In [1]:
# Imports
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler

In [2]:
# Loading the dataset
columns = [
    'age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status',
    'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss',
    'hours-per-week', 'native-country', 'income'
]

train_data = pd.read_csv('../1_Data/adult.data', header=None, names=columns, na_values=' ?', skipinitialspace=True)
test_data = pd.read_csv('../1_Data/adult.test', header=0, names=columns, na_values=' ?', skipinitialspace=True, comment='|')

In [3]:
test_data['income'] = test_data['income'].str.replace('.', '', regex=False)

data_cleaned = pd.concat([train_data, test_data], axis=0).dropna().reset_index(drop=True)

categorical_cols = [
    'workclass', 'education', 'marital-status', 'occupation',
    'relationship', 'race', 'sex', 'native-country'
]
data_encoded = pd.get_dummies(data_cleaned, columns=categorical_cols)

scaler = StandardScaler()
continuous_cols = ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
data_encoded[continuous_cols] = scaler.fit_transform(data_encoded[continuous_cols])

data_encoded['income'] = data_encoded['income'].apply(lambda x: 1 if x.strip() == '>50K' else 0)

In [4]:
train_size = len(train_data.dropna())
X_train = data_encoded.iloc[:train_size].drop(columns=['income']).values
y_train = data_encoded.iloc[:train_size]['income'].values.reshape(-1, 1)
X_test = data_encoded.iloc[train_size:].drop(columns=['income']).values
y_test = data_encoded.iloc[train_size:]['income'].values.reshape(-1, 1)

In [5]:
def initialize_parameters(layer_dims):
    np.random.seed(42)
    parameters = {}
    for l in range(1, len(layer_dims)):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]).astype(np.float64) * np.sqrt(2. / layer_dims[l-1])
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1), dtype=np.float64)
    return parameters

def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return (Z > 0).astype(float)

def sigmoid(Z):
    Z = np.array(Z, dtype=np.float64)  # Ensure Z is a NumPy array with correct dtype
    return 1 / (1 + np.exp(-Z))

def forward_propagation(X, parameters):
    caches = {}
    A = np.array(X.T, dtype=np.float64)  # Ensure input is NumPy array with float64 type
    L = len(parameters) // 2
    for l in range(1, L):
        Z = np.dot(parameters['W' + str(l)], A) + parameters['b' + str(l)]
        A = relu(Z)
        caches['A' + str(l)] = A
        caches['Z' + str(l)] = Z
    ZL = np.dot(parameters['W' + str(L)], A) + parameters['b' + str(L)]
    AL = sigmoid(ZL)
    caches['A' + str(L)] = AL
    caches['Z' + str(L)] = ZL
    return AL, caches

def compute_loss(AL, Y):
    m = Y.shape[0]
    loss = -np.sum(Y.T * np.log(AL + 1e-8) + (1 - Y.T) * np.log(1 - AL + 1e-8)) / m
    return np.squeeze(loss)

def backward_propagation(X, Y, parameters, caches):
    grads = {}
    m = X.shape[0]
    L = len(parameters) // 2
    Y = Y.T
    dZL = caches['A' + str(L)] - Y
    grads['dW' + str(L)] = (1/m) * np.dot(dZL, caches['A' + str(L-1)].T).astype(np.float64)
    grads['db' + str(L)] = (1/m) * np.sum(dZL, axis=1, keepdims=True).astype(np.float64)
    for l in reversed(range(1, L)):
        dA = np.dot(parameters['W' + str(l+1)].T, dZL)
        dZ = dA * relu_derivative(caches['Z' + str(l)])
        A_prev = X.T if l == 1 else caches['A' + str(l-1)]
        grads['dW' + str(l)] = (1/m) * np.dot(dZ, A_prev.T).astype(np.float64)
        grads['db' + str(l)] = (1/m) * np.sum(dZ, axis=1, keepdims=True).astype(np.float64)
        dZL = dZ
    return grads

def update_parameters(parameters, grads, learning_rate, velocities, beta=0.9):
    L = len(parameters) // 2
    for l in range(1, L + 1):
        velocities['dW' + str(l)] = beta * velocities['dW' + str(l)] + (1 - beta) * grads['dW' + str(l)]
        velocities['db' + str(l)] = beta * velocities['db' + str(l)] + (1 - beta) * grads['db' + str(l)]
        parameters['W' + str(l)] -= learning_rate * velocities['dW' + str(l)]
        parameters['b' + str(l)] -= learning_rate * velocities['db' + str(l)]
    return parameters, velocities

def model(X, Y, layer_dims, learning_rate=0.01, epochs=20, batch_size=128):
    parameters = initialize_parameters(layer_dims)
    velocities = {f'dW{l}': np.zeros_like(parameters[f'W{l}'], dtype=np.float64) for l in range(1, len(layer_dims))}
    velocities.update({f'db{l}': np.zeros_like(parameters[f'b{l}'], dtype=np.float64) for l in range(1, len(layer_dims))})
    
    for epoch in range(epochs):
        permutation = np.random.permutation(X.shape[0])
        X_shuffled, Y_shuffled = X[permutation], Y[permutation]

        for i in range(0, X.shape[0], batch_size):
            X_batch, Y_batch = X_shuffled[i:i+batch_size], Y_shuffled[i:i+batch_size]
            AL, caches = forward_propagation(X_batch, parameters)
            loss = compute_loss(AL, Y_batch)
            grads = backward_propagation(X_batch, Y_batch, parameters, caches)
            parameters, velocities = update_parameters(parameters, grads, learning_rate, velocities)

        if epoch % 5 == 0:
            print(f"Epoch {epoch}/{epochs}, Loss: {loss:.4f}")

    return parameters

In [6]:
layer_dims = [108, 64, 32, 16, 1]
parameters_trained = model(X_train, y_train, layer_dims, learning_rate=0.01, epochs=25, batch_size=128)

def predict(X, parameters):
    AL, _ = forward_propagation(X, parameters)
    predictions = (AL > 0.5).astype(int)
    return predictions

y_pred_test = predict(X_test, parameters_trained)
accuracy_test = np.mean(y_pred_test.T == y_test)

print(f"Test Accuracy: {accuracy_test * 100:.2f}%")

Epoch 0/25, Loss: 0.3980
Epoch 5/25, Loss: 0.2816
Epoch 10/25, Loss: 0.2916
Epoch 15/25, Loss: 0.4038
Epoch 20/25, Loss: 0.2791
Test Accuracy: 85.53%
