In [128]:
import pandas as pd
import numpy as np

## Preprocessing

In [129]:
df = pd.read_csv("data/titanic.csv")
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [130]:
df.columns = df.columns.str.lower()
df.head()

Unnamed: 0,passengerid,survived,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [131]:
df = df.drop(columns=['name', 'passengerid', 'ticket', 'cabin'])
df.isna().count()

survived    891
pclass      891
sex         891
age         891
sibsp       891
parch       891
fare        891
embarked    891
dtype: int64

In [132]:
df['embarked']= df['embarked'].fillna(df['embarked'].mode())
df['age'] = df['age'].fillna(df['age'].mean())
df = pd.get_dummies(df, columns=['sex', 'embarked'])
df.head()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare,sex_female,sex_male,embarked_C,embarked_Q,embarked_S
0,0,3,22.0,1,0,7.25,False,True,False,False,True
1,1,1,38.0,1,0,71.2833,True,False,True,False,False
2,1,3,26.0,0,0,7.925,True,False,False,False,True
3,1,1,35.0,1,0,53.1,True,False,False,False,True
4,0,3,35.0,0,0,8.05,False,True,False,False,True


In [133]:
y = df['survived'].values.reshape(-1, 1)  
X = df.drop(columns=['survived'])

In [134]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
numeric_cols = ['pclass', 'age', 'fare']
X[numeric_cols] = scaler.fit_transform(X[numeric_cols])

X = X.astype(float)
X

Unnamed: 0,pclass,age,sibsp,parch,fare,sex_female,sex_male,embarked_C,embarked_Q,embarked_S
0,0.827377,-0.592481,1.0,0.0,-0.502445,0.0,1.0,0.0,0.0,1.0
1,-1.566107,0.638789,1.0,0.0,0.786845,1.0,0.0,1.0,0.0,0.0
2,0.827377,-0.284663,0.0,0.0,-0.488854,1.0,0.0,0.0,0.0,1.0
3,-1.566107,0.407926,1.0,0.0,0.420730,1.0,0.0,0.0,0.0,1.0
4,0.827377,0.407926,0.0,0.0,-0.486337,0.0,1.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...
886,-0.369365,-0.207709,0.0,0.0,-0.386671,0.0,1.0,0.0,0.0,1.0
887,-1.566107,-0.823344,0.0,0.0,-0.044381,1.0,0.0,0.0,0.0,1.0
888,0.827377,0.000000,1.0,2.0,-0.176263,1.0,0.0,0.0,0.0,1.0
889,-1.566107,-0.284663,0.0,0.0,-0.044381,0.0,1.0,1.0,0.0,0.0


In [135]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Implementing Neural Network From Scratch

In [136]:
def init_params(input_size):
    params = {}
    params['W1'] = np.random.randn(input_size, 64) * np.sqrt(1 / input_size)
    params['W2'] = np.random.randn(64, 32) * np.sqrt(1 / 64)
    params['W3'] = np.random.randn(32, 16) * np.sqrt(1 / 32)
    params['W4'] = np.random.randn(16, 1) * np.sqrt(1 / 16)
    
    params['b1'] = np.zeros((1, 64))
    params['b2'] = np.zeros((1, 32))
    params['b3'] = np.zeros((1, 16))
    params['b4'] = np.zeros((1, 1))

    for key in ['W1', 'W2', 'W3', 'W4', 'b1', 'b2', 'b3', 'b4']:
        params[f'm_{key}'] = np.zeros_like(params[key])
        params[f'v_{key}'] = np.zeros_like(params[key])

    return params

In [137]:
def ReLU(Z):
    return np.maximum(1, Z)

def sigmoid(Z):
    Z = np.asarray(Z, dtype=np.float64)  # Ensure Z is a NumPy array with float64 dtype
    Z = np.clip(Z, -500, 500)  # Adjust these limits as needed
    return 1 / (1 + np.exp(-Z))

def forward_prop(X, params, keep_prob=1.0):
    cache = {} 

    Z1 = np.dot(X, params['W1']) + params['b1']
    A1 = ReLU(Z1)

    if keep_prob < 1.0:
        D1 = np.random.randn(*A1.shape) < keep_prob
        A1 *= D1 
        A1 /= keep_prob
        cache['D1'] = D1
    
    Z2 = np.dot(A1, params['W2']) + params['b2']
    A2 = ReLU(Z2)

    if keep_prob < 1.0:
        D2 = np.random.rand(*A2.shape) < keep_prob
        A2 *= D2
        A2 /= keep_prob
        cache['D2'] = D2

    Z3 = np.dot(A2, params['W3']) + params['b3']
    A3 = sigmoid(Z3)
    

    cache['Z1'], cache['A1'], cache['Z2'], cache['A2'], cache['Z3'], cache['A3'] = Z1, A1, Z2, A2, Z3, A3

    return A3, cache


In [138]:
def create_mini_batches(X, y, batch_size=32):
    batches = []
    m = X.shape[0]  # Number of samples

    permutation = np.random.permutation(m)  # Shuffle indices

    X, y = np.array(X), np.array(y)
    X_shuffled = X[permutation]
    y_shuffled = y[permutation]  

    num_batches = m // batch_size

    for i in range(num_batches):
        X_mini = X_shuffled[i * batch_size:(i + 1) * batch_size]
        y_mini = y_shuffled[i * batch_size:(i + 1) * batch_size]
        batches.append((X_mini, y_mini))

    if m % batch_size != 0:  # Add remaining samples if any
        X_mini = X_shuffled[num_batches * batch_size:]
        y_mini = y_shuffled[num_batches * batch_size:]
        batches.append((X_mini, y_mini))

    return batches


In [139]:
def compute_loss(y_pred, y, params, lambda_reg=0.01):
    m = y.shape[0]
    cross_entropy_loss = 1/m * np.sum(-y*np.log(y_pred+1e-10)+(1-y)*np.log(1-y_pred+1e-10))
    l2_loss = lambda_reg * (np.sum(params['W1']**2) + np.sum(params['W2']**2) + np.sum(params['W3']**2))
    return cross_entropy_loss + l2_loss

In [140]:
def back_prop(X, y, params, cache, keep_prob=1.0):
   
    m = X.shape[0]  # Number of samples
    grads = {}

    A1, A2, A3 = cache['A1'], cache['A2'], cache['A3']
    Z1, Z2, Z3 = cache['Z1'], cache['Z2'], cache['Z3']

    dZ3 = A3 - y  # Derivative of binary cross-entropy loss w.r.t Z3
    dW3 = (1/m) * np.dot(A2.T, dZ3)
    db3 = (1/m) * np.sum(dZ3, axis=0, keepdims=True)

    dA2 = np.dot(dZ3, params['W3'].T)
    dZ2 = dA2 * (Z2 > 0)  # ReLU derivative
    dW2 = (1/m) * np.dot(A1.T, dZ2)
    db2 = (1/m) * np.sum(dZ2, axis=0, keepdims=True)

    if keep_prob < 1.0:
        dA2 *= cache['D2']
        dA2 /= keep_prob

    dA1 = np.dot(dZ2, params['W2'].T)
    dZ1 = dA1 * (Z1 > 0)  # ReLU derivative
    dW1 = (1/m) * np.dot(X.T, dZ1)
    db1 = (1/m) * np.sum(dZ1, axis=0, keepdims=True)

    if keep_prob < 1.0:
        dA1 *= cache['D1']
        dA1 /= keep_prob

    grads['dW1'], grads['db1'] = dW1, db1
    grads['dW2'], grads['db2'] = dW2, db2
    grads['dW3'], grads['db3'] = dW3, db3

    return grads

In [141]:
def update_params(params, grads, learning_rate=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8, t=1):
    updated_params = params.copy()  # Create a copy of params to avoid modifying the original

    # Update first and second moments for weights and biases
    for key in ['W1', 'W2', 'W3', 'b1', 'b2', 'b3']:
        # Update first moment (momentum)
        updated_params[f'm_{key}'] = beta1 * params[f'm_{key}'] + (1 - beta1) * grads[f'd{key}']
        
        # Update second moment (RMSprop)
        updated_params[f'v_{key}'] = beta2 * params[f'v_{key}'] + (1 - beta2) * (grads[f'd{key}'] ** 2)
        
        # Bias correction for first and second moments
        m_hat = updated_params[f'm_{key}'] / (1 - beta1 ** t)
        v_hat = updated_params[f'v_{key}'] / (1 - beta2 ** t)
        
        # Update parameters
        updated_params[key] = params[key] - learning_rate * m_hat / (np.sqrt(v_hat) + epsilon)

    return updated_params

In [142]:
def model(X, y, params, epochs, decay_rate=0.2, learning_rate=0.01, batch_size=32, keep_prob=0.8, beta1=0.09, beta2=0.999):
    learning_rate0 = learning_rate
    t = 1  # Initialize time step for Adam

    for i in range(epochs):
        batches = create_mini_batches(X, y, batch_size=batch_size)
        epoch_loss = 0  

        for X_mini, y_mini in batches:
            A3, cache = forward_prop(X_mini, params, keep_prob)
            loss = compute_loss(A3, y_mini, params)
            epoch_loss += loss  
            grads = back_prop(X_mini, y_mini, params, cache, keep_prob)
            params = update_params(params, grads, learning_rate, beta1, beta2, t)
            t += 1  # Increment time step
        
        learning_rate = (1 / (1 + decay_rate * i)) * learning_rate0
        epoch_loss /= len(batches)

        if (i + 1) % 100 == 0:
            print(f"Epoch {i + 1} Done")
            print("Learning rate: ", learning_rate)

    return params


In [143]:
predict = forward_prop

In [144]:
from sklearn.model_selection import ParameterSampler, train_test_split

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)


param_grid = {
    'learning_rate': [0.001, 0.01, 0.1],
    'keep_prob': [0.7, 0.8, 0.9],
    'beta1': [0.9, 0.95],
    'beta2': [0.999, 0.995],
    'decay_rate': [0.0, 0.001, 0.01],
    'batch_size': [32, 64, 128]
}


param_samples = list(ParameterSampler(param_grid, n_iter=10, random_state=42))

best_accuracy = 0
best_params = {}

params = init_params(X.shape[1])

for params_sample in param_samples:
    print("Testing params: ", params_sample)

    model_params = model(
        X_train, y_train,
        params=params,
        epochs=2000,
        learning_rate=params_sample['learning_rate'],
        keep_prob=params_sample['keep_prob'],
        beta1=params_sample['beta1'],
        beta2=params_sample['beta2'],
        decay_rate=params_sample['decay_rate']
    )

    y_pred, _ = predict(X_val, model_params)
    accuracy = np.mean((y_pred > 0.5) == y_val) * 100
    print(f"Validation Accuracy: {accuracy:.2f}%")
    
    # Track best params
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = params_sample

Testing params:  {'learning_rate': 0.001, 'keep_prob': 0.9, 'decay_rate': 0.01, 'beta2': 0.999, 'beta1': 0.9, 'batch_size': 64}
Epoch 100 Done
Learning rate:  0.0005025125628140703
Epoch 200 Done
Learning rate:  0.00033444816053511704
Epoch 300 Done
Learning rate:  0.0002506265664160401
Epoch 400 Done
Learning rate:  0.00020040080160320639
Epoch 500 Done
Learning rate:  0.0001669449081803005
Epoch 600 Done
Learning rate:  0.00014306151645207438
Epoch 700 Done
Learning rate:  0.0001251564455569462
Epoch 800 Done
Learning rate:  0.00011123470522803114
Epoch 900 Done
Learning rate:  0.00010010010010010009
Epoch 1000 Done
Learning rate:  9.099181073703367e-05
Epoch 1100 Done
Learning rate:  8.340283569641367e-05
Epoch 1200 Done
Learning rate:  7.698229407236336e-05
Epoch 1300 Done
Learning rate:  7.14796283059328e-05
Epoch 1400 Done
Learning rate:  6.6711140760507e-05
Epoch 1500 Done
Learning rate:  6.253908692933083e-05
Epoch 1600 Done
Learning rate:  5.8858151854031775e-05
Epoch 1700 Don

In [145]:
params = init_params(X.shape[1])
params = model(X_train, y_train, params, 5000, **best_params)

Epoch 100 Done
Learning rate:  0.1
Epoch 200 Done
Learning rate:  0.1
Epoch 300 Done
Learning rate:  0.1
Epoch 400 Done
Learning rate:  0.1
Epoch 500 Done
Learning rate:  0.1
Epoch 600 Done
Learning rate:  0.1
Epoch 700 Done
Learning rate:  0.1
Epoch 800 Done
Learning rate:  0.1
Epoch 900 Done
Learning rate:  0.1
Epoch 1000 Done
Learning rate:  0.1
Epoch 1100 Done
Learning rate:  0.1
Epoch 1200 Done
Learning rate:  0.1
Epoch 1300 Done
Learning rate:  0.1
Epoch 1400 Done
Learning rate:  0.1
Epoch 1500 Done
Learning rate:  0.1
Epoch 1600 Done
Learning rate:  0.1
Epoch 1700 Done
Learning rate:  0.1
Epoch 1800 Done
Learning rate:  0.1
Epoch 1900 Done
Learning rate:  0.1
Epoch 2000 Done
Learning rate:  0.1
Epoch 2100 Done
Learning rate:  0.1
Epoch 2200 Done
Learning rate:  0.1
Epoch 2300 Done
Learning rate:  0.1
Epoch 2400 Done
Learning rate:  0.1
Epoch 2500 Done
Learning rate:  0.1
Epoch 2600 Done
Learning rate:  0.1
Epoch 2700 Done
Learning rate:  0.1
Epoch 2800 Done
Learning rate:  0.1
E

In [146]:
y_pred, cahce = predict(X_test, params)

accuracy = np.mean((y_pred > 0.5) == y_test) *100
print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 81.18%
