In [33]:
import pandas as pd
import numpy as np

def load_and_preprocess_data():
    train_data = pd.read_csv('train.csv')

    train_data['Age'].fillna(train_data['Age'].mean(), inplace=True)
    train_data['Embarked'].fillna(train_data['Embarked'].mode()[0], inplace=True)
    train_data['Fare'].fillna(train_data['Fare'].mean(), inplace=True)

    train_data['Sex'] = train_data['Sex'].map({'male': 0, 'female': 1})
    train_data['Embarked'] = train_data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

    features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
    X = train_data[features].values
    y = train_data['Survived'].values.reshape(1, -1)

    # Normalize data using min-max scaling
    X = (X - np.min(X, axis=0)) / (np.max(X, axis=0) - np.min(X, axis=0))

    return X.T, y

def preprocess_test_data():
    test_data = pd.read_csv('test.csv')

    test_data['Age'].fillna(test_data['Age'].mean(), inplace=True)
    test_data['Fare'].fillna(test_data['Fare'].mean(), inplace=True)

    test_data['Sex'] = test_data['Sex'].map({'male': 0, 'female': 1})
    test_data['Embarked'] = test_data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

    features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
    X_test = test_data[features].values

    # Normalize data using min-max scaling
    X_test = (X_test - np.min(X_test, axis=0)) / (np.max(X_test, axis=0) - np.min(X_test, axis=0))

    return X_test.T

def initialize_parameters():
    np.random.seed(42)

    # Hidden layer: 4 neurons, input size 7 (features)
    w1 = np.random.randn(4, 7) * 0.01
    b1 = np.zeros((4, 1))

    # Output layer: 1 neuron, input size 4 (from hidden layer)
    w2 = np.random.randn(1, 4) * 0.01
    b2 = np.zeros((1, 1))

    return w1, b1, w2, b2

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def tanh(z):
    return np.tanh(z)

def forward_pass(X, w1, b1, w2, b2):
    # Hidden layer computation
    z1 = np.dot(w1, X) + b1
    a1 = tanh(z1)

    # Output layer computation
    z2 = np.dot(w2, a1) + b2
    y_hat = sigmoid(z2)

    return z1, a1, z2, y_hat

def compute_cost(y, y_hat):
    m = y.shape[1]
    cost = -1/m * np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))
    return np.squeeze(cost)

def backpropagation(X, y, z1, a1, z2, y_hat, w1, w2):
    m = X.shape[1]

    # Output layer gradients
    dz2 = y_hat - y
    dw2 = 1/m * np.dot(dz2, a1.T)
    db2 = 1/m * np.sum(dz2, axis=1, keepdims=True)

    # Hidden layer gradients
    dz1 = np.dot(w2.T, dz2) * (1 - np.power(a1, 2))
    dw1 = 1/m * np.dot(dz1, X.T)
    db1 = 1/m * np.sum(dz1, axis=1, keepdims=True)

    return dw1, db1, dw2, db2

def update_parameters(w1, b1, w2, b2, dw1, db1, dw2, db2, learning_rate):
    w1 = w1 - learning_rate * dw1
    b1 = b1 - learning_rate * db1
    w2 = w2 - learning_rate * dw2
    b2 = b2 - learning_rate * db2
    return w1, b1, w2, b2

def train(X, y, num_iterations=1000, learning_rate=0.05):
    w1, b1, w2, b2 = initialize_parameters()
    costs = []

    for i in range(num_iterations):
        z1, a1, z2, y_hat = forward_pass(X, w1, b1, w2, b2)
        cost = compute_cost(y, y_hat)
        costs.append(cost)

        dw1, db1, dw2, db2 = backpropagation(X, y, z1, a1, z2, y_hat, w1, w2)
        w1, b1, w2, b2 = update_parameters(w1, b1, w2, b2, dw1, db1, dw2, db2, learning_rate)

        if i % 100 == 0:
            print(f"Iteration {i}: Cost {cost}")

    return w1, b1, w2, b2, costs

def predict(X, w1, b1, w2, b2):
    _, _, _, y_hat = forward_pass(X, w1, b1, w2, b2)
    predictions = (y_hat > 0.5).astype(int)
    return predictions

def save_predictions(predictions):
    test_data = pd.read_csv('test.csv')
    output = pd.DataFrame({
        "PassengerId": test_data["PassengerId"],
        "Survived": predictions.flatten()
    })
    output.to_csv('submission.csv', index=False)

# Load and preprocess data
X_train, y_train = load_and_preprocess_data()

# Train the model
# I've found learning rate by trial and error
w1, b1, w2, b2, costs = train(X_train, y_train, num_iterations=100000, learning_rate=2)

# Preprocess and predict on the test data
X_test = preprocess_test_data()
predictions = predict(X_test, w1, b1, w2, b2)

# Save predictions to CSV for Kaggle submission
save_predictions(predictions)


Iteration 0: Cost 0.6931288902529148
Iteration 100: Cost 0.4305455213199182
Iteration 200: Cost 0.4323672836427741
Iteration 300: Cost 0.42379179275471685
Iteration 400: Cost 0.4228987142131999
Iteration 500: Cost 0.42036692851855373
Iteration 600: Cost 0.41782843161647204
Iteration 700: Cost 0.4143359867786057
Iteration 800: Cost 0.4108671197691011
Iteration 900: Cost 0.4079915047219374
Iteration 1000: Cost 0.4056090961019272
Iteration 1100: Cost 0.4035793919907909
Iteration 1200: Cost 0.40180050442001763
Iteration 1300: Cost 0.4002343094824928
Iteration 1400: Cost 0.39886718912400865
Iteration 1500: Cost 0.3976842496911947
Iteration 1600: Cost 0.3966621056344416
Iteration 1700: Cost 0.3957717089846095
Iteration 1800: Cost 0.3949838410440775
Iteration 1900: Cost 0.39427327480136637
Iteration 2000: Cost 0.39362110134752093
Iteration 2100: Cost 0.3930151216671277
Iteration 2200: Cost 0.39244838970511725
Iteration 2300: Cost 0.39191696826876693
Iteration 2400: Cost 0.3914181729667143
Ite