In [20]:
import pandas as pd
import numpy as np
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')


In [21]:
# Display the first few rows of the training data
print(train_data.head())

# Display the first few rows of the test data
print(test_data.head())


   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN        S  
  

In [22]:
# Fill missing 'Age' with the median value
train_data['Age'].fillna(train_data['Age'].median())
train_data['Fare'] = train_data['Fare'].fillna(train_data['Fare'].median())

# Fill missing 'Embarked' with the mode (most frequent value)
train_data['Embarked'].fillna(train_data['Embarked'].mode()[0])

# Drop the 'Cabin' column due to too many missing values
train_data.drop(columns=['Cabin'])


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,S
...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C


In [23]:
from sklearn.preprocessing import StandardScaler

# Initialize the scaler
scaler = StandardScaler()

# Normalize 'Age' and 'Fare'
train_data[['Age', 'Fare']] = scaler.fit_transform(train_data[['Age', 'Fare']])

# Check the result
print(train_data[['Age', 'Fare']].head())


        Age      Fare
0 -0.530377 -0.502445
1  0.571831  0.786845
2 -0.254825 -0.488854
3  0.365167  0.420730
4  0.365167 -0.486337


In [24]:
# Convert 'Sex' column to numerical (0 = female, 1 = male)
train_data['Sex'] = train_data['Sex'].map({'male': 1, 'female': 0})

# One-hot encode 'Embarked' and 'Pclass'
train_data = pd.get_dummies(train_data, columns=['Embarked', 'Pclass'], drop_first=True)

# Check the result
print(train_data.head())


   PassengerId  Survived                                               Name  \
0            1         0                            Braund, Mr. Owen Harris   
1            2         1  Cumings, Mrs. John Bradley (Florence Briggs Th...   
2            3         1                             Heikkinen, Miss. Laina   
3            4         1       Futrelle, Mrs. Jacques Heath (Lily May Peel)   
4            5         0                           Allen, Mr. William Henry   

   Sex       Age  SibSp  Parch            Ticket      Fare Cabin  Embarked_Q  \
0    1 -0.530377      1      0         A/5 21171 -0.502445   NaN       False   
1    0  0.571831      1      0          PC 17599  0.786845   C85       False   
2    0 -0.254825      0      0  STON/O2. 3101282 -0.488854   NaN       False   
3    0  0.365167      1      0            113803  0.420730  C123       False   
4    1  0.365167      0      0            373450 -0.486337   NaN       False   

   Embarked_S  Pclass_2  Pclass_3  
0       

In [25]:
train_data

Unnamed: 0,PassengerId,Survived,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked_Q,Embarked_S,Pclass_2,Pclass_3
0,1,0,"Braund, Mr. Owen Harris",1,-0.530377,1,0,A/5 21171,-0.502445,,False,True,False,True
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,0.571831,1,0,PC 17599,0.786845,C85,False,False,False,False
2,3,1,"Heikkinen, Miss. Laina",0,-0.254825,0,0,STON/O2. 3101282,-0.488854,,False,True,False,True
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,0.365167,1,0,113803,0.420730,C123,False,True,False,False
4,5,0,"Allen, Mr. William Henry",1,0.365167,0,0,373450,-0.486337,,False,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,"Montvila, Rev. Juozas",1,-0.185937,0,0,211536,-0.386671,,False,True,True,False
887,888,1,"Graham, Miss. Margaret Edith",0,-0.737041,0,0,112053,-0.044381,B42,False,True,False,False
888,889,0,"Johnston, Miss. Catherine Helen ""Carrie""",0,,1,2,W./C. 6607,-0.176263,,False,True,False,True
889,890,1,"Behr, Mr. Karl Howell",1,-0.254825,0,0,111369,-0.044381,C148,False,False,False,False


In [26]:
# Step 1: Divide the training data
Xtrain = train_data.drop(columns=['Survived'])  # Input features
Ytrain = train_data['Survived']  # Labels

# Step 2: Prepare the test data
test_data = pd.read_csv('test.csv')

# Handle missing values in test data similarly as in train data
test_data['Age'] = test_data['Age'].fillna(train_data['Age'].median())
test_data['Fare'] = test_data['Fare'].fillna(test_data['Fare'].median())
test_data['Embarked'] = test_data['Embarked'].fillna(test_data['Embarked'].mode()[0])

# Drop unnecessary columns in test data
test_data = test_data.drop(columns=['Cabin', 'PassengerId'])

# Convert 'Sex' to numerical values in test data
test_data['Sex'] = test_data['Sex'].map({'male': 1, 'female': 0})

# One-hot encode 'Embarked' and 'Pclass' in test data
Xtest = pd.get_dummies(test_data, columns=['Embarked', 'Pclass'], drop_first=True)

# Ensure consistency between Xtrain and Xtest columns
Xtrain, Xtest = Xtrain.align(Xtest, join='left', axis=1, fill_value=0)

# Verify shapes
print("Xtrain shape:", Xtrain.shape)
print("Ytrain shape:", Ytrain.shape)
print("Xtest shape:", Xtest.shape)


Xtrain shape: (891, 13)
Ytrain shape: (891,)
Xtest shape: (418, 13)


In [27]:
import random

# Function to initialize weights and biases
def initialize_parameters(n_inputs, n_hidden, n_outputs):
    # Initialize weights with small random values
    W1 = [[random.uniform(-0.01, 0.01) for _ in range(n_hidden)] for _ in range(n_inputs)]
    b1 = [0.0 for _ in range(n_hidden)]  # Initialize biases to 0
    
    W2 = [[random.uniform(-0.01, 0.01)] for _ in range(n_hidden)]
    b2 = 0.0  # Initialize bias for output layer to 0
    
    return W1, b1, W2, b2

# Example usage: 3 input features, 4 hidden units, 1 output unit
n_inputs = 3
n_hidden = 4
n_outputs = 1

W1, b1, W2, b2 = initialize_parameters(n_inputs, n_hidden, n_outputs)

# Print the initialized weights and biases
print("Weights from Input to Hidden Layer (W1):", W1)
print("Biases for Hidden Layer (b1):", b1)
print("Weights from Hidden to Output Layer (W2):", W2)
print("Bias for Output Layer (b2):", b2)


Weights from Input to Hidden Layer (W1): [[-0.0028815960158764557, -0.0012053978874985924, -0.001995256099090221, -0.0008306523563759074], [0.008709794891299543, -0.0081235815439863, 0.0054144609232092585, 0.004251388117417786], [-0.007945443723433178, -0.009880295734946672, 0.004519014129042195, 0.0007265829261429713]]
Biases for Hidden Layer (b1): [0.0, 0.0, 0.0, 0.0]
Weights from Hidden to Output Layer (W2): [[-0.004893589069369706], [-0.0032742030050955506], [0.006821589451114754], [0.004753089568059898]]
Bias for Output Layer (b2): 0.0


In [28]:
import math

# Activation functions
def relu(z):
    return [max(0, value) for value in z]  # Element-wise ReLU

def sigmoid(z):
    return [1 / (1 + math.exp(-value)) for value in z]  # Element-wise Sigmoid

# Linear function: Z = W * X + b
def linear_forward(W, A_prev, b):
    Z = []
    for j in range(len(W[0])):  # Loop through neurons in the current layer
        z_j = sum(W[i][j] * A_prev[i] for i in range(len(A_prev))) + b[j]
        Z.append(z_j)
    return Z

# Forward pass function
def forward_pass(X, parameters):
    # Extract parameters
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    # Layer 1: Input to Hidden
    Z1 = linear_forward(W1, X, b1)  # Linear step
    A1 = relu(Z1)                   # Activation step (ReLU)
    
    # Layer 2: Hidden to Output
    Z2 = linear_forward(W2, A1, b2)  # Linear step
    A2 = sigmoid(Z2)                 # Activation step (Sigmoid for binary classification)
    
    return A2  # Output of the network

In [29]:
import numpy as np

# Sigmoid activation function and its derivative
def sigmoid(z):
    return [1 / (1 + np.exp(-value)) for value in z]  # Element-wise Sigmoid

def sigmoid_derivative(z):
    sig = sigmoid(z)
    return [sig[i] * (1 - sig[i]) for i in range(len(sig))]  # Derivative of Sigmoid

# Backpropagation function
def backpropagation(X, Y, parameters, cache):
    # Extract cached values from forward pass
    A1, A2, Z1, Z2 = cache
    
    # Number of examples
    m = len(Y)
    
    # Gradients for output layer
    dZ2 = [(A2[i] - Y[i]) for i in range(m)]  # Derivative of loss w.r.t Z2
    dW2 = [[(A1[i][j] * dZ2[i]) for j in range(len(A1[0]))] for i in range(m)]  # Gradients for W2
    db2 = [sum(dZ2) / m]  # Gradient for b2
    
    # Gradients for hidden layer with Sigmoid
    dZ1 = [[sigmoid_derivative(Z1[i])[j] * sum([dZ2[i] * parameters['W2'][j][0]]) for j in range(len(Z1[0]))] for i in range(m)]
    dW1 = [[(X[i][k] * dZ1[i][j]) for j in range(len(dZ1[0]))] for i in range(m) for k in range(len(X[0]))]
    db1 = [sum(dZ1[i][j] for i in range(m)) / m for j in range(len(dZ1[0]))]
    
    return dW1, db1, dW2, db2

# Example usage
# Assuming parameters and forward pass cache are available
# X is input, Y is target (0 or 1), cache contains forward pass results




In [30]:
def update_parameters(parameters, dW1, db1, dW2, db2, learning_rate):
    # Update weights and biases for the first layer (hidden layer)
    parameters['W1'] = [[parameters['W1'][i][j] - learning_rate * dW1[i][j] for j in range(len(parameters['W1'][0]))] for i in range(len(parameters['W1']))]
    parameters['b1'] = [parameters['b1'][i] - learning_rate * db1[i] for i in range(len(parameters['b1']))]
    
    # Update weights and biases for the second layer (output layer)
    parameters['W2'] = [[parameters['W2'][i][j] - learning_rate * dW2[i][j] for j in range(len(parameters['W2'][0]))] for i in range(len(parameters['W2']))]
    parameters['b2'] = [parameters['b2'][i] - learning_rate * db2[i] for i in range(len(parameters['b2']))]
    
    return parameters


In [31]:
import numpy as np

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Forward pass function (used for prediction)
def forward_pass(X, parameters):
    # Layer 1 (hidden layer)
    Z1 = np.dot(X, parameters['W1']) + parameters['b1']  # Linear transformation
    A1 = sigmoid(Z1)  # Activation function (sigmoid for hidden layer)
    
    # Layer 2 (output layer)
    Z2 = np.dot(A1, parameters['W2']) + parameters['b2']  # Linear transformation
    A2 = sigmoid(Z2)  # Activation function (sigmoid for output layer)
    
    return A2  # A2 is the final output (prediction probability)

# Prediction function
def predict(X, parameters):
    # Perform a forward pass to get the output probability
    A2 = forward_pass(X, parameters)
    
    # Convert the output probability into a binary prediction (0 or 1)
    predictions = [1 if output >= 0.5 else 0 for output in A2]
    
    return predictions


In [32]:
import numpy as np
import pandas as pd

# Sigmoid activation function and its derivative
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(a):
    return a * (1 - a)

# Forward pass
def forward_pass(X, parameters):
    Z1 = np.dot(X, parameters['W1']) + parameters['b1']  # Layer 1 linear
    A1 = sigmoid(Z1)  # Layer 1 activation (sigmoid)
    
    Z2 = np.dot(A1, parameters['W2']) + parameters['b2']  # Layer 2 linear
    A2 = sigmoid(Z2)  # Layer 2 activation (sigmoid)
    
    return Z1, A1, Z2, A2

# Backpropagation
def backpropagation(X, Y, Z1, A1, Z2, A2, parameters):
    m = X.shape[0]  # Number of training examples
    
    # Output layer gradients
    dZ2 = A2 - Y.reshape(-1, 1)  # Binary cross-entropy loss derivative w.r.t Z2
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m
    
    # Hidden layer gradients
    dZ1 = np.dot(dZ2, parameters['W2'].T) * sigmoid_derivative(A1)
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m
    
    return dW1, db1, dW2, db2

# Update parameters using gradient descent
def update_parameters(parameters, dW1, db1, dW2, db2, learning_rate):
    parameters['W1'] -= learning_rate * dW1
    parameters['b1'] -= learning_rate * db1
    parameters['W2'] -= learning_rate * dW2
    parameters['b2'] -= learning_rate * db2
    return parameters

# Prediction function
def predict(X, parameters):
    _, _, _, A2 = forward_pass(X, parameters)
    return (A2 >= 0.5).astype(int)  # Convert probabilities to binary predictions

# Binary cross-entropy loss function
def compute_loss(A2, Y):
    m = Y.shape[0]
    loss = -1/m * np.sum(Y * np.log(A2) + (1 - Y) * np.log(1 - A2))
    return loss

# Training and evaluation function
def train_model(Xtrain, Ytrain, Xtest, num_iterations, learning_rate):
    np.random.seed(42)  # For reproducibility

    # Initialize parameters
    n_x = Xtrain.shape[1]  # Number of features
    n_h = 5  # Number of hidden units
    n_y = 1  # Output unit (binary classification)
    
    parameters = {
        'W1': np.random.randn(n_x, n_h) * 0.01,
        'b1': np.zeros((1, n_h)),
        'W2': np.random.randn(n_h, n_y) * 0.01,
        'b2': np.zeros((1, n_y))
    }

    for i in range(num_iterations):
        # Forward pass
        Z1, A1, Z2, A2 = forward_pass(Xtrain, parameters)

        # Compute loss
        loss = compute_loss(A2, Ytrain)
        if i % 100 == 0:
            print(f"Iteration {i}: Loss = {loss}")

        # Backpropagation
        dW1, db1, dW2, db2 = backpropagation(Xtrain, Ytrain, Z1, A1, Z2, A2, parameters)

        # Update parameters
        parameters = update_parameters(parameters, dW1, db1, dW2, db2, learning_rate)
    
    # Predict on the test set
    Ytest_pred = predict(Xtest, parameters)

    return Ytest_pred, parameters

# Save predictions in the format required for Kaggle submission
def save_predictions(Ytest_pred, passenger_ids, filename='submission.csv'):
    submission = pd.DataFrame({
        'PassengerId': passenger_ids,
        'Survived': Ytest_pred.flatten()  # Flatten to a 1D array
    })
    submission.to_csv(filename, index=False)
    print(f"Predictions saved to {filename}")

# Example usage
if __name__ == "__main__":
    # Load training and test data (assuming they are already preprocessed)
    train_data = pd.read_csv('train.csv')  # Load your processed train data
    test_data = pd.read_csv('test.csv')  # Load your processed test data

    # Extract features and labels
    Xtrain = train_data.drop(columns=['Survived']).values
    Ytrain = train_data['Survived'].values
    Xtest = test_data.drop(columns=['PassengerId']).values  # Exclude PassengerId in features
    passenger_ids = test_data['PassengerId'].values  # Keep PassengerId for submission

    # Train the model
    Ytest_pred, trained_parameters = train_model(Xtrain, Ytrain, Xtest, num_iterations=1000, learning_rate=0.01)

    # Save predictions in a CSV file
    save_predictions(Ytest_pred, passenger_ids)


TypeError: can't multiply sequence by non-int of type 'float'