In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim

In [6]:
train_data = pd.read_csv('./train.csv')
train_data = train_data.drop(["Cabin", "PassengerId", "Name", "Ticket"], axis=1)
test_data = pd.read_csv('./test.csv')
test_data = test_data.drop(["Cabin", "PassengerId", "Name", "Ticket"], axis=1)

In [7]:
train_data = train_data.dropna()
test_data = test_data.dropna()

In [8]:
train_data.head()
# train_data.describe()
# print(test_data)
# print(train_data)

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.25,S
1,1,1,female,38.0,1,0,71.2833,C
2,1,3,female,26.0,0,0,7.925,S
3,1,1,female,35.0,1,0,53.1,S
4,0,3,male,35.0,0,0,8.05,S


In [11]:
embarked_mapping = {embarked: i for i, embarked in enumerate(train_data['Embarked'].unique())}
gender_mapping = {sex: i for i, sex in enumerate(train_data['Sex'].unique())}

embarked_mapping.update({embarked: i for i, embarked in enumerate(test_data['Embarked'].unique())})
gender_mapping.update({sex: i for i, sex in enumerate(test_data['Sex'].unique())})

In [12]:
# Step 2: Encode categorical variables
train_data['Embarked'] = train_data['Embarked'].map(embarked_mapping)
train_data['Sex'] = train_data['Sex'].map(gender_mapping)

test_data['Embarked'] = test_data['Embarked'].map(embarked_mapping)
test_data['Sex'] = test_data['Sex'].map(gender_mapping)

In [13]:
selected_features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']

In [14]:
train_data[selected_features] = (train_data[selected_features] - train_data[selected_features].mean()) / train_data[selected_features].std()
test_data = (test_data - test_data.mean()) / test_data.std()

In [15]:
X_train = train_data.drop('Survived', axis=1).values
y_train = train_data['Survived'].values

In [48]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(len(selected_features), 50)
        self.fc2 = nn.Linear(50, 1)
        self.fc3 = nn.Linear(1, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.sigmoid(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [59]:
def train_model(model, criterion, optimizer, X_train, y_train, epochs=1000):
    X_train = torch.Tensor(X_train)
    y_train = torch.Tensor(y_train)

    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train.float().unsqueeze(1))
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}')

In [60]:
model = NeuralNetwork()

In [61]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [62]:
train_model(model, criterion, optimizer, X_train, y_train, epochs=1000)

Epoch [10/1000], Loss: 0.6085190176963806
Epoch [20/1000], Loss: 0.5236503481864929
Epoch [30/1000], Loss: 0.46910974383354187
Epoch [40/1000], Loss: 0.44890153408050537
Epoch [50/1000], Loss: 0.4456418454647064
Epoch [60/1000], Loss: 0.4414485991001129
Epoch [70/1000], Loss: 0.4389791488647461
Epoch [80/1000], Loss: 0.43631166219711304
Epoch [90/1000], Loss: 0.43330204486846924
Epoch [100/1000], Loss: 0.42985770106315613
Epoch [110/1000], Loss: 0.42593929171562195
Epoch [120/1000], Loss: 0.4215456545352936
Epoch [130/1000], Loss: 0.41671809554100037
Epoch [140/1000], Loss: 0.4114323854446411
Epoch [150/1000], Loss: 0.4056311547756195
Epoch [160/1000], Loss: 0.399372398853302
Epoch [170/1000], Loss: 0.39285808801651
Epoch [180/1000], Loss: 0.3862791061401367
Epoch [190/1000], Loss: 0.3798447847366333
Epoch [200/1000], Loss: 0.37367093563079834
Epoch [210/1000], Loss: 0.367669016122818
Epoch [220/1000], Loss: 0.3617028594017029
Epoch [230/1000], Loss: 0.3558383584022522
Epoch [240/1000]

In [63]:
with torch.no_grad():
    model.eval()
    X_test_tensor = torch.from_numpy(test_data.values).float()
    outputs = torch.sigmoid(model(X_test_tensor)).squeeze().numpy()

print(outputs)

[1.10604007e-04 1.91147009e-08 3.67513451e-08 1.71365380e-01
 1.68203442e-08 4.04275239e-01 1.41737640e-01 6.09164520e-13
 3.96305621e-01 5.32182111e-12 3.13457608e-01 1.00000000e+00
 2.49257596e-07 1.00000000e+00 1.00000000e+00 4.59492876e-04
 9.34079468e-01 2.68262182e-03 3.06272305e-08 2.37671398e-02
 9.99879241e-01 9.99998569e-01 9.99984384e-01 5.07976150e-09
 1.00000000e+00 9.55528557e-01 5.24325967e-01 2.01915941e-06
 2.28007464e-03 1.66060213e-13 7.14972317e-01 4.84450758e-01
 6.72997057e-01 8.82547796e-02 9.68492532e-05 1.53020501e-01
 9.99166012e-01 1.00000000e+00 1.48511291e-01 8.09482634e-01
 9.99990821e-01 3.02770699e-04 8.54749791e-03 2.58608583e-08
 8.51682425e-01 9.99998689e-01 2.80261803e-11 1.53856194e-02
 1.61711350e-01 1.00000000e+00 1.76193550e-01 2.82117039e-01
 1.39689863e-01 3.11753362e-01 9.85376346e-07 4.75588769e-01
 1.68773700e-02 5.56654530e-03 2.20275178e-05 1.95700690e-01
 9.51359868e-02 6.54845715e-01 3.86608328e-04 1.00000000e+00
 9.00565982e-01 1.000000