In [23]:
import torch
from torch import nn
import pandas as pd
import numpy as np
from tqdm import tqdm

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [3]:
data = pd.read_csv("data/train.csv")
y = data["Survived"]

In [4]:
X = data.drop("Name", axis=1)
X.drop("Ticket", inplace=True, axis=1)
X.drop("Cabin", inplace=True, axis=1)
X.drop("Survived", inplace=True, axis=1)
X.drop("PassengerId", inplace=True, axis=1)
X

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,3,male,22.0,1,0,7.2500,S
1,1,female,38.0,1,0,71.2833,C
2,3,female,26.0,0,0,7.9250,S
3,1,female,35.0,1,0,53.1000,S
4,3,male,35.0,0,0,8.0500,S
...,...,...,...,...,...,...,...
886,2,male,27.0,0,0,13.0000,S
887,1,female,19.0,0,0,30.0000,S
888,3,female,,1,2,23.4500,S
889,1,male,26.0,0,0,30.0000,C


In [8]:
X["Sex"] = X["Sex"].apply(lambda x: 0 if x == "male" else 1)

le = LabelEncoder()
X["Embarked"] = le.fit_transform(X["Embarked"])
X

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,3,1,22.0,1,0,7.2500,2
1,1,1,38.0,1,0,71.2833,0
2,3,1,26.0,0,0,7.9250,2
3,1,1,35.0,1,0,53.1000,2
4,3,1,35.0,0,0,8.0500,2
...,...,...,...,...,...,...,...
886,2,1,27.0,0,0,13.0000,2
887,1,1,19.0,0,0,30.0000,2
888,3,1,,1,2,23.4500,2
889,1,1,26.0,0,0,30.0000,0


In [9]:
X["Age"].fillna(X["Age"].median(), inplace=True)
X

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,3,1,22.0,1,0,7.2500,2
1,1,1,38.0,1,0,71.2833,0
2,3,1,26.0,0,0,7.9250,2
3,1,1,35.0,1,0,53.1000,2
4,3,1,35.0,0,0,8.0500,2
...,...,...,...,...,...,...,...
886,2,1,27.0,0,0,13.0000,2
887,1,1,19.0,0,0,30.0000,2
888,3,1,28.0,1,2,23.4500,2
889,1,1,26.0,0,0,30.0000,0


In [74]:
ageScaler = StandardScaler()
fareScaler = StandardScaler()
X["Fare"] = fareScaler.fit_transform(X["Fare"].values.reshape(-1, 1))
X["Age"] = ageScaler.fit_transform(X["Age"].values.reshape(-1, 1))

In [10]:
for col in X.columns:
    n = X[col].isna().sum()
    print(f"{col}: {n}")

Pclass: 0
Sex: 0
Age: 0
SibSp: 0
Parch: 0
Fare: 0
Embarked: 0


In [75]:
X.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,3,1,-0.565736,1,0,-0.502445,2
1,1,1,0.663861,1,0,0.786845,0
2,3,1,-0.258337,0,0,-0.488854,2
3,1,1,0.433312,1,0,0.42073,2
4,3,1,0.433312,0,0,-0.486337,2


In [76]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=0.2)
X_train = torch.tensor(X_train.values, dtype=torch.float32)
X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32)

In [77]:
X_train.shape, y_train.shape

(torch.Size([712, 7]), torch.Size([712]))

In [89]:

class TitanicModel(nn.Module):
    def __init__(self, input, hidden, output) -> None:
        super().__init__()
        self.fc1 = nn.Linear(input, 256)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 32)
        self.fc4 = nn.Linear(32, output)
        self.sigmoid = nn.Sigmoid()
        
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.relu(out)
        out = self.fc4(out)
        return self.sigmoid(out)

In [90]:
model = TitanicModel(7, 16, 1)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [91]:
epochs = 1000
batch_size = 100
for epoch in tqdm(range(epochs)):
    for i in range(0, len(X_train), 100):
        inputs = X_train[i:i+batch_size]
        labels = y_train[i:i+batch_size].unsqueeze(1)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
    if(epoch % 200 == 199):
        model.eval()
        with torch.no_grad():
            total_loss = 0
            total_correct = 0
            total_samples = 0
            for i in range(0, len(X_test), batch_size):
                inputs = X_test[i:i+batch_size]
                labels = y_test[i:i+batch_size].unsqueeze(1)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                total_loss += loss.item() * len(inputs)
                total_correct += ((outputs > 0.5).float() == labels).sum().item()
                total_samples += len(inputs)
            validation_acc = total_correct / total_samples
            validation_loss = total_loss / total_samples
            print(f"Epoch {epoch}: Accuracy: {validation_acc}, Loss: {validation_loss}")
            model.train()

 20%|██        | 202/1000 [00:08<00:32, 24.42it/s]

Epoch 199: Accuracy: 0.7094972067039106, Loss: 0.9899554792063197


 40%|████      | 402/1000 [00:17<00:24, 24.77it/s]

Epoch 399: Accuracy: 0.6983240223463687, Loss: 2.2572973680229826


 60%|██████    | 602/1000 [00:28<00:16, 24.16it/s]

Epoch 599: Accuracy: 0.7094972067039106, Loss: 2.98134958011478


 81%|████████  | 808/1000 [00:36<00:04, 45.05it/s]

Epoch 799: Accuracy: 0.7039106145251397, Loss: 4.073295880962351


100%|██████████| 1000/1000 [00:40<00:00, 24.73it/s]

Epoch 999: Accuracy: 0.7094972067039106, Loss: 4.258076835611013



