In [49]:
pip install kaggle torch numpy matplotlib pandas

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [50]:
import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cpu device


Loading Data

In [82]:
train_file_path = r"titanic/train.csv"
test_file_path = r"titanic/test.csv"
train = pd.read_csv(train_file_path)
test = pd.read_csv(test_file_path)

Data Preprocessing

In [83]:
train['Age'].fillna(train['Age'].median(), inplace=True)
test['Age'].fillna(test['Age'].median(), inplace=True)
train.drop('Cabin', axis=1, inplace=True)
test.drop('Cabin', axis=1, inplace=True)

features = ['Pclass', 'Sex', 'Age', 'Parch']
y_train = train['Survived'].to_numpy(dtype='float32')
x_train = pd.get_dummies(train[features], drop_first=True, dtype='float32')
x_test = pd.get_dummies(test[features], drop_first=True, dtype='float32')
x_train_tensor = torch.tensor(x_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
x_test_tensor = torch.tensor(x_test.values, dtype=torch.float32)

print(f'x_train_tensor shape: {x_train_tensor.shape}')

x_train_tensor shape: torch.Size([891, 4])


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train['Age'].fillna(train['Age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test['Age'].fillna(test['Age'].median(), inplace=True)


In [94]:
from torch import nn

class SurvivalClassifier(nn.Module):
    def __init__(self, input_features):
        super().__init__()
        self.layer_1 = nn.Linear(input_features, 8)
        self.layer_2 = nn.Linear(8, 32)
        self.layer_3 = nn.Linear(32, 8)
        self.layer_4 = nn.Linear(8, 1)
        self.relu = nn.ReLU()
    def forward(self, x):
        return self.layer_4(self.relu(self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(x)))))))

input_features = x_train_tensor.shape[1]
model = SurvivalClassifier(input_features).to(device)

In [95]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.1)

In [96]:
def accuracy_fn(y_true, y_pred):
    predicted = torch.round(torch.sigmoid(y_pred))
    correct = (predicted == y_true).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [99]:
epochs = 10000

x_train_tensor = x_train_tensor.to(device)
y_train_tensor = y_train_tensor.to(device)

for epoch in range(epochs):
    model.train()
    y_logits = model(x_train_tensor)
    loss = loss_fn(y_logits, y_train_tensor)
    acc = accuracy_fn(y_train_tensor, y_logits)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if epoch % 1000 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Acc: {acc:.2f}%")

Epoch: 0 | Loss: 0.40075, Acc: 82.27%
Epoch: 1000 | Loss: 0.41379, Acc: 81.71%
Epoch: 2000 | Loss: 0.46150, Acc: 77.33%
Epoch: 3000 | Loss: 0.39345, Acc: 83.16%
Epoch: 4000 | Loss: 0.38983, Acc: 83.16%
Epoch: 5000 | Loss: 0.39176, Acc: 82.72%
Epoch: 6000 | Loss: 0.38940, Acc: 83.28%
Epoch: 7000 | Loss: 0.38842, Acc: 83.16%
Epoch: 8000 | Loss: 0.38240, Acc: 83.61%
Epoch: 9000 | Loss: 0.37650, Acc: 83.50%


In [92]:
import csv
model.eval()
with torch.inference_mode():
     logits = model(x_test_tensor).squeeze()
     preds = torch.round(torch.sigmoid(logits))

results = preds.numpy().astype(dtype=np.int32)
data = []
passengerId = test["PassengerId"].to_numpy(dtype="int32")
for index,result in enumerate(results):
     data.append({"PassengerId":passengerId[index],"Survived":result})
df = pd.DataFrame(data)
df.to_csv("output.csv",index=False)
print("Output Saved")

Output Saved
