In [2]:
# Titanic Problem using PyTorch

# Importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
from sklearn.model_selection import train_test_split

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



/kaggle/input/titanic/train.csv
/kaggle/input/titanic/test.csv
/kaggle/input/titanic/gender_submission.csv


In [3]:
# Torch Version
print(f"Torch Version: {torch.__version__}")

# Device Agonistic Code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

Torch Version: 2.0.0+cpu
Device: cpu


In [4]:
# Reading train data

trainData = pd.read_csv("/kaggle/input/titanic/train.csv")
trainData.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [5]:
testData = pd.read_csv("/kaggle/input/titanic/test.csv")
testData.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [16]:
# Drop rows with NaN values

trainData.dropna(subset=['Pclass', 'Age', 'SibSp', 'Parch', 'Sex'], inplace=True)
testData.dropna(subset=['Pclass', 'Age', 'SibSp', 'Parch', 'Sex'], inplace=True)

trainData.shape, testData.shape

((183, 12), (87, 11))

In [17]:
X = pd.DataFrame({"Pclass": trainData["Pclass"],
                  "Age": trainData["Age"],
                  "SibSp": trainData["SibSp"],
                  "Parch": trainData["Parch"],
                  "Sex": trainData["Sex"]
                 })

X = pd.get_dummies(X)
y = trainData["Survived"]

In [18]:
# Check shape
X.shape, y.shape

((183, 6), (183,))

In [19]:
# aggregateView Input Features
X.head()

Unnamed: 0,Pclass,Age,SibSp,Parch,Sex_female,Sex_male
1,1,38.0,1,0,1,0
3,1,35.0,1,0,1,0
6,1,54.0,0,0,0,1
10,3,4.0,1,1,1,0
11,1,58.0,0,0,1,0


In [20]:
y.head()

1     1
3     1
6     0
10    1
11    1
Name: Survived, dtype: int64

In [21]:
# Convert Numpy Arrays to Tensors
X = torch.tensor(X.values, dtype=torch.float32)
y = torch.tensor(y.values, dtype=torch.float32)

In [22]:
# Train Test Split
XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
len(XTrain), len(yTrain), len(XTest), len(yTest)

(146, 146, 37, 37)

In [23]:
# Shape of tensors
X.shape, y.shape

(torch.Size([183, 6]), torch.Size([183]))

In [24]:
# First 5 rows of X and y

print(f"First five rows of X:\n{X[:5]}")
print(f"\nFirst five rows of y:\n{y[:5]}")

First five rows of X:
tensor([[ 1., 38.,  1.,  0.,  1.,  0.],
        [ 1., 35.,  1.,  0.,  1.,  0.],
        [ 1., 54.,  0.,  0.,  0.,  1.],
        [ 3.,  4.,  1.,  1.,  1.,  0.],
        [ 1., 58.,  0.,  0.,  1.,  0.]])

First five rows of y:
tensor([1., 1., 0., 1., 1.])


In [25]:
print(f"20 rows of XTrain:\n{XTrain[:20]}")
print(f"\n20 rows of XTest:\n{XTest[:20]}")

20 rows of XTrain:
tensor([[ 1.0000, 45.5000,  0.0000,  0.0000,  0.0000,  1.0000],
        [ 1.0000, 29.0000,  1.0000,  0.0000,  0.0000,  1.0000],
        [ 2.0000,  3.0000,  1.0000,  1.0000,  0.0000,  1.0000],
        [ 3.0000, 25.0000,  0.0000,  0.0000,  0.0000,  1.0000],
        [ 1.0000, 37.0000,  1.0000,  1.0000,  0.0000,  1.0000],
        [ 1.0000, 21.0000,  0.0000,  0.0000,  1.0000,  0.0000],
        [ 1.0000, 49.0000,  1.0000,  0.0000,  0.0000,  1.0000],
        [ 1.0000, 56.0000,  0.0000,  0.0000,  0.0000,  1.0000],
        [ 1.0000, 47.0000,  0.0000,  0.0000,  0.0000,  1.0000],
        [ 1.0000, 22.0000,  0.0000,  2.0000,  1.0000,  0.0000],
        [ 1.0000, 38.0000,  1.0000,  0.0000,  0.0000,  1.0000],
        [ 1.0000, 18.0000,  0.0000,  2.0000,  1.0000,  0.0000],
        [ 1.0000, 35.0000,  1.0000,  0.0000,  1.0000,  0.0000],
        [ 1.0000, 25.0000,  1.0000,  0.0000,  0.0000,  1.0000],
        [ 3.0000,  6.0000,  0.0000,  1.0000,  0.0000,  1.0000],
        [ 1.0000, 54.

In [51]:
# Creating a Neural Network

class Titanic(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.layer1 = nn.Linear(in_features=6, out_features=16)
        self.layer2 = nn.Linear(in_features=16, out_features=16)
        self.layer3 = nn.Linear(in_features=16, out_features=16)
        self.layer4 = nn.Linear(in_features=16, out_features=1)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        return self.layer4(self.relu(self.layer3(self.relu(self.layer2(self.relu(self.layer1(x)))))))

In [52]:
torch.manual_seed(42)
model0 = Titanic().to(device)
model0

Titanic(
  (layer1): Linear(in_features=6, out_features=16, bias=True)
  (layer2): Linear(in_features=16, out_features=16, bias=True)
  (layer3): Linear(in_features=16, out_features=16, bias=True)
  (layer4): Linear(in_features=16, out_features=1, bias=True)
  (relu): ReLU()
)

In [53]:
# Predictions with untrained model
untrainedPred = model0(XTest.to(device))

print(f"Length of predicitons: {len(untrainedPred)}, shape: {untrainedPred.shape}")
print(f"Length of test samples: {len(yTest)}, shape: {yTest.shape}")

print(f"\nFirst 10 predictions: {untrainedPred[:10]}")
print(f"\nFirst 10 test labels: {yTest[:10]}")

Length of predicitons: 37, shape: torch.Size([37, 1])
Length of test samples: 37, shape: torch.Size([37])

First 10 predictions: tensor([[0.7955],
        [0.8960],
        [0.7266],
        [1.3541],
        [1.3117],
        [1.8002],
        [0.8032],
        [1.1682],
        [1.1251],
        [1.4608]], grad_fn=<SliceBackward0>)

First 10 test labels: tensor([0., 0., 1., 0., 1., 0., 0., 1., 1., 0.])


In [54]:
# Setup loss function and optimizer

lossFn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(params=model0.parameters(), lr=0.1)

In [55]:
# Accuracy Function

def accuracyFn(yTrue, yPred):
    correct = torch.eq(yTrue, yPred).sum().item()
    acc = (correct / len(yPred)) * 100
    return acc

In [59]:
# Training Loop

epochs = 200

XTrain, yTrain = XTrain.to(device), yTrain.to(device)
XTest, yTest = XTest.to(device), yTest.to(device)

for epoch in range(epochs):
    model0.train()
    yLogits = model0(XTrain).squeeze()
    yPred = torch.round(torch.sigmoid(yLogits))
    
    loss = lossFn(yLogits, yTrain)
    acc = accuracyFn(yTrue=yTrain, yPred=yPred)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    model0.eval()
    with torch.inference_mode():
        testLogits = model0(XTest).squeeze()
        testPred = torch.round(torch.sigmoid(testLogits))
        testLoss = lossFn(testLogits, yTest)
        testAcc = accuracyFn(yTrue=yTest, yPred=testPred)
    
    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test Loss: {testLoss:.5f}, Test Accuracy: {testAcc:.2f}%")

Epoch: 0 | Loss: 0.54120, Accuracy: 70.55% | Test Loss: 0.55277, Test Accuracy: 62.16%
Epoch: 10 | Loss: 0.60190, Accuracy: 71.23% | Test Loss: 0.58286, Test Accuracy: 62.16%
Epoch: 20 | Loss: 0.54204, Accuracy: 70.55% | Test Loss: 0.53728, Test Accuracy: 67.57%
Epoch: 30 | Loss: 0.68311, Accuracy: 68.49% | Test Loss: 0.77526, Test Accuracy: 62.16%
Epoch: 40 | Loss: 0.64843, Accuracy: 68.49% | Test Loss: 0.72408, Test Accuracy: 62.16%
Epoch: 50 | Loss: 0.62808, Accuracy: 68.49% | Test Loss: 0.68338, Test Accuracy: 62.16%
Epoch: 60 | Loss: 0.60755, Accuracy: 68.49% | Test Loss: 0.61863, Test Accuracy: 62.16%
Epoch: 70 | Loss: 0.60476, Accuracy: 68.49% | Test Loss: 0.61299, Test Accuracy: 62.16%
Epoch: 80 | Loss: 0.60230, Accuracy: 67.81% | Test Loss: 0.60402, Test Accuracy: 62.16%
Epoch: 90 | Loss: 0.60042, Accuracy: 67.81% | Test Loss: 0.60048, Test Accuracy: 62.16%
Epoch: 100 | Loss: 0.59883, Accuracy: 67.81% | Test Loss: 0.59684, Test Accuracy: 62.16%
Epoch: 110 | Loss: 0.59703, Accu