In [2]:
# Titanic Problem using PyTorch

# Importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
from sklearn.model_selection import train_test_split

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



/kaggle/input/titanic/train.csv
/kaggle/input/titanic/test.csv
/kaggle/input/titanic/gender_submission.csv


In [3]:
# Torch Version
print(f"Torch Version: {torch.__version__}")

# Device Agonistic Code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

Torch Version: 2.0.0+cpu
Device: cpu


In [4]:
# Reading train data

trainData = pd.read_csv("/kaggle/input/titanic/train.csv")
trainData.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [5]:
testData = pd.read_csv("/kaggle/input/titanic/test.csv")
testData.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [6]:
X = pd.DataFrame({"Pclass": trainData["Pclass"],
                  "Age": trainData["Age"],
                  "SibSp": trainData["SibSp"],
                  "Parch": trainData["Parch"],
                  "Sex": trainData["Sex"]
                 })

X = pd.get_dummies(X)
y = trainData["Survived"]

In [7]:
# View Input Features
X.head()

Unnamed: 0,Pclass,Age,SibSp,Parch,Sex_female,Sex_male
0,3,22.0,1,0,0,1
1,1,38.0,1,0,1,0
2,3,26.0,0,0,1,0
3,1,35.0,1,0,1,0
4,3,35.0,0,0,0,1


In [8]:
y.head()

0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64

In [9]:
# Convert Numpy Arrays to Tensors
X = torch.tensor(X.values, dtype=torch.float32)
y = torch.tensor(y.values, dtype=torch.float32)

In [19]:
# Train Test Split
XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
len(XTrain), len(yTrain), len(XTest), len(yTest)

(712, 712, 179, 179)

In [14]:
# Shape of tensors
X.shape, y.shape

(torch.Size([891, 6]), torch.Size([891]))

In [18]:
# First 5 rows of X and y

print(f"First five rows of X:\n{X[:5]}")
print(f"\nFirst five rows of y:\n{y[:5]}")

First five rows of X:
tensor([[ 3., 22.,  1.,  0.,  0.,  1.],
        [ 1., 38.,  1.,  0.,  1.,  0.],
        [ 3., 26.,  0.,  0.,  1.,  0.],
        [ 1., 35.,  1.,  0.,  1.,  0.],
        [ 3., 35.,  0.,  0.,  0.,  1.]])

First five rows of y:
tensor([0., 1., 1., 1., 0.])


In [20]:
# Creating a Neural Network

INPUT_FEATURES = 5

class Titanic(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.layerStack = nn.Sequential(
            nn.Linear(in_features=INPUT_FEATURES, out_features=16),
            nn.ReLU(),
            nn.Linear(in_features=16, out_features=16),
            nn.ReLU(),
            nn.Linear(in_features=16, out_features=1),
            nn.ReLU()
        )
        
    def forward(self, x):
        return self.layerStack(x)

In [21]:
torch.manual_seed(42)
model1 = Titanic()
model1.to(device)
model1

Titanic(
  (layerStack): Sequential(
    (0): Linear(in_features=5, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=16, bias=True)
    (3): ReLU()
    (4): Linear(in_features=16, out_features=1, bias=True)
    (5): ReLU()
  )
)

In [22]:
# Setup loss function and optimizer

lossFn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(params=model1.parameters(), lr=0.1)

In [23]:
# Training Loop

epochs = 100

XTrain, yTrain = XTrain.to(device), yTrain.to(device)
XTest, yTest = XTest.to(device), yTest.to(device)

for epoch in range(epochs):
    model1.train()
    yLogits = model1(XTrain).squeeze()
    yPred = torch.round(sigmoid(yLogits))
    
    loss = lossFn(yPred, yTrain)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    model1.eval()
    with torch.inference_mode():
        testLogits = model1(XTest)
        testPred = torch.round(sigmoid(testLogits))
        testLoss = lossFn(testPred, yTest)
    
    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f} | Test Loss: {testLoss:.5f}")

RuntimeError: mat1 and mat2 shapes cannot be multiplied (712x6 and 5x16)