In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import torch
import seaborn as sns
import os
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


# Dataset

In [2]:
loc = '/home/arjun/Desktop/Datasets/titanic'

In [3]:
df = pd.read_csv(loc + '/train.csv')

In [4]:
df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)

In [5]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,male,22.0,1,0,7.25,S
1,2,1,1,female,38.0,1,0,71.2833,C
2,3,1,3,female,26.0,0,0,7.925,S
3,4,1,1,female,35.0,1,0,53.1,S
4,5,0,3,male,35.0,0,0,8.05,S


In [6]:
df['Sex'] = df['Sex'].replace({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].replace({'S':1, 'C':2, 'Q':3})

In [7]:
df.fillna(df.mean(), inplace=True)
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,0,22.0,1,0,7.25,1.0
1,2,1,1,1,38.0,1,0,71.2833,2.0
2,3,1,3,1,26.0,0,0,7.925,1.0
3,4,1,1,1,35.0,1,0,53.1,1.0
4,5,0,3,0,35.0,0,0,8.05,1.0


In [8]:
y = df['Survived']
X = df.drop('Survived', axis=1)

In [9]:
X

Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,3,0,22.000000,1,0,7.2500,1.0
1,2,1,1,38.000000,1,0,71.2833,2.0
2,3,3,1,26.000000,0,0,7.9250,1.0
3,4,1,1,35.000000,1,0,53.1000,1.0
4,5,3,0,35.000000,0,0,8.0500,1.0
...,...,...,...,...,...,...,...,...
886,887,2,0,27.000000,0,0,13.0000,1.0
887,888,1,1,19.000000,0,0,30.0000,1.0
888,889,3,1,29.699118,1,2,23.4500,1.0
889,890,1,0,26.000000,0,0,30.0000,2.0


In [10]:
y

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [11]:
X = torch.tensor(np.array(X))
y = torch.tensor(np.array(y))

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5)
X_train.shape, X_test.shape ,y_train.shape, y_test.shape

(torch.Size([445, 8]),
 torch.Size([446, 8]),
 torch.Size([445]),
 torch.Size([446]))

# Model Creation

In [13]:
lr =.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [14]:
class NeuralNet(nn.Module):
    def __init__(self, inp_size, h1, h2, out_size):
        super(NeuralNet, self).__init__()
        self.inp_size = inp_size
        self.lay1 = nn.Linear(inp_size, h1)
        self.lay2 = nn.ReLU()
        self.lay3 = nn.Linear(h1, h2)
        self.lay4 = nn.ReLU()
        self.lay7 = nn.Linear(h2, out_size)
        
    def forward(self,x):
        out = self.lay1(x)
        out = self.lay2(out)
        out = self.lay3(out)
        out = self.lay4(out)
        out = self.lay7(out)  
        return out
    
model = NeuralNet(8,100,100,2).to(device)

In [15]:
lossCategory = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(), lr = 1e-4)

# Training Model

In [16]:
train_ds = torch.cat((X_train, y_train.reshape(-1,1)), dim=-1)  
val_ds = torch.cat((X_test, y_test.reshape(-1,1)), dim=-1)  

In [17]:
train_loader = torch.utils.data.DataLoader(dataset=train_ds, shuffle=True, batch_size=100)
val_loader = torch.utils.data.DataLoader(dataset=val_ds, shuffle=True, batch_size=100)

In [33]:
num_epochs = 1000
for epoch in range(num_epochs):
    avg_loss = 0
    for i,batch in enumerate(train_loader):
 
        y = batch[:,-1].to(device).long()
        X = batch[:, :-1].to(device).to(torch.float32)
        output = model(X)
        loss = lossCategory(output, y)
        loss.backward()
        optimiser.step()
        optimiser.zero_grad()
        avg_loss += loss

    if avg_loss/len(train_loader) < .3: break
    if epoch%100 == 0:    
        print(f"Epoch:{epoch+1}/{num_epochs} Loss:{avg_loss/len(train_loader)}")

Epoch:1/1000 Loss:0.3315698206424713
Epoch:101/1000 Loss:0.39021608233451843
Epoch:201/1000 Loss:0.33550509810447693
Epoch:301/1000 Loss:0.32042834162712097
Epoch:401/1000 Loss:0.3204706311225891
Epoch:501/1000 Loss:0.32023313641548157
Epoch:601/1000 Loss:0.35392919182777405
Epoch:701/1000 Loss:0.3486168384552002
Epoch:801/1000 Loss:0.33790773153305054
Epoch:901/1000 Loss:0.3300239145755768


# Checking Accuracy

In [35]:
correct = 0
tot = 0
for i,batch in enumerate(train_loader):
    
    y = batch[:,-1].to(device).long()
    X = batch[:, :-1].to(device).to(torch.float32)
    y_pred = torch.argmax(model(X), dim=1)
    s = sum(y_pred == y)
    correct += s
    tot += len(y)
    
print('Accuracy:', (correct*100/tot).item()) 

Accuracy: 84.9438247680664


In [34]:

correct = 0
tot = 0
for i,batch in enumerate(val_loader):
    y = batch[:,-1].to(device).long()
    X = batch[:, :-1].to(device).to(torch.float32)
    y_pred = torch.argmax(model(X), dim=1)
    s = sum(y_pred == y)
    correct += s
    tot += len(y)
    
print('Accuracy:', (correct*100/tot).item()) 

Accuracy: 76.45740509033203


# Running Test Dataset

In [21]:
df = pd.read_csv(loc + '/test.csv')
df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)
df['Sex'] = df['Sex'].replace({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].replace({'S':1, 'C':2, 'Q':3})
df.fillna(df.mean(), inplace=True)
ds = torch.tensor(np.array(df))
test_loader = torch.utils.data.DataLoader(dataset=ds, shuffle=False, batch_size=100)

In [22]:
ds.shape,df.shape

(torch.Size([418, 8]), (418, 8))

In [23]:
l = []
id_ = []
for i,batch in enumerate(test_loader):
        X = batch.to(device).to(torch.float32)
        y_pred = torch.argmax(model(X), dim=1)

        l.extend(list(y_pred))
   
id_ = [i for i in range(892,1310 )]
l = [i.item() for i in l]
ans = pd.DataFrame({'PassengerId':id_,'Survived':l})
ans.to_csv('./submission.csv', index=False)
print("Saved new O/P")
ans.head(20)

Saved new O/P


Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,0
5,897,0
6,898,0
7,899,0
8,900,0
9,901,0
