In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import torch
import seaborn as sns
import os
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


# Dataset

In [2]:
loc = '/home/arjun/Desktop/Datasets/titanic'

In [3]:
df = pd.read_csv(loc + '/train.csv')

In [4]:
df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)

In [5]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,male,22.0,1,0,7.25,S
1,2,1,1,female,38.0,1,0,71.2833,C
2,3,1,3,female,26.0,0,0,7.925,S
3,4,1,1,female,35.0,1,0,53.1,S
4,5,0,3,male,35.0,0,0,8.05,S


In [6]:
df['Sex'] = df['Sex'].replace({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].replace({'S':1, 'C':2, 'Q':3})

In [7]:
df.fillna(df.mean(), inplace=True)
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,0,22.0,1,0,7.25,1.0
1,2,1,1,1,38.0,1,0,71.2833,2.0
2,3,1,3,1,26.0,0,0,7.925,1.0
3,4,1,1,1,35.0,1,0,53.1,1.0
4,5,0,3,0,35.0,0,0,8.05,1.0


In [8]:
y = df['Survived']
X = df.drop('Survived', axis=1)

In [9]:
X.describe()

Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
count,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0
mean,446.0,2.308642,0.352413,29.699118,0.523008,0.381594,32.204208,1.362205
std,257.353842,0.836071,0.47799,13.002015,1.102743,0.806057,49.693429,0.635442
min,1.0,1.0,0.0,0.42,0.0,0.0,0.0,1.0
25%,223.5,2.0,0.0,22.0,0.0,0.0,7.9104,1.0
50%,446.0,3.0,0.0,29.699118,0.0,0.0,14.4542,1.0
75%,668.5,3.0,1.0,35.0,1.0,0.0,31.0,2.0
max,891.0,3.0,1.0,80.0,8.0,6.0,512.3292,3.0


In [10]:
X = X/X.max()

In [11]:
X = torch.tensor(np.array(X))
y = torch.tensor(np.array(y))

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1)
X_train.shape, X_test.shape ,y_train.shape, y_test.shape

(torch.Size([801, 8]),
 torch.Size([90, 8]),
 torch.Size([801]),
 torch.Size([90]))

# Model Creation

In [13]:
lr =.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

  return torch._C._cuda_getDeviceCount() > 0


device(type='cpu')

In [14]:
class NeuralNet(nn.Module):
    def __init__(self, inp_size, h1, h2, out_size):
        super(NeuralNet, self).__init__()
        self.inp_size = inp_size
        self.lay1 = nn.Linear(inp_size, h1)
        self.lay2 = nn.ReLU()
        self.lay3 = nn.Linear(h1, h2)
        self.lay4 = nn.ReLU()
        self.lay7 = nn.Linear(h2, out_size)
        
    def forward(self,x):
        out = self.lay1(x)
        out = self.lay2(out)
        out = self.lay3(out)
        out = self.lay4(out)
        out = self.lay7(out)  
        return out
    
model = NeuralNet(8,100,100,2).to(device)

In [15]:
lossCategory = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(), lr = 1e-4)

# Training Model

In [16]:
train_ds = torch.cat((X_train, y_train.reshape(-1,1)), dim=-1)  
val_ds = torch.cat((X_test, y_test.reshape(-1,1)), dim=-1)  

In [17]:
train_loader = torch.utils.data.DataLoader(dataset=train_ds, shuffle=True, batch_size=500)
val_loader = torch.utils.data.DataLoader(dataset=val_ds, shuffle=True, batch_size=500)

In [24]:
num_epochs = 1000
for epoch in range(num_epochs):
    avg_loss = 0
    for i,batch in enumerate(train_loader):
 
        y = batch[:,-1].to(device).long()
        X = batch[:, :-1].to(device).to(torch.float32)
        output = model(X)
        loss = lossCategory(output, y)
        loss.backward()
        optimiser.step()
        optimiser.zero_grad()
        avg_loss += loss

    if avg_loss/len(train_loader) < .35: break
    if epoch%100 == 0:    
        print(f"Epoch:{epoch+1}/{num_epochs} Loss:{avg_loss/len(train_loader)}")

Epoch:1/1000 Loss:0.39758455753326416
Epoch:101/1000 Loss:0.3761930465698242
Epoch:201/1000 Loss:0.3859157860279083
Epoch:301/1000 Loss:0.3857325315475464
Epoch:401/1000 Loss:0.3761509358882904
Epoch:501/1000 Loss:0.37623167037963867
Epoch:601/1000 Loss:0.368654727935791
Epoch:701/1000 Loss:0.3674357533454895
Epoch:801/1000 Loss:0.3682706952095032
Epoch:901/1000 Loss:0.36305394768714905


# Checking Accuracy

In [25]:
correct = 0
tot = 0
for i,batch in enumerate(train_loader):
    
    y = batch[:,-1].to(device).long()
    X = batch[:, :-1].to(device).to(torch.float32)
    y_pred = torch.argmax(model(X), dim=1)
    s = sum(y_pred == y)
    correct += s
    tot += len(y)
    
print('Accuracy:', (correct*100/tot).item()) 

Accuracy: 84.64419555664062


In [26]:

correct = 0
tot = 0
for i,batch in enumerate(val_loader):
    y = batch[:,-1].to(device).long()
    X = batch[:, :-1].to(device).to(torch.float32)
    y_pred = torch.argmax(model(X), dim=1)
    s = sum(y_pred == y)
    correct += s
    tot += len(y)
    
print('Accuracy:', (correct*100/tot).item()) 

Accuracy: 87.77777862548828


# Running Test Dataset

In [21]:
df = pd.read_csv(loc + '/test.csv')
df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)
df['Sex'] = df['Sex'].replace({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].replace({'S':1, 'C':2, 'Q':3})
df.fillna(df.mean(), inplace=True)
df = df/df.max()
ds = torch.tensor(np.array(df))
test_loader = torch.utils.data.DataLoader(dataset=ds, shuffle=False, batch_size=100)

In [22]:
ds.shape,df.shape

(torch.Size([418, 8]), (418, 8))

In [23]:
l = []
id_ = []
for i,batch in enumerate(test_loader):
        X = batch.to(device).to(torch.float32)
        y_pred = torch.argmax(model(X), dim=1)

        l.extend(list(y_pred))
   
id_ = [i for i in range(892,1310 )]
l = [i.item() for i in l]
ans = pd.DataFrame({'PassengerId':id_,'Survived':l})
ans.to_csv('./submission.csv', index=False)
print("Saved new O/P")
ans.head(20)

Saved new O/P


Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,0
5,897,0
6,898,1
7,899,0
8,900,1
9,901,0
