In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import numpy as np
import pandas as pd
import xgboost as xgb
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import Dataset
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

# importing train and test sets from google drive
train_set_up = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Kaggle/Titanic/train.csv', index_col='PassengerId')
test_set_up = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Kaggle/Titanic/test.csv', index_col='PassengerId')


**Pre Processing:**

In [7]:
## Droping cabin since 77% is NaN, and Ticket and Name are not informative.

def pre_proccess(up_set, isTrain=True):
  # Setting all NaN ages to the average age. The same with Fare.
  avg_age = np.floor(up_set['Age'][up_set['Age'].isna()==False].mean())
  up_set['Age'][up_set['Age'].isna()] = avg_age
  avg_fare = np.floor(up_set['Fare'][up_set['Fare'].isna()==False].mean())
  up_set['Fare'][up_set['Fare'].isna()] = avg_fare

  # Droping irrelevant columns, and changing catagorical columns to numeric vals.
  up_set_proc = up_set.drop(columns=['Ticket','Embarked', 'Name', 'Cabin'])
  up_set_proc['Sex'][up_set_proc['Sex']=='male'] = 0
  up_set_proc['Sex'][up_set_proc['Sex']=='female'] = 1
  up_set_proc['Sex'] = up_set_proc['Sex'].astype(int)


  # Max Abs normalization:
  for col in ['Age', 'Fare', 'SibSp', 'Parch']:
    up_set_proc[col] = up_set_proc[col] /up_set_proc[col].abs().max()


  return up_set_proc




class MyDataset(Dataset):
  def __init__(self,df, isTest=False):
    self.isTest = isTest
    self.indexes = df.index.to_numpy()
    if self.isTest==False:
      y=df['Survived'].to_numpy()
      self.y_train=torch.tensor(y,dtype=torch.float32,requires_grad=True)
      x=df.drop(columns='Survived').to_numpy()
      self.x_train=torch.tensor(x,dtype=torch.float32,requires_grad=True)
    else:
      x=df.to_numpy()
      self.x_train=torch.tensor(x,dtype=torch.float32, requires_grad=True)
 
  def __len__(self):
    return self.x_train.shape[0]
   
  def __getitem__(self,idx):
    if self.isTest==False:
      return self.indexes[idx],self.x_train[idx],self.y_train[idx]
    else:
      return self.indexes[idx],self.x_train[idx]


def errors_per_batch(outputs, labels):
  error = 0
  for i in range(len(labels)):
    if outputs[i] != labels[i]:
      error += 1
  return error

**Model Creation:**

In [202]:
class Net(nn.Module):
    def __init__(self, dout1 = 0.5,dout2 = 0.5,dout3 = 0.5):
      super(Net, self).__init__()
      self.layer1 = nn.Sequential(
          nn.Linear(6, 10),
          nn.Dropout1d(dout1),
          nn.ReLU()
      )

      self.layer2 = nn.Sequential(
          nn.Linear(10, 20),
          nn.Dropout1d(dout1),
          nn.ReLU()
      )

      self.layer3 = nn.Sequential(
        nn.Linear(20, 40),
        nn.Dropout1d(dout2),
        nn.ReLU()
      )

      self.layer4 = nn.Sequential(
        nn.Linear(40, 2),
        nn.Dropout1d(dout3),
        nn.Sigmoid()
      )

      self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
      out = self.layer1(x)
      out = self.layer2(out)
      out = self.layer3(out)
      out = self.layer4(out)
      return self.softmax(out)


# **Model Training**

In [174]:
def train(d_out, learning_rate, epochs ,train_loader, val_loader):
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  net_model = Net(d_out, d_out ,d_out)
  net_model = net_model.cuda(device)
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(net_model.parameters(), lr=learning_rate)

  for epoch in range(epochs):
    for i, (_,points, labels) in enumerate(train_loader):
      if torch.cuda.is_available():
        points = points.cuda()
        labels = labels.cuda()

      outputs = torch.argmax(net_model(points), dim=1)
      outputs = outputs.type(torch.float32)
      labels.retain_grad()
      loss = criterion(outputs, labels)
      # optimizer.zero_grad()
      loss.backward()
      optimizer.step()

  net_model.eval()
  t_loss = 0
  total_loss = [t_loss]
  loss_per_batch = []
  val_one_loss_total = []
  train_one_loss_total = []
  with torch.no_grad():
    for i, (_,points, labels) in enumerate(val_loader):
      if torch.cuda.is_available():
        points = points.cuda()
        labels = labels.cuda()
      outputs = torch.argmax(net_model(points), dim=1)
      outputs = outputs.type(torch.float32)
      loss = criterion(outputs, labels)
      error_loss = errors_per_batch(outputs, labels)
      val_one_loss_total.append(error_loss)
      loss = loss.data.cpu().detach().numpy()
      t_loss += loss
      total_loss.append(t_loss)
      loss_per_batch.append(loss)

    for i, (_,points, labels) in enumerate(train_loader):
      if torch.cuda.is_available():
        points = points.cuda()
        labels = labels.cuda()
      outputs = torch.argmax(net_model(points), dim=1)
      outputs = outputs.type(torch.float32)
      loss = criterion(outputs, labels)
      error_loss = errors_per_batch(outputs, labels)
      train_one_loss_total.append(error_loss)
      loss = loss.data.cpu().detach().numpy()
      t_loss += loss
      total_loss.append(t_loss)
      loss_per_batch.append(loss)

  train_loss = sum(train_one_loss_total)/len(train_set)
  val_loss = sum(val_one_loss_total)/len(val_set)

  return net_model, (train_loss, val_loss)

In [56]:
def predict(net_model,test_loader):
  y_pred = pd.DataFrame(index=test_set_p.index, columns=['Survived'])
  with torch.no_grad():
    for i, (idx,points) in enumerate(test_loader):
      if torch.cuda.is_available():
        points = points.cuda()
      outputs = torch.argmax(net_model(points), dim=1)
      outputs = outputs.cpu().detach().numpy()
      idx = idx.cpu().detach().numpy()
      for j in range(idx.shape[0]):
        y_pred.loc[idx[j]]['Survived'] = outputs[j]
  return y_pred

# Prepering Train/Val/Test sets:

*Spliting to Train and Validation and Creaing Data Loaders*

In [None]:
train_set_p = pre_proccess(train_set_up)
test_set_p = pre_proccess(test_set_up, isTrain=False)

train_data, val_data = train_test_split(train_set_p,test_size=200, random_state=42)
train_set = MyDataset(train_data)
val_set = MyDataset(val_data)
test_set = MyDataset(test_set_p, isTest=True)

**Fine Tuning Hyper Parameters**

In [205]:
lrs = [0.01,0.001,0.0001,0.00001]
epochs = [10,15,20,25,30,50,100]
b_size = [5,10,15,20,30,50]
dropout_rate = [0.1,0.3,0.5,0.7]

loss_arr = []
for b in b_size:
  train_loader = torch.utils.data.DataLoader(dataset=train_set,
                                            batch_size=b,
                                            shuffle=True)
  val_loader = torch.utils.data.DataLoader(dataset=val_set,
                                            batch_size=b,
                                            shuffle=True)
  test_loader = torch.utils.data.DataLoader(dataset=test_set,
                                            batch_size=b,
                                             shuffle=True)
  for l in lrs:
    for e in epochs:
      for d_out in dropout_rate:
        learning_rate = l
        num_epochs = e
        batch_size = b
        model, (t_loss,v_loss)= train(d_out,learning_rate, num_epochs,train_loader,val_loader)
        if v_loss<0.3:
          print(f'Learning Rate: {l}, epochs: {e}, batch size: {b},d_out: {d_out}, val loss: {v_loss}')
        y_predict = predict(model, test_loader)
        loss_arr.append((l,e,b,t_loss,v_loss))


Learning Rate: 1e-05, epochs: 25, batch size: 10,d_out: 0.1, val loss: 0.25
Learning Rate: 0.01, epochs: 10, batch size: 30,d_out: 0.5, val loss: 0.29
Learning Rate: 0.01, epochs: 50, batch size: 30,d_out: 0.3, val loss: 0.275
Learning Rate: 0.001, epochs: 100, batch size: 30,d_out: 0.1, val loss: 0.24
Learning Rate: 0.01, epochs: 25, batch size: 50,d_out: 0.1, val loss: 0.285
Learning Rate: 0.01, epochs: 30, batch size: 50,d_out: 0.1, val loss: 0.255
Learning Rate: 1e-05, epochs: 10, batch size: 50,d_out: 0.5, val loss: 0.29
Learning Rate: 1e-05, epochs: 50, batch size: 50,d_out: 0.1, val loss: 0.285


*Finding the best hyper parameters combinations by sorting w.r.t validation loss*

In [None]:
loss_arr.sort(key= lambda x: x[4])
loss_arr[:10]

**Predict:**

In [None]:
b = 30
train_loader = torch.utils.data.DataLoader(dataset=train_set,
                                      batch_size=b,
                                      shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=val_set,
                                          batch_size=b,
                                          shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_set,
                                            batch_size=b,
                                            shuffle=True)

i=1
while i>0:
  d_out = 0.7
  learning_rate = 0.001
  num_epochs = 15
  model, (t_loss,v_loss)= train(d_out,learning_rate, num_epochs,train_loader,val_loader)
  print(f'loss: {v_loss}')
  if v_loss<0.2:
    break
y_predict = predict(model, test_loader)
y_predict.to_csv('prediction_simple_NN.csv')
print(t_loss,v_loss)