# Neural Network

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader

from utils import DataHandlerTitantic

Load the data and split into train and validation sets.

In [2]:
dh = DataHandlerTitantic(34545234)
dh.load_data("data/train.csv", "data/test.csv")
dh.shuffle_split(0.8)
dh.full_train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


# First model

We'll use socio-economic class, gender, age, fare, port of embark, number of siblings/spouses aboard and number of parents/childern aboard as features. Let's impute the missing values, and do a little preprocessing.

In [3]:
impute_columns = [
    "Pclass_1",
    "Pclass_2",
    "Pclass_3",
    "IsFemale",
    "Age",
    "SibSp",
    "Parch",
    "Fare",
    "Embarked_C",
    "Embarked_Q",
    "Embarked_S"
]

dh1 = dh.to_is_female()\
        .make_dummies(["Pclass", "Embarked"])\
        .impute_values(impute_columns, strategy="knn")
dh1.train.head()

Unnamed: 0,PassengerId,Survived,Name,IsFemale,Age,SibSp,Parch,Ticket,Fare,Cabin,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S
763,764,1,"Carter, Mrs. William Ernest (Lucile Polk)",1.0,36.0,1.0,2.0,113760,120.0,B96 B98,1.0,0.0,0.0,0.0,0.0,1.0
632,633,1,"Stahelin-Maeglin, Dr. Max",0.0,32.0,0.0,0.0,13214,30.5,B50,1.0,0.0,0.0,1.0,0.0,0.0
237,238,1,"Collyer, Miss. Marjorie ""Lottie""",1.0,8.0,0.0,2.0,C.A. 31921,26.25,,0.0,1.0,0.0,0.0,0.0,1.0
491,492,0,"Windelov, Mr. Einar",0.0,21.0,0.0,0.0,SOTON/OQ 3101317,7.25,,0.0,0.0,1.0,0.0,0.0,1.0
590,591,0,"Rintamaki, Mr. Matti",0.0,35.0,0.0,0.0,STON/O 2. 3101273,7.125,,0.0,0.0,1.0,0.0,0.0,1.0


Now set up the data sets and data loaders.

In [4]:
batch_size = 64

feature_columns = [
    "Pclass_1",
    "Pclass_2",
    "Pclass_3",
    "IsFemale",
    "Age",
    "SibSp",
    "Parch",
    "Fare",
    "Embarked_C",
    "Embarked_Q",
    "Embarked_S"
]

train_dataset = dh1.get_train_pytorch_dataset(feature_columns)
eval_dataset = dh1.get_eval_pytorch_dataset(feature_columns)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
eval_dataloader = DataLoader(eval_dataset, batch_size=batch_size)