In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split

In [2]:
data_train = pd.read_csv('titanic/train.csv')
data_test = pd.read_csv('titanic/test.csv')
data_train

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [3]:
X_train = data_train.drop(['Survived', 'PassengerId', 'Name', 'SibSp', 'Parch',
                'Ticket', 'Cabin', 'Embarked'], axis=1)
X_test = data_test.drop(['PassengerId', 'Name', 'SibSp', 'Parch',
                'Ticket', 'Cabin', 'Embarked'], axis=1)

In [4]:
X_train = pd.get_dummies(X_train)
X_train = X_train.drop(['Sex_male'], axis=1)
X_train['Age'].fillna(X_train['Age'].median(), inplace=True)
X_train['Sex_female'] = (X_train['Sex_female']).astype(np.int8)
X_train['Family'] = data_train['SibSp'] + data_train['Parch'] + 1
X_train['Fare'].fillna(X_train['Fare'].median(), inplace=True)
X_train

Unnamed: 0,Pclass,Age,Fare,Sex_female,Family
0,3,22.0,7.2500,0,2
1,1,38.0,71.2833,1,2
2,3,26.0,7.9250,1,1
3,1,35.0,53.1000,1,2
4,3,35.0,8.0500,0,1
...,...,...,...,...,...
886,2,27.0,13.0000,0,1
887,1,19.0,30.0000,1,1
888,3,28.0,23.4500,1,4
889,1,26.0,30.0000,0,1


In [5]:
X_test = pd.get_dummies(X_test)
X_test = X_test.drop(['Sex_male'], axis=1)
X_test['Age'].fillna(X_test['Age'].median(), inplace=True)
X_test['Sex_female'] = (X_test['Sex_female']).astype(np.int8)
X_test['Family'] = data_test['SibSp'] + data_test['Parch']
X_test['Fare'].fillna(X_test['Fare'].median(), inplace=True)
X_test

Unnamed: 0,Pclass,Age,Fare,Sex_female,Family
0,3,34.5,7.8292,0,0
1,3,47.0,7.0000,1,1
2,2,62.0,9.6875,0,0
3,3,27.0,8.6625,0,0
4,3,22.0,12.2875,1,2
...,...,...,...,...,...
413,3,27.0,8.0500,0,0
414,1,39.0,108.9000,1,0
415,3,38.5,7.2500,0,0
416,3,27.0,8.0500,0,0


In [6]:
y_train = data_train.iloc[:, 1].values

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
one_hot_encoder = OneHotEncoder()
X_train = one_hot_encoder.fit_transform(X_train).toarray()
X_test = one_hot_encoder.fit_transform(X_test).toarray()

In [9]:
X_train

array([[0., 0., 1., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.]])

In [10]:
x_train, x_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=4)

In [11]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(350, 32)
        self.f_act = nn.ReLU()
        self.linear2 = nn.Linear(32, 3)
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, X):
        res = self.linear1(X)
        res = self.f_act(res)
        res = self.linear2(res)
        res = self.softmax(res)
        return res
net = Net()

In [12]:
batch_size = 20
num_epochs = 50
learning_rate = 0.001
batch_no = len(x_train) // batch_size

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [14]:
from sklearn.utils import shuffle
from torch.autograd import Variable

for epoch in range(num_epochs):
    x_train, y_train = shuffle(x_train, y_train)
    for i in range(batch_no):
        start = i * batch_size
        end = start + batch_size
        x_var = Variable(torch.FloatTensor(x_train[start:end]))
        y_var = Variable(torch.LongTensor(y_train[start:end]))
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        ypred_var = net(x_var)
        loss =criterion(ypred_var, y_var)
        loss.backward()
        optimizer.step()

In [15]:
test_var = Variable(torch.FloatTensor(x_val), requires_grad=True)
with torch.no_grad():
    result = net(test_var)
values, labels = torch.max(result, 1)
num_right = np.sum(labels.data.numpy() == y_val)
print('Accuracy {:.22f}'.format(num_right / len(y_val)))

Accuracy 0.8555555555555555136138
