In [2]:
import numpy as np
import pandas as pd

train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')
submission = pd.read_csv('./gender_submission.csv')

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Sequential(
            nn.Linear(5, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [6]:
data_set = pd.concat((train.drop(['Survived'], axis = 1), test), axis = 0)

data_set = data_set.drop(['PassengerId', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis = 1)
data_set = data_set.fillna(data_set.mean())

n_train = train.shape[0]
train_x, test_x = data_set[:n_train], data_set[n_train:]
train_y = train['Survived']

train_x = train_x[train_x.keys()].values
test_x = test_x[test_x.keys()].values
train_y = train_y.values

import torch.optim as optim
from torch.autograd import Variable

simple_nn = SimpleNN()
optimizer = optim.Adam(simple_nn.parameters(), lr=0.01)
error = nn.BCELoss()

batch_size = 99
batch_count = int(len(train_x) / batch_size)

for epoch in range(300):
    train_loss = 0
    num_right = 0
    for i in range(batch_count):
        start = i * batch_size
        end = start + batch_size
        tensor_x = torch.FloatTensor(train_x[start:end])
        tensor_y = torch.FloatTensor(train_y[start:end]).reshape(-1, 1)

        optimizer.zero_grad()
        output = simple_nn(tensor_x)
        loss = error(output, tensor_y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * batch_size
        result = [1 if out >= 0.5 else 0 for out in output]
        num_right += np.sum(np.array(result) == train_y[start:end])

    train_loss = train_loss / len(train_x)
    accuracy = num_right / len(train_x)

    if epoch % 25 == 0:
        print('Loss: {} Accuracy: {}% Epoch:{}'.format(train_loss, accuracy, epoch))

print('Training Ended')

Loss: 0.6433323555522494 Accuracy: 0.6554433221099888% Epoch:0
Loss: 0.5325872169600593 Accuracy: 0.7474747474747475% Epoch:25
Loss: 0.4781406886047787 Accuracy: 0.7687991021324355% Epoch:50
Loss: 0.40701249572965836 Accuracy: 0.8114478114478114% Epoch:75
Loss: 0.41228268212742275 Accuracy: 0.819304152637486% Epoch:100
Loss: 0.38357147243287826 Accuracy: 0.8237934904601572% Epoch:125
Loss: 0.3328804605536991 Accuracy: 0.8552188552188552% Epoch:150
Loss: 0.31955427593655056 Accuracy: 0.8709315375982043% Epoch:175
Loss: 0.25873688360055286 Accuracy: 0.8922558922558923% Epoch:200
Loss: 0.27872639894485474 Accuracy: 0.877665544332211% Epoch:225
Loss: 0.24741005731953514 Accuracy: 0.9034792368125701% Epoch:250
Loss: 0.21650268137454987 Accuracy: 0.9124579124579124% Epoch:275
Training Ended


In [7]:
tensor_test_x = torch.FloatTensor(test_x)
with torch.no_grad():
  test_output = simple_nn(tensor_test_x)
  result = np.array([1 if out >= 0.5 else 0 for out in test_output])
  submission = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': result})
  submission.to_csv('submission.csv', index = False)