In [4]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.
train = pd.read_csv("../input/train.csv")
print(train.shape)

# Data processing part is based on code from https://www.kaggle.com/sashr07/kaggle-titanic-tutorial/
def process_age(df,cut_points,label_names):
    df["Age"] = df["Age"].fillna(-0.5)
    df["Age_categories"] = pd.cut(df["Age"],cut_points,labels=label_names)
    return df

def create_dummies(db, column_name):
    dummies = pd.get_dummies(db[column_name],prefix=column_name)
    db = pd.concat([db,dummies],axis=1)
    return db

def process_table(db):
    cut_points = [-1,0, 1, 4, 6, 12, 18, 28, 45, 60, 100]
    label_names = ["Missing", 'Infant', 'Baby', 'Toddler', "Child", 'Teenager', "Young Adult", 'Adult', 'Adult2', 'Senior']
    # process age column
    db = process_age(db,cut_points,label_names)
    db = create_dummies(db, "Age_categories")
    #process sex column
    db = create_dummies(db, 'Sex')
    #process class column
    db = create_dummies(db,"Pclass")
    
    columns = ['Pclass_1', 'Pclass_2', 'Pclass_3', 'Sex_female', 'Sex_male',
               'Age_categories_Missing','Age_categories_Infant',
               'Age_categories_Baby', 'Age_categories_Toddler',
               'Age_categories_Child', 'Age_categories_Teenager',
               'Age_categories_Young Adult', 'Age_categories_Adult',
               'Age_categories_Adult2','Age_categories_Senior']
    return db[columns], db['Survived']

class Net(nn.Module):
    def __init__(self, hidden_number):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(15, hidden_number)
        self.ac1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_number, 2)
        self.ac2 = nn.Sigmoid()
        self.dropout = nn.Dropout(p=0.2)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.dropout(self.ac1(x))
        x = self.fc2(x)
        x = self.ac2(x)
        return x

hidden_number = 45
epoches = 50
batch_size = 89
batches = 9
model = Net(hidden_number)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)    

data, labels = process_table(train)
train_data, test_data, train_labels, test_labels = train_test_split(
   data, labels, test_size=0.1,random_state=0)

train_data, test_data, train_labels, test_labels = torch.Tensor(train_data.values), torch.Tensor(test_data.values), torch.Tensor(train_labels.values), torch.Tensor(test_labels.values)
    
print(train_data.shape)
print(test_data.shape)

for epoch in range(epoches):
    for i in range(batches):
        optimizer.zero_grad()
        start = i * batch_size
        end = start + batch_size
        output = model(train_data[start:end])
        loss = criterion(output, train_labels[start:end].long())
        loss.backward()
        optimizer.step()
    with torch.no_grad():
        model.eval()
        output = model(test_data)
        output = torch.max(output, 1)[1]
        accuracy = torch.mean((output == test_labels.long()).float())
        print('Accuracy {:.2f}'.format(accuracy * 100.0))
    model.train()


['train.csv', 'gender_submission.csv', 'test.csv']
(891, 12)
torch.Size([801, 15])
torch.Size([90, 15])
Accuracy 58.89
Accuracy 58.89
Accuracy 65.56
Accuracy 65.56
Accuracy 66.67
Accuracy 77.78
Accuracy 77.78
Accuracy 77.78
Accuracy 78.89
Accuracy 80.00
Accuracy 80.00
Accuracy 78.89
Accuracy 78.89
Accuracy 77.78
Accuracy 77.78
Accuracy 77.78
Accuracy 77.78
Accuracy 77.78
Accuracy 77.78
Accuracy 77.78
Accuracy 78.89
Accuracy 78.89
Accuracy 78.89
Accuracy 78.89
Accuracy 78.89
Accuracy 78.89
Accuracy 78.89
Accuracy 78.89
Accuracy 78.89
Accuracy 78.89
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
Accuracy 80.00
