In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [2]:
df_train = pd.read_csv('./titanic/train.csv')
df_test = pd.read_csv('./titanic/test.csv')
df_sub = pd.read_csv('./titanic/gender_submission.csv')

In [3]:
df_train.drop(['Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
df_test.drop(['Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

sex = pd.get_dummies(df_train['Sex'], drop_first=True)
embark = pd.get_dummies(df_train['Embarked'], drop_first=True)
df_train = pd.concat([df_train, sex, embark], axis=1)
df_train.drop(['Sex', 'Embarked'], axis=1, inplace=True)

sex = pd.get_dummies(df_test['Sex'], drop_first=True)
embark = pd.get_dummies(df_test['Embarked'], drop_first=True)
df_test = pd.concat([df_test, sex, embark], axis=1)
df_test.drop(['Sex', 'Embarked'], axis=1, inplace=True)

df_train.fillna(df_train.mean(), inplace=True)
df_test.fillna(df_test.mean(), inplace=True)

Scaler1 = StandardScaler()
Scaler2 = StandardScaler()

train_columns = df_train.columns
test_columns = df_test.columns

df_train = pd.DataFrame(Scaler1.fit_transform(df_train))
df_test = pd.DataFrame(Scaler2.fit_transform(df_test))

df_train.columns = train_columns
df_test.columns = test_columns

x_train = df_train.iloc[:, 2:].values
y_train = df_train.loc[:, 'Survived'].values

In [4]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

device = torch.device('mps:0' if torch.backends.mps.is_available() else 'cpu')

In [5]:
class TitanicDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

train_dataset = TitanicDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [81]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(8, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 64)
        self.fc4 = nn.Linear(64, 2)
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        return x

model = Net().to(device)
model

Net(
  (fc1): Linear(in_features=8, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=2, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [84]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

In [85]:
batch_size = 64
n_epochs = 500

train_loss_min = np.Inf
model.train()
for epoch in range(n_epochs):
    train_loss = 0.0
    num_correct = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(torch.float32).to(device), target.to(torch.long).to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * data.size(0)
        predicted = torch.max(output, 1)[1]
        num_correct += (predicted == target).sum().item()

    train_loss = train_loss / len(train_loader.dataset)

    if train_loss <= train_loss_min:
        print('Validation loss decreased ({:.6f} -> {:.6f}). Saving the model..'
              .format(train_loss_min, train_loss))
        torch.save(model.state_dict(), 'model.pt')
        train_loss_min = train_loss
    if (epoch + 1) % 100 == 0:
        print('Epoch: {} \tTraining Loss: {:.6f} \tTraining Accuracy: {:.2f}%'
              .format(epoch + 1, train_loss, num_correct / len(train_loader.dataset) * 100))

print('Training ended!')

Validation loss decreased (inf -> 0.243769). Saving the model..
Validation loss decreased (0.243769 -> 0.228435). Saving the model..
Validation loss decreased (0.228435 -> 0.225800). Saving the model..
Validation loss decreased (0.225800 -> 0.214875). Saving the model..
Validation loss decreased (0.214875 -> 0.210970). Saving the model..
Validation loss decreased (0.210970 -> 0.210644). Saving the model..
Validation loss decreased (0.210644 -> 0.210495). Saving the model..
Validation loss decreased (0.210495 -> 0.200579). Saving the model..
Epoch: 100 	Training Loss: 0.201792 	Training Accuracy: 90.46%
Validation loss decreased (0.200579 -> 0.196435). Saving the model..
Validation loss decreased (0.196435 -> 0.193713). Saving the model..
Validation loss decreased (0.193713 -> 0.192775). Saving the model..
Epoch: 200 	Training Loss: 0.201501 	Training Accuracy: 91.69%
Validation loss decreased (0.192775 -> 0.184310). Saving the model..
Validation loss decreased (0.184310 -> 0.180078). S

In [27]:
x_test = df_test.iloc[:, 1:].values
x_test_var = Variable(torch.FloatTensor(x_test), requires_grad=False).to(device)
model.eval()
with torch.no_grad():
    test_result = model(x_test_var)
values, labels = torch.max(test_result, 1)
survived = labels.cpu().data.numpy()

In [28]:
submission = pd.DataFrame({'PassengerId': df_sub['PassengerId'], 'Survived': survived})
submission.to_csv('submission.csv', index=False)