In [130]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm


In [131]:
train, test = pd.read_csv("/home/ryler/Datasets/Titanic-Starter-Competition/train.csv"), pd.read_csv("/home/ryler/Datasets/Titanic-Starter-Competition/test.csv")

In [132]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [133]:
def clean_data_for_titanic_training(dataset):
    data = dataset.drop(["PassengerId", "Embarked", "Parch", "SibSp", "Ticket", "Name", "Cabin"], axis=1)
    age_missing_values = data["Age"].isna()
    data.loc[age_missing_values, "Age"] = -1

    sex_mapping = {
        "male": 0,
        "female": 1
    }

    data["Sex"] = data["Sex"].map(sex_mapping)

    return data

In [134]:
cleaned_train = clean_data_for_titanic_training(train)
cleaned_test = clean_data_for_titanic_training(test)

y_train = torch.tensor(cleaned_train["Survived"], dtype=torch.int32)
x_train = torch.tensor(cleaned_train.drop("Survived", axis=1).to_numpy(), dtype=torch.float32)

x_test = torch.tensor(cleaned_test.to_numpy(), dtype=torch.float32)

In [143]:
train_dataloader = DataLoader(TensorDataset(x_train, y_train), batch_size=4, shuffle=True)
features, labels = next(iter(train_dataloader))

features.shape, labels.shape

(torch.Size([4, 4]), torch.Size([4]))

In [144]:
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()

        self.inference = nn.Sequential(
            nn.Linear(in_features=4, out_features=16),
            nn.BatchNorm1d(num_features=16),
            nn.ReLU(),
            nn.Linear(in_features=16, out_features=32),
            nn.BatchNorm1d(num_features=32),
            nn.ReLU(),
            nn.Linear(in_features=32, out_features=1),
        )

    def forward(self, inputs):
        return self.inference(inputs)

In [None]:
model = Classifier()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001, weight_decay=0.001)
loss_fn = nn.BCEWithLogitsLoss()

In [150]:
epochs = 50

for epoch in range(1, epochs+1):

    print("Starting Training...\n")

    running_train_loss = 0
    train_correct = 0
    total_train = 0

    model.train()
    for features, labels in tqdm(train_dataloader):
        optimizer.zero_grad()
        logits = model(features).squeeze()
        loss = loss_fn(logits, labels.float())
        loss.backward()
        optimizer.step()

        total_train += len(features)
        preds = torch.sigmoid(logits)
        preds = (preds >= 0.5).int()
        train_correct += (preds == labels).sum().item()
        running_train_loss += loss.item()

    print(f"Avg loss: {(running_train_loss / total_train):.2f}, Accuracy: {(train_correct / total_train):.3f}")

    

Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1184.90it/s]


Avg loss: 0.14, Accuracy: 0.724
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1052.29it/s]


Avg loss: 0.14, Accuracy: 0.735
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1192.93it/s]


Avg loss: 0.14, Accuracy: 0.751
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1238.24it/s]


Avg loss: 0.14, Accuracy: 0.737
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1203.21it/s]


Avg loss: 0.14, Accuracy: 0.724
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1151.18it/s]


Avg loss: 0.14, Accuracy: 0.710
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1181.12it/s]


Avg loss: 0.14, Accuracy: 0.723
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1198.90it/s]


Avg loss: 0.14, Accuracy: 0.758
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1183.67it/s]


Avg loss: 0.14, Accuracy: 0.745
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1201.59it/s]


Avg loss: 0.14, Accuracy: 0.703
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1244.75it/s]


Avg loss: 0.14, Accuracy: 0.741
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1266.18it/s]


Avg loss: 0.15, Accuracy: 0.689
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1246.10it/s]


Avg loss: 0.14, Accuracy: 0.734
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1188.05it/s]


Avg loss: 0.14, Accuracy: 0.745
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1260.55it/s]


Avg loss: 0.14, Accuracy: 0.737
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1261.04it/s]


Avg loss: 0.14, Accuracy: 0.750
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1255.39it/s]


Avg loss: 0.14, Accuracy: 0.715
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1270.82it/s]


Avg loss: 0.14, Accuracy: 0.735
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1228.29it/s]


Avg loss: 0.14, Accuracy: 0.714
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1230.02it/s]


Avg loss: 0.14, Accuracy: 0.722
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1254.35it/s]


Avg loss: 0.14, Accuracy: 0.716
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1276.70it/s]


Avg loss: 0.14, Accuracy: 0.736
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1250.96it/s]


Avg loss: 0.14, Accuracy: 0.714
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1227.91it/s]


Avg loss: 0.15, Accuracy: 0.699
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1215.11it/s]


Avg loss: 0.14, Accuracy: 0.718
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1258.08it/s]


Avg loss: 0.14, Accuracy: 0.725
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1221.89it/s]


Avg loss: 0.14, Accuracy: 0.731
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1207.68it/s]


Avg loss: 0.14, Accuracy: 0.742
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1256.94it/s]


Avg loss: 0.15, Accuracy: 0.690
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1266.11it/s]


Avg loss: 0.15, Accuracy: 0.703
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1248.10it/s]


Avg loss: 0.15, Accuracy: 0.703
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1268.13it/s]


Avg loss: 0.15, Accuracy: 0.673
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1211.29it/s]


Avg loss: 0.14, Accuracy: 0.717
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1214.12it/s]


Avg loss: 0.15, Accuracy: 0.694
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1241.84it/s]


Avg loss: 0.15, Accuracy: 0.691
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1194.45it/s]


Avg loss: 0.14, Accuracy: 0.721
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1241.64it/s]


Avg loss: 0.14, Accuracy: 0.723
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1216.41it/s]


Avg loss: 0.14, Accuracy: 0.742
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1258.87it/s]


Avg loss: 0.14, Accuracy: 0.730
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1216.77it/s]


Avg loss: 0.13, Accuracy: 0.756
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1162.54it/s]


Avg loss: 0.15, Accuracy: 0.713
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1237.60it/s]


Avg loss: 0.14, Accuracy: 0.719
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1225.84it/s]


Avg loss: 0.14, Accuracy: 0.719
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1186.35it/s]


Avg loss: 0.15, Accuracy: 0.705
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1190.81it/s]


Avg loss: 0.14, Accuracy: 0.734
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1252.03it/s]


Avg loss: 0.14, Accuracy: 0.733
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1249.52it/s]


Avg loss: 0.14, Accuracy: 0.724
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1251.35it/s]


Avg loss: 0.15, Accuracy: 0.690
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1250.28it/s]


Avg loss: 0.14, Accuracy: 0.717
Starting Training...



100%|██████████| 223/223 [00:00<00:00, 1246.35it/s]

Avg loss: 0.14, Accuracy: 0.736



