In [130]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm


In [131]:
train, test = pd.read_csv("/home/ryler/Datasets/Titanic-Starter-Competition/train.csv"), pd.read_csv("/home/ryler/Datasets/Titanic-Starter-Competition/test.csv")

In [132]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [133]:
def clean_data_for_titanic_training(dataset):
    data = dataset.drop(["PassengerId", "Embarked", "Parch", "SibSp", "Ticket", "Name", "Cabin"], axis=1)
    age_missing_values = data["Age"].isna()
    data.loc[age_missing_values, "Age"] = -1

    sex_mapping = {
        "male": 0,
        "female": 1
    }

    data["Sex"] = data["Sex"].map(sex_mapping)

    return data

In [134]:
cleaned_train = clean_data_for_titanic_training(train)
cleaned_test = clean_data_for_titanic_training(test)

y_train = torch.tensor(cleaned_train["Survived"], dtype=torch.int32)
x_train = torch.tensor(cleaned_train.drop("Survived", axis=1).to_numpy(), dtype=torch.float32)

x_test = torch.tensor(cleaned_test.to_numpy(), dtype=torch.float32)

In [140]:
train_dataloader = DataLoader(TensorDataset(x_train, y_train), batch_size=1, shuffle=True)
features, labels = next(iter(train_dataloader))

features.shape, labels.shape

(torch.Size([1, 4]), torch.Size([1]))

In [136]:
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()

        self.inference = nn.Sequential(
            nn.Linear(in_features=4, out_features=16),
            nn.BatchNorm1d(num_features=16),
            nn.ReLU(),
            nn.Linear(in_features=16, out_features=32),
            nn.BatchNorm1d(num_features=32),
            nn.ReLU(),
            nn.Linear(in_features=32, out_features=1),
        )

    def forward(self, inputs):
        return self.inference(inputs)

In [141]:
model = Classifier()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001, weight_decay=0.001)
loss_fn = nn.BCEWithLogitsLoss()

In [142]:
epochs = 20

for epoch in range(1, epochs+1):

    print("Starting Training...\n")

    running_train_loss = 0
    train_correct = 0
    total_train = 0

    model.train()
    for features, labels in tqdm(train_dataloader):
        optimizer.zero_grad()
        logits = model(features)
        loss = loss_fn(logits, labels.float())
        loss.backward()
        optimizer.step()

        total_train += len(features)
        preds = torch.sigmoid(logits)
        preds = (preds >= 0.5).int()
        train_correct += (preds == labels).sum().item()
        running_train_loss += loss.item()

    print(f"Avg loss: {(running_train_loss / total_train):.2f}, Accuracy: {(train_correct / total_train):.3f}")

    

Starting Training...



  0%|          | 0/891 [00:00<?, ?it/s]


ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 16])