In [None]:
import pandas as pd
import torch
from torch import nn, optim
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader 
import numpy as np

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
from pathlib import Path

In [None]:
dftrain = pd.read_csv(Path('./data/spaceship-titanic/train.csv'))
dftest = pd.read_csv(Path('./data/spaceship-titanic/test.csv'))

In [None]:
dftrain

In [None]:
dftrain.isna().sum()

In [None]:
dftrain.fillna(dftrain.mode()).isna().sum()

In [None]:
cats = ['HomePlanet', 'Cabin', 'Destination']
conts = ['CryoSleep', 'Age', 'VIP', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']

In [None]:
dftrain['Cabin'].str[0].unique()

In [None]:
dftrain['Destination'].unique()

In [None]:
def clean_df(df, fillmode='median'):
    df = df.drop(['PassengerId', 'Name'], axis = 1)
    
    df['Cabin'] = df['Cabin'].str[0]

    df = pd.get_dummies(df, columns=cats)
    
    fill_method = getattr(df, fillmode)
    df = df.fillna(fill_method())
    
    for cont in conts:
        df[cont] = np.log1p(df[cont].astype(float))
    
    
    return df

In [None]:
clean_df(dftrain).astype(float).head()

In [None]:
class SpaceTitanic(Dataset):
    def __init__(self, df, fillmode='median', train=True):
        self.train = train
        self.clean_ds = clean_df(df, fillmode)
        
    def __getitem__(self, i):
        row = self.clean_ds.iloc[i].astype(np.float32)
        x = torch.tensor(row.drop(labels=['Transported']))
        y = torch.tensor(row.loc(axis=0)['Transported'])
        return x, y
        
    def __len__(self):
        return len(self.clean_ds)

In [None]:
class Block(nn.Module):
    def __init__(self, n_in, n_out):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_in, n_out),
            nn.ReLU(),
            nn.LayerNorm(n_in),
        )
        
    def forward(self, x):
        return self.net(x)

class SpaceTitanicModel(nn.Module):
    def __init__(self, n_in, n_out, n_h, n_hidden_layers=5):
        super().__init__()
        self.model = nn.Sequential(
            Block(n_in, n_h),
            *[Block(n_h, n_h) for i in range(n_hidden_layers)],
            Block(n_h, n_out)
        )
        
    def forward(self, x):
        return self.model(x)

In [None]:
ds = SpaceTitanic(dftrain)

In [None]:
def normalize(tns): return (tns - tns.mean()) / tns.std()

In [None]:
class Learner():
    def __init__(self, model, dls, opt_fn=optim.SGD, loss_fn=F.mse_loss):
        self.model = model
        self.dls = dls
        self.opt = opt_fn(model.parameters(), lr=1e-3)
        self.loss_fn = loss_fn
        
    def fit(self, epochs=10):
        for epoch in range(epochs):
            for x, y in self.dls:
                x = normalize(x)
                pred = self.model(x)
                pred = pred.view(-1)
                loss = self.loss_fn(pred, y)
                loss.backward()
                self.opt.step()
                self.opt.zero_grad()
                
                print(loss.item())

In [None]:
bs = 32
dl = DataLoader(ds, batch_size=bs)
n_features = ds.clean_ds.shape[1] - 1
model = SpaceTitanicModel(n_features, 1, 10, 1)

In [None]:
learn = Learner(model, dl, loss_fn=F.binary_cross_entropy_with_logits, opt_fn=optim.AdamW)

In [None]:
learn.fit()

In [None]:
ds.clean_ds