In [23]:
import pandas as pd
import torch
from torch import nn, optim
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader 
import numpy as np

In [24]:
from pathlib import Path

In [25]:
dftrain = pd.read_csv(Path('./data/spaceship-titanic/train.csv'))
dftest = pd.read_csv(Path('./data/spaceship-titanic/test.csv'))

In [26]:
dftrain

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,9276_01,Europa,False,A/98/P,55 Cancri e,41.0,True,0.0,6819.0,0.0,1643.0,74.0,Gravior Noxnuther,False
8689,9278_01,Earth,True,G/1499/S,PSO J318.5-22,18.0,False,0.0,0.0,0.0,0.0,0.0,Kurta Mondalley,False
8690,9279_01,Earth,False,G/1500/S,TRAPPIST-1e,26.0,False,0.0,0.0,1872.0,1.0,0.0,Fayey Connon,True
8691,9280_01,Europa,False,E/608/S,55 Cancri e,32.0,False,0.0,1049.0,0.0,353.0,3235.0,Celeon Hontichre,False


In [27]:
dftrain.isna().sum()

PassengerId       0
HomePlanet      201
CryoSleep       217
Cabin           199
Destination     182
Age             179
VIP             203
RoomService     181
FoodCourt       183
ShoppingMall    208
Spa             183
VRDeck          188
Name            200
Transported       0
dtype: int64

In [42]:
dftrain.fillna(dftrain.mode()).isna().sum()

PassengerId       0
HomePlanet      201
CryoSleep       217
Cabin           199
Destination     182
Age             179
VIP             203
RoomService     181
FoodCourt       183
ShoppingMall    208
Spa             183
VRDeck          188
Name            200
Transported       0
dtype: int64

In [43]:
cats = ['HomePlanet', 'Cabin', 'Destination']
conts = ['CryoSleep', 'Age', 'VIP', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']

In [59]:
dftrain['Cabin'].str[0].unique()

array(['B', 'F', 'A', 'G', nan, 'E', 'D', 'C', 'T'], dtype=object)

In [60]:
dftrain['Destination'].unique()

array(['TRAPPIST-1e', 'PSO J318.5-22', '55 Cancri e', nan], dtype=object)

In [64]:
def clean_df(df, fillmode='median'):
    df = df.drop(['PassengerId', 'Name'], axis = 1)
    
    df['Cabin'] = df['Cabin'].str[0]

    df = pd.get_dummies(df, columns=cats)
    
    fill_method = getattr(df, fillmode)
    df = df.fillna(fill_method())
    
    for cont in conts:
        df[cont] = np.log1p(df[cont].astype(float))
    
    
    return df

In [66]:
clean_df(dftrain).astype(float).head()

Unnamed: 0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,HomePlanet_Earth,...,Cabin_B,Cabin_C,Cabin_D,Cabin_E,Cabin_F,Cabin_G,Cabin_T,Destination_55 Cancri e,Destination_PSO J318.5-22,Destination_TRAPPIST-1e
0,0.0,3.688879,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,3.218876,0.0,4.70048,2.302585,3.258097,6.309918,3.806662,1.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,0.0,4.077537,0.693147,3.78419,8.18228,0.0,8.812248,3.912023,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,3.526361,0.0,0.0,7.157735,5.918894,8.110728,5.267858,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,2.833213,0.0,5.717028,4.26268,5.023881,6.338594,1.098612,1.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


In [None]:
class SpaceTitanic(Dataset):
    def __init__(self, df, fillmode='median', train=True):
        self.train = train
        self.clean_ds = clean_df(df, fillmode)
        
    def __getitem__(self, i):
        row = self.clean_ds.iloc[i].astype(np.float64)
        return torch.tensor(row.drop(['Transported'])), torch.tensor(row.iloc[:'Transported'])
        
    def __len__(self):
        return len(self.clean_ds)

In [106]:
class Block(nn.Module):
    def __init__(self, n_in, n_out):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_in, n_out),
            nn.ReLU()
        )
        
    def forward(self, x):
        return self.net(x)

class SpaceTitanicModel(nn.Module):
    def __init__(self, n_in, n_out, n_h, n_hidden_layers=5):
        super().__init__()
        self.model = nn.Sequential(
            Block(n_in, n_h),
            *[Block(n_h, n_h) for i in range(n_hidden_layers)],
            Block(n_in, n_out)
        )
        
    def forward(self, x):
        return self.model(x)

In [107]:
ds = SpaceTitanic(dftrain)

In [108]:
bs = 32
dl = DataLoader(ds, batch_size=bs)
n_features = ds.clean_ds.shape[1]
model = SpaceTitanicModel(n_features, 1, 30)

In [109]:
class Learner():
    def __init__(self, model, dls, opt_fn=optim.SGD, loss_fn=F.mse_loss):
        self.model = model
        self.dls = dls
        self.opt = opt_fn(model.parameters(), lr=1e-3)
        self.loss_fn = loss_fn
        
    def fit(self, epochs=10):
        for epoch in range(epochs):
            for x, y in self.dls:
                pred = self.model(x)
                loss = self.loss_fn(pred, y)
                loss.backward()
                self.opt.step()
                self.opt.zero_grad()
                
                print(loss.item())

In [110]:
learn = Learner(model, dl, loss_fn=F.binary_cross_entropy_with_logits)

In [111]:
learn.fit()


  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transpo

RuntimeError: mat1 and mat2 must have the same dtype, but got Double and Float

In [112]:
next(iter(dl))

  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transported'])), torch.tensor(row.Transported)
  return torch.tensor(row.drop(['Transpo

[tensor([[0.0000, 3.6889, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          1.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 1.0000],
         [0.0000, 3.2189, 0.0000, 4.7005, 2.3026, 3.2581, 6.3099, 3.8067, 1.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 1.0000],
         [0.0000, 4.0775, 0.6931, 3.7842, 8.1823, 0.0000, 8.8122, 3.9120, 0.0000,
          1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 1.0000],
         [0.0000, 3.5264, 0.0000, 0.0000, 7.1577, 5.9189, 8.1107, 5.2679, 0.0000,
          1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 1.0000],
         [0.0000, 2.8332, 0.0000, 5.7170, 4.2627, 5.0239, 6.3386, 1.0986, 1.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000,
        