In [1]:
import os
import torch
import numpy as np
import pandas as pd

from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


# Configuration

In [14]:
config = {
    'data_root': './data',
    'batch_size': 64,
    'num_worker': 2,
    
    'trainer': 'Adam',
    'epoch': 5000,
    'early_stop': 10,
    'save_period': 1,
    'device': 'mps'
}

# Utils

In [3]:
from torch.utils.data import random_split

In [4]:
def train_valid_split(full_dataset, valid_ratio):
    train_set, valid_set = random_split(full_dataset, [1-valid_ratio, valid_ratio])
    return train_set, valid_set

# Dataset

In [5]:
from torch.utils.data import Dataset

In [9]:
class TitanicDataset(Dataset):
    def __init__(self, data_root, training=False, transform=None):
        self.data_root = data_root
        self.training = training
        self.transform = transform
        
        if training:
            file_name = 'train.csv'
        else:
            file_name = 'test.csv'
            
        self.dataset = pd.read_csv(os.path.join(data_root, file_name))
        
    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):  
        pclass = np.array(self.dataset['Pclass'][idx], dtype=np.float64)
        name = self.dataset['Name'][idx] # not used
        sex = np.array(pd.get_dummies(self.dataset['Sex']).iloc[idx], dtype=np.float64)
        age = np.array(self.dataset['Age'][idx] if not np.isnan(self.dataset['Age'][idx]) else -1, dtype=np.float64)
        sibsp = np.array(self.dataset['SibSp'][idx], dtype=np.float64)
        parch = np.array(self.dataset['Parch'][idx], dtype=np.float64)
        ticket = self.dataset['Ticket'][idx] # not used
        fare = np.array(self.dataset['Fare'][idx], dtype=np.float64)
        cabin = self.dataset['Cabin'][idx] # not used
        embarked = np.array(pd.get_dummies(self.dataset['Embarked']).iloc[idx], dtype=np.float64)
        
        if self.training:
            label = np.array(self.dataset['Survived'][idx], dtype=np.float64).item()
        else:
            label = None
        
        # feature: [paclass, female, male, age, sibsp, parch, fare, embarked(C), embarked(Q), embarked(S)]
        feature = np.concatenate((pclass, sex, age, sibsp, parch, fare, embarked), axis=None)
        sample = {'feature': feature, 'label': label}
        if self.transform:
            return self.transform(sample)
        else:
            return sample

In [10]:
dataset = TitanicDataset(data_root=config['data_root'], training=True, transform=)
train_set, valid_set = train_valid_split(dataset, 0.1)
dataset.__getitem__(3)

{'feature': array([ 1. ,  1. ,  0. , 35. ,  1. ,  0. , 53.1,  0. ,  0. ,  1. ]),
 'label': 1.0}

# Data Loader

In [11]:
from torch.utils.data import DataLoader

  warn(f"Failed to load image Python extension: {e}")


## Transforms

In [12]:
class ToTensor():

    def __init__(self) -> None:
        pass

    def __call__(self, data: dict) -> dict:
        for k, v in data.items():
            data[k] = torch.from_numpy(v).to(dtype=torch.get_default_dtype())
        return data

# Network

In [15]:
import torch.nn as nn

In [17]:
class SurvivalNet(nn.Module):
    
    def __init__(self, mlp_arch):
        self.mlp = nn.Sequential([
            self.mlp_block(mlp_arch[i-1], mlp_arch[i]) for i in range(1, len(mlp_arch)-1)
        ])
        self.output_layer = nn.Sequential([
            nn.Linear(mlp_arch[-2], mlp_arch[-1]),
            nn.Softmax()
        ])
    
    def forward(self, x):
        x = self.mlp(x)
        x = self.output_layer(x)
        return x
    
    def mlp_block(self, dim_input, dim_output):
        return nn.Sequential([
            nn.Linear(dim_input, dim_output),
            nn.ReLU()
        ])

# Loss

In [18]:
loss = nn.CrossEntropyLoss()

# Metrics

In [19]:
def accurancy(prediction, target):
    assert len(prediction) == len(target)
    predition = torch.round(prediction)
    correct = torch.sum(torch.abs(prediction - target))
    return correct / len(target)

# Trainer

# Train