### DATA GENERATION

In [108]:
import os
import torch, numpy as np, pandas as pd
from pathlib import Path
from torch import tensor

path = Path('titanic')
if not path.exists():
    import zipfile,kaggle
    kaggle.api.competition_download_cli(str(path))
    zipfile.ZipFile(f'{path}.zip').extractall(path)

In [109]:
df = pd.read_csv(path/'train.csv')
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [110]:
df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


### Data Preparation

In [None]:
# Fill missing values with most common values
df.fillna(df.mode().iloc[0], inplace=True)
# Convert categorical variables to numerical
df = pd.get_dummies(df, columns=["Pclass", "Sex", "Embarked"])

# Normalize fare column
df['LogFare'] = np.log(df['Fare']+1)

# select dependent and independent variables
indep_cols = ['Age', 'SibSp', 'Parch', 'LogFare', 'Sex_male', 'Sex_female', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Embarked_C', 'Embarked_Q', 'Embarked_S']
dep_cols = ['Survived']

# Convert to numpy arrays
df[indep_cols] = df[indep_cols].astype(float)

#Prepare predictors and targets
predictors = tensor(df[indep_cols].values, dtype=torch.float)
targets = tensor(df[dep_cols].values, dtype=torch.float)  # Ensure targets are float

#Normalize predictors
vals, indices = predictors.max(dim=0)
predictors = predictors/vals

from fastai.data.transforms import RandomSplitter
from fastai.data.all import range_of

# Use RandomSplitter to get train/val indices
splitter = RandomSplitter(valid_pct=0.2, seed=42)
train_idx, val_idx = splitter(range_of(predictors))

train_predictors = predictors[train_idx]
train_targets = targets[train_idx]
val_predictors = predictors[val_idx]
val_targets = targets[val_idx]



### Create Neural Network from scratch

In [112]:
from typing import List, Tuple

def initialize_coeffs(layer_sizes):
    layers: List[torch.Tensor] = []
    constants: List[torch.Tensor] = []
    for i in range(len(layer_sizes)-1):
        layers.append((torch.rand(layer_sizes[i], layer_sizes[i+1])-0.3) / layer_sizes[i+1] * 4)
        constants.append(torch.rand(1)[0]*0.1)
    for l in layers+constants: l.requires_grad_()
    return layers, constants

def calc_preds(coeffs: Tuple[List[torch.Tensor]], predictors: List[torch.Tensor]):
        layers, constants = coeffs
        n = len(layers)
        res = predictors
        for i in range(n):
            res = res@layers[i] + constants[i]
            if i != n-1: res = torch.relu(res)
        return torch.sigmoid(res)

def calc_loss(coeffs: Tuple[List[torch.Tensor]], predictors: List[torch.Tensor], targets: List[torch.Tensor]): return torch.abs(calc_preds(coeffs, predictors) -targets).mean()

def update_coeffs(coeffs: Tuple[List[torch.Tensor]], lr: int):
    layers, constants = coeffs
    with torch.no_grad(): 
        for layer in layers + constants:
            layer.sub_(layer.grad * lr)
            layer.grad.zero_()

def one_epoch(coeffs: Tuple[List[torch.Tensor]], predictors: List[torch.Tensor], targets: List[torch.Tensor], lr: int):
    loss = calc_loss(coeffs, predictors, targets)
    loss.backward()
    update_coeffs(coeffs, lr)
    print(f"{loss:.3f}", end="; ")

def train_model(predictors, targets, layer_sizes, epochs=10, lr=0.1):
    coeffs = initialize_coeffs(layer_sizes)
    for i in range(epochs):
        one_epoch(coeffs, predictors, targets, lr)
    return coeffs

In [113]:

def calc_accuracy(coeffs: Tuple[List[torch.Tensor]], predictors: List[torch.Tensor], targets: List[torch.Tensor]) -> float:
    return (targets.bool()==(calc_preds(coeffs, predictors)>0.5)).float().mean()


### Train the Network

In [114]:
# One hidden layer with 10 neurons
coeffs = train_model(train_predictors, train_targets, [len(indep_cols), 10,  1], epochs=20, lr=0.5)
calc_accuracy(coeffs, val_predictors, val_targets)

0.609; 0.568; 0.453; 0.397; 0.357; 0.333; 0.310; 0.291; 0.277; 0.267; 0.259; 0.252; 0.247; 0.243; 0.240; 0.237; 0.235; 0.232; 0.231; 0.229; 

tensor(0.8034)

In [119]:
# Train the model with specified layer sizes
coeffs = train_model(predictors, targets, [len(indep_cols), 10, 10, 1], epochs=20, lr=0.5)
calc_accuracy(coeffs, val_predictors, val_targets)

0.603; 0.585; 0.511; 0.497; 0.491; 0.478; 0.469; 0.483; 0.462; 0.439; 0.435; 0.439; 0.474; 0.463; 0.436; 0.396; 0.388; 0.380; 0.374; 0.368; 

tensor(0.7640)

In [116]:
# Three hidden layers with 10, neurons each
coeffs = train_model(predictors, targets, [len(indep_cols), 10, 10, 10, 1], epochs=20, lr=0.5)
calc_accuracy(coeffs, val_predictors, val_targets)

0.594; 0.555; 0.503; 0.500; 0.494; 0.489; 0.486; 0.516; 0.494; 0.490; 0.488; 0.487; 0.485; 0.483; 0.482; 0.480; 0.478; 0.477; 0.475; 0.474; 

tensor(0.5955)

In [117]:
# Five hidden layers with 10, neurons each
coeffs = train_model(predictors, targets, [len(indep_cols), 10, 10, 10, 10, 10, 1], epochs=20, lr=0.5)
calc_accuracy(coeffs, val_predictors, val_targets)

0.615; 0.615; 0.615; 0.613; 0.606; 0.504; 0.502; 0.500; 0.498; 0.497; 0.495; 0.493; 0.492; 0.490; 0.488; 0.487; 0.485; 0.483; 0.482; 0.480; 

tensor(0.5955)

### Conclusion

As we can see the loss and accuracy is coming down with each layer, this might be caused, by not big enough dataset, and also, in that case the data cant have enough features to support the amount of neurons, the problem just doesnt need such a big amount of neurons