In [1]:
import torch
import numpy as np
import pandas as pd
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
from pandas.api.types import is_numeric_dtype
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv("train.csv")
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
np.random.seed(442)
torch.manual_seed(442)
torch.cuda.manual_seed(442)
torch.cuda.manual_seed_all(442)

# Feature Engineering / Data Cleaning

In [4]:
df.fillna(df.mode().iloc[0], inplace=True)
df.Pclass = df.Pclass.astype('str')
df['LogFare'] = np.log(df['Fare']+1)
temp = pd.get_dummies(df[["Sex","Pclass","Embarked"]]).astype(int)
df = pd.concat([df, temp], axis=1)
y = torch.tensor(df.Survived)
added_cols = ['Sex_male', 'Sex_female', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Embarked_C', 'Embarked_Q', 'Embarked_S']
indep_cols = ['Age', 'SibSp', 'Parch', 'LogFare'] + added_cols
X = torch.tensor(df[indep_cols].values, dtype=torch.float64)
vals, index = X.max(dim=0)
X = X / vals

# Split the data

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create NN from scratch

![Two-or-more-hidden-layers-comprise-a-Deep-Neural-Network.webp](attachment:60c1548e-1e65-4f29-870e-ed230e0bb68e.webp)

In [6]:
import torch.nn.functional as F

def init_coe(n1, n2):
    hiddens = [n1, n2]  # <-- set this to the size of each hidden layer you want
    sizes = [X_train.shape[1]] + hiddens + [1] # example: input layer: 12 (No. features) -> hidden layer1: n1 -> hidden layer2: n2 -> output layer: 1
    n = len(sizes)
    
    layers = []
    for i in range(n-1): # initialize random weights for each layer
        layers.append((torch.rand(sizes[i], sizes[i+1]) - 0.3) / sizes[i+1]*4)

    consts = []
    for i in range(n-1): # initialize random weights for the biases
        consts.append(torch.rand(1)[0] - 0.5)
    for l in layers+consts: l.requires_grad_() # make every layer's weights differentiable (able to calculate gradients)
    return layers,consts
    
y_train = y_train[:, None]
y_test = y_test[:, None]

def predict(w, x):
    layers, consts = w
    res = x.to(torch.float32)
    for i, layer in enumerate(layers):
        res = res@layer + consts[i]
        if i != (len(layers) - 1): res = F.relu(res)
    return torch.sigmoid(res)

def update_weights(w, lr):
    layers, consts = w
    for layer in layers+consts:
        layer -= (layer.grad * lr)
        layer.grad.zero_()


def mae(pre, acts): return (torch.abs(pre - acts)).mean()
    
def one_epoch(coe, lr):
    loss = mae(predict(coe, X_train), y_train)
    loss.backward()
    with torch.no_grad():
        update_weights(coe, lr)
    #print(loss)

In [7]:
def train(epoch=100, n_hidden1=10, n_hidden2=10, lr=0.1):
    # y = mx + b
    coe = init_coe(n_hidden1, n_hidden2)
    for i in range(epoch):
        one_epoch(coe, lr)
    loss = mae(predict(coe, X_train), y_train)
    print(loss)
    return coe

In [8]:
coe = train(1000, lr=0.2)

tensor(0.1835, grad_fn=<MeanBackward0>)
