First, I'll reproduce the code from Jeremy Howard's notebook:
https://www.kaggle.com/code/jhoward/how-does-a-neural-net-really-work

In [171]:
from matplotlib import pyplot
import numpy
import torch
from ipywidgets import interact
from fastai.basics import *

plt.rc('figure', dpi=90)

def plot_function(f, title=None, min=-2.1, max=2.1, color='r', ylim=None):
    x = torch.linspace(min,max, 100)[:,None]
    if ylim: plt.ylim(ylim)
    plt.plot(x, f(x), color)
    if title is not None: plt.title(title)


def f(x): return 3*x**2 + 2*x + 1
def quad(a, b, c, x): return a * x ** 2 + b * x + c
def make_quad(a, b, c): return partial(quad, a, b, c)
def mean_absolute_error(predictions, actuals): return torch.abs(predictions - actuals).mean()
def noise(x, scale): return numpy.random.normal(scale=scale, size=x.shape)
def add_noise(x, mult, add): return x * (1 + noise(x, mult)) + noise(x, add)
numpy.random.seed(42)
x = torch.linspace(-2, 2, steps=20)[:,None]
y = add_noise(f(x), 0.15, 1.5)
def quad_mean_absolute_error(params):
	f = make_quad(*params)
	return mean_absolute_error(f(x), y)


In [172]:
abc = torch.tensor([1.1, 1.1, 1.1])
abc.requires_grad_()

loss = quad_mean_absolute_error(abc)
loss

tensor(2.4219, dtype=torch.float64, grad_fn=<MeanBackward0>)

In [173]:
loss.backward()

In [174]:
abc.grad

tensor([-1.3529, -0.0316, -0.5000])

In [175]:
with torch.no_grad():
    abc -= abc.grad*0.01
    loss = quad_mean_absolute_error(abc)
    
print(f'loss={loss:.2f}')

loss=2.40


In [176]:
for i in range(10):
    loss = quad_mean_absolute_error(abc)
    loss.backward()
    with torch.no_grad(): abc -= abc.grad*0.01
    print(f'Step={i}; loss={loss:.2f}')

Step=0; loss=2.40
Step=1; loss=2.36
Step=2; loss=2.30
Step=3; loss=2.21
Step=4; loss=2.11
Step=5; loss=1.98
Step=6; loss=1.85
Step=7; loss=1.72
Step=8; loss=1.58
Step=9; loss=1.46


In [177]:
def rectified_linear(m,b,x):
    y = m*x+b
    return torch.clip(y, 0.)

In [178]:
@interact(m=1.5,b=1.5)
def plot_relu(m,b):
    plot_function(partial(rectified_linear, m,b))

interactive(children=(FloatSlider(value=1.5, description='m', max=4.5, min=-1.5), FloatSlider(value=1.5, descr…

### Now let's try and "manually" (meaning: without a predefined nn architecture) create a super-basic NN and optimize it for the Titanic Competition
https://www.kaggle.com/competitions/titanic

In [179]:
import pandas as pd

# import csv
csv = pd.read_csv("data/titanic/train.csv")

# cleanup superfluous data and convert columns to usable numbers
dataframe = pd.DataFrame(csv)
dataframe = dataframe.drop(columns=["PassengerId","Ticket", "Name","Cabin"])
# drop rows with empty cells
dataframe = dataframe.dropna()

dataframe['isMale'] = dataframe['Sex'].apply(lambda gender : 1 if gender == 'male' else 0)
dataframe['isEmbarkedS'] = dataframe['Embarked'].apply(lambda harbour: 1 if harbour == 'S' else 0)
dataframe['isEmbarkedC'] = dataframe['Embarked'].apply(lambda harbour: 1 if harbour == 'C' else 0)
dataframe['ageNormalized'] = dataframe['Age'].apply(lambda age: age / dataframe['Age'].max())
dataframe['fareNormalized'] = dataframe['Fare'].apply(lambda fare: numpy.log10(fare))
dataframe

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,isMale,isEmbarkedS,isEmbarkedC,ageNormalized,fareNormalized
0,0,3,male,22.0,1,0,7.2500,S,1,1,0,0.2750,0.860338
1,1,1,female,38.0,1,0,71.2833,C,0,0,1,0.4750,1.852988
2,1,3,female,26.0,0,0,7.9250,S,0,1,0,0.3250,0.898999
3,1,1,female,35.0,1,0,53.1000,S,0,1,0,0.4375,1.725095
4,0,3,male,35.0,0,0,8.0500,S,1,1,0,0.4375,0.905796
...,...,...,...,...,...,...,...,...,...,...,...,...,...
885,0,3,female,39.0,0,5,29.1250,Q,0,0,0,0.4875,1.464266
886,0,2,male,27.0,0,0,13.0000,S,1,1,0,0.3375,1.113943
887,1,1,female,19.0,0,0,30.0000,S,0,1,0,0.2375,1.477121
889,1,1,male,26.0,0,0,30.0000,C,1,0,1,0.3250,1.477121


#### Initialize our 2 layer NN with random values

In [180]:
params = pd.DataFrame(np.random.random((2,8)))
params

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.969585,0.775133,0.939499,0.894827,0.5979,0.921874,0.088493,0.195983
1,0.045227,0.32533,0.388677,0.271349,0.828738,0.356753,0.280935,0.542696


#### Some preparations: Create a dataframe that contains only the values we need for easier iteration, prepare the functions for creating predictions and loss, prepare some DFs to hold our results

In [242]:
cleanDf = dataframe[['Pclass','SibSp','Parch','isMale','isEmbarkedS','isEmbarkedC','ageNormalized','fareNormalized']]
resultsLayer1, resultsLayer2 = pd.DataFrame(),pd.DataFrame()

for i in range(0,8):
  resultsLayer1[i] = cleanDf.iloc[:,i:i+1].apply(lambda x : x * params.iat[0,i])


resultsLayer1




Unnamed: 0,0,1,2,3,4,5,6,7
0,2.908754,0.775133,0.000000,0.894827,0.5979,0.000000,0.024335,0.168612
1,0.969585,0.775133,0.000000,0.000000,0.0000,0.921874,0.042034,0.363154
2,2.908754,0.000000,0.000000,0.000000,0.5979,0.000000,0.028760,0.176188
3,0.969585,0.775133,0.000000,0.000000,0.5979,0.000000,0.038715,0.338089
4,2.908754,0.000000,0.000000,0.894827,0.5979,0.000000,0.038715,0.177520
...,...,...,...,...,...,...,...,...
885,2.908754,0.000000,4.697495,0.000000,0.0000,0.000000,0.043140,0.286971
886,1.939169,0.000000,0.000000,0.894827,0.5979,0.000000,0.029866,0.218314
887,0.969585,0.000000,0.000000,0.000000,0.5979,0.000000,0.021017,0.289490
889,0.969585,0.000000,0.000000,0.894827,0.0000,0.921874,0.028760,0.289490


In [None]:
# todo: 
# - create reLU with clip that takes an entry of the param and always +1
# - use tensors for params / NN layers with gradient 
# - mean absolute error (see below)
# - gradient function
# - optimization loop

# this is still incorrect
result = pd.DataFrame(resultLayer2.apply(sum, axis=1),columns=['prediction'])
result['actual'] = dataframe['Survived']
result['loss'] = result.sum(axis=1)
result
resultLayer1 = cleanDf.mul(np.array(params.iloc[[0]]), axis=1)
resultLayer2 = cleanDf.mul(np.array(params.iloc[[1]]), axis=1)

def mae(preds, acts): return (torch.abs(preds-acts)).mean()
def rectified_linear(m,b,x):
    y = m*x+b
    return torch.clip(y, 0.)