First, I'll reproduce the code from Jeremy Howard's notebook:
https://www.kaggle.com/code/jhoward/how-does-a-neural-net-really-work

In [86]:
from matplotlib import pyplot
import numpy
import torch
from ipywidgets import interact
from fastai.basics import *

plt.rc('figure', dpi=90)

def plot_function(f, title=None, min=-2.1, max=2.1, color='r', ylim=None):
    x = torch.linspace(min,max, 100)[:,None]
    if ylim: plt.ylim(ylim)
    plt.plot(x, f(x), color)
    if title is not None: plt.title(title)


def f(x): return 3*x**2 + 2*x + 1
def quad(a, b, c, x): return a * x ** 2 + b * x + c
def make_quad(a, b, c): return partial(quad, a, b, c)
def mean_absolute_error(predictions, actuals): return torch.abs(predictions - actuals).mean()
def noise(x, scale): return numpy.random.normal(scale=scale, size=x.shape)
def add_noise(x, mult, add): return x * (1 + noise(x, mult)) + noise(x, add)
numpy.random.seed(42)
x = torch.linspace(-2, 2, steps=20)[:,None]
y = add_noise(f(x), 0.15, 1.5)
def quad_mean_absolute_error(params):
	f = make_quad(*params)
	return mean_absolute_error(f(x), y)


In [87]:
abc = torch.tensor([1.1, 1.1, 1.1])
abc.requires_grad_()

loss = quad_mean_absolute_error(abc)
loss

tensor(2.4219, dtype=torch.float64, grad_fn=<MeanBackward0>)

In [88]:
loss.backward()

In [89]:
abc.grad

tensor([-1.3529, -0.0316, -0.5000])

In [90]:
with torch.no_grad():
    abc -= abc.grad*0.01
    loss = quad_mean_absolute_error(abc)
    
print(f'loss={loss:.2f}')

loss=2.40


In [91]:
for i in range(10):
    loss = quad_mean_absolute_error(abc)
    loss.backward()
    with torch.no_grad(): abc -= abc.grad*0.01
    print(f'Step={i}; loss={loss:.2f}')

Step=0; loss=2.40
Step=1; loss=2.36
Step=2; loss=2.30
Step=3; loss=2.21
Step=4; loss=2.11
Step=5; loss=1.98
Step=6; loss=1.85
Step=7; loss=1.72
Step=8; loss=1.58
Step=9; loss=1.46


In [92]:
def rectified_linear(m,b,x):
    y = m*x+b
    return torch.clip(y, 0.)

In [93]:
@interact(m=1.5,b=1.5)
def plot_relu(m,b):
    plot_function(partial(rectified_linear, m,b))

interactive(children=(FloatSlider(value=1.5, description='m', max=4.5, min=-1.5), FloatSlider(value=1.5, descr…

### Now let's try and "manually" (meaning: without a predefined nn architecture) create a super-basic NN and optimize it for the Titanic Competition
https://www.kaggle.com/competitions/titanic

In [94]:
import pandas as pd

# import csv
csv = pd.read_csv("data/titanic/train.csv")

# cleanup superfluous data and convert columns to usable numbers
dataframe = pd.DataFrame(csv)
dataframe = dataframe.drop(columns=["PassengerId","Ticket", "Name","Cabin"])
# drop rows with empty cells
dataframe = dataframe.dropna()

dataframe['pClass1'] = dataframe['Pclass'].apply(lambda pclass : 1 if pclass == 1 else 0 )
dataframe['pClass2'] = dataframe['Pclass'].apply(lambda pclass : 1 if pclass == 2 else 0)
dataframe['isMale'] = dataframe['Sex'].apply(lambda gender : 1 if gender == 'male' else 0)
dataframe['isEmbarkedS'] = dataframe['Embarked'].apply(lambda harbour: 1 if harbour == 'S' else 0)
dataframe['isEmbarkedC'] = dataframe['Embarked'].apply(lambda harbour: 1 if harbour == 'C' else 0)
dataframe['ageNormalized'] = dataframe['Age'].apply(lambda age: age / dataframe['Age'].max())
dataframe['fareNormalized'] = dataframe['Fare'].apply(lambda fare: numpy.log10(fare))
dataframe

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,pClass1,pClass2,isMale,isEmbarkedS,isEmbarkedC,ageNormalized,fareNormalized
0,0,3,male,22.0,1,0,7.2500,S,0,0,1,1,0,0.2750,0.860338
1,1,1,female,38.0,1,0,71.2833,C,1,0,0,0,1,0.4750,1.852988
2,1,3,female,26.0,0,0,7.9250,S,0,0,0,1,0,0.3250,0.898999
3,1,1,female,35.0,1,0,53.1000,S,1,0,0,1,0,0.4375,1.725095
4,0,3,male,35.0,0,0,8.0500,S,0,0,1,1,0,0.4375,0.905796
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
885,0,3,female,39.0,0,5,29.1250,Q,0,0,0,0,0,0.4875,1.464266
886,0,2,male,27.0,0,0,13.0000,S,0,1,1,1,0,0.3375,1.113943
887,1,1,female,19.0,0,0,30.0000,S,1,0,0,1,0,0.2375,1.477121
889,1,1,male,26.0,0,0,30.0000,C,1,0,1,0,1,0.3250,1.477121


#### Initialize our 2 layer NN with random values

In [99]:
params = torch.tensor(pd.DataFrame(np.random.random((2,10))).values)
params.requires_grad_()

tensor([[0.2898, 0.1612, 0.9297, 0.8081, 0.6334, 0.8715, 0.8037, 0.1866, 0.8926,
         0.5393],
        [0.8074, 0.8961, 0.3180, 0.1101, 0.2279, 0.4271, 0.8180, 0.8607, 0.0070,
         0.5107]], dtype=torch.float64, requires_grad=True)

#### Some preparations: Create a cleaned up dataframe with only the data we need and layers for receiving our calculations
#### Apply reLU to cleaned df

In [100]:
cleanDf = dataframe[['SibSp','Parch','pClass1','pClass2','isMale','isEmbarkedS','isEmbarkedC','ageNormalized','fareNormalized']]
layer1,layer2,predictions = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

def mae(preds, acts): return (torch.abs(preds-acts)).mean()

# make two layers of reLUs and sum them up to get our predictions
for i in range(0,9):
  layer1[i] = cleanDf.iloc[:,i:i+1].apply(lambda x : params[0,i].item() * x + params[0,9].item()).clip(lower=0)
  layer2[i] = cleanDf.iloc[:,i:i+1].apply(lambda x : params[1,i].item() * x + params[1,9].item()).clip(lower=0)
    
predictions = layer1.add(layer2).sum(axis=1)
# calculate loss based on predictions - actual survival rate
loss = mae(torch.tensor(predictions.values), torch.tensor(dataframe['Survived'].values))
loss.backward()

# todo: 
# - tensors, gradient - how to multiply in a way that results in usable tensors?
# - gradient function
# - optimization loop


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn