In [400]:
import torch

import pandas as pd
import numpy as np

In [401]:
df = pd.read_csv("../data/boston.csv")
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [402]:
def model(W, x, b):
    return x @ W + b

In [403]:
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [404]:
def initialize_layer_parameters(n_in, n_out):
    W = torch.rand(n_in, n_out) * torch.sqrt(torch.tensor(2.0/n_in))
    b = torch.zeros((1, n_out))

    W.requires_grad_(True)
    b.requires_grad_(True)
    return W, b

In [405]:
def sigmoid(Z):
    return 1 / (1 + torch.exp(-Z))

def relu(Z):
    return torch.maximum(Z, torch.tensor(0))


In [406]:
X = df.drop("TAX", axis=1).to_numpy(dtype='float32')
y = df["TAX"].to_numpy(dtype='float32')

In [407]:
X_mean = X.mean(axis=0)
X_std = X.std(axis=0)
X = (X - X_mean) / X_std

y_mean = y.mean()
y_std = y.std()
y = (y - y_mean) / y_std

In [408]:
X = torch.from_numpy(X)
y = torch.from_numpy(y.reshape(-1, 1))

In [409]:
parameters = {}
layers = [X.shape[1], 15, 10, 3, 1]
activations = ["relu", "relu", "relu", "linear"]

In [410]:
for i in range(1, len(layers)):
    W, b = initialize_layer_parameters(layers[i-1], layers[i])
    parameters[i] = {
        "W": W,
        "b": b
    }

In [411]:
layers_size = len(parameters)

for i in range(1500):
    A = X
    for l in range(1, layers_size+1):
        params = parameters[l]
        W = params["W"]
        b = params["b"]


        Z = model(W, A, b)
        parameters[l]["Z"] = Z

        activation = activations[l-1]
        if activation == "sigmoid":
            A = sigmoid(Z)
        elif activation == "relu":
            A = relu(Z)
        elif activation == "linear":
            A = Z
        
        parameters[l]["A"] = A

    A = parameters[layers_size]["A"]
    loss = mse(y, A)
    loss.backward()

    with torch.no_grad():
        for l in range(layers_size, 0, -1): 
            W = parameters[l]["W"]
            b = parameters[l]["b"]
            
            W -= W.grad * 1e-2
            b -= b.grad * 1e-2

            W.grad.zero_()
            b.grad.zero_()

In [412]:
loss.item()

0.12804614007472992