In [533]:
import torch

import pandas as pd
import numpy as np

In [534]:
df = pd.read_csv("../data/boston.csv")
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [535]:
def model(W, x, b):
    return x @ W + b

In [536]:
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [537]:
def initialize_layer_parameters(n_in, n_out):
    W = torch.rand(n_in, n_out) * torch.sqrt(torch.tensor(2.0/n_in))
    b = torch.zeros((1, n_out))

    W.requires_grad_(True)
    b.requires_grad_(True)
    return W, b

In [538]:
def sigmoid(Z):
    return 1 / (1 + torch.exp(-Z))

def relu(Z):
    return torch.maximum(Z, torch.tensor(0))


In [539]:
def gradient_descent(parameters):
    W = parameters["W"]
    b = parameters["b"]
    
    W -= W.grad * 5e-3
    b -= b.grad * 5e-3

    W.grad.zero_()
    b.grad.zero_()

In [540]:
X = df.drop("TAX", axis=1).to_numpy(dtype='float32')
y = df["TAX"].to_numpy(dtype='float32')

In [541]:
X_mean = X.mean(axis=0)
X_std = X.std(axis=0)
X = (X - X_mean) / X_std

y_mean = y.mean()
y_std = y.std()
y = (y - y_mean) / y_std

In [542]:
X = torch.from_numpy(X)
y = torch.from_numpy(y.reshape(-1, 1))

In [543]:
parameters = {}
layers = [X.shape[1], 15, 10, 3, 1]
activations = ["relu", "relu", "relu", "linear"]

In [544]:
for i in range(1, len(layers)):
    W, b = initialize_layer_parameters(layers[i-1], layers[i])
    parameters[i] = {
        "W": W,
        "b": b
    }

In [545]:
EPOCHS = 1500
layers_size = len(parameters)

In [546]:
def train():
    for i in range(EPOCHS):
        A = X
        for l in range(1, layers_size+1):
            params = parameters[l]
            W = params["W"]
            b = params["b"]


            Z = model(W, A, b)
            parameters[l]["Z"] = Z

            activation = activations[l-1]
            if activation == "sigmoid":
                A = sigmoid(Z)
            elif activation == "relu":
                A = relu(Z)
            elif activation == "linear":
                A = Z
            
            parameters[l]["A"] = A

        A = parameters[layers_size]["A"]
        loss = mse(y, A)
        loss.backward()

        with torch.no_grad():
            for l in range(layers_size, 0, -1): 
                gradient_descent(parameters[l])

        if i % 100 == 0:
            print(f"{i}/{EPOCHS} - Train Loss: {loss}")

In [547]:
def test():
    A = X
    for l in range(1, layers_size+1):
        params = parameters[l]
        W = params["W"]
        b = params["b"]


        Z = model(W, A, b)

        activation = activations[l-1]
        if activation == "sigmoid":
            A = sigmoid(Z)
        elif activation == "relu":
            A = relu(Z)
        elif activation == "linear":
            A = Z

    print((A * y_std) + y_mean)

In [548]:
train()

0/1500 - Train Loss: 14.517006874084473


100/1500 - Train Loss: 0.4296911656856537
200/1500 - Train Loss: 0.3401240110397339
300/1500 - Train Loss: 0.2915806770324707
400/1500 - Train Loss: 0.25967568159103394
500/1500 - Train Loss: 0.2366703301668167
600/1500 - Train Loss: 0.2188119888305664
700/1500 - Train Loss: 0.20447972416877747
800/1500 - Train Loss: 0.19268180429935455
900/1500 - Train Loss: 0.1828320324420929
1000/1500 - Train Loss: 0.1745544821023941
1100/1500 - Train Loss: 0.16753098368644714
1200/1500 - Train Loss: 0.16158123314380646
1300/1500 - Train Loss: 0.1564985066652298
1400/1500 - Train Loss: 0.15214747190475464


In [549]:
test()

tensor([[302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [307.6311],
        [320.1954],
        [302.8670],
        [307.8325],
        [302.9834],
        [302.9834],
        [302.8875],
        [304.1556],
        [302.9834],
        [302.9834],
        [306.4360],
        [302.9834],
        [302.9834],
        [330.9617],
        [309.7644],
        [328.0709],
        [335.9742],
        [321.7625],
        [315.3808],
        [314.4230],
        [324.7657],
        [325.4342],
        [320.4720],
        [335.4705],
        [316.9410],
        [359.3977],
        [319.8822],
        [345.9177],
        [302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [302.9834],
        [315.0491],
        [302.9834],
