# Imports

In [1]:
import random
from value import Value
from graph_visualization_code import draw_dot

# Training Loop

In [2]:
class Neuron:
    def __init__(self, nin):
        """
        nin: number of inputs
        """
        self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)] # random weight b/w -1 and 1 for every input
        self.b = Value(random.uniform(-1, 1)) # the bias, which controls the overall trigger happiness of the neuron

    def __call__(self, x):
        """
        What we want to do here is the weighted sum, including the bias: w * x + b

        In other words, the dot product of w and x to get the forward pass of the neuron

        What we need to do here:
            1. Multiply all the elements of w, with all of the elements of x, pairwise
            2. Add the bias to the weighted sum
        """
        act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out

    def parameters(self):
        """
        PyTorch has a parameters call on every single module
        """
        return self.w + [self.b] # returns concatenation of the weights and biases

class Layer:
    """
    A list of neurons

    nin: number of inputs for the neuron in the layer
    nout: how many neurons we will have in a layer
    """
    
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs # to return just the final output value, instead of it being wrapped in a list

    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]
        
class MLP:

    def __init__(self, nin, nouts):
        """
        nin: number of inputs (as before)
        nouts: number of outputs (which is the neurons in a single layer) is now a list - this list defines the sizes of all the layers in the MLP
        """
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i + 1]) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

The network structure:

In [3]:
x = [2.0, 3.0, -1.0]   # three inputs into the MLP
n = MLP(3, [4, 4, 1])  # 3 layers of size 4, 4, and 1 - the last being the output
n(x)

Value(data=-0.5978655363471771)

The data

In [4]:
xs = [
    [2.0, 3.0, -1.0], # so desired output is 1.0
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0] # desired targets

Our training loop:

In [5]:
for k in range(20):
    # forward pass
    ypred = [n(x) for x in xs]
    loss = sum([(y_output - y_ground_truth)**2 for y_ground_truth, y_output in zip(ys, ypred)])

    # backward pass
    for p in n.parameters():
        p.grad = 0.0
    loss.backward()

    # update parameters - gradient descent
    for p in n.parameters():
        p.data += -0.05 * p.grad

    # print step and loss value
    print(k, loss.data)

0 5.226138135169745
1 1.9463119241537454
2 1.1348461224920454
3 0.6575899537017964
4 0.4081322564768234
5 0.2815404514170492
6 0.21012713395765253
7 0.16472114190415132
8 0.13384088388732432
9 0.11176560464612696
10 0.09535826438928993
11 0.08277734820717694
12 0.07288099761774844
13 0.06492883626834328
14 0.05842268887336329
15 0.05301672222853071
16 0.04846436880990715
17 0.044585728915217775
18 0.04124686742831188
19 0.038346276450447726


In [6]:
ypred

[Value(data=0.9008488908408923),
 Value(data=-0.8850568105053157),
 Value(data=-0.918501485522933),
 Value(data=0.9069334145367586)]

Neural nets can be tricky. They can work in spite of having bugs in the code (like forgetting to flush gradients).

# Summary

In [None]:
left off at 2:14:00