In [79]:
import math
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [80]:
class Value:
  
  def __init__(self, data, _children=(), _op='', label=''):
    self.data = data
    self.grad = 0.0
    self._backward = lambda: None
    self._prev = set(_children)
    self._op = _op
    self.label = label

  def __repr__(self):
    return f"Value(data={self.data})"
  
  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self, other), '+')
    
    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward
    
    return out

  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')
    
    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward
      
    return out
  
  def __pow__(self, other):
    assert isinstance(other, (int, float)), "only supporting int/float powers for now"
    out = Value(self.data**other, (self,), f'**{other}')

    def _backward():
        self.grad += other * (self.data ** (other - 1)) * out.grad
    out._backward = _backward

    return out
  
  def __rmul__(self, other): # other * self
    return self * other

  def __truediv__(self, other): # self / other
    return self * other**-1

  def __neg__(self): # -self
    return self * -1

  def __sub__(self, other): # self - other
    return self + (-other)

  def __radd__(self, other): # other + self
    return self + other

  def tanh(self):
    x = self.data
    t = (np.exp(2*x - (2*x).max()) - 1)/(np.exp(2*x - (2*x).max()) + 1)
    out = Value(t, (self, ), 'tanh')
    
    def _backward():
      self.grad += (1 - t**2) * out.grad
    out._backward = _backward
    
    return out

  def relu(self):
    x = self.data
    t = max(0.0, x)
    out = Value(t, (self, ), 'relu')
    
    def _backward():
      self.grad += (1 if x >= 0 else 0) * out.grad
    out._backward = _backward
    
    return out

  def softmax(self):
    x = self.data
    numerator = np.exp(x - x.max())
    denominator = np.sum(np.exp(x - x.max())) # stable softmaxfor big numbers
    t = numerator / denominator
    out = Value(t, (self, ), 'softmax')
    
    def _backward():
      derivative = ((numerator * denominator) - (numerator**2)) / (denominator**2)
      self.grad += derivative * out.grad
    out._backward = _backward
    
    return out
  
  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self, ), 'exp')
    
    def _backward():
      self.grad += out.data * out.grad # NOTE: in the video I incorrectly used = instead of +=. Fixed here.
    out._backward = _backward
    
    return out
  
  
  def backward(self):
    
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)
    
    self.grad = 1.0
    for node in reversed(topo):
      node._backward()

In [81]:
class Neuron:
  
  def __init__(self, nin, activation):
    self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
    self.b = Value(random.uniform(-1,1))
    self.activation = activation
  
  def __call__(self, x):
    # w * x + b
    act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
    if (self.activation == "relu"):
        out = act.relu()
    elif (self.activation == "softmax"):
        out = act.softmax()
    elif (self.activation == "tanh"):
        out = act.tanh()
    else:
        out = act.tanh()
    return out
  
  def parameters(self):
    return self.w + [self.b]

class Layer:
  
  def __init__(self, nin, nout, activation):
    self.neurons = [Neuron(nin, activation) for _ in range(nout)]
  
  def __call__(self, x):
    outs = [n(x) for n in self.neurons]
    return outs[0] if len(outs) == 1 else outs
  
  def parameters(self):
    return [p for neuron in self.neurons for p in neuron.parameters()]

class MLP:
  
  def __init__(self, nin, nouts):
    sz = [nin] + [nout[0] for nout in nouts]
    self.layers = [Layer(sz[i], sz[i+1], nouts[i][1]) for i in range(len(nouts))]
  
  def __call__(self, x):
    for layer in self.layers:
      x = layer(x)
    return x
  
  def parameters(self):
    return [p for layer in self.layers for p in layer.parameters()]

In [82]:
# dataset of images

from tensorflow.keras.datasets import mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = np.reshape(train_images, (-1, 784))
test_images = np.reshape(test_images, (-1, 784))

# normalize data
train_images = train_images.astype('float32') / 255
test_images = test_images.astype('float32') / 255

# convert labels to a one-hot vector
from tensorflow.keras.utils import to_categorical

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

In [85]:
model = MLP(784, [[256, "relu"], [256, "relu"], [10, "relu"]])
# 784 is the length of a image for the input node
# 256 first layer and second layer
# 10 output layer

xs = [] # input list
for x in train_images:
    x_temp = []
    for num in x:
        x_temp.append(num)
    xs.append(x_temp)
xs = xs[:5]

ys = [] # expected result list
for y in train_labels:
    y_temp = []
    for num in y:
        y_temp.append(num)
    ys.append(y_temp)
ys = ys[:5]

progress = 0
acum = 0

ypred = [] # predicted results from MLP for each group of inputs
for x in xs:
    acum += 1
    print("Predicting ...")
    prediction = model(x)
    print("Saving prediction...")
    ypred.append(prediction)
    progress = (acum*100)/len(xs)
    print("Progress: " + str(progress) + "%")
    
def meanSquare(listA, listB):
    return sum([(listOut - listIn)**2 for listIn, listOut in zip(listA, listB)])
    
loss = sum([meanSquare(ygt,yout) for ygt, yout in zip(ys, ypred)])
loss.backward()
print(ypred[0])
print({"loss": loss.data})

Predicting ...
Saving prediction...
Progress: 20.0%
Predicting ...
Saving prediction...
Progress: 40.0%
Predicting ...
Saving prediction...
Progress: 60.0%
Predicting ...
Saving prediction...
Progress: 80.0%
Predicting ...
Saving prediction...
Progress: 100.0%
[Value(data=364.6312158483422), Value(data=0.0), Value(data=0.0), Value(data=0.0), Value(data=0.0), Value(data=166.97836302121814), Value(data=70.59367634697287), Value(data=0.0), Value(data=0.0), Value(data=0.0)]
{'loss': 723111.7109347673}


In [86]:
iterations_training = 10 # iterations_training

print("Expected first result:\n")
print(ys[0])
print("---------------------------------------\n")
for k in range(iterations_training):
    # Forward pass
    progress = 0
    acum = 0

    ypred = [] # predicted results from MLP for each group of inputs
    for x in xs:
        acum += 1
        print("Predicting ...")
        prediction = model(x)
        print("Saving prediction...")
        ypred.append(prediction)
        progress = (acum*100)/len(xs)
        print("Progress: " + str(progress) + "%")
    def meanSquare(listA, listB):
        return sum([(listOut - listIn)**2 for listIn, listOut in zip(listA, listB)])

    loss = sum([meanSquare(ygt,yout) for ygt, yout in zip(ys, ypred)])
    # zero grad to flush previous gradients
    for p in model.parameters():
        p.grad = 0.0
    # Backward pass
    loss.backward()
    # Update
    for p in model.parameters():
        p.data += -0.05 * p.grad
    # Show status
    print(ypred[0])
    print(k, {"loss": loss.data})

Expected first result:

[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]
---------------------------------------

Predicting ...
Saving prediction...
Progress: 20.0%
Predicting ...
Saving prediction...
Progress: 40.0%
Predicting ...
Saving prediction...
Progress: 60.0%
Predicting ...
Saving prediction...
Progress: 80.0%
Predicting ...
Saving prediction...
Progress: 100.0%
[Value(data=364.6312158483422), Value(data=0.0), Value(data=0.0), Value(data=0.0), Value(data=0.0), Value(data=166.97836302121814), Value(data=70.59367634697287), Value(data=0.0), Value(data=0.0), Value(data=0.0)]
0 {'loss': 723111.7109347673}
Predicting ...
Saving prediction...
Progress: 20.0%
Predicting ...
Saving prediction...
Progress: 40.0%
Predicting ...
Saving prediction...
Progress: 60.0%
Predicting ...
Saving prediction...
Progress: 80.0%
Predicting ...
Saving prediction...
Progress: 100.0%
[Value(data=0.0), Value(data=0.0), Value(data=0.0), Value(data=0.0), Value(data=0.0), Value(data=0.0), Value(data=0.0)