In [39]:
import numpy as np

# Compute  graph with each node maitainng a state for later backprop
import math

class Param:
  def __init__(self, value, parents=(), op=None):
    self.value = value
    # book keeping
    self.op = op
    self.parents = parents
    self.grad = 0.0
    self.backward = lambda: None

  def __repr__(self):
    return f'v: {self.value}'

  def __add__(self, other):
    other = other if isinstance(other, Param) else Param(other)
    out = Param(self.value + other.value, (self, other),  '+')
    def __backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out.backward = __backward
    return out

  def __neg__(self):
    return self * -1

  def __sub__(self, other):
    other = other if isinstance(other, Param) else Param(other)
    return self + (-other)

  def __truediv__(self, other):
    other = other if isinstance(other, Param) else Param(other)
    return self * (other ** -1)

  def __mul__(self, other):
    other = other if isinstance(other, Param) else Param(other)
    out = Param(self.value * other.value, (self, other), '*')
    def __backward():
      self.grad +=  other.value * out.grad
      other.grad += self.value * out.grad
    out.backward = __backward
    return out

  def __pow__(self, other):
    other = other if isinstance(other, Param) else Param(other)
    out = Param(self.value * other.value, (self, ), f'**{other}')
    def __backward():
      self.grad += other * (self.value ** (other - 1)) * out.grad
    out.backward = __backward
    return out

  def exp(self):
    x = self.value
    out = Param(math.exp(x), (self, ), 'exp')

    def __backward():
      self.grad += out.value * out.grad # NOTE: in the video I incorrectly used = instead of +=. Fixed here.
    out.backward = __backward

    return out

  def __radd__(self, other):
    return self + other

  def __rmul__(self, other):
    return self * other

  def __gt__(self, other):
    other = other if isinstance(other, Param) else Param(other)
    return self.value > other.value

  def __lt__(self, other):
    other = other if isinstance(other, Param) else Param(other)
    return self.value < other.value

  def backward(self):
    print(f'calling backward on {self.value}')
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v.parents:
          build_topo(child)
        topo.append(v)
    build_topo(self)

    self.grad = 1.0
    for node in reversed(topo):
      node.__backward()


class Neuron:
  def __init__(self, n):
    self.w = [Param(np.random.rand()) for _ in range(n)]
    self.b = Param(np.random.rand())

  def __call__(self, x):
    return self.forward(x)

  def forward(self, x):
    self.x = x
    return np.sum([wi * xi for wi, xi in zip(self.w, x)], axis=0) + self.b

  def parameters(self):
    return self.w + [self.b]

  def __repr__(self):
    return f'weights: {[ wei for wei in self.w]} bias: {self.b}'


class LinearLayer:
  def __init__(self, ni, no):
    self.ni = ni
    self.no = no
    self.layer = [Neuron(ni) for _ in range(no)]

  def __call__(self, x):
    return self.forward(x)

  def forward(self, x):
    return [n(x) for n in self.layer]

  def __repr__(self):
    return f'neurons: {[n for n in self.layer]}'

  def parameters(self):
    return [p for neuron in self.layer for p in neuron.parameters()]

class ReluLayer:
  def __call__(self, x):
    return self.forward(x)

  def forward(self, x):
    self.x = x
    return [max(xi, 0) for xi in x]


class MSE:
  def __call__(self, y_pred, y):
    return self.forward(y_pred, y)

  def forward(self, y_pred, y):
    self.y_pred = y_pred
    self.y = y
    return sum(((y_predi - yi) ** 2 for y_predi, yi in zip(y_pred, y)), Param(0)) / len(y_pred)


class FCN:  # 3-layer fully connected network
  def __init__(self, input_size, hidden_size, output_size):
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.output_size = output_size
    self.layer1 = LinearLayer(input_size, hidden_size)
    self.relu = ReluLayer()
    self.layer2 = LinearLayer(hidden_size, output_size)

  def __call__(self, x):
    return self.forward(x)

  def forward(self, x):
    x = self.layer1(x)
    print(f'layer1 output {x}')
    x = self.relu(x)
    print(f'relu ouput {x}')
    x = self.layer2(x)
    print(f'layer2 ouput {x}')
    return x

  def parameters(self):
    return self.layer1.parameters() + self.layer2.parameters()




In [35]:
fcn = FCN(10, 20, 1)


In [36]:
x = np.random.rand(10)
print(f'x={x}')
y = fcn(x)
print(f'y={y}')

x=[0.51143676 0.37881268 0.07476352 0.12542999 0.09052774 0.80227874
 0.61861968 0.59827223 0.21331327 0.2579352 ]
layer1 output [v: 2.843134157655485, v: 1.94607170593777, v: 3.547586680175322, v: 2.988933169627365, v: 3.2816506525605513, v: 2.1246783933348175, v: 2.537511170965553, v: 2.7947818701819105, v: 2.2154946808813913, v: 1.929000972987342, v: 2.3945781818156737, v: 1.8940648234816753, v: 2.506871474682456, v: 2.658393505785049, v: 2.3856710737437368, v: 3.019398344852406, v: 2.343521663372198, v: 2.6882822253102736, v: 2.5566589016033228, v: 2.1913103894243062]
relu ouput [v: 2.843134157655485, v: 1.94607170593777, v: 3.547586680175322, v: 2.988933169627365, v: 3.2816506525605513, v: 2.1246783933348175, v: 2.537511170965553, v: 2.7947818701819105, v: 2.2154946808813913, v: 1.929000972987342, v: 2.3945781818156737, v: 1.8940648234816753, v: 2.506871474682456, v: 2.658393505785049, v: 2.3856710737437368, v: 3.019398344852406, v: 2.343521663372198, v: 2.6882822253102736, v: 2.5

In [37]:
x = np.random.rand(10)
y = np.random.rand(1)
print(f'x={x}')
y_pred = fcn(x)
print(f'y={y_pred}')

x=[0.64354011 0.06671703 0.63076598 0.73818818 0.89614523 0.11321184
 0.87706697 0.38689226 0.10784903 0.2915324 ]
layer1 output [v: 3.51773706167005, v: 2.606764510747487, v: 3.464564611877407, v: 2.7869297251624534, v: 3.374301641843973, v: 2.8246682285585556, v: 2.878051681936111, v: 3.3770991157847243, v: 2.895902175143295, v: 2.412533016007809, v: 3.3170558901878398, v: 3.0645246490902203, v: 3.239747550221692, v: 4.030739518695705, v: 2.863287092367913, v: 3.7888113031052066, v: 3.0278360334566115, v: 3.177052823461179, v: 3.504169356654999, v: 2.58243456564035]
relu ouput [v: 3.51773706167005, v: 2.606764510747487, v: 3.464564611877407, v: 2.7869297251624534, v: 3.374301641843973, v: 2.8246682285585556, v: 2.878051681936111, v: 3.3770991157847243, v: 2.895902175143295, v: 2.412533016007809, v: 3.3170558901878398, v: 3.0645246490902203, v: 3.239747550221692, v: 4.030739518695705, v: 2.863287092367913, v: 3.7888113031052066, v: 3.0278360334566115, v: 3.177052823461179, v: 3.504169

In [43]:
epochs = 50
lr = 0.1
loss_fn = MSE()
for epoch in range(epochs):
  y_pred = fcn(x)
  # Zero gradients
  loss = loss_fn(y_pred, y)
  loss.backward()
  for param in fcn.parameters():
    param.value = param.value - (lr * param.grad)
    #param.grad = 0
  print(f'Epoch {epoch+1}, Loss: {loss}')


layer1 output [v: v: 3.51773706167005, v: v: 2.606764510747487, v: v: 3.464564611877407, v: v: 2.7869297251624534, v: v: 3.374301641843973, v: v: 2.8246682285585556, v: v: 2.878051681936111, v: v: 3.3770991157847243, v: v: 2.895902175143295, v: v: 2.412533016007809, v: v: 3.3170558901878398, v: v: 3.0645246490902203, v: v: 3.239747550221692, v: v: 4.030739518695705, v: v: 2.863287092367913, v: v: 3.7888113031052066, v: v: 3.0278360334566115, v: v: 3.177052823461179, v: v: 3.504169356654999, v: v: 2.58243456564035]
relu ouput [v: v: 3.51773706167005, v: v: 2.606764510747487, v: v: 3.464564611877407, v: v: 2.7869297251624534, v: v: 3.374301641843973, v: v: 2.8246682285585556, v: v: 2.878051681936111, v: v: 3.3770991157847243, v: v: 2.895902175143295, v: v: 2.412533016007809, v: v: 3.3170558901878398, v: v: 3.0645246490902203, v: v: 3.239747550221692, v: v: 4.030739518695705, v: v: 2.863287092367913, v: v: 3.7888113031052066, v: v: 3.0278360334566115, v: v: 3.177052823461179, v: v: 3.5041