In [5]:
import numpy as np
import math
import matplotlib.pyplot as plt
%matplotlib inline

My google colab with all of the tests. 
https://colab.research.google.com/drive/1An24HbS5hTjBQjdIYkG4RBTOwdRbcUfK?usp=sharing

# Create Parameter class 

In [2]:
class Parameter:
    def __init__(self, _value, _children =(), _op='', name = '') -> None:
      self._value = _value
      self.grad = 0.0
      self._backward = lambda: None
      self._prev = set(_children)
      self._op = _op
      self.name = name

    def __repr__(self) -> str:
        return f"Parameter {self.name} = {self._value}; dL/d[{self.name}] = {self.grad}"

    def __add__(self, other):
        # Add string to check if the other is Parameter if not make it Parameter, so we can add numbers to Parameter now
        other = other if isinstance(other, Parameter) else Parameter(other)
        result = Parameter(self._value + other._value, (self, other), '+')


        def _backward():
          self.grad += 1.0 * result.grad
          other.grad += 1.0 * result.grad
        result._backward = _backward

        return result

    def __mul__(self, other):
        # Add string to check if the other is Parameter if not make it Parameter, so we can multiply numbers with Parameter now
        other = other if isinstance(other, Parameter) else Parameter(other)
        result = Parameter(self._value * other._value, (self, other), '*')

        def _backward():
          self.grad += other._value * result.grad
          other.grad += self._value * result.grad
        result._backward = _backward

        return result

    def __neg__(self):
      return self * -1

    def __sub__(self,other):
      return self + (-other)
    # Now lets make activation functions from lab1
    # So, I'll do ReLU and Softplus

    def __pow__(self, other):
      assert isinstance(other, (int, float))
      out = Parameter(self._value ** other, (self,), f'**{other}')

      def _backward():
        self.grad += other * self._value** (other - 1) * out.grad

      out._backward = _backward
      return out

    def ReLU(self):
      result = Parameter (max(0.0, self._value), (self, ), 'ReLU')

      def _backward():
        temp =  1 if self._value > 0 else 0
        self.grad += temp * result.grad

      result._backward = _backward
      return result

    def Softplus(self):
      result  = Parameter(
        np.log(1 + np.exp(self._value)),
       (self, ),
       'Softplus')

      def _backward():
        self.grad = (np.exp(self._value))/(1 + np.exp(self._value) ) * result.grad

      result._backward = _backward
      return result

    # Now I'll do automatic backpropagation
    # I will use the topological sort
    # to use it I add _children - paramiter to our Parameter
    def backward(self):

      topo = []
      visited = set()
      def build_topo(v):
        if v not in visited:
          visited.add(v)
          for child in v._prev:
            build_topo(child)
          topo.append(v)
      build_topo(self)

      self.grad = 1.0
      for node in reversed(topo):
        node._backward()




# make fun. to draw Parameter 

In [20]:
from graphviz import Digraph

def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v._prev:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right

  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    #
    dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % (n.name, n._value, n.grad), shape='record')
    if n._op:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid + n._op, label = n._op)
      # and connect this node to it
      dot.edge(uid + n._op, uid)

  for n1, n2 in edges:
    # connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2)) + n2._op)

  return dot

# Test 

In [21]:
a = Parameter(3.0, name = 'a')
b = Parameter(2.0, name = 'b')
c = Parameter(5.0, name = 'c')
d = Parameter(5.0, name = 'd')
q = a * b; q.name = 'q'
w = q.Softplus(); w.name = 'w'
w

Parameter w = 6.00247568513773; dL/d[w] = 0.0

In [22]:
w.backward()

In [25]:
a

Parameter a = 3.0; dL/d[a] = 1.9950547536867305

# Gradient descent 

In [13]:
def gradient_descent(pars : list[Parameter], lr = 0.001):
   for par in pars:
      par._value -= lr * par.grad

The proof that we don't need the return in the function can be seen in my collabnotebook. There you can see all of the examples and tests. notebook(https://colab.research.google.com/drive/1An24HbS5hTjBQjdIYkG4RBTOwdRbcUfK?usp=sharing)

# Test grad.desc 

In [14]:
x1 = Parameter(4.0, name ="x1")
w1 = Parameter(0.5, name = "w1")
b1 = Parameter(1.0, name = "b1")
#out1 = x1 * w1 + b1
#print(out1) 


x2 = Parameter(6.0, name ="x2")
w2 = Parameter(-4.0, name = "w2")
b2 = Parameter(0.76, name = "b2")
#out2 = x2 * w2 + b2

In [15]:
iter = 682
lr = 1e-8 
out_pred = Parameter(2.0, name = "out_pred") 

for i in range(iter):
  w1._grad = 0
  w2._grad = 0
  b1._grad = 0
  b2._grad = 0

  #loss = (out_pred - out1._value) ** 2
  # forward 
  out1 = x1   * w1 + b1
  out2 = out1 * w2 + b2
  loss = (out_pred - out2) ** 2
  # backward 
  loss.backward()

  #grad 
  gradient_descent([x1,w1,b1,w2,b2], lr)
  #print(f'Epoch {i}: a1={a1._value}, a2={a2._value}, b1={b1._value}, b2={b2._value}')
  print(f"Epoch{i} : Out2 = {out2._value}, Loss = {loss._value}")
print(out2)

Epoch0 : Out2 = -11.24, Loss = 175.29760000000002
Epoch1 : Out2 = -11.23992426722349, Loss = 175.29559460181346
Epoch2 : Out2 = -11.23977280229158, Loss = 175.2915838562998
Epoch3 : Out2 = -11.239545606446471, Loss = 175.28556786517606
Epoch4 : Out2 = -11.239242681551444, Loss = 175.27754678101346
Epoch5 : Out2 = -11.238864030090808, Loss = 175.26752080723222
Epoch6 : Out2 = -11.23840965516986, Loss = 175.25549019809458
Epoch7 : Out2 = -11.237879560514816, Loss = 175.24145525869594
Epoch8 : Out2 = -11.237273750472736, Loss = 175.22541634495454
Epoch9 : Out2 = -11.236592230011448, Loss = 175.20737386359943
Epoch10 : Out2 = -11.235835004719434, Loss = 175.1873282721563
Epoch11 : Out2 = -11.23500208080573, Loss = 175.165280078932
Epoch12 : Out2 = -11.234093465099784, Loss = 175.1412298429968
Epoch13 : Out2 = -11.233109165051346, Loss = 175.11517817416592
Epoch14 : Out2 = -11.232049188730286, Loss = 175.08712573297782
Epoch15 : Out2 = -11.230913544826464, Loss = 175.0570732306724
Epoch16 :