In [1]:
import math
import numpy as np
import matplotlib.pyplot as plt
import random
%matplotlib inline

In [17]:
class Value:
  
  def __init__(self, data,_children=(),_op='',label=''): #children is like pointer to point to what value produced it. it is initialized as empty tuple 
    self.data = data
    self._prev=set(_children) #maintained as set for efficiency
    self._op=_op #to know what operation produced it
    self.label=label
    self.grad=0.0
    self._backward= lambda: None #by default it is a function that doesnt do anything and returns none
  

  def __repr__(self): #this function runs when Value() is used in the program
    return f"Value(data={self.data})" #returns as a value object
  
         
  
  def __add__(self, other): #python automatically calls this function if + is used between two value objects
    other=other if isinstance(other, Value) else Value(other) #if other is not a value object we wrap it into a value object
    out = Value(self.data + other.data,(self,other),'+') 
    
    def _backward(): #for an addition operation : the output's gradient is just copied to the operands' gradients. eg: if c=a+b , a.grad=1*c.grad and b.grad=1*c.grad
        self.grad+=1.0*out.grad #we use += to address the multivariate cases(same variable used multiple times)
        other.grad+=1.0*out.grad
    out._backward=_backward #we dont use _backward() because we are not calling the function which just returns none. instead we are storing the function
    return out
    
    
 
  def __mul__(self, other):
    other=other if isinstance(other,Value) else Value(other) #eg: if a*2 is run... 2 is not a vlaue obj so it is wrapped into a value obj
    out = Value(self.data * other.data,(self,other),'*')#(self,other) is the tuple for _prev
    
    def _backward(): # for multiplication operation, c=a*b: a's gradient is c's gradient times b; and b's gradient is c's gradient times a
        self.grad+=other.data*out.grad
        other.grad+=self.data*out.grad
    
    out._backward=_backward
    return out
  
  
  def __pow__(self, other):
    assert isinstance(other, (int, float)), "only supporting int/float powers for now"
    out = Value(self.data**other, (self,), f'**{other}')

    def _backward():
        self.grad += other * (self.data ** (other - 1)) * out.grad
    out._backward = _backward

    return out
  
  
  def __rmul__(self,other): #called in cases of eg:  2*a and it returns a*2
        return self*other 
   
  def __truediv__(self, other): # self / other
    return self * other**-1

  def __neg__(self): # -self
    return self * -1

  def __sub__(self, other): # self - other
    return self + (-other)

  def __radd__(self, other): # other + self
    return self + other


  def tanh(self): #e^( 2 x) − 1 / e (2 x) + 1
    x=self.data
    t=(math.exp(2*x)-1)/(math.exp(2*x)+1)
    out=Value(t,(self, ),'tanh') #(self,) is a tuple of  children
        
    def _backward():
        self.grad+=(1-t**2)*out.grad #d/dx(tanh)=1-(tanhx)^2
    out._backward=_backward
    return out
  
  
  def exp(self): 
    x=self.data
    out=Value(math.exp(x),(self, ),'exp') #(self,) is a tuple of  children
        
    def _backward():
        self.grad+=out.data*out.grad #d/dx(tanh)=1-(tanhx)^2
    out._backward=_backward
    return out
  
   
  def backward(self):
    #building topological graph
    topo = []
    visited = set()
    def build_topo(v):
      
      if v not in visited:
        visited.add(v)
        for child in v._prev:
           
          build_topo(child)
        topo.append(v)
    build_topo(self)
    #reversing the topological graph. because we need to go from output to leaves.
    self.grad = 1.0
    for node in reversed(topo):
      
      node._backward() 

In [24]:
class Neuron:
  
  def __init__(self, nin): #nin - no of inputs
    self.w = [Value(random.uniform(-1,1)) for _ in range(nin)] #assign random weights
    self.b = Value(random.uniform(-1,1))
  
  def __call__(self, x):
    # w * x + b
    act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
    out = act.tanh()
    return out
#x = [2.0, 3.0, -1.0]
#n = Neuron(3, [4, 4, 1])
#n(x) #this will automatically call __call__() function 
  
  def parameters(self):
    return self.w + [self.b]

class Layer:
  
  def __init__(self, nin, nout):
    self.neurons = [Neuron(nin) for _ in range(nout)]
  
  def __call__(self, x):
    outs = [n(x) for n in self.neurons]
    return outs[0] if len(outs) == 1 else outs #ie. for last layer only one value is returned
  
  def parameters(self):
    return [p for neuron in self.neurons for p in neuron.parameters()] #all w,b in the neurons in the layer

class MLP:
  
  def __init__(self, nin, nouts):
    sz = [nin] + nouts
    self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]
  
  def __call__(self, x):
    for layer in self.layers:
      x = layer(x)
    return x
  
  def parameters(self):
    return [p for layer in self.layers for p in layer.parameters()]

In [25]:
#x = [2.0, 3.0, -1.0]
n = MLP(3, [4, 4, 1]) 
#n(x)

In [26]:
xs = [
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0] # desired targets

In [33]:
for k in range(20):
  
  # forward pass
  ypred = [n(x) for x in xs]
  loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))
  
  # backward pass
  for p in n.parameters():
    p.grad = 0.0
  loss.backward()
  
  # update
  for p in n.parameters():
    p.data += -0.1 * p.grad
  
  print(k, loss.data)
  

0 0.004921237157403436
1 0.004866417015855336
2 0.004812751596240523
3 0.004760205284152089
4 0.004708743906108499
5 0.004658334657655214
6 0.004608946035719791
7 0.004560547774929466
8 0.004513110787624062
9 0.004466607107316527
10 0.004421009835372695
11 0.004376293090698443
12 0.004332431962238757
13 0.00428940246410739
14 0.004247181493178795
15 0.0042057467889862095
16 0.0041650768957817545
17 0.00412515112662316
18 0.004085949529362787
19 0.004047452854422305


In [34]:
ypred

[Value(data=0.9735857493954889),
 Value(data=-0.9672480651927889),
 Value(data=-0.9623345556311127),
 Value(data=0.9707021282973858)]