In [74]:
import numpy as np
import matplotlib.pyplot as plt
import math
%matplotlib inline
import torch

In [76]:
class value:
    def __init__(self,data, _children=(), _oper ='', label=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None
        self._prev = set(_children)
        self._oper = _oper
        self.label = label
    def __repr__(self):
        return f"value(data={self.data})"
        
    def __add__(self,other):
        other = other if isinstance(other, value) else value(other)
        out = value(self.data + other.data, (self,other), '+')
        
        def _backward():
            self.grad += 1*out.grad
            other.grad += 1*out.grad
        out._backward = _backward    

        return out
    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only supporting int/float powers for now"
        out = value(self.data**other, (self,), f'**{other}')

        def _backward():
            self.grad += other * (self.data ** (other - 1)) * out.grad
        out._backward = _backward

        return out
  
    def __rmul__(self, other): # other * self
        return self * other

    def __truediv__(self, other): # self / other
        return self * other**-1

    def __neg__(self): # -self
        return self * -1

    def __sub__(self, other): # self - other
        return self + (-other)

    def __radd__(self, other): # other + self
        return self + other
       
    
    def __mul__(self, other):
        other = other if isinstance(other, value) else value(other)
        out = value(self.data * other.data, (self,other), '*' )
        def _backward():
            self.grad += other.data*out.grad
            other.grad+= self.data*out.grad
        out._backward = _backward    

        return out 
        
    
    def tanh(self):
        x= self.data
        t = ((math.exp(2*x)-1)/(math.exp(2*x)+1))
        out = value(t, (self, ), 'tanh')
        def _backward():
            self.grad = (1-t**2)*out.grad
        out._backward = _backward
        return out
    
     
    def exp(self):
        x = self.data
        out = value(math.exp(x), (self, ), 'exp')
    
        def _backward():
            self.grad += out.data * out.grad # NOTE: in the video I incorrectly used = instead of +=. Fixed here.
        out._backward = _backward
    
        return out 
    
    def backward(self):
     topo = []
     visited = set()
     def build_topo(v):
       if v not in visited:
         visited.add(v)
         for child in v._prev:
           build_topo(child)
         topo.append(v)
     build_topo(self)
    
     self.grad = 1.0
     for node in reversed(topo):
       node._backward()   

        

In [77]:
from graphviz import Digraph

def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v._prev:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right
  
  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % (n.label, n.data, n.grad), shape='record')
    if n._oper:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid + n._oper, label = n._oper)
      # and connect this node to it
      dot.edge(uid + n._oper, uid)

  for n1, n2 in edges:
    # connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2)) + n2._oper)

  return dot

In [78]:
#test case
x1 = torch.Tensor([2.0]).double()                ; x1.requires_grad = True    #bydeafult pytorch keeps grads off for scalars & x1 was typecasted to double to be consistent with python which uses float64
x2 = torch.Tensor([0.0]).double()                ; x2.requires_grad = True
w1 = torch.Tensor([-3.0]).double()               ; w1.requires_grad = True
w2 = torch.Tensor([1.0]).double()                ; w2.requires_grad = True
b = torch.Tensor([6.8813735870195432]).double()  ; b.requires_grad = True
n = x1*w1 + x2*w2 + b
o = torch.tanh(n)

print(o.data.item())
o.backward()

print('---')
print('x2', x2.grad.item())
print('w2', w2.grad.item())
print('x1', x1.grad.item())
print('w1', w1.grad.item())

0.7071066904050358
---
x2 0.5000001283844369
w2 0.0
x1 -1.5000003851533106
w1 1.0000002567688737


In [79]:
#Here we are tyring to build a neural network where we feed some data x's and then weight is multiplies to  to it, then a summation of wixi is done with the addition of a bias(activation function) and then passed thorugh tanh. For calculating the best w's we'll be implementing loss function on which we can perform optimisation which will directly effect the weights to give us better answer


import random


class Neurons:

    def __init__(self, nin):
        self.w = [value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = value(random.uniform(-1,1))
    def __call__(self,x):
        activation = sum((wi*xi for wi,xi in zip(self.w,x)), self.b)
        out  = activation.tanh()
        return out
    def parameters(self):
        return self.w + [self.b]
#Now defining layers of mlp
class Layers:
    def __init__(self, nin, nout):
        self.neurons = [Neurons(nin) for _ in range(nout)]
    def __call__ (self, x):
        outs= [n(x) for n in self.neurons]   
        return outs[0] if len(outs) == 1 else outs 
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]
class MLP:
  
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [layers(sz[i], sz[i+1]) for i in range(len(nouts))]
  
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    def parameters(self):
        return[ p for layer in self.layers for p in layer.parameters() ]



In [80]:
x = [2.0, 3.0, -1.0]
n = MLP(3, [4, 4, 1])
n(x)
len(n.parameters())


41

In [81]:
#test case for the MLP

xs = [
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0]




In [86]:
# defining loss function and applying gradient descent via forward-> backward->update step
for k in range(15):
    #forward pass
    ypred = [n(x) for x in xs]
    mse_loss = sum((yout-yreal)**2 for yout,yreal in zip(ypred, ys))

#backward pass
    for p in n.parameters():
        p.grad = 0
    mse_loss.backward()

#update

    for p in n.parameters():
        p.data += -0.6*p.grad  
    print(k, mse_loss.data)     

0 3.809928734170398e-05
1 3.806026247876363e-05
2 3.802131679097869e-05
3 3.7982450038509405e-05
4 3.7943661982492435e-05
5 3.79049523850139e-05
6 3.786632100912145e-05
7 3.7827767618820554e-05
8 3.778929197904946e-05
9 3.775089385569891e-05
10 3.771257301559196e-05
11 3.767432922648368e-05
12 3.76361622570598e-05
13 3.759807187692828e-05
14 3.756005785661362e-05


In [87]:
ypred

[value(data=0.9977838099248567),
 value(data=-0.9981817860291162),
 value(data=-0.9960119813578201),
 value(data=0.9963341624868035)]