In [12]:
!pip3 install graphviz

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [13]:
import math 
import torch 
import numpy as np
import matplotlib.pyplot as plt
import graphviz

Engine of the code

In [14]:
class Tensor:

    def __init__ (self, data, _children=(), _op='', label=''):
        
        self.data = data # data 
        self._op = _op #store the operation (like +,- etc)
        self._prev = set(_children) # where this came from 
        self.grad = 0.0 # default value 
        self._backward = lambda : None # defalut not activated 
        self.label = label # label for the each data point 

    def __add__(self, other):

        other = other if isinstance(other, Tensor) else Tensor(other)

        out = Tensor(self.data + other.data,(self,other),'+') # the other access the other data other than 
                                                              # the self.data = a , other.data = b 
        def _backward():
            self.grad += 1.0 * out.grad 
            other.grad += 1.0 * out.grad
        out._backward = _backward                                                     

        return out 
    
    def __mul__(self, other):

        other = other if isinstance(other, Tensor) else Tensor(other) # if the other object is not tensor then it converts the scalar to tensor

        out = Tensor(self.data * other.data,(self,other),'*')

        def _backward():

            self.grad += other.data * out.grad # L = d * F - > F for mul der 
            other.grad += self.data * out.grad
        out._backward = _backward


        return out 
    
    def tanh(self):
           x = self.data 
           
           t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
           out = Tensor(t, (self, ), 'tanh')
           
           def _backward():
               
               self.grad += (1 - t**2) * out.grad
               
           out._backward = _backward
           
           return out 
      
    def relu(self): # relu 
        x = self.data
        t = np.maximum(0, x)
        out = Tensor(t, (self,), 'relu')

        def _backward():
            self.grad += (t > 0) * out.grad

        out._backward = _backward

        return out 
    
    def gelu(self): # Gelu 
        x = self.data
        t = 0.5 * x * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * np.power(x, 3))))
        out = Tensor(t, (self,), 'gelu')

        def _backward():
            tanh_out = np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * np.power(x, 3)))
            derivative = 0.5 * (1 + tanh_out + x * (1 - np.square(tanh_out)) * (np.sqrt(2 / np.pi) + 0.134145 * np.power(x, 2)))
            self.grad += derivative * out.grad

        out._backward = _backward

        return out
    
    def sigmoid(self):  # sigmoid 
        x = self.data 
        t = 1 / (1 + np.exp(-x))
        out = Tensor(t, (self,), 'sigmoid')

        def _backward():
            self.grad += t * (1 - t) * out.grad

        out._backward = _backward

        return out
    
    def softmax(self):  # softmax 
        x = self.data
        exps = np.exp(x - np.max(x))
        t = exps / np.sum(exps)
        out = Tensor(t, (self,), 'softmax')

        def _backward():
            for i in range(len(t)):
                self.grad[i] += t[i] * (1 - t[i]) * out.grad[i]

        out._backward = _backward

        return out
    
    
    def __rmul__(self,other): # arranging the a * b == b * a 

        return  self * other 
    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only supporting int/float powers for now"
        out = Tensor(self.data**other, (self,), f'**{other}')

        def _backward():
            self.grad += (other * self.data**(other-1)) * out.grad
        out._backward = _backward

        return out
    
    
    def backward(self):
    
      topo = [] #  list to store nodes in topo order
      visited = set() # Set to track visited nodes
      
      # Helper function to build topological order
      def build_topo(v):
        if v not in visited:
          visited.add(v)
          for child in v._prev: # ass : _prev contains parent nodes
            build_topo(child)
          topo.append(v)
          
      build_topo(self) # start building topo order from the current node
     # print(topo)
      # print(visited)
      
      self.grad = 1.0 # initialize the gradient of the final node (self) | so it can avoid zero curse 
      for node in reversed(topo):  # backward pass in topological order
        node._backward()
        
    def __neg__(self): # -self
        return self * -1

    def __radd__(self, other): # other + self
        return self + other

    def __sub__(self, other): # self - other
        return self + (-other)

    def __rsub__(self, other): # other - self
        return other + (-self)

    def __rmul__(self, other): # other * self
        return self * other

    def __truediv__(self, other): # self / other
        return self * other**-1

    def __rtruediv__(self, other): # other / self
        return other * self**-1
    
    def __repr__(self):

        return f"Tensor(data={self.data})"

In [15]:
a = Tensor(2.0, label='a')
b = Tensor(-3.0, label='b')
c = Tensor(10.0, label='c')
e = a*b; e.label = 'e'
d = e + c; d.label = 'd'
f = Tensor(-2.0, label='f')
L = d * f; L.label = 'L'
L

Tensor(data=-8.0)

In [16]:
from graphviz import Digraph

def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v._prev:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right
  
  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % (n.label, n.data, n.grad), shape='record')
    if n._op:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid + n._op, label = n._op)
      # and connect this node to it
      dot.edge(uid + n._op, uid)

  for n1, n2 in edges:
    # connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2)) + n2._op)

  return dot

Now Lets build the NN

In [24]:
import random

class Module:

    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0

    def parameters(self):
        return []

class Neuron(Module):

    def __init__(self, nin, nonlin=True):
        self.w = [Tensor(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Tensor(0)
        self.nonlin = nonlin

    def __call__(self, x):
        act = sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
        return act.relu() if self.nonlin else act

    def parameters(self):
        return self.w + [self.b]

    def __repr__(self):
        return f"{'ReLU' if self.nonlin else 'Linear'}Neuron({len(self.w)})"

class Layer(Module):

    def __init__(self, nin, nout, **kwargs):
        self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

    def __call__(self, x):
        out = [n(x) for n in self.neurons]
        return out[0] if len(out) == 1 else out

    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]

    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"

class MLP(Module):

    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1], nonlin=i!=len(nouts)-1) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"
    