In [17]:
import math
import numpy as np
import matplotlib.pyplot as plt


In [236]:
class Value:

  # _ op tells us wich operation created this value
  #_children contains konse 2 elements ke beech operation ka result hai yeh no
  def __init__(self,data, _children = (), _op = '') -> None: # like a constructor
    self.data = data
    self._backward = lambda : None # backward is a function which will be called once the entire neural net has been made
    self.grad = 0.0 # this is actually the derivative of final expression with respect to current value (intitally it is zero that means this value has no effect on the final result)
    self._prev = set(_children)
    self._op = _op


  def __repr__(self) -> str: # used to define string representation of an object
    return f"Value(data={self.data})"

  def __add__(self,other):
    if isinstance(other,int) or isinstance(other,float):
      other = Value(other)
    out = Value(self.data + other.data,(self,other),'+')

    def _backward(): ## it is a closure function and captures all the local variables that is it will capture the reference of out, self and every other object defined here
      self.grad += 1.0*out.grad
      other.grad += 1.0*out.grad

    out._backward = _backward

    return out

  def __mul__(self,other):
    if isinstance(other,int) or isinstance(other,float):
      other = Value(other)
    out = Value(self.data * other.data, (self,other),'*')

    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad

    out._backward = _backward

    return out

  def tanh(self):
    n = self.data
    t = (math.exp(2*n) - 1) / (math.exp(2*n)+1) # this is tanh formula
    out = Value(t,(self,),'tanh')

    def _backward():
      self.grad += (1-t**2) * out.grad
    out._backward = _backward
    return out

  def exp(self):
    x = self.data
    out = Value(math.exp(x),(self,),'exp')

    def _backward():
      self.grad += out.data*out.grad

    out._backward = _backward

  def __truediv__(self,other):
    return self * other**-1

  def __neg__(self):
    return self* Value(-1)

  def __sub__(self,other):
    return self + (-other)


  def __pow__(self,other):
    if isinstance(other,float) or isinstance(other,int):
      other = Value(other)
    out = Value(self.data**other.data,(self,),f"**{other.data}") # we here assume other is of type integer or float

    def _backward():
      self.grad  += other.data * (self.data ** (other.data - 1)) * out.grad

    out._backward = _backward
    return out

  def topo(self):

    self.grad = 1
    topo = []
    visited = set()
    que = [self]

    while len(que) > 0:
      print(que)
      size = len(que)
      while size > 0:
        size-=1
        v = que.pop(0) # we can improve the tc by implementing a que
        if v in visited:
          continue
        visited.add(v)
        topo.append(v)
        for child in v._prev:
          que.append(child)

    print(len(topo))
    for node in topo:
      print(node.grad)
      node._backward()




# backpropogation
# we want to find out the derivative of final expression with respect to each value (weights) in the neural nets, this will tell us how each value (weights) influences the final outcome.

# The





In [3]:
import torch
import random

In [2]:
# lets try the same implementation with torch library
# tensors are nothing but 2 d arrays
# torch simplifies the whole process and is much more optimized
x1 = torch.tensor([2.0]).double()
x2 = torch.tensor([0.0]).double()
w1 = torch.tensor([-3.0]).double()
w2 = torch.tensor([1.0]).double()
b = torch.tensor([6.881373]).double()

x1.requires_grad = True
x2.requires_grad = True
w1.requires_grad = True
w2.requires_grad = True


n = x1*w1 + x2*w2 + b

o = torch.tanh(n)
o.backward() # backpropogation

print('x2',x2.grad.item())






x2 0.500000465559325


In [327]:
class Neuron:
  def __init__(self,nin): # nin basically tells us how many inputs comes to this Neuron
    self.w = [Value(random.uniform(-1.0,1.0)) for _ in range(nin)] # for each input we have one corresponding weight
    self.b = Value(random.uniform(-1.0,1.0)) # each neuron will have its own bias

  def __call__(self,x): # this method is called directly from object variables like for ex obj = Neuron, then it will be called as obj(x)
    activation = sum([w*x for w, x in zip(self.w,x)],self.b)
    out = activation.tanh()
    return out
  def parameters(self): # these all the parameters which we can fine tune to improve the result
    return self.w + [self.b]



class Layer:
  def __init__(self,nin,nout): #nout tells us how many neurons we want in that layer
    self.neurons = [Neuron(nin) for _ in range(nout)]

  def __call__(self,x):
    outs = [n(x) for n in self.neurons]

    return outs[0] if len(outs) == 1 else outs

  def parameters(self):
    p = []
    for neuorn in self.neurons:
      ps = neuorn.parameters()
      p.extend(ps)
    return p





class MLP: # every feeds in to another layer

  def __init__(self,nin,nouts): # nouts basically tells how many layers we want in each layer
    sz = [nin] + nouts # so basically the first layer will have nin inputs to each neuron but later on each neuron will get nout number of inputs
    self.layers = [Layer(sz[i],sz[i+1]) for i in range(len(sz)-1)]

    # sz[i] -> are the number of inputs
    # sz[i+1] -> number of neurons in that layer

  def __call__(self,x):
    for layer in self.layers: # here we basically pass outputs of one layer to another layers neurons
      x = layer(x)
    return x
  def parameters(self):
    p = []
    for layer in self.layers:
      ps = layer.parameters()
      p.extend(ps)
    return p




x = [Value(2.0),Value(3.0),Value(-1.0)]
n = MLP(3,[4,4,1])
n(x)




Value(data=-0.9405298027166158)

In [328]:
## creating loss function

xs = [
  [2.0,3.0,-1.0],
  [3.0,-1.0,0.5],
  [0.5,1.0,1.0],
  [1.0,1.0,-1.0]
]
ys = [1.0,-1.0,-1.0,1.0] # these are the desired targets for the 4 inputs
ypred = [n(x) for x in xs]
ypred

[Value(data=-0.9405298027166158),
 Value(data=-0.8957813567916313),
 Value(data=-0.8960674094667208),
 Value(data=-0.9388468974715499)]

In [329]:
loss = [(Value(ygt) - yout)**2 for ygt, yout in zip(ys,ypred)]

total_loss = Value(0.0)
for l in loss:
  total_loss += l

total_loss




Value(data=7.546446716033593)

In [368]:
ypred = [n(x) for x in xs]
loss = [(Value(ygt) - yout)**2 for ygt, yout in zip(ys,ypred)]

total_loss = Value(0.0)
for l in loss:
  total_loss += l

total_loss


Value(data=6.891288750725499)

In [369]:
for p in n.parameters():
  p.grad = 0.0
total_loss.topo()

[Value(data=6.891288750725499)]
[Value(data=3.0105177673465615), Value(data=3.8807709833789374)]
[Value(data=1.7350843689419144), Value(data=0.0005172545285702123), Value(data=3.8802537288503673)]
[Value(data=1.0), Value(data=0.7350843689419145), Value(data=-0.022743230389947078), Value(data=0.01928296282604084), Value(data=3.8609707660243267)]
[Value(data=-1), Value(data=-0.7350843689419145), Value(data=0.9772567696100529), Value(data=-1.0), Value(data=-0.1388631082254781), Value(data=0.0), Value(data=3.8609707660243267)]
[Value(data=-0.9396999584509773), Value(data=-1), Value(data=-0.9772567696100529), Value(data=0.8611368917745219), Value(data=-1.0), Value(data=1.9649353083560606)]
[Value(data=-0.5670010201711435), Value(data=-0.37269893827983386), Value(data=-2.23259907865264), Value(data=-0.8611368917745219), Value(data=-1), Value(data=1.0), Value(data=0.9649353083560606)]
[Value(data=0.6691916576761641), Value(data=-0.8472924216361454), Value(data=-0.4991928124216387), Value(data

In [331]:
n.layers[0].neurons[0].w[1].grad
len(n.parameters())


41

In [371]:
# steps
# we produce the predicted values
# get the gradient of each parameter and loss
# update the data based on gradient to reduce loss
# re run
for i in range(0,20):



  ypred = [n(x) for x in xs]
  loss = [(Value(ygt) - yout)**2 for ygt, yout in zip(ys,ypred)]

  total_loss = Value(0.0)
  for l in loss:
    total_loss += l
  print(total_loss)

  for p in n.parameters():
    p.grad = 0.0
  total_loss.topo()

  for p in n.parameters():
    p.data += -0.05 * p.grad



Value(data=5.772732784218359)
[Value(data=5.772732784218359)]
[Value(data=1.9178646296473674), Value(data=3.8548681545709917)]
[Value(data=1.38486989628895), Value(data=3.8539059187823677), Value(data=0.0009622357886239699)]
[Value(data=0.38486989628895013), Value(data=1.0), Value(data=0.12856405816928038), Value(data=3.7253418606130873), Value(data=-0.03101992567083245)]
[Value(data=-1), Value(data=-0.38486989628895013), Value(data=-0.3585583051182616), Value(data=0.0), Value(data=3.7253418606130873), Value(data=0.9689800743291676), Value(data=-1.0)]
[Value(data=-0.40576384016451), Value(data=0.6414416948817384), Value(data=-1.0), Value(data=1.930114468266866), Value(data=-1), Value(data=-0.9689800743291676)]
[Value(data=-0.4832664064786344), Value(data=0.07750256631412439), Value(data=-0.6414416948817384), Value(data=-1), Value(data=1.0), Value(data=0.9301144682668661), Value(data=-2.0753205986604737)]
[Value(data=0.5969574673424841), Value(data=-0.809549143643389), Value(data=-0.327

In [373]:
ypred = [n(x) for x in xs]
loss = [(Value(ygt) - yout)**2 for ygt, yout in zip(ys,ypred)]
total_loss = Value(0.0)
for l in loss:
  total_loss += l
print(total_loss)
print(ypred)

Value(data=0.05672828010280238)
[Value(data=0.8947597159664947), Value(data=-0.8782247554850983), Value(data=-0.8682842443263509), Value(data=0.8839202332445272)]
