In [47]:
import math
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [48]:
class Value:
  
  def __init__(self, data, _children=(), _op='', label=''):
    self.data = data
    self.grad = 0.0
    self._backward = lambda: None
    self._prev = set(_children)
    self._op = _op
    self.label = label

  def __repr__(self):
    return f"Value(data={self.data})"
  
  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self, other), '+')
    
    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward
    
    return out

  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')
    
    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward
      
    return out
  
  def __pow__(self, other):
    assert isinstance(other, (int, float)), "only supporting int/float powers for now"
    out = Value(self.data**other, (self,), f'**{other}')

    def _backward():
        self.grad += other * (self.data ** (other - 1)) * out.grad
    out._backward = _backward

    return out
  
  def __rmul__(self, other): # other * self
    return self * other

  def __truediv__(self, other): # self / other
    return self * other**-1

  def __neg__(self): # -self
    return self * -1

  def __sub__(self, other): # self - other
    return self + (-other)

  def __radd__(self, other): # other + self
    return self + other

  def tanh(self):
    x = self.data
    t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
    out = Value(t, (self, ), 'tanh')
    
    def _backward():
      self.grad += (1 - t**2) * out.grad
    out._backward = _backward
    
    return out
  
  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self, ), 'exp')
    
    def _backward():
      self.grad += out.data * out.grad # NOTE: in the video I incorrectly used = instead of +=. Fixed here.
    out._backward = _backward
    
    return out
  
  
  def backward(self):
    
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)
    
    self.grad = 1.0
    for node in reversed(topo):
      node._backward()



In [49]:
from graphviz import Digraph

def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v._prev:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right
  
  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % (n.label, n.data, n.grad), shape='record')
    if n._op:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid + n._op, label = n._op)
      # and connect this node to it
      dot.edge(uid + n._op, uid)

  for n1, n2 in edges:
    # connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2)) + n2._op)

  return dot


In [50]:
import torch

In [51]:

x1 = torch.Tensor([2.0]).double()                ; x1.requires_grad = True
x2 = torch.Tensor([0.0]).double()                ; x2.requires_grad = True
w1 = torch.Tensor([-3.0]).double()               ; w1.requires_grad = True
w2 = torch.Tensor([1.0]).double()                ; w2.requires_grad = True
b = torch.Tensor([6.8813735870195432]).double()  ; b.requires_grad = True
n = x1*w1 + x2*w2 + b
o = torch.tanh(n)

print(o.data.item())
o.backward()

print('---')
print('x2', x2.grad.item())
print('w2', w2.grad.item())
print('x1', x1.grad.item())
print('w1', w1.grad.item())

0.7071066904050358
---
x2 0.5000001283844369
w2 0.0
x1 -1.5000003851533106
w1 1.0000002567688737


## Now Lets build the Neural Net Library from scratch(but we'll follow the naming as like PyTorch library)

### First we will be building the basic building block - **A Neuron** 
### Then we will build - **A Layer of Neurons**
### Then we will build - **A Multi-Layered Perceptron (MLP)** which will be our Neural Network


- The code is self explanatory for Neuron class(nin - number of inputs/ Dimention of the neuron)
- Each Layer  will have a number of Neurons which are not connected to each other in the same Layer but fully connected to the next or previous layer
<br>
nin - number of inputs to the layer<br>
nout - number of outputs from the layer

<br>

![alt text](image-5.png)

<br>

# Neural Network Building Blocks

## 1. `Value` Class
The foundation of your autograd engine. Each parameter (weight, bias) is wrapped in a `Value` object to track its value and gradient during backpropagation.

## 2. `Neuron` Class
Represents a single neuron that:
* Takes `nin` inputs
* Has `nin` weights (one for each input)
* Has 1 bias term
* Computes: output = tanh(w₁x₁ + w₂x₂ + ... + wₙxₙ + b)

## 3. `Layer` Class
A collection of neurons where:
* All neurons in the layer receive the same inputs
* Each neuron produces its own output
* The layer outputs all neuron outputs as a list

## 4. `MLP` Class
Connects multiple layers sequentially where:
* The output of one layer becomes the input to the next
* The first layer receives the external input `x`
* The last layer produces the final output


In [52]:
#Defining a class for a Neuron
class Neuron:
  
  def __init__(self, nin):  #nin- number of inputs that come to a neuron
    self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]   #initiating each inputs with random numbers and assign them to the weights matrix
    self.b = Value(random.uniform(-1,1))                         #initiating with a random bias
  
  def __call__(self, x):
    # w * x + b
    act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b) #value of sum can be started with self.b , that is why 2nd parameter of sum() is self.b
    out = act.tanh()
    return out
  
  def parameters(self):
    return self.w + [self.b]

class Layer:
  
  def __init__(self, nin, nout):  #nout -Number of neurons in a single layer/How many neurons do you want in your layer , nin - Dimension of the neurons
    self.neurons = [Neuron(nin) for _ in range(nout)]
  
  def __call__(self, x):
    outs = [n(x) for n in self.neurons]
    return outs[0] if len(outs) == 1 else outs
  
  def parameters(self):
    return [p for neuron in self.neurons for p in neuron.parameters()]

class MLP:
  
  def __init__(self, nin, nouts): #nouts - this is a list consisting the number of Neurons in each layers. [4,4] means 2 layers with each layer having 4 neurons
                                  #nin - number of input neurons
    sz = [nin] + nouts            #sz list - see code explanation for better understanding
    self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]   #creating each layers- see code explanation 
  
  def __call__(self, x):
    for layer in self.layers:
      x = layer(x)
    return x
  
  def parameters(self):
    return [p for layer in self.layers for p in layer.parameters()]


In [None]:
# x = [2.0, 3.0, -1.0]
# n = MLP(3, [4, 4, 1])
# x = [2.0 , 3.0]
# n = Layer(2 , 3)
# n = Neuron(2)
# n(x)




#Lets create the neural network of the above image 
#3 input neuron , 2 layer with four neuron in each , 1 output neuron
x = [2.0 , 3.0  , -1.0] #3 inputs to the input neuron
n = MLP(3 , [4 , 4, 1])  #3 inputs , 2 layers , each with 4 neurons , 1 output
n(x)






Value(data=0.6747410739946735)

In [None]:
#you can also visualize this neural network by running --
# draw_dot(n(x))

#as this is a untrained neural net , all grad will be 0.0000. we'll train it later

# What's Happening Inside this Neural Network

## When you create `MLP(3, [4, 4, 1])`:

1. The network has 3 layers:
   * **Layer 1**: 4 neurons, each taking 3 inputs
   * **Layer 2**: 4 neurons, each taking 4 inputs (outputs from Layer 1)
   * **Layer 3**: 1 neuron, taking 4 inputs (outputs from Layer 2)

2. When you call `n(x)`:
   * `x` [2.0, 3.0, -1.0] is fed to Layer 1
   * Layer 1 produces 4 outputs (one from each neuron)
   * These 4 outputs are fed to Layer 2
   * Layer 2 produces 4 outputs
   * These 4 outputs are fed to Layer 3
   * Layer 3 produces the final output (a single value)


## Some Code explanation:
### The sz List Creation
First, `sz = [nin] + nouts` - creates a new list that contains all layer sizes in order. This line is using list concatenation to join the input size with the sizes of all subsequent layers.<br>
For example, if you create a network with:<br>
```python
MLP(3, [4, 4, 1])
```
Then:

`nin` is 3 (the number of input features)
`nouts` is [4, 4, 1] (the sizes of all layers)
`sz` becomes [3, 4, 4, 1]

### The `self.layers` List Creation
Next,
```python
 `self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(sz)-1)]`
 ```
<br>

This creates the actual layer objects. For each iteration:<br>

- i goes from 0 to `len(nouts)-1` (in our example, 0 to 2)
- For each i, we create a ``Layer(sz[i], sz[i+1])``

Following our example with ``sz = [3, 4, 4, 1]:`` <br>

When ``i=0:`` <br>

- We create ``Layer(sz[0], sz[1])`` which is ``Layer(3, 4)`` <br>
- This means: a layer with 4 neurons, each taking 3 inputs


When `i=1:`<br>

- We create `Layer(sz[1], sz[2])` which is `Layer(4, 4)`<br>
- This means: a layer with 4 neurons, each taking 4 inputs<br>


When` i=2:`<br>

- We create `Layer(sz[2], sz[3])` which is `Layer(4, 1)`<br>
- This means: a layer with 1 neuron, taking 4 inputs<br>



The resulting `self.layers` becomes `[Layer(3,4), Layer(4,4), Layer(4,1)]`, which is exactly what we need for our network architecture.


## Now Lets build a tiny dataset to test our Neural Network and write a loss function to train the network

### This is our dataset(very tiny) , something like binary classifier

In [None]:
xs = [
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0] # desired targets

In [55]:

for k in range(20):
  
  # forward pass
  ypred = [n(x) for x in xs]
  loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))
  
  # backward pass
  for p in n.parameters():
    p.grad = 0.0
  loss.backward()
  
  # update
  for p in n.parameters():
    p.data += -0.1 * p.grad
  
  print(k, loss.data)
  

0 4.3574567008266465
1 2.7199641200315434
2 0.9781222846958535
3 0.3969974848312069
4 0.18528950185017162
5 0.11716424996316788
6 0.085788514153105
7 0.06761609161931967
8 0.05576333151862019
9 0.04742125933592265
10 0.0412309061843707
11 0.036454795077672096
12 0.032658074629093294
13 0.029567689110198875
14 0.027003503943843547
15 0.02484186106990222
16 0.022995044289188908
17 0.021399099780921152
18 0.02000629668114786
19 0.018780287982270598


In [56]:
ypred

[Value(data=0.9605560941789921),
 Value(data=-0.9768273003021039),
 Value(data=-0.896746698590858),
 Value(data=0.9223710877785986)]