In [55]:
import math
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [38]:
from graphviz import Digraph

def trace(root):
    nodes, edges = set(), set()

    def build(v):
        if v not in nodes:
            nodes.add(v)
            for child in v._prev:
                edges.add((child,v))
                build(child)

    build(root)
    return nodes, edges

def draw_dot(root):
    dot = Digraph(format='svg', graph_attr={'rankdir':'LR'})

    nodes, edges = trace(root)
    for n in nodes:
        uid = str(id(n))

        dot.node(name=uid, label="{% s | data %.4f | grad %.4f}" % (n.label, n.data, n.grad),shape='record')
        if n._op:
            dot.node(name=uid+n._op, label=n._op)
            dot.edge(uid+n._op, uid)

    for n1, n2 in edges:
        dot.edge(str(id(n1)),str(id(n2))+n2._op)

    return dot


In [127]:
import math
import random

class Value:
    def __init__(self, data, _children=(), _op='', label=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None
        self._prev = set(_children)
        self._op = _op
        self.label = label

    def __repr__(self):
        return f"Value(data={self.data})"

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')

        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
        out._backward = _backward
        return out

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')

        def _backward():
            self.grad += other.data * out.grad # Corrected
            other.grad += self.data * out.grad # Corrected
        out._backward = _backward
        return out
    
    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only supporting int/float powers for now"
        out = Value(self.data**other, (self,), f'**{other}')

        def _backward():
            self.grad += other * (self.data**(other - 1)) * out.grad
        out._backward = _backward
        return out

    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1) / (math.exp(2*x) + 1)
        out = Value(t, (self,), 'tanh')

        def _backward():
            self.grad += (1 - t**2) * out.grad
        out._backward = _backward
        return out

    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self,), 'exp')

        def _backward():
            self.grad += out.data * out.grad # Corrected
        out._backward = _backward # Corrected
        return out

    def __neg__(self): 
        return self * -1

    def __radd__(self, other):
        return self + other

    def __sub__(self, other):
        return self + (-other)

    def __rsub__(self, other):
        return other + (-self)

    def __rmul__(self, other):
        return self * other

    def __truediv__(self, other):
        return self * other**-1

    def __rtruediv__(self, other):
        return other * self**-1

    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        self.grad = 1.0
        for node in reversed(topo):
            node._backward()

In [128]:
class Neuron:
    def __init__(self,nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1,1))

    def __call__(self,x):
        act = sum((wi*xi for wi,xi in zip(self.w,x)), self.b)
        out = act.tanh()
        return out

    def parameters(self):
        return self.w + [self.b]

class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs)==1 else outs

    def parameters(self):
        params = []
        for neuron in self.neurons:
            ps = neuron.parameters()
            params.extend(ps)
        return params

class MLP:
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i],sz[i+1]) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]


In [129]:
x = [2.0,3.0,-1.0]
n = MLP(3, [4,3,1])
n(x)

Value(data=-0.287531305789408)

In [130]:
xs = [
    [2.0,3.0,-4.0],
    [3.0, -1.0, 2.0],
    [5.0,6.0,-3.0]
]
ys = [1.0, -1.0, 1.0]

In [131]:
for k in range(20):

    # Forward pass
    ypred = [n(x) for x in xs]
    loss = sum((yout-ygt)**2 for yout,ygt in zip(ypred,ys))

    # Backward pass
    for p in n.parameters():
        p.grad=0.0

    loss.backward()

    # Update
    for p in n.parameters():
        p.data += -0.1 * p.grad

    print(f"step {k} loss {loss.data}")


step 0 loss 4.6603969167011305
step 1 loss 1.1938075476560992
step 2 loss 0.44837103742362483
step 3 loss 0.23637641798456208
step 4 loss 0.15463636766268396
step 5 loss 0.11316504610712137
step 6 loss 0.08849037172051513
step 7 loss 0.07226554035956047
step 8 loss 0.0608457724275088
step 9 loss 0.05240290067988227
step 10 loss 0.045924557199500005
step 11 loss 0.04080702656181902
step 12 loss 0.0366689726947795
step 13 loss 0.03325819474769501
step 14 loss 0.030401508979218426
step 15 loss 0.02797620995697951
step 16 loss 0.025893024175523027
step 17 loss 0.024085510081040654
step 18 loss 0.022503236252516448
step 19 loss 0.02110725878155211


In [114]:
import torch

# 1. Create Tensors
# x is our input data
x = torch.tensor([[1.0], [2.0], [3.0], [4.0]])

# y is our target output data
y = torch.tensor([[2.0], [4.0], [6.0], [8.0]])

# w and b are the parameters of our model, which we want to learn.
# We initialize them with random values and set requires_grad=True
# to tell PyTorch we want to compute gradients for them.
w = torch.tensor([[0.0]], requires_grad=True)
b = torch.tensor([[0.0]], requires_grad=True)

# 2. Define the Model and Loss Function
# This is a simple linear model: y_pred = x * w + b
def forward(x):
    return x @ w + b # @ is matrix multiplication in PyTorch

# We use Mean Squared Error (MSE) as our loss function
def loss(y, y_pred):
    return ((y_pred - y)**2).mean()

# 3. Perform a Forward Pass and Calculate Loss
# Get the model's prediction
y_pred = forward(x)

# Calculate how wrong the prediction is
L = loss(y, y_pred)

# 4. Automatic Differentiation (The Magic Part!)
# PyTorch will now automatically calculate the gradients of the loss
# with respect to all tensors that have requires_grad=True (w and b).
L.backward()

# The calculated gradients are stored in the .grad attribute of the tensors.
print(f"Initial Prediction: {y_pred.detach().numpy().flatten()}")
print(f"Actual Target: {y.numpy().flatten()}")
print(f"Loss: {L.item()}")
print("--- Gradients ---")
print(f"Gradient of w: {w.grad.item()}")
print(f"Gradient of b: {b.grad.item()}")

# In a real training loop, you would now use these gradients
# to update the weights w and b and repeat the process.

Initial Prediction: [0. 0. 0. 0.]
Actual Target: [2. 4. 6. 8.]
Loss: 30.0
--- Gradients ---
Gradient of w: -30.0
Gradient of b: -10.0
