In [33]:
import math
import numpy as np
import matplotlib.pyplot as plt
import torch
import random
from typing import List

from sgrad import Value

%matplotlib inline

In [87]:
class Neuron:
    
    def __init__(self, num_in):
        self.w = [Value(random.uniform(-1, 1)) for _ in range(num_in)]
        self.b = Value(random.uniform(-1, 1))
        
    def __call__(self, x):
        act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out
    
    def parameters(self):
        return self.w + [self.b]
    
    
class Layer:
    
    def __init__(self, num_in, num_out):
        self.num_in = num_in
        self.num_out = num_out
        
        self.neurons = [Neuron(num_in) for _ in range(num_out)]
        
    def __call__(self, x):
        out = [[n(xi) for n in self.neurons] for xi in x]
        if len(out[0]) == 1:
            out = [o[0] for o in out]
        return out
    
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]
    
    
class MLP:
    
    def __init__(self, layout: List[int]):
        self.layers = [Layer(layout[i], layout[i+1]) for i in range(len(layout)-1)]
        
    def __call__(self, x):
        for l in self.layers:
            x = l(x)
        return x
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]
    
    def zero_grad(self):
        for p in self.parameters():
            p.zero_grad()
            
        
##class SelfAttentionHead:
    
#    def __init__(self, ):

In [88]:
m = MLP([2, 3, 5, 3, 1])

In [89]:
m(x)

[Value(data=0.00014473180744377273),
 Value(data=0.00014473180744377273),
 Value(data=0.3682260830412507),
 Value(data=-0.8762380858787088)]

In [85]:
x = [[1, 2], [1, 2], [1, 4], [347, 3]]
y = [-1, -1, 1, 1]
m(x)
#len(m(x))
m.parameters()

[Value(data=-0.403338344799319),
 Value(data=-0.5576798775137295),
 Value(data=-0.7584471858059523),
 Value(data=0.15287907506789206),
 Value(data=0.8251107688615915),
 Value(data=0.7663666984786376),
 Value(data=0.8985714831370212),
 Value(data=0.6281431290386401),
 Value(data=0.35236006706983153),
 Value(data=0.958501003834656),
 Value(data=-0.25656202030434294),
 Value(data=-0.60805050740469),
 Value(data=-0.6919918092749205),
 Value(data=0.9623852849359836),
 Value(data=0.49102874124898643),
 Value(data=-0.6372161724179853),
 Value(data=0.4226421547980547),
 Value(data=0.4693956598370029),
 Value(data=0.812900787245067),
 Value(data=0.5546960540093653),
 Value(data=0.8890465548608445),
 Value(data=-0.36768699177720005),
 Value(data=-0.7257710245204582),
 Value(data=0.8255117937815438),
 Value(data=0.12242840126990404),
 Value(data=0.8962488325676017),
 Value(data=-0.787483829488462),
 Value(data=0.07060390932423323),
 Value(data=0.21496956146642263),
 Value(data=0.03822045401263027

In [84]:
lr = 0.01
for p in m.parameters():
    p.data -= lr*p.grad

In [79]:
ypred = m(x)

In [80]:
loss = mse(ypred, y)

In [81]:
def mse(pred, gt):
    cost = sum((p - y)*(p - y) for p, y in zip(pred, gt))
    return cost
    

In [380]:
x1 = torch.Tensor([2.0]).double(); x1.requires_grad = True
x2 = torch.Tensor([0.0]).double(); x2.requires_grad = True
w1 = torch.Tensor([-3.0]).double(); w1.requires_grad = True
w2 = torch.Tensor([1.0]).double(); w2.requires_grad = True

b = torch.Tensor([6.8813735870195432]).double(); b.requires_grad = True




In [381]:
n = x1*w1 + x2*w2 + b
o = torch.tanh(n)

In [82]:
loss.backward()

In [387]:
o.backward()

In [346]:
x1w1.grad

0.4999999999999999

In [347]:
x2w2.grad

0.4999999999999999

In [348]:
x1w1x2w2.grad

0.4999999999999999

In [349]:
n.grad

0.4999999999999999

In [350]:
o.grad

1

In [316]:
a = Value(3)
b = Value(6)

In [317]:
c = a / b

In [318]:
c

Value(data=0.5)

In [319]:
c.backward()

In [320]:
a.grad

0.16666666666666666

In [321]:
b.grad

-0.08333333333333333

In [121]:
-0.12*25

-3.0

In [122]:
-3/25

-0.12