In [84]:
import math
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import torch


In [None]:
class Value:

  def __init__(self, data, _children=(), _op='', label=''):
    self.data = data
    self.grad = 0.0
    self._backward = lambda: None
    self._prev = set(_children)
    self._op = _op
    self.label = label

  def __repr__(self):
    return f"Value(data={self.data})"

  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self, other), '+')

    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward

    return out

  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')

    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward

    return out

  def __pow__(self, other):
    assert isinstance(other, (int, float)), "only supporting int/float powers for now"
    out = Value(self.data**other, (self,), f'**{other}')

    def _backward():
        self.grad += other * (self.data ** (other - 1)) * out.grad
    out._backward = _backward

    return out

  def __rmul__(self, other): 
    return self * other

  def __truediv__(self, other): 
    return self * other**-1

  def __neg__(self): 
    return self * -1

  def __sub__(self, other): 
    return self + (-other)

  def __radd__(self, other): 
    return self + other

  def tanh(self):
    x = self.data
    t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
    out = Value(t, (self, ), 'tanh')

    def _backward():
      self.grad += (1 - t**2) * out.grad
    out._backward = _backward

    return out

  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self, ), 'exp')

    def _backward():
      self.grad += out.data * out.grad 
    out._backward = _backward

    return out


  def backward(self):

    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)

    self.grad = 1.0
    for node in reversed(topo):
      node._backward()

In [91]:
import random
import math



class Neuron:
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1, 1))

    def __call__(self, x):
        x = [xi if isinstance(xi, Value) else Value(xi) for xi in x]

        act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out

    def parameters(self):
        return self.w + [self.b]


class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs

    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]


class MLP:
    def __init__(self, nin, nouts):  
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i + 1]) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]


In [92]:
xs = [
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0]
n = MLP(3, [4, 5, 2, 1])
for x in xs:
    y_pred = n(x)
    print(y_pred)


Value(data=0.5699685524526016)
Value(data=0.14811880062450147)
Value(data=0.5289931002653167)
Value(data=0.42139440515858645)


In [93]:
a = len(n.parameters())
a
n = MLP(3, [4, 5, 2, 1])


In [111]:
for k in range(50):
    y_pred = [n(x) for x in xs]
    loss = sum((yout-ygt)**2 for ygt,yout in zip(ys,y_pred))
    for p in n.parameters():
        p.grad =0.0
    loss.backward()

    for p in n.parameters():
        p.data += -0.05*p.grad
    print(k,loss.data) 

0 0.018867732977056288
1 0.01843671885637243
2 0.018024288805281067
3 0.017629284299847746
4 0.017250640277716077
5 0.016887375958309633
6 0.016538586719216212
7 0.016203436890146868
8 0.015881153346209733
9 0.015571019799294795
10 0.015272371700709563
11 0.014984591680308097
12 0.014707105457595633
13 0.014439378168986447
14 0.01418091106279086
15 0.013931238519824932
16 0.013689925362944357
17 0.013456564423440844
18 0.013230774336234648
19 0.013012197539235736
20 0.012800498455223519
21 0.012595361837172752
22 0.01239649126019069
23 0.01220360774517958
24 0.012016448501035892
25 0.011834765773682157
26 0.011658325791526439
27 0.01148690779808325
28 0.011320303163491597
29 0.011158314567546831
30 0.011000755247640819
31 0.01084744830569015
32 0.010698226068741726
33 0.010552929498481058
34 0.010411407645348637
35 0.010273517143393009
36 0.01013912174236847
37 0.010008091873921526
38 0.009880304249012697
39 0.009755641483987955
40 0.009633991752956697
41 0.009515248464347693
42 0.0093

In [112]:
for x in xs:
    y_pred = n(x)
    print(y_pred)

Value(data=0.9558038100925752)
Value(data=-0.967130930232524)
Value(data=-0.9527358312321814)
Value(data=0.9426029923508877)


In [123]:
for k in range(50):
    y_pred = [n(x) for x in xs]
    loss = sum((yout-ygt)**2 for ygt,yout in zip(ys,y_pred))
    for p in n.parameters():
        p.grad =0.0
    loss.backward()

    for p in n.parameters():
        p.data += -0.05*p.grad
    print(k,loss.data) 

0 0.0014906258624189127
1 0.0014874566106961402
2 0.001484300506134243
3 0.00148115746785167
4 0.0014780274156256322
5 0.0014749102698854628
6 0.0014718059517060118
7 0.00146871438280112
8 0.0014656354855172127
9 0.0014625691828269209
10 0.0014595153983228294
11 0.0014564740562112068
12 0.0014534450813059787
13 0.0014504283990225482
14 0.0014474239353719385
15 0.0014444316169547502
16 0.001441451370955445
17 0.0014384831251364385
18 0.0014355268078325308
19 0.00143258234794516
20 0.001429649674936848
21 0.0014267287188257614
22 0.0014238194101802003
23 0.0014209216801132315
24 0.001418035460277392
25 0.0014151606828594213
26 0.0014122972805750726
27 0.0014094451866639647
28 0.0014066043348845364
29 0.0014037746595090164
30 0.00140095609531848
31 0.0013981485775979227
32 0.0013953520421314297
33 0.0013925664251974162
34 0.0013897916635638326
35 0.00138702769448357
36 0.001384274455689771
37 0.0013815318853912441
38 0.0013787999222680404
39 0.0013760785054668738
40 0.0013733675745967627


BEFORE:
Value(data=0.9558038100925752)
Value(data=-0.967130930232524)
Value(data=-0.9527358312321814)
Value(data=0.9426029923508877)

In [124]:
for x in xs:
    y_pred = n(x)
    print(y_pred)

Value(data=0.9827963814814136)
Value(data=-0.9855447945318717)
Value(data=-0.9806587764557552)
Value(data=0.9783708149811414)


In [None]:
#params, why pow, mul n all,topo in vlaue , 
#neuron - mlp part and layers , their params