In [1]:
import math
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
class Value:
    def __init__(self, data, _children=(), _op='', label=''):
        self.data = data
        self.grad = 0
        self._backward = lambda: None
        self._prev = set(_children)
        self._op = _op
        self.label = label
    
    def __repr__(self):
        return f"Value(label={self.label},data={self.data:.4f},op={self._op})"
    
    def __add__(self, other):
        other = other if isinstance(other,Value) else Value(other, (), _op='', label=str(other)) 
        out = Value(self.data + other.data, (self, other), '+')
        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad                
        out._backward = _backward
        return out
    
    def __radd__(self, other):
        return self + other
    
    def __neg__(self):
        return self * -1
    
    def __sub__(self, other):
        return self + (-other)
    
    def __rsub__(self, other):
        return other + (-self)
    
    def __mul__(self, other):
        other = other if isinstance(other,Value) else Value(other, (), _op='', label=str(other)) 
        out = Value(self.data * other.data, (self, other), '*')
        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward
        return out

    def __rmul__(self, other):
        return self * other
    
    def __truediv__(self, other):
        return self * (other**-1)
    
    def __pow__(self, other):
        assert isinstance(other, (int, float)) 
        out = Value(self.data ** other, (self,), f'**{other}')
        def _backward():
            self.grad += other * (self.data ** (other-1)) * out.grad
        out._backward = _backward
        return out
    
    def tanh(self):
        exp = math.exp(2*self.data)
        out = Value( ((exp-1)/(exp+1)), (self,), 'tanh' )
        def _backward():
            self.grad += (1-out.data**2) * out.grad
        out._backward = _backward
        return out
    
    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self,) , 'exp')
        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward
        return out

    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        self.grad = 1.0
        for node in reversed(topo):
            node._backward()


In [3]:
from graphviz import Digraph

def trace_graph (root):
    nodes = set()
    edges = set()

    def build(n):
        for p in n._prev:
            edges.add((p,n))
            build(p)        
        nodes.add(n)
    build(root)
    return nodes, edges

def draw_graph(root):
    dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right

    nodes, edges = trace_graph(root)    
    for n in nodes:
        dot.node(name=str(id(n)), label="{ %s | data %.4f | grad %.4f }" % (n.label, n.data, n.grad), shape='record')
        if n._op:
            dot.node(name=str(id(n))+n._op, label=f"{n._op}")
            dot.edge(str(id(n))+n._op, str(id(n)))

    for (n1,n2) in edges:
        dot.edge(str(id(n1)), str(id(n2))+n2._op)
        if n2._op == '++' or n2._op == '**' or n2._op == '--': 
            dot.edge(str(id(n1)), str(id(n2))+n2._op)
        
    return dot

In [4]:
class Neuron:
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1,1))
        
    def __call__(self, x):
        act = sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out
    
    def parameters (self):
        return self.w + [self.b]
    
class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]
        
    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs
    
    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]
    
class MLP:
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]
    
    def __call__(self, x):
        for l in self.layers:
            x = l(x)
        return x
    
    def parameters(self):
        return [p for l in self.layers for n in l.neurons for p in n.parameters()]

In [10]:
words = open('names.txt', 'r').read().splitlines()
chrs = sorted(list(set(''.join(words))))

In [16]:
stoi = {s:i for i, s in enumerate(chrs)}
stoi['.'] = 26
itos = {i:s for s, i in stoi.items()}

In [18]:
n = MLP(1, [4, 10, 30, 50, 30, len(stoi)])

In [52]:
step = 0
for w in words[:40]:
    w = ['.'] + list(w) + ['.']
    for chr1, chr2 in zip(w, w[1:]):
        ypred = []
        ygt = []
        step += 1

        inp = [float(stoi[chr1])]
        ypred.extend( n(inp) )

        yo = list(np.zeros(len(stoi), dtype=float))
        yo[stoi[chr2]] = 1.0
        ygt.extend(yo)
        
        loss = sum([(ys-yt)**2 for ys, yt in zip(ypred, ygt)])

        # zero grad
        for p in n.parameters():
            p.grad = 0.0

        # backward pass
        loss.backward()

        for p in n.parameters():
            p.data += -0.01 * p.grad
            
        print (f'{step=}, {loss.data=:.4f}')
        # print (f'{chr1=}, {chr2=}')
        # print (f'ypred = ({[y.data for y in ypred]})')
        # print (f'yout = ({[y for y in ygt]})')


step=1, loss.data=12.9931
chr1='.', chr2='e'
ypred = ([0.999973098486918, -1.6554555886228065e-05, 0.9999879456105902, -3.5560202878027385e-05, 0.99997926042271, 2.6302067099644366e-05, -2.339551227921466e-05, 0.9998823759290001, 0.9999821200577562, -2.9948358502807232e-05, -2.5999917129657327e-05, 0.9999762881994336, 0.9999998865933093, -3.87985028535391e-07, 0.00024014322531199888, 0.9999464424576866, -4.337227589017303e-05, -0.9999414532675938, 0.999982804285072, 0.999998277896247, -1.782527387139434e-05, 0.9999939597450961, -5.2203602055132e-05, 1.0685981305508965e-05, 0.9969679850524091, 2.359384686396759e-05, 0.9999009472737757])
yout = ([0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
step=2, loss.data=12.9930
chr1='e', chr2='m'
ypred = ([0.9999730972486567, 2.5560233043620828e-05, 0.9999879452396523, 5.596575285519296e-05, 0.9999792593990011, -3.5989841084777174e-05, 2.316739324858705e-05, 0

KeyboardInterrupt: 