In [1]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data, mnist.target

In [None]:
import matplotlib.pyplot as plt

# Reshape one sample to 28x28 and plot first 10 images
fig, axes = plt.subplots(1, 10, figsize=(10, 3))
for i, ax in enumerate(axes):
    ax.imshow(X.iloc[i].values.reshape(28, 28), cmap='gray')
    ax.axis('off')
    ax.set_title(y[i])

plt.show()
X = X/255
X = [X.iloc[i].values.reshape(28, 28).tolist() for i in range(X.shape[0])]
y = y.tolist()
print(f'X: {len(X)}  Y: {len(y)}')

In [3]:
import sys
import os

sys.path.append(os.path.abspath(".."))

from graphviz import Digraph
from src.nn import CNN
import random

network = CNN([(28, 28), (5, 5), 2], [32, 16, 16, 1])

In [4]:
# Tracing and drawing graph

def trace(root):
    nodes, edges = set(), set()
    param_nodes = network.parameters()
    def build(node):
        if node not in nodes:
            nodes.add(node)

            if len(node._prev) == 0 and node not in param_nodes:
                node.label = "input"

            for n in node._prev:
                edges.add((n, node))
                build(n)
    
    build(root)
    return nodes, edges

def build_dot(root):
    dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # Left to right graph

    nodes, edges = trace(root)
    for node in nodes:
        uid = str(id(node))

        dot.node(name=uid, label=f'{node.label} | data {node.data:.4f} | grad {node.grad:.4f}', shape='record')

        if node._op:
            dot.node(name=uid+node._op, label=node._op)
            dot.edge(uid+node._op, uid)
    
    for n1, n2 in edges:
        dot.edge(str(id(n1)), str(id(n2))+n2._op)
    
    return dot

In [5]:
def fetch_training_data(n=1000):
    r = random.randrange(0, 50000-n)
    return X[r:r+n], y[r:r+n]

def fetch_testing_data(n=1000):
    r = random.randrange(50000, 70000-n)
    return X[r:r+n], y[r:r+n]

In [None]:
training_cycles = 100
step = 0.01
batch_size = 100
targets = [-9+2*x for x in range(10)]

for i in range(training_cycles):
    x_batch, y_batch = fetch_training_data(batch_size)

    outs = [network(ins) for ins in x_batch]
    L = sum((out-targets[int(exp)])**2 for out, exp in zip(outs, y_batch))

    for p in network.parameters(): # Make sure to zero the grads!
        p.grad = 0
    
    L.backward() # Backward step

    for p in network.parameters(): # Modify weights based on grads
        p.data += -step * p.grad
    
    print(f'Training step {i+1}: {L.data} loss')