# Learning MNIST

In [17]:
import os, gzip, tarfile
import numpy as np

from tinygrad import Tensor, nn
from tinygrad.nn.state import get_parameters
from tinygrad.helpers import fetch

from tqdm import trange


In [2]:
parse = lambda file: np.frombuffer(gzip.open(file).read(), dtype=np.uint8).copy()
BASE_URL = "https://storage.googleapis.com/cvdf-datasets/mnist/"   # http://yann.lecun.com/exdb/mnist/ lacks https
X_train = parse(fetch(f"{BASE_URL}train-images-idx3-ubyte.gz"))[0x10:].reshape((-1, 28*28)).astype(np.float32)
Y_train = parse(fetch(f"{BASE_URL}train-labels-idx1-ubyte.gz"))[8:].astype(np.int8)
X_test = parse(fetch(f"{BASE_URL}t10k-images-idx3-ubyte.gz"))[0x10:].reshape((-1, 28*28)).astype(np.float32)
Y_test = parse(fetch(f"{BASE_URL}t10k-labels-idx1-ubyte.gz"))[8:].astype(np.int8)

  


## Tinygrad

In [7]:
class LinearNet:
    def __init__(self):
      self.l1 = Tensor.scaled_uniform(784, 128)
      self.l2 = Tensor.scaled_uniform(128, 10)
    
    def forward(self, x: Tensor) -> Tensor:
      x = x.dot(self.l1).relu()
      x = x.dot(self.l2)
      return x

In [8]:
model = LinearNet()

In [15]:
get_parameters(model)

[<Tensor <LB METAL (784, 128) float (<BinaryOps.MUL: 3>, None)> on METAL with grad None>,
 <Tensor <LB METAL (128, 10) float (<BinaryOps.MUL: 3>, None)> on METAL with grad None>]

In [16]:
optimizer = nn.optim.SGD(get_parameters(model))
optimizer

<tinygrad.nn.optim.LARS at 0x107d8a390>

In [67]:
def train(model, X_train, Y_train, optim, steps, BS=128, lossfn=lambda out, y: out.sparse_categorical_crossentropy(y),
          transform=lambda x: x, target_transform=lambda x: x, noloss=False, allow_jit=True):

  def train_step(x, y):
    out = model.forward(x)
    loss = lossfn(out, y)
    optim.zero_grad() 
    loss.backward()
    optim.step()
    cat = out.argmax(axis=-1)
    accuracy = (cat==y).mean()
    return loss.realize(), accuracy.realize()

  with Tensor.train():
    losses, accuracies = [], []
    for i in (t := trange(steps)):
      samp = np.random.randint(0, X_train.shape[0], size=(BS))
      x = Tensor(X_train[samp], requires_grad=False)
      y = Tensor(Y_train[samp])

      loss, accuracy = train_step(x, y)
      loss, accuracy = loss.numpy(), accuracy.numpy()
      losses.append(loss)
      accuracies.append(accuracy)
      t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy))
  return [losses, accuracies]

In [None]:
def evaluate(model, X_test, Y_test):

In [68]:
train(model, X_train, Y_train, optimizer, 1000, BS=256)
evaluate(model, X_test, Y_test)


loss 0.10 accuracy 1.00: 100%|██████████████| 1000/1000 [00:19<00:00, 50.06it/s]


[[array(2.0270782, dtype=float32),
  array(1.7643367, dtype=float32),
  array(1.4866745, dtype=float32),
  array(1.4672384, dtype=float32),
  array(1.2727242, dtype=float32),
  array(1.1927449, dtype=float32),
  array(1.1844066, dtype=float32),
  array(1.2419747, dtype=float32),
  array(1.0048064, dtype=float32),
  array(0.9636627, dtype=float32),
  array(0.76890194, dtype=float32),
  array(0.62453306, dtype=float32),
  array(0.588806, dtype=float32),
  array(0.5072289, dtype=float32),
  array(0.57968366, dtype=float32),
  array(0.4835639, dtype=float32),
  array(0.52716947, dtype=float32),
  array(0.5791708, dtype=float32),
  array(0.48027998, dtype=float32),
  array(0.528171, dtype=float32),
  array(0.483884, dtype=float32),
  array(0.5443866, dtype=float32),
  array(0.5665873, dtype=float32),
  array(0.73633665, dtype=float32),
  array(0.53120613, dtype=float32),
  array(0.58719534, dtype=float32),
  array(0.609508, dtype=float32),
  array(0.55613464, dtype=float32),
  array(0.42259