# Learning MNIST

In [3]:
import os, gzip, tarfile
import numpy as np

from tinygrad import Tensor, nn
from tinygrad.nn.state import get_parameters
from tinygrad.helpers import fetch

from tqdm import trange


In [4]:
parse = lambda file: np.frombuffer(gzip.open(file).read(), dtype=np.uint8).copy()
BASE_URL = "https://storage.googleapis.com/cvdf-datasets/mnist/"   # http://yann.lecun.com/exdb/mnist/ lacks https
X_train = parse(fetch(f"{BASE_URL}train-images-idx3-ubyte.gz"))[0x10:].reshape((-1, 28*28)).astype(np.float32)
Y_train = parse(fetch(f"{BASE_URL}train-labels-idx1-ubyte.gz"))[8:].astype(np.int8)
X_test = parse(fetch(f"{BASE_URL}t10k-images-idx3-ubyte.gz"))[0x10:].reshape((-1, 28*28)).astype(np.float32)
Y_test = parse(fetch(f"{BASE_URL}t10k-labels-idx1-ubyte.gz"))[8:].astype(np.int8)

  


## Tinygrad

In [5]:
class LinearNet:
    def __init__(self):
      self.l1 = Tensor.scaled_uniform(784, 128)
      self.l2 = Tensor.scaled_uniform(128, 10)
    
    def forward(self, x: Tensor) -> Tensor:
      x = x.dot(self.l1).relu()
      x = x.dot(self.l2)
      return x

In [6]:
model = LinearNet()

In [7]:
get_parameters(model)

[<Tensor <LB METAL (784, 128) float (<BinaryOps.MUL: 3>, None)> on METAL with grad None>,
 <Tensor <LB METAL (128, 10) float (<BinaryOps.MUL: 3>, None)> on METAL with grad None>]

In [8]:
optimizer = nn.optim.SGD(get_parameters(model))
optimizer

<tinygrad.nn.optim.LARS at 0x10cfa1eb0>

In [9]:
def train(model, X_train, Y_train, optim, steps, BS=128, lossfn=lambda out, y: out.sparse_categorical_crossentropy(y),
          transform=lambda x: x, target_transform=lambda x: x, noloss=False, allow_jit=True):

  def train_step(x, y):
    out = model.forward(x)
    loss = lossfn(out, y)
    optim.zero_grad() 
    loss.backward()
    optim.step()
    cat = out.argmax(axis=-1)
    accuracy = (cat==y).mean()
    return loss.realize(), accuracy.realize()

  with Tensor.train():
    losses, accuracies = [], []
    for i in (t := trange(steps)):
      samp = np.random.randint(0, X_train.shape[0], size=(BS))
      x = Tensor(X_train[samp], requires_grad=False)
      y = Tensor(Y_train[samp])

      loss, accuracy = train_step(x, y)
      loss, accuracy = loss.numpy(), accuracy.numpy()
      losses.append(loss)
      accuracies.append(accuracy)
      t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy))
  return [losses, accuracies]

In [16]:
def evaluate(model, X_test, Y_test):
  BS = 128
  Tensor.training = False 

  def numpy_eval(Y_test, num_classes):
    Y_test_preds_out = np.zeros(list(Y_test.shape) + [num_classes])
    for i in trange((len(Y_test)-1)//BS+1):
      x = Tensor(X_test[i*BS:(i+1)*BS])
      out = model.forward(x)
      Y_test_preds_out[i*BS:(i+1)*BS] = out.numpy()
    Y_test_preds = np.argmax(Y_test_preds_out, axis=-1)
    return (Y_test == Y_test_preds).mean(), Y_test_preds

  num_classes = Y_test.max().astype(int)+1
  acc, Y_test_pred = numpy_eval(Y_test, num_classes)
  print(f"test set accuracy is {acc}")
  return acc
  

In [17]:
train(model, X_train, Y_train, optimizer, 1000, BS=256)
evaluate(model, X_test, Y_test)


loss 0.05 accuracy 1.00: 100%|██████████████| 1000/1000 [00:17<00:00, 57.92it/s]
100%|██████████████████████████████████████████| 79/79 [00:00<00:00, 339.75it/s]

test set accuracy is 0.9772





0.9772