# ConvNet

In [2]:
import os, gzip, tarfile
import numpy as np

from tinygrad import Tensor, nn
from tinygrad.nn.state import get_parameters
from tinygrad.helpers import fetch

from tqdm import trange


In [5]:
parse = lambda file: np.frombuffer(gzip.open(file).read(), dtype=np.uint8).copy()
BASE_URL = "https://storage.googleapis.com/cvdf-datasets/mnist/"   # http://yann.lecun.com/exdb/mnist/ lacks https
X_train = parse(fetch(f"{BASE_URL}train-images-idx3-ubyte.gz"))[0x10:].reshape((-1, 28*28)).astype(np.float32)
Y_train = parse(fetch(f"{BASE_URL}train-labels-idx1-ubyte.gz"))[8:].astype(np.int8)
X_test = parse(fetch(f"{BASE_URL}t10k-images-idx3-ubyte.gz"))[0x10:].reshape((-1, 28*28)).astype(np.float32)
Y_test = parse(fetch(f"{BASE_URL}t10k-labels-idx1-ubyte.gz"))[8:].astype(np.int8)


In [10]:
def train(model, X_train, Y_train, optim, steps, BS=128, lossfn=lambda out, y: out.sparse_categorical_crossentropy(y),
          transform=lambda x: x, target_transform=lambda x: x, noloss=False, allow_jit=True):

  def train_step(x, y):
    out = model.forward(x)
    loss = lossfn(out, y)
    optim.zero_grad() 
    loss.backward()
    optim.step()
    cat = out.argmax(axis=-1)
    accuracy = (cat==y).mean()
    return loss.realize(), accuracy.realize()

  with Tensor.train():
    losses, accuracies = [], []
    for i in (t := trange(steps)):
      samp = np.random.randint(0, X_train.shape[0], size=(BS))
      x = Tensor(X_train[samp], requires_grad=False)
      y = Tensor(Y_train[samp])

      loss, accuracy = train_step(x, y)
      loss, accuracy = loss.numpy(), accuracy.numpy()
      losses.append(loss)
      accuracies.append(accuracy)
      t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy))
  return [losses, accuracies]

def evaluate(model, X_test, Y_test):
  BS = 128
  Tensor.training = False 

  def numpy_eval(Y_test, num_classes):
    Y_test_preds_out = np.zeros(list(Y_test.shape) + [num_classes])
    for i in trange((len(Y_test)-1)//BS+1):
      x = Tensor(X_test[i*BS:(i+1)*BS])
      out = model.forward(x)
      Y_test_preds_out[i*BS:(i+1)*BS] = out.numpy()
    Y_test_preds = np.argmax(Y_test_preds_out, axis=-1)
    return (Y_test == Y_test_preds).mean(), Y_test_preds

  num_classes = Y_test.max().astype(int)+1
  acc, Y_test_pred = numpy_eval(Y_test, num_classes)
  print(f"test set accuracy is {acc}")
  return acc
  

In [19]:
class ConvNet:
  def __init__(self):
    self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
    self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
    self.conv3 = nn.Conv2d(32, 256, kernel_size=5)
    self.linear = Tensor.scaled_uniform(256, 10)

  def forward(self, x: Tensor) -> Tensor:
    x = self.conv1(x).relu()
    x = self.conv2(x).relu()
    x = self.conv3(x).relu()
    x = x.dot(self.linear)
    return x

  

In [20]:
model = ConvNet()

In [21]:
optimizer = nn.optim.SGD(get_parameters(model))


In [22]:
train(model, X_train, Y_train, optimizer, 500, BS=256)

  0%|                                                  | 0/1000 [00:00<?, ?it/s]


AssertionError: Input Tensor shape (256, 784) does not match the shape of the weights (32, 1, 5, 5). (1 vs. 784)

In [None]:
evaluate(model, X_test, Y_test