In [2]:
from tinygrad import Device
print([dev for dev in Device.get_available_devices()])
print(Device.DEFAULT)

['GPU']
GPU


In [3]:
from tinygrad import Tensor, nn

class Model:
  def __init__(self):
    self.l1 = nn.Conv2d(1, 32, kernel_size=(3,3))
    self.l2 = nn.Conv2d(32, 64, kernel_size=(3,3))
    self.l3 = nn.Linear(1600, 10)

  def __call__(self, x:Tensor) -> Tensor:
    x = self.l1(x).relu().max_pool2d((2,2))
    x = self.l2(x).relu().max_pool2d((2,2))
    return self.l3(x.flatten(1).dropout(0.5))

In [4]:
from tinygrad.nn.datasets import mnist
X_train, Y_train, X_test, Y_test = mnist()
print(X_train.shape, X_train.dtype, Y_train.shape, Y_train.dtype)
# (60000, 1, 28, 28) dtypes.uchar (60000,) dtypes.uchar

https://storage.googleapis.com/cvdf-datasets/mnist/train-images-idx3-ubyte.gz: 47.0MB [00:00, 50.5MB
https://storage.googleapis.com/cvdf-datasets/mnist/train-labels-idx1-ubyte.gz: 60.0kB [00:00, 10.1MB
https://storage.googleapis.com/cvdf-datasets/mnist/t10k-images-idx3-ubyte.gz: 7.84MB [00:00, 27.8MB/
https://storage.googleapis.com/cvdf-datasets/mnist/t10k-labels-idx1-ubyte.gz: 10.0kB [00:00, 1.98MB/


(60000, 1, 28, 28) dtypes.uchar (60000,) dtypes.uchar


In [5]:
model = Model()
acc = (model(X_test).argmax(axis=1) == Y_test).mean()
# NOTE: tinygrad is lazy, and hasn't actually run anything by this point
print(acc.item())  # ~10% accuracy, as expected from a random model

0.07909999787807465


In [6]:
optim = nn.optim.Adam(nn.state.get_parameters(model))
batch_size = 128
def step():
  Tensor.training = True  # makes dropout work
  samples = Tensor.randint(batch_size, high=X_train.shape[0])
  X, Y = X_train[samples], Y_train[samples]
  optim.zero_grad()
  loss = model(X).sparse_categorical_crossentropy(Y).backward()
  optim.step()
  return loss

In [8]:
import timeit
timeit.repeat(step, repeat=5, number=1)
#[0.08268719699981375,
# 0.07478952900009972,
# 0.07714716600003158,
# 0.07785399599970333,
# 0.07605237000007037]

[0.14294536099998822,
 0.1373435109999832,
 0.13323673700000427,
 0.1391792169999917,
 0.13211539400001016]

In [9]:
from tinygrad import GlobalCounters, Context
GlobalCounters.reset()
with Context(DEBUG=2): step()

scheduled 49 kernels
*** CUDA       1 E_[90mn11[0m                                     arg  1 mem  0.06 GB tm     30.14us/     0.03ms (     0.00 GFLOPS    0.0|0.0     GB/s) ['__imul__']
*** CUDA       2 E_[90mn12[0m                                     arg  1 mem  0.06 GB tm     22.43us/     0.05ms (     0.00 GFLOPS    0.0|0.0     GB/s) ['__imul__']
*** CUDA       3 E_[90mn6[0m                                      arg  1 mem  0.06 GB tm     24.61us/     0.08ms (     0.00 GFLOPS    0.0|0.0     GB/s) ['randint']
*** CUDA       4 r_[34m625[0m[90m_[0m[36m32[0m[90m_[0m[31m15000[0m[90m_[0m[33m3[0m[90m_[0m[35m4[0m[90m[0m                        arg  1 mem  0.06 GB tm     22.56us/     0.10ms (    20.39 GFLOPS   10.6|10.6    GB/s) ['__getitem__']
*** CUDA       5 r_[34m5[0m[90m_[0m[36m2[0m[90m_[0m[35m10[0m[90mn1[0m                                arg  1 mem  0.06 GB tm     41.09us/     0.14ms (     0.01 GFLOPS    0.0|0.0     GB/s) ['sparse_categorical_crossent

In [10]:
from tinygrad import TinyJit
jit_step = TinyJit(step)

In [12]:
import timeit
timeit.repeat(jit_step, repeat=5, number=1)
# [0.2596786549997887,
#  0.08989566299987928,
#  0.0012115650001760514,
#  0.001010227999813651,
#  0.0012164899999334011]

[0.0004929050000157531,
 0.0001609649999920748,
 0.00018581100002279527,
 9.445599999935439e-05,
 7.041399999252462e-05]

In [13]:
for step in range(7000):
  loss = jit_step()
  if step%100 == 0:
    Tensor.training = False
    acc = (model(X_test).argmax(axis=1) == Y_test).mean().item()
    print(f"step {step:4d}, loss {loss.item():.2f}, acc {acc*100.:.2f}%")

step    0, loss 1.48, acc 83.76%
step  100, loss 0.31, acc 94.99%
step  200, loss 0.16, acc 96.41%
step  300, loss 0.23, acc 97.14%
step  400, loss 0.28, acc 97.29%
step  500, loss 0.21, acc 97.50%
step  600, loss 0.15, acc 97.70%
step  700, loss 0.18, acc 97.53%
step  800, loss 0.20, acc 97.36%
step  900, loss 0.32, acc 97.72%
step 1000, loss 0.17, acc 98.02%
step 1100, loss 0.15, acc 98.20%
step 1200, loss 0.05, acc 98.15%
step 1300, loss 0.03, acc 98.28%
step 1400, loss 0.14, acc 98.07%
step 1500, loss 0.07, acc 98.04%
step 1600, loss 0.09, acc 98.13%
step 1700, loss 0.20, acc 98.35%
step 1800, loss 0.06, acc 98.48%
step 1900, loss 0.08, acc 98.38%
step 2000, loss 0.07, acc 98.27%
step 2100, loss 0.13, acc 98.42%
step 2200, loss 0.13, acc 98.41%
step 2300, loss 0.07, acc 98.43%
step 2400, loss 0.10, acc 98.54%
step 2500, loss 0.10, acc 98.44%
step 2600, loss 0.02, acc 98.60%
step 2700, loss 0.14, acc 98.60%
step 2800, loss 0.12, acc 98.56%
step 2900, loss 0.07, acc 98.36%
step 3000,