1. Load Dataset
2. Define Parametrized Model
3. Instantiate Model
4. Instantiate Optimizer
5. Training/Eval Loop
    1. Set Gradients to Zero
    2. Compute out Estimation (Forward Pass)
    3. Compute our Loss (Objective Function to Minimize)
    4. Compute out Gradients (Backward Pass)
    5. Update our Weights
    6. Monitoring (Visualize Training Loss / Eval Loss)
6. Save our Weights
7. Export our Model for WebGPU

In [None]:
from typing import Callable
from tinygrad import Tensor, TinyJit
from tinygrad.nn.datasets import mnist
from tinygrad.nn.optim import Adam
from tinygrad.helpers import trange
import tinygrad.nn as nn

In [None]:
# Define the Model
class Model:
    def __init__(self) : 
        self.layers: list[Callable[[Tensor], Tensor]] = [
            lambda x: x.flatten(1), # Flatten the input 28x28 to 784
            nn.Linear(784, 512), Tensor.relu,
            nn.Linear(512, 512), Tensor.relu,
            nn.Linear(512, 10)
        ]

    def __call__(self, x: Tensor) -> Tensor:
        return x.sequential(self.layers)

In [4]:
X_train,Y_train, X_test, Y_test = mnist()
model = Model()
opt = Adam(nn.state.get_parameters(model))

In [5]:
@TinyJit
@Tensor.train()
def train_step() -> Tensor:
    opt.zero_grad()
    samples = Tensor.randint(512, high=int(X_train.shape[0]))
    loss = model(X_train[samples]).sparse_categorical_crossentropy(Y_train[samples]).backward()
    return loss.realize(*opt.schedule_step())

@TinyJit
def get_test_acc() -> Tensor:
    preds = model(X_test).argmax(axis=1)
    # cast to float (0/1), average to scalar, scale to %
    return (preds == Y_test).float().mean() * 100

In [6]:
# Training loop
test_acc = float('nan')
for step in (t := trange(100)):
    loss = train_step().item()
    if step % 10 == 9: test_acc = get_test_acc().item()
    t.set_description(f"loss: {loss:4.2f} test acc: {test_acc:4.2f}%")

loss: 0.13 test acc: 95.06%: 100%|████████████| 100/100 [00:18<00:00,  5.50it/s]


In [None]:
import json
import numpy as np
from pathlib import Path
from tinygrad.device import Device
from tinygrad.nn.state import safe_save, safe_load, load_state_dict
from export_model import export_model

model_name = "model_nn1"
dir_name = Path("models")
dir_name.mkdir(exist_ok=True)

state_dict = nn.state.get_state_dict(model)
safe_save(state_dict, dir_name / f"{model_name}_model.safetensors")

Device.DEFAULT = "WEBGPU"

model = Model()
state_dict = safe_load(dir_name / f"{model_name}_model.safetensors")
load_state_dict(model, state_dict)

input = Tensor(np.random.randn(1, 28, 28).astype(np.float32))
prg, *_, state = export_model(model, Device.DEFAULT.lower(), input, model_name=model_name)
safe_save(state, dir_name / f"{model_name}.webgpu.safetensors")
with open(dir_name / f"{model_name}.js", "w") as f:
    f.write(prg)

AttributeError: 'str' object has no attribute 'mkdir'