In [5]:
#Import MNIST working code

import tinygrad
import numpy as np
import argparse
import time
#Boilerplate
#Load Data
##Struct Network
#train
#run

#struct network
from tinygrad.nn import Linear
from tinygrad.tensor import Tensor
from tinygrad.lazy import Device
Device.DEFAULT = "CPU"



class TinyDense:
  def __init__(self):
    self.l1 = Linear(784, 128, bias=False)
    self.l2 = Linear(128, 10, bias=False)

  def __call__(self, x):
    x = self.l1(x)
    x = x.leakyrelu()
    x = self.l2(x)
    return x.log_softmax()

net = TinyDense()
Tensor.training = True #boilerplatish?

from tinygrad.nn.optim import SGD
opt = SGD([net.l1.weight, net.l2.weight], lr=3e-4)

from extra.datasets import fetch_mnist, fetch_cifar
X_train, Y_train, X_test, Y_test = fetch_mnist()

from extra.training import sparse_categorical_crossentropy  
from tinygrad.state import safe_save, safe_load, get_state_dict, load_state_dict
def cross_entropy(out, Y):
  num_classes = out.shape[-1]
  YY = Y.flatten().astype(np.int32)
  y = np.zeros((YY.shape[0], num_classes), np.float32)
  y[range(y.shape[0]),YY] = -1.0*num_classes
  y = y.reshape(list(Y.shape)+[num_classes])
  y = Tensor(y)
  return out.mul(y).mean()

#load model

state_dict = safe_load("TinyDense.safetensors")
load_state_dict(net, state_dict)

for step in range(8000):
    #random sample batch??
    samp = np.random.randint(0, X_train.shape[0], size=(128))
    batch = Tensor(X_train[samp], requires_grad=True)
    #labels for the same random sample? Batch of 64?
    labels = Y_train[samp]
    #fordward pass
    out = net (batch)

    #compute loss
    loss = cross_entropy(out, labels)
    #zero gradients
    opt.zero_grad()
    #backward   
    loss.backward()
    #update param
    opt.step()

    #calculate accuracy
    pred = np.argmax(out.numpy(), axis=-1)
    acc = (pred == labels).mean()
    if acc >= 1:
        break
    if step % 100 == 0:
        print(f'step{step} | Loss: {loss.numpy()} | Accuracy: {acc}')

#save model
state_dict = get_state_dict(net)
safe_save(state_dict, "TinyDense.safetensors")


#test model
Tensor.training = False
av_acc = 0 #reset acc
st = time.perf_counter()
print(X_test.shape)
testamount = 1
for step in range(testamount):
    #test is just fordward?
    samp = np.random.randint(0, X_test.shape[0], size=1)
    batch = Tensor(X_test[samp], requires_grad=True)
    #get labels
    labels = Y_test[samp]
    #forward pass
    out = net(batch)
  
    pred = np.argmax(out.numpy(), axis=-1)
    av_acc += (pred == labels).mean()
    print(out.shape)
    print(np.argmax(out.numpy()))
    print(Y_test[samp])
    print(batch.numpy())
 
print(f"Test Accuracy: {av_acc / testamount}")
print(f"Time: {time.perf_counter() - st}")
#94% MNIST? 

ram used:  0.00 GB, l2.weight                                         : 100%|██████████| 2/2 [00:00<00:00, 1771.62it/s]

loaded weights in 2.65 ms, 0.00 GB loaded at 0.93 GB/s
step0 | Loss: 0.07643339037895203 | Accuracy: 0.9765625
(10000, 784)
(1, 10)
1
[1]
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  55.
  236. 107.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. 169.
  252. 185.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0




In [6]:
#open up CIFAR
X, Y = fetch_cifar(train=True)
#reshape
Xr = X.reshape(50000, -1)
print(Xr.shape)

(50000, 3072)


In [7]:
class TinyDense:
  def __init__(self):
    self.l1 = Linear(3072, 128, bias=False)
    self.l2 = Linear(128, 10, bias=False)

  def __call__(self, x):
    x = self.l1(x)
    x = x.leakyrelu()
    x = self.l2(x)
    return x.log_softmax()

net = TinyDense()
Tensor.training = True #boilerplatish?

from tinygrad.nn.optim import SGD
opt = SGD([net.l1.weight, net.l2.weight], lr=3e-3)

from extra.datasets import fetch_mnist, fetch_cifar
X_train, Y_train, X_test, Y_test = fetch_mnist()

from extra.training import sparse_categorical_crossentropy  
from tinygrad.state import safe_save, safe_load, get_state_dict, load_state_dict
def cross_entropy(out, Y):
  num_classes = out.shape[-1]
  YY = Y.flatten().astype(np.int32)
  y = np.zeros((YY.shape[0], num_classes), np.float32)
  y[range(y.shape[0]),YY] = -1.0*num_classes
  y = y.reshape(list(Y.shape)+[num_classes])
  y = Tensor(y)
  return out.mul(y).mean()

In [8]:
for step in range(8000):
    #random sample batch??
    samp = np.random.randint(0, Xr.shape[0], size=(64))
    batch = Tensor(Xr[samp], requires_grad=True)
    #labels for the same random sample? Batch of 64?
    labels = Y[samp]
    #fordward pass
    out = net (batch)

    #compute loss
    loss = cross_entropy(out, labels)
    #zero gradients
    opt.zero_grad()
    #backward   
    loss.backward()
    #update param
    opt.step()

    #calculate accuracy
    pred = np.argmax(out.numpy(), axis=-1)
    acc = (pred == labels).mean()
    if acc >= 1:
        break
    if step % 100 == 0:
        print(f'step{step} | Loss: {loss.numpy()} | Accuracy: {acc}')


step0 | Loss: 183.7943572998047 | Accuracy: 0.078125
step100 | Loss: 56959924.0 | Accuracy: 0.140625
step200 | Loss: 28365152.0 | Accuracy: 0.109375
step300 | Loss: 17038890.0 | Accuracy: 0.1171875
step400 | Loss: 8207320.0 | Accuracy: 0.078125
step500 | Loss: 8522882.0 | Accuracy: 0.1328125
step600 | Loss: 5506623.5 | Accuracy: 0.140625
step700 | Loss: 2399960.75 | Accuracy: 0.1484375
step800 | Loss: 2082001.0 | Accuracy: 0.21875
step900 | Loss: 1874370.0 | Accuracy: 0.1875
step1000 | Loss: 1470141.625 | Accuracy: 0.2265625
step1100 | Loss: 843535.0 | Accuracy: 0.1796875
step1200 | Loss: 929060.0 | Accuracy: 0.28125
step1300 | Loss: 1124453.125 | Accuracy: 0.265625
step1400 | Loss: 687603.4375 | Accuracy: 0.1796875
step1500 | Loss: 730008.4375 | Accuracy: 0.2578125
step1600 | Loss: 505249.71875 | Accuracy: 0.1875
step1700 | Loss: 10449819.0 | Accuracy: 0.0859375
step1800 | Loss: 5109804.5 | Accuracy: 0.1640625
step1900 | Loss: 3291413.25 | Accuracy: 0.1171875
step2000 | Loss: 1509549.