[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ML-Village/Giza-Orion-Experiments/blob/main/notebooks/pytorch-mnist.ipynb)

In [1]:
#@title Install libs
!pip install torch torchvision



In [2]:
!python --version

Python 3.10.12


In [8]:
#@title Import Dependencies
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

In [11]:
torch.__version__

'2.1.0+cu121'

In [13]:
#@title Set ze Params

args={}
kwargs={}
args['batch_size']=1000
args['test_batch_size']=1000
args['epochs']=10  #The number of Epochs is the number of times you go through the full dataset.
args['lr']=0.01 #Learning rate is how fast it will decend.
args['momentum']=0.5 #SGD momentum (default: 0.5) Momentum is a moving average of our gradients (helps to keep direction).

args['seed']=1 #random seed
args['log_interval']=10
args['cuda']=False

#vs
# input_size = 784 # img_size = (28,28) ---> 28*28=784 in total
# hidden_size = 500 # number of nodes at hidden layer
# num_classes = 10 # number of output classes discrete range [0,9]
# num_epochs = 20 # number of times which the entire dataset is passed throughout the model
# batch_size = 100 # the size of input data took for one iteration
# lr = 1e-3 # size of step

In [15]:
#@title Load ze data

train_loader = torch.utils.data.DataLoader(

    # downloads training set
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,)) #normalizing
                   ])),

    batch_size=args['batch_size'], shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(

    # downloads test set
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,)) #normalizing
                   ])),


    batch_size=args['test_batch_size'], shuffle=True, **kwargs)

In [16]:
#@title Define ze ModeL

class Net(nn.Module):
    #This defines the structure of the NN.
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()  #Dropout
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        #Convolutional Layer/Pooling Layer/Activation
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        #Convolutional Layer/Dropout/Pooling Layer/Activation
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        #Fully Connected Layer/Activation
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        #Fully Connected Layer/Activation
        x = self.fc2(x)
        #Softmax gets probabilities.
        return F.log_softmax(x, dim=1)

# class Net(nn.Module):
#   def __init__(self, input_size, hidden_size, num_classes):
#     super(Net,self).__init__()
#     self.fc1 = nn.Linear(input_size, hidden_size)
#     self.relu = nn.ReLU()
#     self.fc2 = nn.Linear(hidden_size, num_classes)

#   def forward(self,x):
#     out = self.fc1(x)
#     out = self.relu(out)
#     out = self.fc2(out)
#     return out

In [24]:
#@title Define training routine/flow

def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if args['cuda']:
            data, target = data.cuda(), target.cuda()
        #Variables in Pytorch are differenciable.
        data, target = Variable(data), Variable(target)
        #This will zero out the gradients for this batch.
        optimizer.zero_grad()
        output = model(data)
        # Calculate the loss The negative log likelihood loss. It is useful to train a classification problem with C classes.
        loss = F.nll_loss(output, target)
        #dloss/dx for every Variable
        loss.backward()
        #to do a one-step update on our parameter.
        optimizer.step()
        #Print out the loss periodically.
        if batch_idx % args['log_interval'] == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data))

def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args['cuda']:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [25]:
#@title Train ze Model
model = Net()

if args['cuda']:
    model.cuda()

optimizer = optim.SGD(model.parameters(), lr=args['lr'], momentum=args['momentum'])
# loss_function = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam( net.parameters(), lr=lr)

for epoch in range(1, args['epochs'] + 1):
    train(epoch)
    test()



  data, target = Variable(data, volatile=True), Variable(target)



Test set: Average loss: 2.2560, Accuracy: 2156/10000 (22%)


Test set: Average loss: 2.0671, Accuracy: 5482/10000 (55%)


Test set: Average loss: 1.2439, Accuracy: 7438/10000 (74%)


Test set: Average loss: 0.6962, Accuracy: 8236/10000 (82%)


Test set: Average loss: 0.4920, Accuracy: 8659/10000 (87%)


Test set: Average loss: 0.3894, Accuracy: 8872/10000 (89%)


Test set: Average loss: 0.3358, Accuracy: 9067/10000 (91%)


Test set: Average loss: 0.2981, Accuracy: 9142/10000 (91%)


Test set: Average loss: 0.2610, Accuracy: 9265/10000 (93%)


Test set: Average loss: 0.2401, Accuracy: 9331/10000 (93%)



In [None]:
#@title Evaluating the accuracy of the model

correct = 0
total = 0
for batch_idx, (images, labels) in enumerate(test_loader):
    if args['cuda']:
        images, labels = images.cuda(), labels.cuda()
    #Variables in Pytorch are differenciable.
    images, labels = Variable(images), Variable(labels)

    output = model(images)
    #_, predicted = torch.max(output,1)
    predicted =output.data.max(1, keepdim=True)[1]
    # correct += (predicted == labels).sum()
    correct += predicted.eq(labels.data.view_as(predicted)).long().cpu().sum()

    total += labels.size(0)

print('Accuracy of the model: %.3f %%' %((100*correct)/(total+1)))

In [29]:
torch.save(model.state_dict(), 'checkpoint.pth')

In [30]:
#@title Save MoDEL

# upload external file before import
from google.colab import files

# download checkpoint file
files.download('checkpoint.pth')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [37]:
#@title Export to ze Onnx MoDEL
!pip install onnx onnxscript

Collecting onnxscript
  Downloading onnxscript-0.1.0.dev20231222-py3-none-any.whl (550 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m550.7/550.7 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: onnxscript
Successfully installed onnxscript-0.1.0.dev20231222


In [38]:
inputs, classes = next(iter(test_loader))

In [39]:
onnx_program = torch.onnx.dynamo_export(model, inputs)



In [41]:
onnx_program.save("mnisttorch.onnx")
# download onnx file
files.download('mnisttorch.onnx')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>