In [1]:
# from IPython.display import display, HTML

# display(HTML(data="""
# <style>
#     div#notebook-container    { width: 95%; }
#     div#menubar-container     { width: 65%; }
#     div#maintoolbar-container { width: 99%; }
# </style>
# """))

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
import glob
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
import torch.optim as optim
from model_utils import run_training, create_train_folders
from data_utils import MnistDataset, get_dataloader
import numpy as np

In [4]:
# Constants
TRAIN_BATCH_SIZE = 512
TEST_BATCH_SIZE = 512

## Load dataset

In [5]:
transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])
dataset1 = datasets.MNIST('./ml_outputs/', train=True, transform=transform, download=True)
dataset2 = datasets.MNIST('./ml_outputs/', train=False, transform=transform, download=True)

In [6]:
len(dataset1), len(dataset2)

(60000, 10000)

In [7]:
# get Data loaders

train_loader = get_dataloader(dataset1, TRAIN_BATCH_SIZE)
test_loader = get_dataloader(dataset2, TRAIN_BATCH_SIZE, False)

### MODELS CLASSES

In [8]:
class Net1(nn.Module):
    def __init__(self):
        super(Net1, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        return x

class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.bn2 = nn.BatchNorm2d(64)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.bn1(self.conv1(x))
        x = F.relu(x)
        x = self.bn2(self.conv2(x))
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        return x

### RUN 1

In [None]:
ckpt_dir, log_dir = create_train_folders("ml_outputs/run1")
model = Net1()
opt = optim.Adam(model.parameters(), lr=1e-2)
sched = optim.lr_scheduler.StepLR(opt, 20, 0.6)
conf1 = {
    "model": model,
    "optimizer": opt,
    "train_loader": train_loader,
    "val_loader": test_loader,
    "epochs": 100,
    "ckpt_dir": ckpt_dir,
    "log_dir": log_dir,
    "scheduler": sched,
    "patience": 20,
    "fname": "run1",
    "label_smooth": 0.0
}

In [None]:
%%time

run_training(**conf1)

# CPU times: user 4min 34s, sys: 13.4 s, total: 4min 48s
# Wall time: 4min 43s
# {'train_loss': 0.10791252932548523,
#  'val_loss': 0.10492422745227814,
#  'train_acc': 0.9678,
#  'val_acc': 0.9659}

### RUN2

In [None]:
ckpt_dir, log_dir = create_train_folders("ml_outputs/run2")
model = Net1()
opt = optim.Adam(model.parameters(), lr=1e-2)
sched = optim.lr_scheduler.StepLR(opt, 20, 0.6)
conf1 = {
    "model": model,
    "optimizer": opt,
    "train_loader": train_loader,
    "val_loader": test_loader,
    "epochs": 100,
    "ckpt_dir": ckpt_dir,
    "log_dir": log_dir,
    "scheduler": sched,
    "patience": 20,
    "fname": "run1",
    "label_smooth": 0.1
}

In [None]:
%%time

run_training(**conf1)

# CPU times: user 4min 24s, sys: 12.8 s, total: 4min 36s
# Wall time: 4min 31s
# {'train_loss': 0.7046732180277506,
#  'val_loss': 0.6972992223739624,
#  'train_acc': 0.9482833333333334,
#  'val_acc': 0.9503}


### RUN3

In [None]:
ckpt_dir, log_dir = create_train_folders("ml_outputs/run3")
model = Net1()
opt = optim.RMSprop(model.parameters(), lr=1e-2)
sched = optim.lr_scheduler.StepLR(opt, 10, 0.8)
conf1 = {
    "model": model,
    "optimizer": opt,
    "train_loader": train_loader,
    "val_loader": test_loader,
    "epochs": 100,
    "ckpt_dir": ckpt_dir,
    "log_dir": log_dir,
    "scheduler": sched,
    "patience": 30,
    "fname": "run1",
    "label_smooth": 0.1
}

In [None]:
%%time

run_training(**conf1)

# CPU times: user 6min 23s, sys: 18.5 s, total: 6min 42s
# Wall time: 6min 34s
# {'train_loss': 1.173741497039795,
#  'val_loss': 1.165649961090088,
#  'train_acc': 0.8259333333333333,
#  'val_acc': 0.8339}

### RUN4

In [None]:
ckpt_dir, log_dir = create_train_folders("ml_outputs/run4")
model = Net1()
opt = optim.Adam(model.parameters(), lr=1e-2)
sched = optim.lr_scheduler.StepLR(opt, 10, 0.8)
conf1 = {
    "model": model,
    "optimizer": opt,
    "train_loader": train_loader,
    "val_loader": test_loader,
    "epochs": 100,
    "ckpt_dir": ckpt_dir,
    "log_dir": log_dir,
    "scheduler": sched,
    "patience": 30,
    "fname": "run1",
    "label_smooth": 0.1
}

In [None]:
%%time

run_training(**conf1)

# CPU times: user 5min 53s, sys: 17.2 s, total: 6min 10s
# Wall time: 6min 4s
# {'train_loss': 0.7110810436248779,
#  'val_loss': 0.7032975393295288,
#  'train_acc': 0.9472333333333334,
#  'val_acc': 0.9516}

### RUN5

In [None]:
ckpt_dir, log_dir = create_train_folders("ml_outputs/run5")
model = Net1()
opt = optim.Adam(model.parameters(), lr=1e-3)
sched = None
conf1 = {
    "model": model,
    "optimizer": opt,
    "train_loader": train_loader,
    "val_loader": test_loader,
    "epochs": 100,
    "ckpt_dir": ckpt_dir,
    "log_dir": log_dir,
    "scheduler": sched,
    "patience": 30,
    "fname": "run1",
    "label_smooth": 0.0
}

In [None]:
%%time

run_training(**conf1)

# CPU times: user 6min 37s, sys: 19.3 s, total: 6min 56s
# Wall time: 6min 48s
# {'train_loss': 0.007712779863427083,
#  'val_loss': 0.025506638836860655,
#  'train_acc': 0.9981666666666666,
#  'val_acc': 0.9916}
# 1
# ​

### RUN6

In [None]:
ckpt_dir, log_dir = create_train_folders("ml_outputs/run6")
model = Net1()
opt = optim.Adam(model.parameters(), lr=1e-3)
sched = None
conf1 = {
    "model": model,
    "optimizer": opt,
    "train_loader": train_loader,
    "val_loader": test_loader,
    "epochs": 100,
    "ckpt_dir": ckpt_dir,
    "log_dir": log_dir,
    "scheduler": sched,
    "patience": 30,
    "fname": "run1",
    "label_smooth": 0.1
}

In [None]:
%%time

run_training(**conf1)

# CPU times: user 16min 59s, sys: 54.1 s, total: 17min 53s
# Wall time: 17min 33s
# {'train_loss': 0.5094659843444824,
#  'val_loss': 0.5336426429748535,
#  'train_acc': 0.9999833333333333,
#  'val_acc': 0.9929}
# 1


### RUN7

In [None]:
ckpt_dir, log_dir = create_train_folders("ml_outputs/run7")
model = Net2()
opt = optim.Adam(model.parameters(), lr=1e-3)
sched = None
conf1 = {
    "model": model,
    "optimizer": opt,
    "train_loader": train_loader,
    "val_loader": test_loader,
    "epochs": 100,
    "ckpt_dir": ckpt_dir,
    "log_dir": log_dir,
    "scheduler": sched,
    "patience": 30,
    "fname": "run1",
    "label_smooth": 0.0
}

In [None]:
%%time

run_training(**conf1)

# U times: user 9min, sys: 21.7 s, total: 9min 22s
# Wall time: 9min 13s
# {'train_loss': 0.0065507115011413895,
#  'val_loss': 0.029933473259210586,
#  'train_acc': 0.9981833333333333,
#  'val_acc': 0.9909}
# 1
# ​

### RUN8

In [None]:
ckpt_dir, log_dir = create_train_folders("ml_outputs/run8")
model = Net2()
opt = optim.Adam(model.parameters(), lr=1e-3)
sched = None
conf1 = {
    "model": model,
    "optimizer": opt,
    "train_loader": train_loader,
    "val_loader": test_loader,
    "epochs": 100,
    "ckpt_dir": ckpt_dir,
    "log_dir": log_dir,
    "scheduler": sched,
    "patience": 30,
    "fname": "run1",
    "label_smooth": 0.1
}

In [None]:
%%time

run_training(**conf1)

# PU times: user 19min 30s, sys: 49.4 s, total: 20min 19s
# Wall time: 19min 59s
# {'train_loss': 0.5131719445546468,
#  'val_loss': 0.5369959331512452,
#  'train_acc': 0.9999333333333333,
#  'val_acc': 0.9927}
# 1
# ​

### Model Quantization

In [14]:
import torch.onnx
import torch.quantization
from ignite.metrics import Accuracy, Loss
from ignite.engine import create_supervised_evaluator

In [15]:
def _pretty_print(msg):
    print("*"*50)
    print(msg)
    print("*"*50 + "\n")
    

def print_size_of_model(model, label=""):
    torch.save(model.state_dict(), "temp.p")
    size = os.path.getsize("temp.p")
    _pretty_print(f"model: {label} \t Size (KB): {size/1e3}")
    os.remove('temp.p')


def quantize_trained_model(model, ckpt_dir, test_loader):
#     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    device = torch.device("cpu")
    metrics = {"loss": Loss(nn.CrossEntropyLoss()), "acc": Accuracy()}
    
    model.load_state_dict(torch.load(glob.glob(os.path.join(ckpt_dir, "*.pth"))[0], map_location=device)["model"])
    val_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)
    val_evaluator.run(test_loader)
    _pretty_print(f"Metrics before quantization : {val_evaluator.state.metrics}")

    quantized_model = torch.quantization.quantize_dynamic(
        model, {nn.Conv2d, nn.Linear, nn.Dropout2d, F.relu, F.max_pool2d}, dtype=torch.qint8
    )
    
    val_evaluator = create_supervised_evaluator(quantized_model, metrics=metrics, device=device)
    val_evaluator.run(test_loader)
    _pretty_print(f"Metrics after quantization : {val_evaluator.state.metrics}")

    print_size_of_model(model, "full model")
    print_size_of_model(quantized_model, "quantized model")
    
    return model, quantized_model

In [16]:
model = Net1()
ckpt_dir = "./ml_outputs/run6/cpkts/"

test_loader = get_dataloader(dataset2, TRAIN_BATCH_SIZE, False)

In [17]:
model, quantized_model = quantize_trained_model(model, ckpt_dir, test_loader)

**************************************************
Metrics before quantization : {'loss': 0.11961191914081573, 'acc': 0.9929}
**************************************************

**************************************************
Metrics after quantization : {'loss': 0.11956824572086334, 'acc': 0.9928}
**************************************************

**************************************************
model: full model 	 Size (KB): 4802.487
**************************************************

**************************************************
model: quantized model 	 Size (KB): 1261.083
**************************************************



In [None]:
quantized_model

### ONNX export

In [18]:
model.eval()
model = model.to(torch.device("cpu"))

In [19]:
traced_model = torch.jit.trace(model, dataset1[0][0].unsqueeze(0))

In [20]:
print(traced_model)

Net1(
  original_name=Net1
  (conv1): Conv2d(original_name=Conv2d)
  (conv2): Conv2d(original_name=Conv2d)
  (dropout1): Dropout2d(original_name=Dropout2d)
  (dropout2): Dropout2d(original_name=Dropout2d)
  (fc1): Linear(original_name=Linear)
  (fc2): Linear(original_name=Linear)
)


In [21]:
print(traced_model.code)

def forward(self,
    input: Tensor) -> Tensor:
  _0 = self.fc2
  _1 = self.dropout2
  _2 = self.fc1
  _3 = self.dropout1
  _4 = self.conv2
  input0 = torch.relu((self.conv1).forward(input, ))
  input1 = torch.relu((_4).forward(input0, ))
  input2 = torch.max_pool2d(input1, [2, 2], annotate(List[int], []), [0, 0], [1, 1], False)
  input3 = torch.flatten((_3).forward(input2, ), 1, -1)
  input4 = torch.relu((_2).forward(input3, ))
  _5 = (_0).forward((_1).forward(input4, ), )
  return _5



In [22]:
with torch.no_grad():
    tmp = model(dataset1[0][0].unsqueeze(0))

In [23]:
tmp

tensor([[-1.2790, -1.3119, -1.2866, -0.5685, -1.4096,  3.7656, -0.9280, -0.8405,
         -1.2271, -0.7966]])

In [24]:
F.softmax(tmp)

  """Entry point for launching an IPython kernel.


tensor([[0.0060, 0.0058, 0.0060, 0.0122, 0.0053, 0.9309, 0.0085, 0.0093, 0.0063,
         0.0097]])

In [25]:
dataset1[0][1]

5

In [36]:
torch.onnx.export(model,               # model being run
                  dataset1[0][0].unsqueeze(0),                         # model input (or a tuple for multiple inputs)
                  "../../public/static/ml_models/mnist.onnx",   # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=9,          # the ONNX version to export the model to
#                   example_outputs=tmp,
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable lenght axes
                                'output' : {0 : 'batch_size'}})

In [37]:
import onnx
import onnxruntime

In [38]:
onnx_model = onnx.load("../../public/static/ml_models/mnist.onnx")
onnx.checker.check_model(onnx_model)

In [39]:
def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

In [40]:
ort_session = onnxruntime.InferenceSession("../../public/static/ml_models/mnist.onnx")

In [41]:
# compute ONNX Runtime output prediction
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(dataset1[0][0].unsqueeze(0))}
ort_outs = ort_session.run(None, ort_inputs)


In [42]:
ort_outs

[array([[-1.2789826 , -1.3118939 , -1.2866195 , -0.5685066 , -1.409559  ,
          3.7656217 , -0.9280032 , -0.84054834, -1.2270917 , -0.7966455 ]],
       dtype=float32)]

In [43]:
print(tmp)

tensor([[-1.2790, -1.3119, -1.2866, -0.5685, -1.4096,  3.7656, -0.9280, -0.8405,
         -1.2271, -0.7966]])


In [44]:
# compare ONNX Runtime and PyTorch results
np.testing.assert_allclose(to_numpy(tmp), ort_outs[0], rtol=1e-03, atol=1e-05)

print("Exported model has been tested with ONNXRuntime, and the result looks good!")

Exported model has been tested with ONNXRuntime, and the result looks good!
