In [17]:
!pip install onnxscript


Collecting onnxscript
  Downloading onnxscript-0.5.6-py3-none-any.whl.metadata (13 kB)
Collecting onnx_ir<2,>=0.1.12 (from onnxscript)
  Downloading onnx_ir-0.1.12-py3-none-any.whl.metadata (3.2 kB)
Collecting onnx>=1.16 (from onnxscript)
  Downloading onnx-1.20.0-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.4 kB)
Downloading onnxscript-0.5.6-py3-none-any.whl (683 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m683.0/683.0 kB[0m [31m39.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnx-1.20.0-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (18.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.1/18.1 MB[0m [31m124.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnx_ir-0.1.12-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.3/129.3 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx, onnx_ir, onnxscript
Successfully install

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import torch.nn.utils.prune as prune
import torch.nn.functional as F
import os

# Device and save directory
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
SAVE_DIR = "rpi_models"
os.makedirs(SAVE_DIR, exist_ok=True)

# Training parameters
BATCH_SIZE = 64
EPOCHS = 5


In [12]:
# Transform and load CIFAR-10
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_dataset = datasets.CIFAR10(root="data", train=True, download=True, transform=transform)
test_dataset  = datasets.CIFAR10(root="data", train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)


In [13]:
def evaluate(model):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            out = model(x)
            pred = out.argmax(dim=1)
            correct += (pred == y).sum().item()
            total += y.size(0)
    return correct / total

def export_to_onnx(model, name, input_shape=(1,3,224,224)):
    model.eval()
    os.makedirs(SAVE_DIR, exist_ok=True)
    dummy_input = torch.randn(*input_shape)
    onnx_path = os.path.join(SAVE_DIR, name + ".onnx")
    torch.onnx.export(
        model.to("cpu"),
        dummy_input,
        onnx_path,
        export_params=True,
        opset_version=12,
        do_constant_folding=True,
        input_names=['input'],
        output_names=['output'],
        dynamic_axes={'input': {0:'batch_size'}, 'output': {0:'batch_size'}}
    )
    print(f"[ONNX] Saved: {onnx_path}")


In [14]:
def train_model(model, epochs=EPOCHS):
    model = model.to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        running_loss = 0
        for x, y in train_loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        acc = evaluate(model)
        print(f"Epoch {epoch+1}: loss={running_loss/len(train_loader):.4f}, test_acc={acc:.4f}")
    return model


In [15]:
# Pruning
def prune_model(model, amount=0.3):
    for m in model.modules():
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            prune.l1_unstructured(m, "weight", amount=amount)
            prune.remove(m, "weight")
    return model

# Dynamic Quantization
def quantize_model(model):
    return torch.quantization.quantize_dynamic(model, {nn.Linear}, dtype=torch.qint8)

# Distillation
def train_distilled(student, teacher, epochs=EPOCHS):
    teacher.eval()
    optimizer = optim.Adam(student.parameters(), lr=1e-3)
    alpha, T = 0.5, 4.0
    for epoch in range(epochs):
        student.train()
        for x, y in train_loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            s_out = student(x)
            with torch.no_grad():
                t_out = teacher(x)
            loss = alpha*F.cross_entropy(s_out, y) + (1-alpha)*F.kl_div(
                F.log_softmax(s_out/T, dim=1),
                F.softmax(t_out/T, dim=1),
                reduction="batchmean"
            )*(T*T)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        acc = evaluate(student)
        print(f"[Distilled] Epoch {epoch+1}: test_acc={acc:.4f}")
    return student


In [18]:
#first cell of training model
model = models.mobilenet_v2(weights="MobileNet_V2_Weights.IMAGENET1K_V1")
model.classifier[1] = nn.Linear(1280, 10)
baseline = train_model(model)
torch.save(baseline.state_dict(), os.path.join(SAVE_DIR, "baseline_fp32.pt"))
export_to_onnx(baseline, "baseline_fp32")  # mandatory for RPi


Epoch 1: loss=0.5268, test_acc=0.8495
Epoch 2: loss=0.3345, test_acc=0.8861
Epoch 3: loss=0.2750, test_acc=0.9017
Epoch 4: loss=0.2324, test_acc=0.9000
Epoch 5: loss=0.2008, test_acc=0.9058


  torch.onnx.export(
W1208 00:13:05.079000 994 torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 12 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features


[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...


Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
             ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/__init__.py", line 122, in _partial_convert_version
    return onnx.version_converter.convert_version(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnx/version_converter.py", line 39, in convert_version
    converted_model_str = C.convert_version(model_str, target_version)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: /github/workspace/onnx/version_converter/adapters/axes_input_to_attribute.h:65: adapt: Asserti

[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 104 of general pattern rewrite rules.
[ONNX] Saved: rpi_models/baseline_fp32.onnx


In [19]:
#Quant should happen on RPI doesn't work otherwise
quant = quantize_model(baseline)
torch.save(quant.state_dict(), os.path.join(SAVE_DIR, "quantized_int8.pt"))
export_to_onnx(quant, "quantized_int8")


For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  return torch.quantization.quantize_dynamic(model, {nn.Linear}, dtype=torch.qint8)
  torch.onnx.export(
W1208 00:17:55.040000 994 torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 12 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to 

[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`... ❌
[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=True)`...
[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=True)`... ❌


TorchExportError: Failed to export the model with torch.export. [96mThis is step 1/3[0m of exporting the model to ONNX. Next steps:
- Modify the model code for `torch.export.export` to succeed. Refer to https://pytorch.org/docs/stable/generated/exportdb/index.html for more information.
- Debug `torch.export.export` and submit a PR to PyTorch.
- Create an issue in the PyTorch GitHub repository against the [96m*torch.export*[0m component and attach the full error stack as well as reproduction scripts.

## Exception summary

<class 'AttributeError'>: __torch__.torch.classes.quantized.LinearPackedParamsBase (of Python compilation unit at: 0) does not have a field with name '__obj_flatten__'

(Refer to the full stack trace above for more information.)

In [20]:
pruned = prune_model(model)
pruned = train_model(pruned)
torch.save(pruned.state_dict(), os.path.join(SAVE_DIR, "pruned_30.pt"))
export_to_onnx(pruned, "pruned_30")


Epoch 1: loss=0.2060, test_acc=0.9040
Epoch 2: loss=0.1722, test_acc=0.9007
Epoch 3: loss=0.1524, test_acc=0.9083
Epoch 4: loss=0.1395, test_acc=0.9083


  torch.onnx.export(
W1208 00:43:30.189000 994 torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 12 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features


Epoch 5: loss=0.1252, test_acc=0.9105
[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...


Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
             ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/__init__.py", line 122, in _partial_convert_version
    return onnx.version_converter.convert_version(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnx/version_converter.py", line 39, in convert_version
    converted_model_str = C.convert_version(model_str, target_version)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: /github/workspace/onnx/version_converter/adapters/axes_input_to_attribute.h:65: adapt: Asserti

[torch.onnx] Translate the graph into ONNX... ✅
Applied 104 of general pattern rewrite rules.
[ONNX] Saved: rpi_models/pruned_30.onnx


In [22]:
student = model
baseline = baseline.to(DEVICE)
distilled = train_distilled(student, baseline)
torch.save(distilled.state_dict(), os.path.join(SAVE_DIR, "distilled.pt"))
export_to_onnx(distilled, "distilled")


[Distilled] Epoch 1: test_acc=0.9104
[Distilled] Epoch 2: test_acc=0.9168
[Distilled] Epoch 3: test_acc=0.9223
[Distilled] Epoch 4: test_acc=0.9211


  torch.onnx.export(
W1208 01:08:00.423000 994 torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 12 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features


[Distilled] Epoch 5: test_acc=0.9155
[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...


Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
             ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/__init__.py", line 122, in _partial_convert_version
    return onnx.version_converter.convert_version(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnx/version_converter.py", line 39, in convert_version
    converted_model_str = C.convert_version(model_str, target_version)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: /github/workspace/onnx/version_converter/adapters/axes_input_to_attribute.h:65: adapt: Asserti

[torch.onnx] Translate the graph into ONNX... ✅
Applied 104 of general pattern rewrite rules.
[ONNX] Saved: rpi_models/distilled.onnx


In [2]:
#entering models that have more than one feature
# Start from a trained baseline
hybrid_model = prune_model(model)
hybrid_model = train_model(hybrid_model)  # optional fine-tuning after pruning
hybrid_model = quantize_model(hybrid_model)

# Save and export
torch.save(hybrid_model.state_dict(), os.path.join(SAVE_DIR, "pruned_quantized.pt"))
export_to_onnx(hybrid_model, "pruned_quantized")


In [None]:
student = model
baseline = baseline.to(DEVICE)
student = train_distilled(student, baseline)  # distillation
student = quantize_model(student)

torch.save(student.state_dict(), os.path.join(SAVE_DIR, "distilled_quantized.pt"))
export_to_onnx(student, "distilled_quantized")


In [24]:
student = prune_model(model)
baseline = baseline.to(DEVICE)
student = train_distilled(student, baseline)

torch.save(student.state_dict(), os.path.join(SAVE_DIR, "pruned_distilled.pt"))
export_to_onnx(student, "pruned_distilled")


[Distilled] Epoch 1: test_acc=0.9087
[Distilled] Epoch 2: test_acc=0.9202
[Distilled] Epoch 3: test_acc=0.9161
[Distilled] Epoch 4: test_acc=0.9177


  torch.onnx.export(
W1208 01:32:14.393000 994 torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 12 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features


[Distilled] Epoch 5: test_acc=0.9106
[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...


Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
             ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnxscript/version_converter/__init__.py", line 122, in _partial_convert_version
    return onnx.version_converter.convert_version(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/onnx/version_converter.py", line 39, in convert_version
    converted_model_str = C.convert_version(model_str, target_version)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: /github/workspace/onnx/version_converter/adapters/axes_input_to_attribute.h:65: adapt: Asserti

[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 104 of general pattern rewrite rules.
[ONNX] Saved: rpi_models/pruned_distilled.onnx


In [None]:
student = prune_model(model)
baseline = baseline.to(DEVICE)
student = train_distilled(student, baseline)
student = quantize_model(student)

torch.save(student.state_dict(), os.path.join(SAVE_DIR, "pruned_distilled_quantized.pt"))
export_to_onnx(student, "pruned_distilled_quantized")
