In [125]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.models import resnet50, ResNet50_Weights
import scipy
import matplotlib.pyplot as plt
from tqdm.notebook import trange, tqdm
from torchinfo import summary
import timeit
import os
from prettytable import PrettyTable, SINGLE_BORDER

This notebook will demonstrate model compression techniques and export an iOS compatible coreML model to load into mobile devices

In [126]:
if torch.cuda.is_available():
    device_name = 'cuda'
elif torch.backends.mps.is_available():
    device_name = 'mps'
else:
    device_name = 'cpu'

device = torch.device(device_name)
print(device)

mps


Code from https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

In [127]:
batch_size=128

In [128]:
train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
    ])
    
val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

In [129]:
# Dataloader

# Define the directory for the dataset
data_dir = "data"

# Create the directory if it doesn't exist
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

training_data = datasets.CIFAR10(
    root="data",
    train=True,
    download=True,
    transform=train_transform
)

testing_data = datasets.CIFAR10(
    root="data",
    train=False,
    download=True,
    transform=val_transform
)


train_data_loader  = torch.utils.data.DataLoader(training_data,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=2)

test_data_loader  = torch.utils.data.DataLoader(testing_data,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=2)


classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [130]:
# Define the directory for the dataset
model_dir = "model"

# Create the directory if it doesn't exist
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
else:
    if len(os.listdir(model_dir)) == 0:
        mode = 'train'
        print("no model checkpoints exist")
    else:
        mode = 'eval'
        print("model checkpoints already exist")
print(mode, "mode")

no model checkpoints exist
train mode


In [131]:
epochs = 5

## Teacher Model

Resnet50 (Modified for CIFAR10)

In [132]:
class ResNet50SmallPretrained(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet50SmallPretrained, self).__init__()
        # Load pretrained ResNet50
        resnet = resnet50(weights=ResNet50_Weights.DEFAULT)
        
        # Method 1: Modify first convolution layer
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        
        # Copy weights from pretrained model with adaptation
        with torch.no_grad():
            # Adapt the weights by averaging over the original kernel size
            original_weights = resnet.conv1.weight
            new_weights = torch.mean(original_weights.view(64, 3, 7*7), dim=2).view(64, 3, 1, 1)
            self.conv1.weight = nn.Parameter(new_weights)
        
        # Remove the original first maxpool layer as it's too aggressive for small images
        self.bn1 = resnet.bn1
        self.relu = resnet.relu
        
        # Keep the rest of the architecture
        self.layer1 = resnet.layer1
        self.layer2 = resnet.layer2
        self.layer3 = resnet.layer3
        self.layer4 = resnet.layer4
        
        # Adjust the final layers
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(2048, num_classes)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        # No maxpool
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        
        return x

In [133]:
teacher_model = ResNet50SmallPretrained().to(device)

In [134]:
teacher_model_stats = summary(teacher_model, input_size=(batch_size, 3, 32, 32), device=device)
teacher_model_stats

Layer (type:depth-idx)                   Output Shape              Param #
ResNet50SmallPretrained                  [128, 10]                 --
├─Conv2d: 1-1                            [128, 64, 34, 34]         192
├─BatchNorm2d: 1-2                       [128, 64, 34, 34]         128
├─ReLU: 1-3                              [128, 64, 34, 34]         --
├─Sequential: 1-4                        [128, 256, 34, 34]        --
│    └─Bottleneck: 2-1                   [128, 256, 34, 34]        --
│    │    └─Conv2d: 3-1                  [128, 64, 34, 34]         4,096
│    │    └─BatchNorm2d: 3-2             [128, 64, 34, 34]         128
│    │    └─ReLU: 3-3                    [128, 64, 34, 34]         --
│    │    └─Conv2d: 3-4                  [128, 64, 34, 34]         36,864
│    │    └─BatchNorm2d: 3-5             [128, 64, 34, 34]         128
│    │    └─ReLU: 3-6                    [128, 64, 34, 34]         --
│    │    └─Conv2d: 3-7                  [128, 256, 34, 34]        16,384


In [135]:
# optimizer = optim.SGD(teacher_model.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(teacher_model.parameters())

In [136]:
def standard_training(epochs, data_loader, model, device, optimizer='adam', criterion='ce'):
    if optimizer == 'adam':
        optimizer = optim.Adam(model.parameters())
    else:
        raise NotImplementedError("optimizer string matcher is not implemented yet other than adam")
    if criterion == 'ce':
        criterion = nn.CrossEntropyLoss()
    else:
        raise NotImplementedError("optimizer string matcher is not implemented yet other than CrossEntropy")
    
    model.to(device)
    for epoch in range(epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(tqdm(data_loader), 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device) # send to accelerator

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

        # Calculate average loss for the epoch
        avg_loss = running_loss / len(train_data_loader)
        # Print average loss for the epoch
        print(f'Epoch [{epoch + 1}/{epochs}], Average Loss: {avg_loss:.4f}')
    print('Finished Training')
    return model

In [None]:
if mode == 'train':
    teacher_model = standard_training(epochs, train_data_loader, teacher_model, device)
    torch.save(teacher_model, "model/teacher_model.pt")
else:
    torch.load("model/teacher_model.pt")

  0%|          | 0/391 [00:00<?, ?it/s]

Epoch [1/5], Average Loss: 0.5112


  0%|          | 0/391 [00:00<?, ?it/s]

Epoch [2/5], Average Loss: 0.2797


  0%|          | 0/391 [00:00<?, ?it/s]

Epoch [3/5], Average Loss: 0.2261


  0%|          | 0/391 [00:00<?, ?it/s]

python(3046) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(3064) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


## Student Model

In [None]:
class ResNet50Smaller(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet50Smaller, self).__init__()
        # Load pretrained ResNet50
        resnet = resnet50()
        
        # Method 1: Modify first convolution layer
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        
        
        # Remove the original first maxpool layer as it's too aggressive for small images
        self.bn1 = resnet.bn1
        self.relu = resnet.relu
        
        # Keep the rest of the architecture
        self.layer1 = resnet.layer1
        self.layer2 = resnet.layer2
        self.layer3 = resnet.layer3
        # Cut layer 4 from original resnet50
        # self.layer4 = resnet.layer4
        
        # Adjust the final layers
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(1024, num_classes)

        # Layers to add quantization layer (does not affect model until the model is converted and quantized)
        self.quant = torch.ao.quantization.QuantStub()
        self.dequant = torch.ao.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)        # Quantize input

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        # No maxpool
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        # x = self.layer4(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        x = self.dequant(x)      # Dequantize output
        
        return x

In [None]:
# Load Student Models
# One instance without Knowledge Distillation
# One instance for Knowledge Distillation
# For comparison of effectiveness of KD
student_model_noKD = ResNet50Smaller().to(device)
student_model_KD = ResNet50Smaller().to(device)

In [None]:
student_model_stats = summary(student_model_noKD, input_size=(batch_size, 3, 32, 32), device=device)
student_model_stats

Layer (type:depth-idx)                   Output Shape              Param #
ResNet50Smaller                          [128, 10]                 --
├─QuantStub: 1-1                         [128, 3, 32, 32]          --
├─Conv2d: 1-2                            [128, 64, 32, 32]         1,728
├─BatchNorm2d: 1-3                       [128, 64, 32, 32]         128
├─ReLU: 1-4                              [128, 64, 32, 32]         --
├─Sequential: 1-5                        [128, 256, 32, 32]        --
│    └─Bottleneck: 2-1                   [128, 256, 32, 32]        --
│    │    └─Conv2d: 3-1                  [128, 64, 32, 32]         4,096
│    │    └─BatchNorm2d: 3-2             [128, 64, 32, 32]         128
│    │    └─ReLU: 3-3                    [128, 64, 32, 32]         --
│    │    └─Conv2d: 3-4                  [128, 64, 32, 32]         36,864
│    │    └─BatchNorm2d: 3-5             [128, 64, 32, 32]         128
│    │    └─ReLU: 3-6                    [128, 64, 32, 32]         --
│ 

Regular Student Training (on Dataset)

In [None]:
if mode == 'train':
    student_model_noKD = standard_training(epochs, train_data_loader, student_model_noKD, device)
    torch.save(student_model_noKD, "model/student_model_noKD.pt")
else:
    student_model_noKD = torch.load("model/student_model_noKD.pt")

  student_model_noKD = torch.load("model/student_model_noKD.pt")


Student Training from Teacher

Some code taken from: https://pytorch.org/tutorials/beginner/knowledge_distillation_tutorial.html

In [None]:
def kd_training(epochs, data_loader, teacher_model, student_model, device, soft_target_loss_weight = 0.25, ce_loss_weight = 0.75, temperature = 2, optimizer='adam', criterion='ce'):
    # Apply ``train_knowledge_distillation`` with a temperature of 2. Arbitrarily set the weights to 0.75 for CE and 0.25 for distillation loss.

    if optimizer == 'adam':
        optimizer = optim.Adam(student_model.parameters())
    else:
        raise NotImplementedError("optimizer string matcher is not implemented yet other than adam")
    if criterion == 'ce':
        criterion = nn.CrossEntropyLoss()
    else:
        raise NotImplementedError("optimizer string matcher is not implemented yet other than CrossEntropy")
    
    student_model.to(device)
    teacher_model.to(device)

    # Set teacher model to evaluation mode to not mess with gradients of teacher model
    teacher = teacher_model.eval()

    student_model.train() # Student to train mode

    for epoch in range(epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(tqdm(data_loader), 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device) # send to cuda

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            student_logits = student_model(inputs)

            with torch.no_grad():
                    teacher_logits = teacher_model(inputs)
            
            #Soften the student logits by applying softmax first and log() second
            soft_targets = nn.functional.softmax(teacher_logits / temperature, dim=-1)
            soft_prob = nn.functional.log_softmax(student_logits / temperature, dim=-1)

            # Calculate the soft targets loss. Scaled by temperature**2 as suggested by the authors of the paper "Distilling the knowledge in a neural network"
            soft_targets_loss = torch.sum(soft_targets * (soft_targets.log() - soft_prob)) / soft_prob.size()[0] * (temperature**2)

            # Calculate the true label loss
            label_loss = criterion(student_logits, labels)

            # Weighted sum of the two losses
            loss = (soft_target_loss_weight * soft_targets_loss) + (ce_loss_weight * label_loss)

            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
        
        # Calculate average loss for the epoch
        avg_loss = running_loss / len(train_data_loader)
        # Print average loss for the epoch
        print(f'Epoch [{epoch + 1}/{epochs}], Average Loss: {avg_loss:.4f}')
    print('Finished Training')
    return student_model

In [None]:
if mode == 'train':
    student_model_KD = kd_training(epochs, train_data_loader, teacher_model, student_model_KD, device)
    torch.save(student_model_KD, "model/student_model_KD.pt")
else:
    student_model_KD = torch.load("model/student_model_KD.pt")

  student_model_KD = torch.load("model/student_model_KD.pt")


## Metrics

In [None]:
def evaluate_model(model, model_name, data_loader, testing_mode=False):
    correct = 0
    total = 0
    model.eval()
    model_device = next(model.parameters()).device
    print(f'The model is stored on: {model_device}')
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in data_loader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    if not testing_mode:
        return correct / total
        # print(f'Accuracy of {model_name} on the 10000 test images: {100 * correct / total:.3f} %')

Accuracy Testing

In [None]:
teacher_acc = evaluate_model(teacher_model, "teacher model", test_data_loader)

The model is stored on: mps:0


In [None]:
student_noKD_acc = evaluate_model(student_model_noKD, "student model with no Knowledge Distillation", test_data_loader)

The model is stored on: mps:0


AttributeError: 'ResNet50Smaller' object has no attribute 'quant'

In [None]:
student_KD_acc = evaluate_model(student_model_KD, "student model with Knowledge Distillation", test_data_loader)

The model is stored on: mps:0


AttributeError: 'ResNet50Smaller' object has no attribute 'quant'

In [None]:
teacher_acc_percent = teacher_acc * 100
student_noKD_acc_percent = student_noKD_acc * 100
student_KD_acc_percent = student_KD_acc * 100

teacher_to_student_noKD_acc = ((teacher_acc - student_noKD_acc) / teacher_acc) * 100
teacher_to_student_KD_acc = ((teacher_acc - student_KD_acc) / teacher_acc) * 100

# Create a PrettyTable object
acc_table = PrettyTable()
acc_table.set_style(SINGLE_BORDER)

# Define the columns
acc_table.field_names = ["Model", "Accuracy", "% Decrease from Teacher"]
acc_table.add_row(["Teacher Model", f"{teacher_acc_percent:.2f} %", "-"])
acc_table.add_row(["Student Model (No KD)", f"{student_noKD_acc_percent:.2f} %", f"{teacher_to_student_noKD_acc:.2f}%"])
acc_table.add_row(["Student Model (KD)", f"{student_KD_acc_percent:.2f} %", f"{teacher_to_student_KD_acc:.2f}%"])

# Print the table
print(acc_table)

┌───────────────────────┬──────────┬─────────────────────────┐
│         Model         │ Accuracy │ % Decrease from Teacher │
├───────────────────────┼──────────┼─────────────────────────┤
│     Teacher Model     │  7.08 %  │            -            │
│ Student Model (No KD) │ 10.00 %  │         -41.24%         │
│   Student Model (KD)  │ 81.00 %  │        -1044.07%        │
└───────────────────────┴──────────┴─────────────────────────┘


Evaluation Speed Testing

In [None]:
num_runs = 5

In [None]:
time_teacher = timeit.timeit(lambda: evaluate_model(teacher_model, "teacher model", test_data_loader, testing_mode=True), number=num_runs)

In [None]:
time_student_noKD = timeit.timeit(lambda: evaluate_model(student_model_noKD, "student model with no Knowledge Distillation", test_data_loader, testing_mode=True), number=num_runs)

In [None]:
time_student_KD = timeit.timeit(lambda: evaluate_model(student_model_KD, "student model with Knowledge Distillation", test_data_loader, testing_mode=True), number=num_runs)

In [None]:
teacher_to_student_noKD_time = ((time_teacher - time_student_noKD) / time_teacher) * 100
teacher_to_student_KD_time = ((time_teacher - time_student_KD) / time_teacher) * 100

# Create a PrettyTable object
speed_table = PrettyTable()
speed_table.set_style(SINGLE_BORDER)

# Define the columns
speed_table.field_names = ["Model", f"Time Averaged over {num_runs} runs (seconds)", "% Decrease from Teacher"]
speed_table.add_row(["Teacher Model", f"{time_teacher:.2f}", "-"])
speed_table.add_row(["Student Model (No KD)", f"{time_student_noKD:.2f}", f"{teacher_to_student_noKD_time:.2f}%"])
speed_table.add_row(["Student Model (KD)", f"{time_student_KD:.2f}", f"{teacher_to_student_KD_time:.2f}%"])

# Print the table
print(speed_table)

┌───────────────────────┬─────────────────────────────────────┬─────────────────────────┐
│         Model         │ Time Averaged over 5 runs (seconds) │ % Decrease from Teacher │
├───────────────────────┼─────────────────────────────────────┼─────────────────────────┤
│     Teacher Model     │                168.35               │            -            │
│ Student Model (No KD) │                121.58               │          27.78%         │
│   Student Model (KD)  │                122.96               │          26.96%         │
└───────────────────────┴─────────────────────────────────────┴─────────────────────────┘


Model Size Comparison

In [None]:
teacher_model_size = os.path.getsize("model/teacher_model.pt") / 1e6
student_model_size = os.path.getsize("model/student_model_KD.pt") / 1e6
teacher_to_student_model_size = (teacher_model_size - student_model_size) * 100 / teacher_model_size

# Create a PrettyTable object
size_table = PrettyTable()
size_table.set_style(SINGLE_BORDER)

# Define the columns
size_table.field_names = ["Model", f"Model Size (MB)", "% Decrease from Teacher"]
size_table.add_row(["Teacher Model", f"{teacher_model_size:.2f}", "-"])
size_table.add_row(["Student Model", f"{student_model_size:.2f}", f"{teacher_to_student_model_size:.2f}%"])

# Print the table
print(size_table)

┌───────────────┬─────────────────────┬─────────────────────────┐
│     Model     │ Parameter Size (MB) │ % Decrease from Teacher │
├───────────────┼─────────────────────┼─────────────────────────┤
│ Teacher Model │        94.42        │            -            │
│ Student Model │        34.41        │          63.55%         │
└───────────────┴─────────────────────┴─────────────────────────┘


The knowledge distilled smaller model is faster than all the models and more accurate than the non knowledge distilled smaller model that was trained regularly using the training dataset

## Quantizing and Exporting KD Model into CoreML model for iOS

Following Pytorch docs for setup from: https://pytorch.org/executorch/stable/getting-started-setup.html

## Quantization

Quantization Docs: https://pytorch.org/docs/stable/quantization.html#introduction-to-quantization

We use post training static quantization. There more information of different types of quantization are provided in the quantization docs above.

In [None]:
from torch.quantization import prepare, convert, fuse_modules, get_default_qconfig
import copy

In [None]:
from executorch.exir import to_edge, EdgeCompileConfig

In [None]:
# Quantization isn't implemented on mps so move model back to cpu
student_model_KD.to('cpu')

ResNet50Smaller(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=T

In [None]:
torch.backends.quantized.engine = 'qnnpack'

In [None]:
student_model_KD.eval()

ResNet50Smaller(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=T

In [None]:
# attach a global qconfig, which contains information about what kind
# of observers to attach. Use 'x86' for server inference and 'qnnpack'
# for mobile inference. Other quantization configurations such as selecting
# symmetric or asymmetric quantization and MinMax or L2Norm calibration techniques
# can be specified here.
# Note: the old 'fbgemm' is still available but 'x86' is the recommended default
# for server inference.
# model_fp32.qconfig = torch.ao.quantization.get_default_qconfig('fbgemm')

student_model_KD.qconfig = torch.ao.quantization.get_default_qconfig('qnnpack')

In [None]:
# Fuse the activations to preceding layers, where applicable.
# This needs to be done manually depending on the model architecture.
# Common fusions include `conv + relu` and `conv + batchnorm + relu`

def fuse_resnet_layers(model):
    # Create a deep copy of the model to avoid in-place modifications
    model_copy = copy.deepcopy(model)

    # Fuse the first conv, bn, and relu layers if applicable
    fuse_modules(model_copy, [['conv1', 'bn1', 'relu']], inplace=True)

    # Go through each residual layer and fuse conv, bn, and relu layers where possible
    for layer_name in ['layer1', 'layer2', 'layer3']:
        layer = getattr(model_copy, layer_name)
        for block_name, block in layer.named_children():
            # Check for Identity and fuse accordingly
            if not isinstance(block.relu, nn.Identity):
                # Fuse conv1 + bn1 + relu if relu is not Identity
                fuse_modules(block, [['conv1', 'bn1', 'relu']])
            else:
                # Only fuse conv1 + bn1 if relu is Identity
                fuse_modules(block, [['conv1', 'bn1']])
            
            # Similarly, check and fuse conv2 + bn2 + relu or conv2 + bn2
            if not isinstance(block.relu, nn.Identity):
                fuse_modules(block, [['conv2', 'bn2', 'relu']])
            else:
                fuse_modules(block, [['conv2', 'bn2']])

            # If it exists, fuse conv3 + bn3 in the bottleneck (no relu following conv3)
            if hasattr(block, 'conv3') and hasattr(block, 'bn3'):
                fuse_modules(block, [['conv3', 'bn3']])

    return model_copy

student_model_KD_fused = fuse_resnet_layers(student_model_KD)

In [None]:
# Prepare the model for static quantization. This inserts observers in
# the model that will observe activation tensors during calibration.
student_model_KD_prepared = prepare(student_model_KD_fused)

In [None]:
# Get a batch of images for calibration
input, _ = next(iter(train_data_loader))

# Run the calibration with real data
student_model_KD_prepared(input)

tensor([[ 1.0080,  0.6651, -0.1002,  ..., -3.3716,  5.1894, -1.8235],
        [-0.8066, 15.0375, -4.5134,  ..., -4.0447, -1.8945,  3.3840],
        [ 1.3016, -7.7857, -0.9425,  ..., 12.0999, -6.1075, -6.3104],
        ...,
        [ 3.7731,  4.5978, -4.8692,  ..., -2.3028,  0.6037, 11.9250],
        [-1.2609,  9.2601, -5.0596,  ..., -3.1786,  1.8563,  1.6664],
        [ 5.0686,  7.3095, -8.0201,  ..., -3.7463, -1.2402, 12.6266]],
       grad_fn=<AddmmBackward0>)

In [None]:
# Convert the observed model to a quantized model. This does several things:
# quantizes the weights, computes and stores the scale and bias value to be
# used with each activation tensor, and replaces key operators with quantized
# implementations.
student_model_KD_int8 = convert(student_model_KD_prepared)

In [None]:
torch.save(student_model_KD_int8, "model/student_model_KD_int8.pt")
student_model_KD_int8 = torch.load("model/student_model_KD_int8.pt")

Let's see how much size we saved from our previous models

In [None]:
teacher_model_size = os.path.getsize("model/teacher_model.pt") / 1e6
student_model_size = os.path.getsize("model/student_model_KD.pt") / 1e6
student_model_quantized_size = os.path.getsize("model/student_model_KD_int8.pt") / 1e6

teacher_to_student_model_size = (teacher_model_size - student_model_size) * 100 / teacher_model_size
teacher_to_quantized_student_model_size = (teacher_model_size - student_model_quantized_size) * 100 / teacher_model_size

# Create a PrettyTable object
size_table = PrettyTable()
size_table.set_style(SINGLE_BORDER)

# Define the columns
size_table.field_names = ["Model", f"Model Size (MB)", "% Decrease from Teacher"]
size_table.add_row(["Teacher Model", f"{teacher_model_size:.2f}", "-"])
size_table.add_row(["Student Model", f"{student_model_size:.2f}", f"{teacher_to_student_model_size:.2f}%"])
size_table.add_row(["Quantized Student Model", f"{student_model_quantized_size:.2f}", f"{teacher_to_quantized_student_model_size:.2f}%"])


# Print the table
print(size_table)

┌─────────────────────────┬─────────────────┬─────────────────────────┐
│          Model          │ Model Size (MB) │ % Decrease from Teacher │
├─────────────────────────┼─────────────────┼─────────────────────────┤
│      Teacher Model      │      94.42      │            -            │
│      Student Model      │      34.41      │          63.55%         │
│ Quantized Student Model │       8.95      │          90.52%         │
└─────────────────────────┴─────────────────┴─────────────────────────┘


We see almost a 90% reduction in model size

In [None]:
student_model_KD_int8.to(device)
print(device)

mps


In [None]:
student_KD_quant_acc = evaluate_model(student_model_KD_int8, "quantized student model", test_data_loader, testing_mode=False)

The model is stored on: mps:0


NotImplementedError: Could not run 'quantized::conv2d_relu.new' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::conv2d_relu.new' is only available for these backends: [MPS, Meta, QuantizedCPU, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMeta, Tracer, AutocastCPU, AutocastXPU, AutocastMPS, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

MPS: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/mps/MPSFallback.mm:75 [backend fallback]
Meta: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/MetaFallbackKernel.cpp:23 [backend fallback]
QuantizedCPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/quantized/cpu/qconv.cpp:1972 [kernel]
BackendSelect: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:153 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/DynamicLayer.cpp:497 [backend fallback]
Functionalize: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/FunctionalizeFallbackKernel.cpp:349 [backend fallback]
Named: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:96 [backend fallback]
AutogradOther: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:63 [backend fallback]
AutogradCPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:67 [backend fallback]
AutogradCUDA: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:75 [backend fallback]
AutogradXLA: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:79 [backend fallback]
AutogradMPS: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:87 [backend fallback]
AutogradXPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:71 [backend fallback]
AutogradHPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:100 [backend fallback]
AutogradLazy: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:83 [backend fallback]
AutogradMeta: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:91 [backend fallback]
Tracer: registered at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/autograd/TraceTypeManual.cpp:294 [backend fallback]
AutocastCPU: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:321 [backend fallback]
AutocastXPU: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:463 [backend fallback]
AutocastMPS: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:209 [backend fallback]
AutocastCUDA: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:165 [backend fallback]
FuncTorchBatched: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:731 [backend fallback]
BatchedNestedTensor: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:758 [backend fallback]
FuncTorchVmapMode: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/VmapModeRegistrations.cpp:27 [backend fallback]
Batched: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/TensorWrapper.cpp:207 [backend fallback]
PythonTLSSnapshot: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:161 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/DynamicLayer.cpp:493 [backend fallback]
PreDispatch: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:165 [backend fallback]
PythonDispatcher: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:157 [backend fallback]


In [None]:
time_student_quantized = timeit.timeit(lambda: evaluate_model(student_model_KD_int8, "quantized student model", test_data_loader, testing_mode=True), number=num_runs)

NotImplementedError: Could not run 'quantized::conv2d_relu.new' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::conv2d_relu.new' is only available for these backends: [MPS, Meta, QuantizedCPU, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMeta, Tracer, AutocastCPU, AutocastXPU, AutocastMPS, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

MPS: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/mps/MPSFallback.mm:75 [backend fallback]
Meta: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/MetaFallbackKernel.cpp:23 [backend fallback]
QuantizedCPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/quantized/cpu/qconv.cpp:1972 [kernel]
BackendSelect: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:153 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/DynamicLayer.cpp:497 [backend fallback]
Functionalize: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/FunctionalizeFallbackKernel.cpp:349 [backend fallback]
Named: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:96 [backend fallback]
AutogradOther: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:63 [backend fallback]
AutogradCPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:67 [backend fallback]
AutogradCUDA: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:75 [backend fallback]
AutogradXLA: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:79 [backend fallback]
AutogradMPS: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:87 [backend fallback]
AutogradXPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:71 [backend fallback]
AutogradHPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:100 [backend fallback]
AutogradLazy: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:83 [backend fallback]
AutogradMeta: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:91 [backend fallback]
Tracer: registered at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/autograd/TraceTypeManual.cpp:294 [backend fallback]
AutocastCPU: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:321 [backend fallback]
AutocastXPU: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:463 [backend fallback]
AutocastMPS: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:209 [backend fallback]
AutocastCUDA: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:165 [backend fallback]
FuncTorchBatched: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:731 [backend fallback]
BatchedNestedTensor: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:758 [backend fallback]
FuncTorchVmapMode: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/VmapModeRegistrations.cpp:27 [backend fallback]
Batched: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/TensorWrapper.cpp:207 [backend fallback]
PythonTLSSnapshot: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:161 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/DynamicLayer.cpp:493 [backend fallback]
PreDispatch: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:165 [backend fallback]
PythonDispatcher: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:157 [backend fallback]


In [None]:
teacher_to_student_noKD_time = ((time_teacher - time_student_noKD) / time_teacher) * 100
teacher_to_student_KD_time = ((time_teacher - time_student_KD) / time_teacher) * 100
teacher_to_student_quantized_time = ((time_teacher - time_student_quantized) / time_teacher) * 100

# Create a PrettyTable object
speed_table = PrettyTable()
speed_table.set_style(SINGLE_BORDER)

# Define the columns
speed_table.field_names = ["Model", f"Time Averaged over {num_runs} runs (seconds)", "% Decrease from Teacher"]
speed_table.add_row(["Teacher Model", f"{time_teacher:.2f}", "-"])
speed_table.add_row(["Student Model (No KD)", f"{time_student_noKD:.2f}", f"{teacher_to_student_noKD_time:.2f}%"])
speed_table.add_row(["Student Model (KD)", f"{time_student_KD:.2f}", f"{teacher_to_student_KD_time:.2f}%"])
speed_table.add_row(["Student Model Quantized (KD)", f"{time_student_quantized:.2f}", f"{teacher_to_student_quantized_time:.2f}%"])


# Print the table
print(speed_table)

## Exporting as CoreML model

In [None]:
import coremltools as ct

In [None]:
# Trace the model with random data.
example_input = torch.rand(1, 3, 32, 32) 
traced_model = torch.jit.trace(student_model_KD_int8, example_input)
out = traced_model(example_input)

NotImplementedError: Could not run 'quantized::conv2d_relu.new' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::conv2d_relu.new' is only available for these backends: [MPS, Meta, QuantizedCPU, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMeta, Tracer, AutocastCPU, AutocastXPU, AutocastMPS, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

MPS: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/mps/MPSFallback.mm:75 [backend fallback]
Meta: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/MetaFallbackKernel.cpp:23 [backend fallback]
QuantizedCPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/quantized/cpu/qconv.cpp:1972 [kernel]
BackendSelect: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:153 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/DynamicLayer.cpp:497 [backend fallback]
Functionalize: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/FunctionalizeFallbackKernel.cpp:349 [backend fallback]
Named: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:96 [backend fallback]
AutogradOther: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:63 [backend fallback]
AutogradCPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:67 [backend fallback]
AutogradCUDA: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:75 [backend fallback]
AutogradXLA: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:79 [backend fallback]
AutogradMPS: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:87 [backend fallback]
AutogradXPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:71 [backend fallback]
AutogradHPU: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:100 [backend fallback]
AutogradLazy: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:83 [backend fallback]
AutogradMeta: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:91 [backend fallback]
Tracer: registered at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/autograd/TraceTypeManual.cpp:294 [backend fallback]
AutocastCPU: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:321 [backend fallback]
AutocastXPU: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:463 [backend fallback]
AutocastMPS: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:209 [backend fallback]
AutocastCUDA: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/autocast_mode.cpp:165 [backend fallback]
FuncTorchBatched: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:731 [backend fallback]
BatchedNestedTensor: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:758 [backend fallback]
FuncTorchVmapMode: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/VmapModeRegistrations.cpp:27 [backend fallback]
Batched: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/TensorWrapper.cpp:207 [backend fallback]
PythonTLSSnapshot: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:161 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/functorch/DynamicLayer.cpp:493 [backend fallback]
PreDispatch: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:165 [backend fallback]
PythonDispatcher: registered at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:157 [backend fallback]


In [None]:
# Create a dummy input tensor that matches the input shape expected by your model
dummy_input = torch.randn(1, 3, 32, 32)  # Adjust based on your input size (e.g., CIFAR-10 images)

# Convert the quantized PyTorch model to Core ML directly
model = ct.convert(
    traced_model,
    convert_to="mlprogram",
    inputs=[ct.TensorType(shape=example_input.shape)]
 )