In [1]:
import onnx
import onnxruntime
import numpy as np
import torch
from torchvision import models, transforms

# Load the ONNX model
model_path = "vgg16-7.onnx"
onnx_model = onnx.load(model_path)

# Create an ONNX Runtime session
ort_session = onnxruntime.InferenceSession(model_path)

In [2]:
pip install onnx

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
!pip install opencv-python

Defaulting to user installation because normal site-packages is not writeable


In [4]:
pip install numba

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [5]:
%pip install numba

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [6]:
from numba import cuda
print(cuda.detect())

Found 1 CUDA devices
id 0      b'NVIDIA T400 4GB'                              [SUPPORTED]
                      Compute Capability: 7.5
                           PCI Device ID: 0
                              PCI Bus ID: 1
                                    UUID: GPU-f748cbed-5ce7-bff1-bd01-ea1a21a5bf6f
                                Watchdog: Enabled
                            Compute Mode: WDDM
             FP32/FP64 Performance Ratio: 32
Summary:
	1/1 devices are supported
True


In [7]:

!pip install onnxruntime-gpu

Defaulting to user installation because normal site-packages is not writeable


In [8]:
import onnx
import onnxruntime
import numpy as np
import torch
from torchvision import transforms
from PIL import Image

# Load the ONNX model
model_path = "vgg16-7.onnx"
onnx_model = onnx.load(model_path)

# Create an ONNX Runtime session with the CUDA execution provider
# Check if CUDA is available
if 'CUDAExecutionProvider' in onnxruntime.get_available_providers():
    print("Using CUDA execution provider for GPU acceleration.")
    providers = ['CUDAExecutionProvider']
else:
    print("CUDA is not available, falling back to CPU.")
    providers = ['CPUExecutionProvider']

ort_session = onnxruntime.InferenceSession(model_path, providers=providers)

Using CUDA execution provider for GPU acceleration.


In [9]:
!pip install onnx onnxruntime numpy Pillow

Defaulting to user installation because normal site-packages is not writeable


In [10]:
import onnx
import onnxruntime
import numpy as np
from PIL import Image
from onnxruntime.quantization import quantize_static, CalibrationDataReader


In [11]:
# Create a dummy calibration data reader
class ImageDataReader(CalibrationDataReader):
    def __init__(self, image_paths):
        self.image_paths = image_paths
        self.data_count = len(image_paths)
        
    def get_next(self):
        if self.data_count > 0:
            self.data_count -= 1
            image_path = self.image_paths[self.data_count]
            # Load and preprocess image
            img = Image.open(image_path).convert('RGB').resize((224, 224))
            data = np.array(img).astype('float32') / 255.0
            data = np.transpose(data, (2, 0, 1))
            data = np.expand_dims(data, axis=0)
            return {'input.1': data} # The input name might vary, check your model

        else:
            return None

# --- Dummy Calibration Data ---
# Create a list of paths to your calibration images
# Replace this with your actual image paths
calibration_images = [f'dummy_image_{i}.jpg' for i in range(100)] 

# You need to create some dummy images to run this code
# (Not part of the core quantization logic)
for img_path in calibration_images:
    dummy_img = Image.fromarray(np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8))
    dummy_img.save(img_path)

calib_reader = ImageDataReader(calibration_images)

In [12]:
import onnx
import onnxruntime
import numpy as np
import torch
from torchvision import transforms
from PIL import Image

# --- Helper Functions ---

def absmax_quantize(weights):
    """Performs absmax quantization on a weight tensor."""
    max_val = np.max(np.abs(weights))
    scale = 127.0 / max_val
    quantized_weights = np.round(weights * scale).astype(np.int8)
    return quantized_weights, scale

def perform_conv(input_tensor, kernel, bias=None, stride=1, padding=0):
    """Manually performs a 2D convolution operation."""
    output_channels, kernel_input_channels, kernel_h, kernel_w = kernel.shape
    batch_size, current_input_channels, input_h, input_w = input_tensor.shape
    
    if kernel_input_channels != current_input_channels:
        raise ValueError(f"Input channels ({current_input_channels}) do not match kernel input channels ({kernel_input_channels}).")
        
    # Calculate output dimensions
    output_h = (input_h + 2 * padding - kernel_h) // stride + 1
    output_w = (input_w + 2 * padding - kernel_w) // stride + 1
    
    output = np.zeros((batch_size, output_channels, output_h, output_w))
    
    # Pad the input tensor
    padded_input = np.pad(input_tensor, ((0, 0), (0, 0), (padding, padding), (padding, padding)), 'constant', constant_values=0)

    for b in range(batch_size):
        for c_out in range(output_channels):
            for i in range(output_h):
                for j in range(output_w):
                    h_start = i * stride
                    w_start = j * stride
                    h_end = h_start + kernel_h
                    w_end = w_start + kernel_w
                    
                    input_patch = padded_input[b, :, h_start:h_end, w_start:w_end]
                    kernel_for_channel = kernel[c_out]
                    
                    conv_result = np.sum(input_patch * kernel_for_channel)
                    output[b, c_out, i, j] = conv_result + bias[c_out] if bias is not None else conv_result
    
    return output

def perform_maxpool(input_tensor, kernel_shape, strides, pads):
    """Manually performs a max pooling operation."""
    batch_size, channels, input_h, input_w = input_tensor.shape
    kernel_h, kernel_w = kernel_shape
    stride_h, stride_w = strides
    
    output_h = (input_h + 2 * pads[0] - kernel_h) // stride_h + 1
    output_w = (input_w + 2 * pads[1] - kernel_w) // stride_w + 1
    
    output = np.zeros((batch_size, channels, output_h, output_w))
    
    padded_input = np.pad(input_tensor, ((0, 0), (0, 0), (pads[0], pads[2]), (pads[1], pads[3])), 'constant', constant_values=0)
    
    for b in range(batch_size):
        for c in range(channels):
            for i in range(output_h):
                for j in range(output_w):
                    h_start = i * stride_h
                    w_start = j * stride_w
                    h_end = h_start + kernel_h
                    w_end = w_start + kernel_w
                    
                    input_patch = padded_input[b, c, h_start:h_end, w_start:w_end]
                    output[b, c, i, j] = np.max(input_patch)
    
    return output

def perform_gemm(input_tensor, weight, bias):
    """Manually performs matrix multiplication for fully connected layers."""
    input_tensor_flat = input_tensor.reshape(input_tensor.shape[0], -1)
    return np.matmul(input_tensor_flat, weight.T) + bias

In [13]:
# --- File Paths ---
model_path = "vgg16-7.onnx"
image_path = "dummy_image_96.jpg" # Make sure this image exists in your directory

# --- Load the ONNX model and prepare a session ---
onnx_model = onnx.load(model_path)
ort_session = onnxruntime.InferenceSession(model_path)

# --- Prepare Input Image ---
img = Image.open(image_path).convert('RGB')
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = transform(img).unsqueeze(0).numpy()

# Get model initializers (weights and biases)
graph = onnx_model.graph
initializers = {init.name: onnx.numpy_helper.to_array(init) for init in graph.initializer}

In [4]:
current_input = input_tensor

for node in graph.node:
    print(f"Processing layer: {node.name} with op_type: {node.op_type}")
    
    if node.op_type == "Conv":
        weight_name = node.input[1]
        bias_name = node.input[2] if len(node.input) > 2 else None
        
        weights = initializers[weight_name]
        bias = initializers[bias_name] if bias_name else None

        # Quantize weights using absmax
        quantized_weights, scale = absmax_quantize(weights)
        dequantized_weights = quantized_weights / scale
        
        # Extract layer attributes
        strides = [attr.ints[0] for attr in node.attribute if attr.name == "strides"][0]
        padding = [attr.ints[0] for attr in node.attribute if attr.name == "pads"][0]
        
        current_input = perform_conv(current_input, dequantized_weights, bias, stride=strides, padding=padding)

    elif node.op_type == "Relu":
        current_input = np.maximum(0, current_input)

    elif node.op_type == "MaxPool":
        kernel_shape = [attr.ints for attr in node.attribute if attr.name == "kernel_shape"][0]
        strides = [attr.ints for attr in node.attribute if attr.name == "strides"][0]
        pads = [attr.ints for attr in node.attribute if attr.name == "pads"][0]
        current_input = perform_maxpool(current_input, kernel_shape, strides, pads)

    elif node.op_type == "Gemm":
        weight_name = node.input[1]
        bias_name = node.input[2]
        
        weights = initializers[weight_name]
        bias = initializers[bias_name]
        
        # Quantize weights
        quantized_weights, scale = absmax_quantize(weights)
        dequantized_weights = quantized_weights / scale
        
        current_input = perform_gemm(current_input, dequantized_weights, bias)

    elif node.op_type == "Flatten":
        current_input = current_input.reshape(current_input.shape[0], -1)

    else:
        print(f"Warning: Unhandled ONNX operator type: {node.op_type}")

NameError: name 'input_tensor' is not defined

In [5]:
# Run the original FP32 model for comparison
ort_inputs = {ort_session.get_inputs()[0].name: input_tensor}
original_output = ort_session.run(None, ort_inputs)[0]

# Get the final predictions
original_prediction = np.argmax(original_output)
quantized_prediction = np.argmax(current_input)

print("\n--- Accuracy Check ---")
print(f"Original model prediction: {original_prediction}")
print(f"Quantized model prediction: {quantized_prediction}")

# Determine if the top prediction is the same
is_accurate = 1 if original_prediction == quantized_prediction else 0
print(f"Prediction accuracy for this image: {is_accurate}")

NameError: name 'ort_session' is not defined

In [6]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import numpy as np

# A module to perform absmax quantization and dequantization
class AbsMaxQuantizer(nn.Module):
    def __init__(self, layer):
        super().__init__()
        self.layer = layer
        
        # Get weights and quantize them on initialization
        weights = layer.weight.data
        max_val = torch.max(torch.abs(weights))
        self.scale = 127.0 / max_val
        self.quantized_weights = torch.round(weights * self.scale).to(torch.int8)

    def forward(self, x):
        # De-quantize the weights for computation
        dequantized_weights = self.quantized_weights.to(torch.float32) / self.scale
        
        # Perform the original layer's forward pass with the de-quantized weights
        # We need to handle Conv2d and Linear layers differently
        if isinstance(self.layer, nn.Conv2d):
            return nn.functional.conv2d(x, dequantized_weights, self.layer.bias, self.layer.stride, self.layer.padding)
        elif isinstance(self.layer, nn.Linear):
            return nn.functional.linear(x, dequantized_weights, self.layer.bias)
        else:
            return self.layer(x)

In [16]:
# Load a pre-trained VGG16 model
model_original = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
model_quantized = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)

# Move models to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model_original.to(device)
model_quantized.to(device)

# --- Replace Layers with Quantized Versions ---
for name, module in model_quantized.named_modules():
    if isinstance(module, (nn.Conv2d, nn.Linear)):
        # To handle layers inside Sequential blocks, we need to get the parent module and its index
        path = name.split('.')
        parent = model_quantized
        for i in range(len(path) - 1):
            parent = parent._modules[path[i]]
        
        layer_name = path[-1]
        
        # Replace the layer with our AbsMaxQuantizer
        quantized_layer = AbsMaxQuantizer(module)
        setattr(parent, layer_name, quantized_layer)

Using device: cuda


In [17]:
# --- Prepare Input Image ---
image_path = "dummy_image_96.jpg" # Replace with your image file path
img = Image.open(image_path).convert('RGB')
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = transform(img).unsqueeze(0).to(device)

# --- Perform Inference on Both Models ---
model_original.eval()
model_quantized.eval()

with torch.no_grad():
    # Original model output
    output_original = model_original(input_tensor)
    
    # Quantized model output
    output_quantized = model_quantized(input_tensor)

# Get predictions
_, pred_original = torch.max(output_original, 1)
_, pred_quantized = torch.max(output_quantized, 1)

print("\n--- Accuracy Check ---")
print(f"Original model prediction: {pred_original.item()}")
print(f"Quantized model prediction: {pred_quantized.item()}")

is_accurate = 1 if pred_original.item() == pred_quantized.item() else 0
print(f"Prediction accuracy for this image: {is_accurate}")

Layer: Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 64, 224, 224])
Layer: Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 64, 224, 224])
Layer: Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 128, 112, 112])
Layer: Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 128, 112, 112])
Layer: Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 512, 28, 28])
Layer: Conv2d(512, 512, kernel_size=(3, 3), stri

In [13]:
# A module to perform absmax quantization and dequantization
class AbsMaxQuantizer(nn.Module):
    def __init__(self, layer):
        super().__init__()
        self.layer = layer
        
        # Get weights and quantize them on initialization
        weights = layer.weight.data
        max_val = torch.max(torch.abs(weights))
        self.scale = 127.0 / max_val
        self.quantized_weights = torch.round(weights * self.scale).to(torch.int8)

    def forward(self, x):
        # De-quantize the weights for computation
        dequantized_weights = self.quantized_weights.to(torch.float32) / self.scale
        
        # Perform the original layer's forward pass with the de-quantized weights
        if isinstance(self.layer, nn.Conv2d):
            output = nn.functional.conv2d(x, dequantized_weights, self.layer.bias, self.layer.stride, self.layer.padding)
            print(f"Layer: {self.layer}, Output Shape: {output.shape}")
            # Optional: Print a sample of the output tensor
            # print(f"Output Sample: {output[0, 0, :2, :2]}")
            return output
        elif isinstance(self.layer, nn.Linear):
            output = nn.functional.linear(x, dequantized_weights, self.layer.bias)
            print(f"Layer: {self.layer}, Output Shape: {output.shape}")
            # Optional: Print a sample of the output tensor
            # print(f"Output Sample: {output[0, :5]}")
            return output
        else:
            output = self.layer(x)
            print(f"Layer: {self.layer}, Output Shape: {output.shape}")
            return output

# ... (The rest of the code remains the same) ...

In [19]:
# --- Perform Inference on Both Models ---
model_original.eval()
model_quantized.eval()

with torch.no_grad():
    # Original model output
    output_original = model_original(input_tensor)
    
    # Quantized model output
    output_quantized = model_quantized(input_tensor)

# Apply Softmax to get probabilities
probabilities_original = torch.nn.functional.softmax(output_original, dim=1)
probabilities_quantized = torch.nn.functional.softmax(output_quantized, dim=1)

# Get the top 5 predictions and their probabilities
# The 'torch.topk' function returns both the values and their indices
top5_prob_orig, top5_idx_orig = torch.topk(probabilities_original, 5)
top5_prob_quant, top5_idx_quant = torch.topk(probabilities_quantized, 5)

Layer: Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 64, 224, 224])
Layer: Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 64, 224, 224])
Layer: Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 128, 112, 112])
Layer: Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 128, 112, 112])
Layer: Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 512, 28, 28])
Layer: Conv2d(512, 512, kernel_size=(3, 3), stri

In [25]:
# --- Load ImageNet Class Labels ---
# Make sure you have the 'imagenet_classes.txt' file in your working directory.
# You can download it from: https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt
with open("imagenet_classes.txt") as f:
    imagenet_labels = [line.strip() for line in f.readlines()]

# --- Prepare Input Image ---
image_path = "dummy_image_96.jpg" # Replace with your image file path
img = Image.open(image_path).convert('RGB')
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = transform(img).unsqueeze(0).to(device)

In [26]:
# --- Perform Inference on Both Models ---
model_original.eval()
model_quantized.eval()

with torch.no_grad():
    output_original = model_original(input_tensor)
    output_quantized = model_quantized(input_tensor)

# Apply Softmax to get probabilities
probabilities_original = torch.nn.functional.softmax(output_original, dim=1)
probabilities_quantized = torch.nn.functional.softmax(output_quantized, dim=1)

# Get the top 5 predictions and their probabilities
top5_prob_orig, top5_idx_orig = torch.topk(probabilities_original, 5)
top5_prob_quant, top5_idx_quant = torch.topk(probabilities_quantized, 5)

print("\n--- Original Model Top 5 Predictions ---")
for i in range(5):
    class_id = top5_idx_orig[0][i].item()
    probability = top5_prob_orig[0][i].item()
    print(f"  {i+1}: {imagenet_labels[class_id]} -> {probability * 100:.2f}%")

print("\n--- Quantized Model Top 5 Predictions ---")
for i in range(5):
    class_id = top5_idx_quant[0][i].item()
    probability = top5_prob_quant[0][i].item()
    print(f"  {i+1}: {imagenet_labels[class_id]} -> {probability * 100:.2f}%")

Layer: Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 64, 224, 224])
Layer: Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 64, 224, 224])
Layer: Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 128, 112, 112])
Layer: Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 128, 112, 112])
Layer: Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 512, 28, 28])
Layer: Conv2d(512, 512, kernel_size=(3, 3), stri

In [27]:
# --- Perform Inference on Both Models ---
model_original.eval()
model_quantized.eval()

with torch.no_grad():
    output_original = model_original(input_tensor)
    output_quantized = model_quantized(input_tensor)

# Apply Softmax to get probabilities
probabilities_original = torch.nn.functional.softmax(output_original, dim=1)
probabilities_quantized = torch.nn.functional.softmax(output_quantized, dim=1)

# Get the top 5 predictions and their probabilities
top5_prob_orig, top5_idx_orig = torch.topk(probabilities_original, 5)
top5_prob_quant, top5_idx_quant = torch.topk(probabilities_quantized, 5)

print("\n--- Original Model Top 5 Predictions ---")
for i in range(5):
    class_id = top5_idx_orig[0][i].item()
    probability = top5_prob_orig[0][i].item()
    print(f"  {i+1}: {imagenet_labels[class_id]} -> {probability * 100:.2f}%")

print("\n--- Quantized Model Top 5 Predictions ---")
for i in range(5):
    class_id = top5_idx_quant[0][i].item()
    probability = top5_prob_quant[0][i].item()
    print(f"  {i+1}: {imagenet_labels[class_id]} -> {probability * 100:.2f}%")

# --- Calculate Accuracy ---
original_top1_pred = top5_idx_orig[0][0].item()
quantized_top1_pred = top5_idx_quant[0][0].item()

accuracy = 100.0 if original_top1_pred == quantized_top1_pred else 0.0

print(f"\n--- Accuracy Check ---")
print(f"Top-1 Prediction Accuracy of Quantized Model: {accuracy:.2f}%")

Layer: Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 64, 224, 224])
Layer: Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 64, 224, 224])
Layer: Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 128, 112, 112])
Layer: Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 128, 112, 112])
Layer: Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 512, 28, 28])
Layer: Conv2d(512, 512, kernel_size=(3, 3), stri

In [28]:
import torch
import torch.nn.functional as F

# --- Assume models and input_tensor are already defined ---
# model_original.eval()
# model_quantized.eval()
# input_tensor = ...

with torch.no_grad():
    output_original = model_original(input_tensor)
    output_quantized = model_quantized(input_tensor)

# Apply Softmax to get probabilities
probabilities_original = F.softmax(output_original, dim=1)
probabilities_quantized = F.softmax(output_quantized, dim=1)

# Get the top 5 predictions and their probabilities
top5_prob_orig, top5_idx_orig = torch.topk(probabilities_original, 5)
top5_prob_quant, top5_idx_quant = torch.topk(probabilities_quantized, 5)

Layer: Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 64, 224, 224])
Layer: Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 64, 224, 224])
Layer: Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 128, 112, 112])
Layer: Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 128, 112, 112])
Layer: Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 256, 56, 56])
Layer: Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), Output Shape: torch.Size([1, 512, 28, 28])
Layer: Conv2d(512, 512, kernel_size=(3, 3), stri

In [29]:
# --- Calculate Probability Differences ---
print("\n--- Probability Difference for Top 5 Predictions ---")

# Iterate through the top 5 predictions of the original model
for i in range(5):
    orig_class_id = top5_idx_orig[0][i].item()
    orig_prob = top5_prob_orig[0][i].item()
    
    # Find the probability of this same class in the quantized model's output
    quant_prob_for_orig_class = probabilities_quantized[0][orig_class_id].item()
    
    # Calculate the percentage difference
    prob_difference = abs(orig_prob - quant_prob_for_orig_class) * 100
    
    class_name = imagenet_labels[orig_class_id]

    print(f"Original Top {i+1}: {class_name} ({orig_prob * 100:.2f}%)")
    print(f"Quantized Prob: {quant_prob_for_orig_class * 100:.2f}%")
    print(f"  --> Difference: {prob_difference:.2f}%\n")


--- Probability Difference for Top 5 Predictions ---
Original Top 1: poncho (9.61%)
Quantized Prob: 20.95%
  --> Difference: 11.34%

Original Top 2: stole (8.06%)
Quantized Prob: 11.70%
  --> Difference: 3.63%

Original Top 3: wool (3.85%)
Quantized Prob: 5.13%
  --> Difference: 1.28%

Original Top 4: coral reef (3.30%)
Quantized Prob: 2.23%
  --> Difference: 1.07%

Original Top 5: barn (2.16%)
Quantized Prob: 1.00%
  --> Difference: 1.16%



In [33]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import onnxruntime

def calculate_top1_consistency(original_model_path, quantized_model_path, validation_dir):
    """
    Calculates the Top-1 prediction consistency between an original and quantized model.

    Args:
        original_model_path (str): Path to the original ONNX model.
        quantized_model_path (str): Path to the quantized ONNX model.
        validation_dir (str): Path to the root directory of the validation images.

    Returns:
        float: The Top-1 prediction consistency as a percentage.
    """
    # Define transformations for validation images
    val_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Create a dataset and DataLoader
    validation_dataset = datasets.ImageFolder(root=validation_dir, transform=val_transform)
    validation_loader = DataLoader(validation_dataset, batch_size=32, shuffle=False)

    # Load both models for inference
    ort_session_original = onnxruntime.InferenceSession(original_model_path)
    ort_session_quantized = onnxruntime.InferenceSession(quantized_model_path)

    correct_matches = 0
    total_images = 0

    # Iterate through the validation dataset
    for images, _ in validation_loader:
        # Convert PyTorch tensors to NumPy arrays for ONNX Runtime
        images_np = images.numpy()

        # Get top-1 prediction from the original model
        inputs_orig = {ort_session_original.get_inputs()[0].name: images_np}
        output_original = ort_session_original.run(None, inputs_orig)[0]
        preds_original = np.argmax(output_original, axis=1)

        # Get top-1 prediction from the quantized model
        inputs_quant = {ort_session_quantized.get_inputs()[0].name: images_np}
        output_quantized = ort_session_quantized.run(None, inputs_quant)[0]
        preds_quantized = np.argmax(output_quantized, axis=1)

        # Check if predictions match
        match_count = (preds_original == preds_quantized).sum()
        correct_matches += match_count
        total_images += images.size(0)

    # Calculate and return the final percentage
    consistency_percentage = (correct_matches / total_images) * 100
    return consistency_percentage

In [44]:
import os
import zipfile
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# --- Unzip the file first ---
zip_file_path = "testABC.zip"
extracted_folder_name = "test_data"

# Check if the folder already exists to avoid re-extraction
if not os.path.exists(extracted_folder_name):
    print(f"Extracting {zip_file_path} to {extracted_folder_name}...")
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extracted_folder_name)
    print("Extraction complete.")
else:
    print(f"Directory '{extracted_folder_name}' already exists. Skipping extraction.")

# --- The rest of your code ---
test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Now, point the ImageFolder root to the extracted folder
test_dataset = datasets.ImageFolder(
    root=extracted_folder_name,  # Use the folder name instead of the zip file
    transform=test_transform
)

test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False
)

Directory 'test_data' already exists. Skipping extraction.


In [46]:
import torch

# Assuming models are already loaded and on the correct device
model_original.eval()
model_quantized.eval()

consistency_matches = 0
original_correct = 0
quantized_correct = 0
total_images = 0

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        # Original model prediction
        output_original = model_original(images)
        _, preds_original = torch.max(output_original, 1)

        # Quantized model prediction
        output_quantized = model_quantized(images)
        _, preds_quantized = torch.max(output_quantized, 1)

        # Calculate consistency matches
        consistency_matches += (preds_original == preds_quantized).sum().item()

        # Calculate accuracy for original model
        original_correct += (preds_original == labels).sum().item()

        # Calculate accuracy for quantized model
        quantized_correct += (preds_quantized == labels).sum().item()

        total_images += images.size(0)

# Calculate percentages
consistency_percentage = (consistency_matches / total_images) * 100


print(f"Top-1 Prediction Consistency: {consistency_percentage:.2f}%")


Top-1 Prediction Consistency: 95.24%
