In [None]:
## quantization test code - ENDG 511

In [2]:
# quantization (dynamic)

import torch
import torch.nn as nn
import torch.quantization
import os

# Load the pruned model (ensure it's on CPU for quantization)
model_pruned = torch.load(r"/Users/SeleemB/Desktop/ENDG511_Final_Project/models/model_language_mobilenet_20_epoch_new_normalize_0p6_dropout.pth")
model_pruned.eval()

# Apply dynamic quantization to Linear layers
quantized_model = torch.quantization.quantize_dynamic(
    model_pruned,                   # the model to quantize
    {nn.Linear},                    # layers to quantize (Conv2d not supported here)
    dtype=torch.qint8               # quantization data type to 8-bit int type
)

# Save the quantized model
quantized_model_path = "models/quantized_iterative_pruned_model.pth"
torch.save(quantized_model, quantized_model_path)

# Report model sizes
original_size = os.path.getsize("/Users/SeleemB/Desktop/ENDG511_Final_Project/models/model_language_mobilenet_20_epoch_new_normalize_0p6_dropout.pth") / 1e6
quantized_size = os.path.getsize(quantized_model_path) / 1e6

print(f"Original pruned model size: {original_size:.2f} MB")
print(f"Quantized model size: {quantized_size:.2f} MB")

Original pruned model size: 9.85 MB
Quantized model size: 9.35 MB


In [5]:
import torch
import torch.nn as nn
import os
import time
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# --------- Load Validation Data ---------
# Basic transform: resize to match model input, convert to tensor
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Load validation dataset
val_dataset = datasets.ImageFolder('/Users/SeleemB/Desktop/ENDG511_Final_Project/languages', transform=transform)
data_loader_val = DataLoader(val_dataset, batch_size=32, shuffle=False)

# --------- Load Models ---------
# Pruned model path (original .pth)
pruned_model_path = "/Users/SeleemB/Desktop/ENDG511_Final_Project/models/model_language_mobilenet_20_epoch_new_normalize_0p6_dropout.pth"
quantized_model_path = "models/quantized_iterative_pruned_model.pth"

# Load models to CPU
model_pruned = torch.load(pruned_model_path, map_location='cpu')
model_pruned.eval()

quantized_model = torch.load(quantized_model_path, map_location='cpu')
quantized_model.eval()

# --------- Evaluation Function ---------
def evaluate_model(model, dataloader, max_batches=None):
    model.eval()
    correct = 0
    total = 0
    total_time = 0.0

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloader):
            inputs, labels = inputs.to('cpu'), labels.to('cpu')

            start = time.time()
            outputs = model(inputs)
            end = time.time()

            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
            total_time += (end - start)

            if max_batches and i >= max_batches - 1:
                break

    accuracy = 100 * correct / total
    avg_inference_time = total_time / total
    return accuracy, avg_inference_time

# --------- Evaluate Models ---------
print("Evaluating pruned model...")
acc_pruned, time_pruned = evaluate_model(model_pruned, data_loader_val, max_batches=50)

print("Evaluating quantized model...")
acc_quant, time_quant = evaluate_model(quantized_model, data_loader_val, max_batches=50)

# --------- Get Model Sizes ---------
size_pruned = os.path.getsize(pruned_model_path) / 1e6
size_quant = os.path.getsize(quantized_model_path) / 1e6

# --------- Plot Comparison ---------
labels = ['Accuracy (%)', 'Avg Inference Time (s)', 'Model Size (MB)']
pruned_stats = [acc_pruned, time_pruned, size_pruned]
quant_stats = [acc_quant, time_quant, size_quant]

x = range(len(labels))
width = 0.35

fig, ax = plt.subplots(figsize=(10, 5))
ax.bar([p - width/2 for p in x], pruned_stats, width=width, label='Pruned Model')
ax.bar([p + width/2 for p in x], quant_stats, width=width, label='Quantized Model')

ax.set_ylabel('Value')
ax.set_title('Model Comparison: Pruned vs Quantized')
ax.set_xticks(list(x))
ax.set_xticklabels(labels)
ax.legend()
ax.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()

# --------- Print Final Stats ---------
print(f"\n--- Evaluation Summary ---")
print(f"Pruned Model     | Accuracy: {acc_pruned:.2f}% | Time: {time_pruned:.4f}s | Size: {size_pruned:.2f} MB")
print(f"Quantized Model  | Accuracy: {acc_quant:.2f}% | Time: {time_quant:.4f}s | Size: {size_quant:.2f} MB")


FileNotFoundError: Couldn't find any class folder in /Users/SeleemB/Desktop/ENDG511_Final_Project/languages.