In [1]:
# Standard library imports
import sys  # For system-specific parameters and functions
import os   # For interacting with the operating system
import time # For time-related functions
import copy # For deep copy operations

# Third-party library imports
import numpy as np  # For numerical operations
import psutil       # For retrieving information on system utilization
import onnx         # For Open Neural Network Exchange format support
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score # For model evaluation metrics
from tqdm import tqdm  # For progress bars

# PyTorch library imports
import torch               # Main library for tensors and deep learning in PyTorch
from torch import nn       # Neural network module in PyTorch
import torch.nn.utils.prune as prune # For network pruning utilities in PyTorch
import torchvision         # Computer vision extension for PyTorch
import torchvision.models as models # Pre-trained models in PyTorch
from torchvision.datasets import ImageFolder  # For loading image data from a directory
import torchvision.transforms as transforms  # For image transformations in PyTorch
from torch.utils.data import DataLoader, random_split, Dataset # For handling datasets in PyTorch


In [2]:
# Load the model architecture
trained_vgg_model = models.vgg11()

num_classes = 10

trained_vgg_model.classifier[-1] = nn.Linear(trained_vgg_model.classifier[-1].in_features, num_classes)


# Load the saved weights into the model. This was our better VGG 11 model
trained_vgg_model.load_state_dict(torch.load("../data/trained_vgg_model.pt"))

# Set to eval mode for faster inference
trained_vgg_model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

In [4]:
def prune_model(model, pruning_rate):
    for module in model.modules():
        if isinstance(module, nn.Conv2d):
            prune.l1_unstructured(module, name='weight', amount=pruning_rate)
            prune.remove(module, 'weight')  # Make the pruning permanent
    return model


In [35]:
list(trained_vgg_model.parameters())

[Parameter containing:
 tensor([ 0.1599,  0.2963,  0.1462, -1.0774,  0.0581, -0.0480,  0.1275, -0.5771,
          0.2057, -0.0019, -0.3728,  0.1032,  0.2302,  0.2247, -0.7634, -1.0575,
          0.2493, -0.1630,  0.1165, -1.1046,  0.0036, -0.8408, -0.4137, -0.3944,
          0.5427,  0.2332,  0.0801,  0.1471, -0.1209, -0.4747, -0.2396, -1.1038,
         -0.5626,  0.2906,  0.1752,  0.3620, -0.0132,  0.1501, -0.5135,  0.2279,
          0.1374, -1.7165, -0.0161,  0.0337,  0.1304,  0.1580,  0.1544,  0.4259,
         -0.0191, -0.0603, -1.4738, -0.7610, -1.2169,  0.1578,  0.1741, -0.0514,
          0.1251,  0.0310, -1.1896,  0.1491, -0.8466, -0.0158,  0.2089, -1.3837],
        requires_grad=True),
 Parameter containing:
 tensor([[[[ 0.2704,  0.0000, -0.3916],
           [ 0.1592,  0.3472, -0.5112],
           [-0.1203,  0.3422, -0.2426]],
 
          [[ 0.3847, -0.0000, -0.6880],
           [ 0.4248,  0.4330, -0.8018],
           [ 0.0000,  0.4729, -0.3809]],
 
          [[ 0.2062, -0.0000, 

In [34]:
list(pruned_vgg_model.parameters())

[Parameter containing:
 tensor([ 0.1599,  0.2963,  0.1462, -1.0774,  0.0581, -0.0480,  0.1275, -0.5771,
          0.2057, -0.0019, -0.3728,  0.1032,  0.2302,  0.2247, -0.7634, -1.0575,
          0.2493, -0.1630,  0.1165, -1.1046,  0.0036, -0.8408, -0.4137, -0.3944,
          0.5427,  0.2332,  0.0801,  0.1471, -0.1209, -0.4747, -0.2396, -1.1038,
         -0.5626,  0.2906,  0.1752,  0.3620, -0.0132,  0.1501, -0.5135,  0.2279,
          0.1374, -1.7165, -0.0161,  0.0337,  0.1304,  0.1580,  0.1544,  0.4259,
         -0.0191, -0.0603, -1.4738, -0.7610, -1.2169,  0.1578,  0.1741, -0.0514,
          0.1251,  0.0310, -1.1896,  0.1491, -0.8466, -0.0158,  0.2089, -1.3837],
        requires_grad=True),
 Parameter containing:
 tensor([[[[0., 0., -0.],
           [0., 0., -0.],
           [-0., 0., -0.]],
 
          [[0., -0., -0.],
           [0., 0., -0.],
           [0., 0., -0.]],
 
          [[0., -0., -0.],
           [0., 0., -0.],
           [-0., 0., -0.]]],
 
 
         [[[-0., -0., 0.],

In [5]:
pruning_rate = 1.  # Define the pruning rate (e.g., 0.3 means pruning 30% of the weights)
pruned_vgg_model = prune_model(trained_vgg_model, pruning_rate)
pruned_vgg_model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

In [6]:
torch.save(pruned_vgg_model.state_dict(), "../data/trained_vgg_model_pruned.pt")


In [None]:
# Define the input data shape (batch size, channels, height, width)
input_shape = (1, 3, 224, 224)

# Generate some random input data
input_data = torch.randn(input_shape)


# Compare results from pruned and original model

In [27]:
# Run inference on the original model
with torch.no_grad():
    output = trained_vgg_model(input_data)

# Run inference on the pruned model
with torch.no_grad():
    pruned_output = pruned_vgg_model(input_data)

# Compare the results. The quantized model has the exact same output
print("Original output shape:", output.shape)
print("Pruned output shape:", quantized_output.shape)
print("Max absolute error:", torch.max(torch.abs(output - pruned_output)))


Original output shape: torch.Size([1, 10])
Pruned output shape: torch.Size([1, 10])
Max absolute error: tensor(17.7294)


# Quantize the model

In [None]:
quantized_vgg_model = torch.quantization.quantize_dynamic(
    trained_vgg_model, {torch.nn.Conv2d}, dtype=torch.qint8
)
quantized_vgg_model.eval()

# Compare results from quantized and original model

In [28]:
# Define the input data shape (batch size, channels, height, width)
input_shape = (1, 3, 224, 224)

# Generate some random input data
input_data = torch.randn(input_shape)


# Run inference on the original model
with torch.no_grad():
    output = trained_vgg_model(input_data)

# Run inference on the quantized model
with torch.no_grad():
    quantized_output = quantized_vgg_model(input_data)

# Compare the results. The quantized model has the exact same output
print("Original output shape:", output.shape)
print("Quantized output shape:", quantized_output.shape)
print("Max absolute error:", torch.max(torch.abs(output - quantized_output)))


Original output shape: torch.Size([1, 10])
Quantized output shape: torch.Size([1, 10])
Max absolute error: tensor(0.)


In [41]:
list(trained_vgg_model.parameters())

[Parameter containing:
 tensor([ 0.1599,  0.2963,  0.1462, -1.0774,  0.0581, -0.0480,  0.1275, -0.5771,
          0.2057, -0.0019, -0.3728,  0.1032,  0.2302,  0.2247, -0.7634, -1.0575,
          0.2493, -0.1630,  0.1165, -1.1046,  0.0036, -0.8408, -0.4137, -0.3944,
          0.5427,  0.2332,  0.0801,  0.1471, -0.1209, -0.4747, -0.2396, -1.1038,
         -0.5626,  0.2906,  0.1752,  0.3620, -0.0132,  0.1501, -0.5135,  0.2279,
          0.1374, -1.7165, -0.0161,  0.0337,  0.1304,  0.1580,  0.1544,  0.4259,
         -0.0191, -0.0603, -1.4738, -0.7610, -1.2169,  0.1578,  0.1741, -0.0514,
          0.1251,  0.0310, -1.1896,  0.1491, -0.8466, -0.0158,  0.2089, -1.3837],
        requires_grad=True),
 Parameter containing:
 tensor([[[[ 0.2704,  0.0000, -0.3916],
           [ 0.1592,  0.3472, -0.5112],
           [-0.1203,  0.3422, -0.2426]],
 
          [[ 0.3847, -0.0000, -0.6880],
           [ 0.4248,  0.4330, -0.8018],
           [ 0.0000,  0.4729, -0.3809]],
 
          [[ 0.2062, -0.0000, 

In [42]:
list(quantized_vgg_model.parameters())

[Parameter containing:
 tensor([[[[ 2.7043e-01,  2.0123e-02, -3.9159e-01],
           [ 1.5917e-01,  3.4721e-01, -5.1117e-01],
           [-1.2029e-01,  3.4223e-01, -2.4261e-01]],
 
          [[ 3.8470e-01, -5.7236e-02, -6.8796e-01],
           [ 4.2480e-01,  4.3295e-01, -8.0183e-01],
           [ 9.0467e-02,  4.7294e-01, -3.8086e-01]],
 
          [[ 2.0624e-01, -3.2893e-02, -3.9607e-01],
           [ 1.3709e-01,  2.9011e-01, -4.3236e-01],
           [-2.3931e-02,  3.3789e-01, -1.6167e-01]]],
 
 
         [[[-3.5906e-01, -2.6959e-01,  6.5864e-01],
           [-6.1003e-01, -3.6071e-01,  7.8990e-01],
           [-4.2977e-01, -1.5093e-01,  6.4548e-01]],
 
          [[-5.6736e-01, -4.0352e-01,  7.6397e-01],
           [-8.6998e-01, -3.7118e-01,  1.1020e+00],
           [-5.1612e-01, -8.5621e-02,  9.1326e-01]],
 
          [[-9.6227e-03, -2.8532e-01,  2.2197e-01],
           [-1.6902e-01, -2.8094e-01,  3.8599e-01],
           [-7.8710e-02, -6.8964e-02,  3.7558e-01]]],
 
 
         [[[ 5.12

In [10]:
torch.save(quantized_vgg_model.state_dict(), "../data/trained_vgg_model_quantized.pt")


In [11]:
file_path = "../data/trained_vgg_model_quantized.pt"
file_size_in_bytes = os.path.getsize(file_path)
file_size_in_megabytes = file_size_in_bytes / (1024 * 1024)  # convert from bytes to megabytes

print(f"The file size of the quantize VGG model is {file_size_in_megabytes:.2f} MB")


The file size of the quantize VGG model is 491.37 MB


In [12]:
input_data = torch.randn((100, 3, 224, 224))  # a random batch data in the shape of 100 images


In [13]:
%timeit trained_vgg_model(input_data)

3.21 s ± 82.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%timeit pruned_vgg_model(input_data)

3.17 s ± 58.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
%timeit quantized_vgg_model(input_data)

3.19 s ± 77.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:

def get_model_memory_usage(model):
    total_memory = 0
    for param in model.parameters():
        param_memory = param.data.numel() * param.data.element_size()
        total_memory += param_memory
    return total_memory/1048576

trained_vgg_memory = get_model_memory_usage(trained_vgg_model)
pruned_vgg_memory = get_model_memory_usage(pruned_vgg_model)

print(f"Original VGG model memory usage: {trained_vgg_memory:.2f} MB")
print(f"Pruned VGG model memory usage: {pruned_vgg_memory:.2f} MB")


Original VGG model memory usage: 491.36 MB
Pruned VGG model memory usage: 491.36 MB


In [17]:
def get_quantized_model_memory_usage(model):
    total_memory = 0
    for name, param in model.named_parameters():
        param_memory = 0
        if "weight" in name and hasattr(param, "q_per_channel_scales"):
            # Quantized parameter
            quantized_bits = 8  # Assuming qint8 quantization
            param_memory = param.data.numel() * (quantized_bits // 8)
        else:
            # Non-quantized parameter
            param_memory = param.data.numel() * param.data.element_size()
        total_memory += param_memory
    return total_memory/1048576

quantized_vgg_memory = get_quantized_model_memory_usage(quantized_vgg_model)

print(f"Quantized VGG model memory usage: {quantized_vgg_memory:.2f} MB")


Quantized VGG model memory usage: 122.87 MB


In [18]:
def check_sparsity(model):
    total_elements = 0
    zero_elements = 0
    for param in model.parameters():
        total_elements += param.numel()
        zero_elements += (param == 0).sum().item()
    sparsity = zero_elements / total_elements * 100
    return sparsity

trained_vgg_sparsity = check_sparsity(trained_vgg_model)
pruned_vgg_sparsity = check_sparsity(pruned_vgg_model)

print(f"Original VGG model sparsity: {trained_vgg_sparsity:.2f}%")
print(f"Pruned VGG model sparsity: {pruned_vgg_sparsity:.2f}%")


Original VGG model sparsity: 0.00%
Pruned VGG model sparsity: 7.16%


In [19]:
# !pip install psutil


In [20]:

def memory_usage():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss

# Create input tensor
input_data = torch.randn((100, 3, 224, 224))

# Measure memory usage before inference
before_memory_usage = memory_usage()

# Run inference on the original model
with torch.no_grad():
    output = trained_vgg_model(input_data)

# Measure memory usage after inference
after_memory_usage = memory_usage()

print(f"Original VGG model memory usage during inference: {(after_memory_usage - before_memory_usage)/1048576} MB")

# Measure memory usage before inference
before_memory_usage_pruned = memory_usage()

# Run inference on the pruned model
with torch.no_grad():
    pruned_output = pruned_vgg_model(input_data)

# Measure memory usage after inference
after_memory_usage_pruned = memory_usage()

print(f"Pruned VGG model memory usage during inference: {(after_memory_usage_pruned - before_memory_usage_pruned)/1048576} MB")


# Measure memory usage before inference
before_memory_usage_pruned = memory_usage()

# Run inference on the pruned model
with torch.no_grad():
    pruned_output = quantized_vgg_model(input_data)

# Measure memory usage after inference
after_memory_usage_pruned = memory_usage()

print(f"Quantized VGG model memory usage during inference: {(after_memory_usage_pruned - before_memory_usage_pruned)/1048576} MB")


Original VGG model memory usage during inference: 7.609375 MB
Pruned VGG model memory usage during inference: 5.71875 MB
Quantized VGG model memory usage during inference: 0.046875 MB


In [22]:

# Set the model to evaluation mode
trained_vgg_model.eval()

# Define an example input tensor
dummy_input = torch.randn(1, 3, 224, 224)

# Convert the model to ONNX format
torch.onnx.export(trained_vgg_model, dummy_input, "../data/vgg.onnx", export_params=True)

verbose: False, log level: Level.ERROR



In [23]:

# https://pytorch.org/vision/main/models/generated/torchvision.models.vgg11.html
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])
dataset = ImageFolder(root='../data/art-styles', transform=transform)

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)


In [24]:
# Check if GPU is available and use it, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device

device(type='cpu')

In [25]:
# Function to evaluate the model
def evaluate_model(model, test_loader, criterion):
    model.eval()
    y_true, y_pred = [], []
    total_loss = 0.0
    with torch.no_grad():
        for inputs, labels, *extra in tqdm(test_loader, total=len(test_loader)):
            inputs = inputs.to(device)
            labels = labels.to(device)
            for e in extra:
                e = e.to(device)   
            outputs = model(inputs, *extra)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    
    avg_loss = total_loss / len(test_loader)
    accuracy, f1, precision, recall = evaluate_model_metrics(np.array(y_true), np.array(y_pred))
    print(f'Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}, F1-score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}\n')
    return avg_loss, accuracy, f1, precision, recall

# Function to calculate evaluation metrics
def evaluate_model_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
    precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
    return accuracy, f1, precision, recall


In [29]:
criterion = nn.CrossEntropyLoss()
evaluate_model(trained_vgg_model, test_loader, criterion)

100%|███████████████████████████████████████████| 32/32 [01:33<00:00,  2.92s/it]

Loss: 1.8108, Accuracy: 0.3500, F1-score: 0.3433, Precision: 0.3860, Recall: 0.3490






(1.8107835911214352,
 0.35,
 0.3433157367077415,
 0.3860420336155393,
 0.3490318741389112)

In [30]:
# slightly faster than original model but accuracy is way down!
evaluate_model(pruned_vgg_model, test_loader, criterion)

100%|███████████████████████████████████████████| 32/32 [01:34<00:00,  2.94s/it]

Loss: 13.7277, Accuracy: 0.1035, F1-score: 0.0188, Precision: 0.0103, Recall: 0.1000






(13.727665781974792, 0.1035, 0.018758495695514275, 0.01035, 0.1)

In [31]:
less_pruned_vgg_model = prune_model(trained_vgg_model, 0.3)
less_pruned_vgg_model.eval()

# That's a better compromise

evaluate_model(less_pruned_vgg_model, test_loader, criterion)

100%|███████████████████████████████████████████| 32/32 [01:32<00:00,  2.91s/it]

Loss: 1.8391, Accuracy: 0.3320, F1-score: 0.3246, Precision: 0.3718, Recall: 0.3307






(1.8390845768153667,
 0.332,
 0.324594687237511,
 0.3718084214903913,
 0.33067123523739994)

In [32]:
# exactly the same metrics but remember, less memory usage during inference!
evaluate_model(quantized_vgg_model, test_loader, criterion)

100%|███████████████████████████████████████████| 32/32 [01:31<00:00,  2.87s/it]

Loss: 1.8108, Accuracy: 0.3500, F1-score: 0.3433, Precision: 0.3860, Recall: 0.3490






(1.8107835911214352,
 0.35,
 0.3433157367077415,
 0.3860420336155393,
 0.3490318741389112)