In [1]:
# Imports here
import torch
import PIL
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from torchvision import datasets, transforms, models
from torch import nn
from torch import optim
from collections import OrderedDict
from time import time
from sklearn.metrics import confusion_matrix,classification_report
import numpy as np
import skimage.io as io
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
data_dir = '/kaggle/input/flower-classification'
train_dir = data_dir + '/train'

# Define your transformations
train_transforms = transforms.Compose([
    transforms.RandomRotation(30),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load all the images from the train folder
all_data = datasets.ImageFolder(train_dir, transform=train_transforms)

# Calculate the sizes for train, validation, and test sets
total_size = len(all_data)
train_size = int(0.7 * total_size)
test_size = int(0.2 * total_size)
valid_size = total_size - train_size - test_size

# Use random_split to split the dataset
train_data, valid_data, test_data = torch.utils.data.random_split(all_data, [train_size, valid_size, test_size])

# Create data loaders
trainloader = torch.utils.data.DataLoader(train_data, batch_size=50, shuffle=False)
validloader = torch.utils.data.DataLoader(valid_data, batch_size=50)
testloader = torch.utils.data.DataLoader(test_data, batch_size=50)

In [3]:
# Load a pre-trained network 
model = models.vgg16(pretrained=True)
model.name = "vgg16"
model

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:03<00:00, 162MB/s]  


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [4]:
for param in model.parameters():
    param.requires_grad = False

In [5]:
classifier = nn.Sequential(OrderedDict([
                          ('fc1', nn.Linear(25088, 4096, bias=True)),
                          ('relu1', nn.ReLU()),
                          ('dropout1', nn.Dropout(p=0.5)),
                          ('fc2', nn.Linear(4096, 14, bias=True)),
                          ('output', nn.LogSoftmax(dim=1))
                          ]))
    
model.classifier = classifier

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [7]:
model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [8]:
# Define loss and optimizer
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

epochs = 5
print_every = 30 # Prints every 30 images out of batch of 50 images
steps = 0

In [9]:
def validation(model, testloader, criterion):
    test_loss = 0
    accuracy = 0
    
    for ii, (inputs, labels) in enumerate(testloader):
        
        inputs, labels = inputs.to(device), labels.to(device)
        
        output = model.forward(inputs)
        test_loss += criterion(output, labels).item()
        
        ps = torch.exp(output)
        equality = (labels.data == ps.max(dim=1)[1])
        accuracy += equality.type(torch.FloatTensor).mean()
    
    return test_loss, accuracy

In [10]:
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [11]:
def train_1(model,steps,print_every,epochs):
    print("Training process initializing .....\n")

    for e in range(epochs):
        running_loss = 0
        model.train() # Technically not necessary, setting this for good measure

        for ii, (inputs, labels) in enumerate(trainloader):
            steps += 1

            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            # Forward and backward passes
            outputs = model.forward(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if steps % print_every == 0:
                model.eval()

                with torch.no_grad():
                    valid_loss, accuracy = validation(model, validloader, criterion)

                print("Epoch: {}/{} | ".format(e+1, epochs),
                      "Training Loss: {:.4f} | ".format(running_loss/print_every),
                      "Validation Loss: {:.4f} | ".format(valid_loss/len(validloader)),
                      "Validation Accuracy: {:.4f}".format(accuracy/len(validloader)))

                running_loss = 0
                model.train()

    print("\nTraining process is now complete!!")
    return model

In [12]:
# original_dataset = train_data.dataset
# # Create a class_to_idx mapping
# model.class_to_idx = {class_name: idx for idx, class_name in enumerate(original_dataset.classes)}
# checkpoint = {'architecture': model.name,
#              'classifier': model.classifier,
#              'class_to_idx': model.class_to_idx,
#              'state_dict': model.state_dict()}

# torch.save(checkpoint, 'model.pth')

In [13]:
import torch.nn.utils.prune as prune
#     for name, module in model.named_modules():
#         if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear):
#             prune.l1_unstructured(module, name='weight', amount=0.5) 
#     #         prune.remove(module, name='weight')
#     # Define a dictionary to store the model information
#     # checkpoint = {
#     #     'architecture': model.name,
#     #     'classifier': model.classifier,
#     #     'class_to_idx': {class_name: idx for idx, class_name in enumerate(train_data.dataset.classes)},
#     #     'state_dict': model.state_dict()
#     # }

#     # # Save the checkpoint dictionary to a file
#     # torch.save(checkpoint, 'pruned_model_1.pth')
#     for name, module in model.named_modules():
#         if isinstance(module, torch.nn.Conv2d):
#             prune.l1_unstructured(module, name='weight', amount=0.2) 
#     #         prune.remove(module, name='weight')
#         elif isinstance(module, torch.nn.Linear):
#             prune.l1_unstructured(module, name='weight', amount=0.4) 
        

In [14]:
def global_prune(model):
    classifier_size = len(model.classifier)
    parameters_to_prune = [
        (model.features[0], 'weight'),  # First convolutional layer
        (model.features[2], 'weight'),  # Second convolutional layer
        (model.features[5], 'weight'),  # Third convolutional layer
        (model.features[7], 'weight'),  # Fourth convolutional layer
        (model.features[10], 'weight'),  # Fifth convolutional layer
        (model.features[12], 'weight'),  # Sixth convolutional layer
        (model.features[14], 'weight'),  # Seventh convolutional layer
        (model.features[17], 'weight'),  # Eighth convolutional layer
        (model.features[19], 'weight'),  # Ninth convolutional layer
        (model.features[21], 'weight'),  # Tenth convolutional layer
        (model.features[24], 'weight'),  # Eleventh convolutional layer
        (model.features[26], 'weight'),  # Twelfth convolutional layer
        (model.features[28], 'weight'),  # Thirteenth convolutional layer
    ]
    for module in model.features:
        if isinstance(module, torch.nn.Conv2d):
            parameters_to_prune.append((module, 'weight'))

    # Add fully connected layers to parameters_to_prune
    for module in model.classifier:
        if isinstance(module, torch.nn.Linear):
            parameters_to_prune.append((module, 'weight'))
    # Apply global unstructured pruning
    prune.global_unstructured(
        parameters_to_prune,
        pruning_method=prune.L1Unstructured,
        amount=0.4,  
    )
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            prune.remove(module, name='weight')
        elif isinstance(module, torch.nn.Linear):
            prune.remove(module, name='weight') 
    return model

In [15]:
def prune_1(model):
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            prune.l1_unstructured(module, name='weight', amount=0.5) 
        elif isinstance(module, torch.nn.Linear):
            prune.l1_unstructured(module, name='weight', amount=0.5) 
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            prune.remove(module, name='weight')
        elif isinstance(module, torch.nn.Linear):
             prune.remove(module, name='weight') 
    return model


In [16]:
def print_sparsity(model):
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
                print("Sparsity in conv .weight: {:.2f}%".format(100. * float(torch.sum(module.weight == 0))/ float(module.weight.nelement())))

        elif isinstance(module, torch.nn.Linear):
            print("Sparsity in conv .weight: {:.2f}%".format(100. * float(torch.sum(module.weight == 0))/ float(module.weight.nelement())))

In [17]:
for i in range(2):
    model=train_1(model,steps,print_every,epochs)
    model=prune_1(model)
    print_sparsity(model)

Training process initializing .....

Epoch: 1/5 |  Training Loss: 3.1476 |  Validation Loss: 1.1028 |  Validation Accuracy: 0.6567
Epoch: 1/5 |  Training Loss: 1.0908 |  Validation Loss: 0.7928 |  Validation Accuracy: 0.7486
Epoch: 1/5 |  Training Loss: 0.9501 |  Validation Loss: 0.6993 |  Validation Accuracy: 0.7655
Epoch: 1/5 |  Training Loss: 0.8544 |  Validation Loss: 0.7963 |  Validation Accuracy: 0.7440
Epoch: 1/5 |  Training Loss: 0.8626 |  Validation Loss: 0.7377 |  Validation Accuracy: 0.7793
Epoch: 1/5 |  Training Loss: 0.8680 |  Validation Loss: 0.6895 |  Validation Accuracy: 0.7736
Epoch: 2/5 |  Training Loss: 0.5085 |  Validation Loss: 0.7167 |  Validation Accuracy: 0.7833
Epoch: 2/5 |  Training Loss: 0.8759 |  Validation Loss: 0.6619 |  Validation Accuracy: 0.8083
Epoch: 2/5 |  Training Loss: 0.8254 |  Validation Loss: 0.6734 |  Validation Accuracy: 0.7840
Epoch: 2/5 |  Training Loss: 0.8188 |  Validation Loss: 0.7524 |  Validation Accuracy: 0.7714
Epoch: 2/5 |  Training 

In [18]:
import os
def print_model_size(mdl):
    torch.save(mdl.state_dict(), "tmp.pt")
    print("%.2f MB" %(os.path.getsize("tmp.pt")/1e6))
    os.remove('tmp.pt')

print_model_size(model)

470.16 MB


In [None]:
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Conv2d):
        prune.remove(module, name='weight')
    elif isinstance(module, torch.nn.Linear):
        prune.remove(module, name='weight') 

In [None]:

print("Sparsity in conv .weight: {:.2f}%".format(100. * float(torch.sum(module.weight == 0))/ float(module.weight.nelement())))

In [None]:
import os
def print_model_size(mdl):
    torch.save(mdl.state_dict(), "tmp.pt")
    print("%.2f MB" %(os.path.getsize("tmp.pt")/1e6))
    os.remove('tmp.pt')

print_model_size(model)

In [None]:
def pred(Model,Testloader):
    all_labels = []
    all_predictions = []
    correct = 0
    total = 0
    start_time = time()
    with torch.no_grad():
        Model.eval()
        for images, labels in Testloader:
            all_labels.extend(labels.numpy())
            images, labels = images.to(device), labels.to(device)
            outputs = Model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            predicted_tensor_cpu = predicted.to('cpu')
            all_predictions.extend(predicted_tensor_cpu.numpy())
    end_time = time()
    print("Time: ",end_time - start_time)
    print('Accuracy achieved by the network on test images is: %d%%' % (100 * correct / total))
    
    return all_labels,all_predictions

In [None]:
labels_fp32,predictions_fp32 = pred(model,testloader)

In [None]:
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Conv2d):
            print("Sparsity in conv{:.2f} .weight: {:.2f}%".format(n,100. * float(torch.sum(module.weight == 0))/ float(module.weight.nelement())))
            
    elif isinstance(module, torch.nn.Linear):
        print("Sparsity in conv{:.2f} .weight: {:.2f}%".format(n,100. * float(torch.sum(module.weight == 0))/ float(module.weight.nelement())))

In [None]:

all_labels = np.array(labels_fp32)
all_predictions = np.array(predictions_fp32)
# Calculate the confusion matrix
cm = confusion_matrix(labels_fp32, predictions_fp32)
print("Confusion Matrix:")
print(cm)
print('----------------------------------------------------------------')
print("Classification Report:")
report = classification_report(all_labels, all_predictions)
print(report)

In [None]:
original_dataset = train_data.dataset
# Create a class_to_idx mapping
model.class_to_idx = {class_name: idx for idx, class_name in enumerate(original_dataset.classes)}
checkpoint = {'architecture': model.name,
             'classifier': model.classifier,
             'class_to_idx': model.class_to_idx,
             'state_dict': model.state_dict()}

torch.save(checkpoint, 'model.pth')

In [None]:
def load_checkpoint():
    """
    Loads deep learning model checkpoint.
    """
    
    # Load the saved file
    checkpoint = torch.load("/kaggle/input/models/model_vgg16.pth") # path to model
    
    # Download pretrained model
    model = models.vgg16(pretrained=True);
    
    # Freeze parameters so we don't backprop through them
    for param in model.parameters(): param.requires_grad = False
    
    # Load stuff from checkpoint
    model.class_to_idx = checkpoint['class_to_idx']
    model.classifier = checkpoint['classifier']
    model.load_state_dict(checkpoint['state_dict'])

    
    return model

In [None]:
def load_checkpoint():
    """
    Loads deep learning model checkpoint.
    """
    
    # Load the saved file
    checkpoint = torch.load("/kaggle/working/pruned_model.pth")  # Path to model
    
    # Download pretrained model
    model = models.vgg16(pretrained=True)
    
    # Freeze parameters so we don't backprop through them
    for param in model.parameters():
        param.requires_grad = False
    
    # Load stuff from checkpoint
    model.class_to_idx = checkpoint['class_to_idx']
    model.classifier = checkpoint['classifier']
    
    # Load state_dict, handling additional keys related to pruning
    state_dict = checkpoint['state_dict']
    new_state_dict = {}
    for key, value in state_dict.items():
        if 'mask' not in key:  # Exclude keys related to pruning
            new_state_dict[key] = value
    model.load_state_dict(new_state_dict)
    
    return model


In [None]:
pruned_model=torch.load("/kaggle/working/pruned_model.pth")

In [None]:
def pred(Model,Testloader):
    all_labels = []
    all_predictions = []
    correct = 0
    total = 0
    start_time = time()
    with torch.no_grad():
        Model.eval()
        for images, labels in Testloader:
            all_labels.extend(labels.numpy())
            images, labels = images.to(device), labels.to(device)
            outputs = Model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            predicted_tensor_cpu = predicted.to('cpu')
            all_predictions.extend(predicted_tensor_cpu.numpy())
    end_time = time()
    print("Time: ",end_time - start_time)
    print('Accuracy achieved by the network on test images is: %d%%' % (100 * correct / total))
    
    return all_labels,all_predictions

In [None]:
labels_fp32,predictions_fp32 = pred(model,testloader)

In [None]:
pip install torchsummary

In [None]:
import torch
import torchvision.models as models
from torchsummary import summary

summary(model, (3, 224, 224))


In [None]:
import torch
import torchvision.models as models
import torch.nn.utils.prune as prune

# Step 1: Load the pre-trained VGG16 model from the checkpoint file
checkpoint = torch.load("/kaggle/working/model.pth")
model = models.vgg16(pretrained=True)  # Load an untrained VGG16 model
model.classifier = checkpoint['classifier']  # Replace the classifier with the one from the checkpoint
model.load_state_dict(checkpoint['state_dict'])  # Load the pre-trained weights

# Step 2: Prune the model (example using L1 unstructured pruning)
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear):
        prune.random_unstructured(module, name='weight', amount=0.5)  # Prune 50% of weights


In [None]:
# Save the pruned model
torch.save({
    'classifier': model.classifier,
    'state_dict': model.state_dict()
}, 'pruned_model0.pth')


# QUANTIZATION

## FP-16

In [None]:
model_fp16 = load_checkpoint()

In [None]:
model.state_dict()['features.0.weight'].dtype

In [None]:
model.half()

In [None]:
model_fp16.half()
model_fp16.to(device)

In [None]:
# weights_32 = model.state_dict()['features.0.weight']
# weights_16 = model_fp16.state_dict()['features.0.weight']
# weights_32_to_16 = torch.tensor(weights_32 ,dtype = torch.float16)
# weights_16 == weights_32_to_16

In [None]:
def pred_16(Model,Testloader):
    all_labels = []
    all_predictions = []

    correct = 0
    total = 0
    start_time = time()
    with torch.no_grad():
        Model.eval()
        for images, labels in Testloader:
            all_labels.extend(labels.numpy())
            images, labels = images.to(device), labels.to(device)
            outputs = Model(images.half())
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            predicted_tensor_cpu = predicted.to('cpu')
            all_predictions.extend(predicted_tensor_cpu.numpy())
    end_time = time()
    print("Time: ",end_time - start_time)
    print('Accuracy achieved by the network on test images is: %d%%' % (100 * correct / total))
    
    return all_labels,all_predictions

In [None]:
labels_fp16,predictions_fp16 = pred_16(model,testloader)

## FP-64

In [None]:
model_64 = load_checkpoint()
model_64.double()

In [None]:
model_64.to(device)
all_labels_fp64 = []
all_predictions_fp64 = []
correct_fp64 = 0
total_fp64 = 0
start_time_fp64 = time()
with torch.no_grad():
    model_64.eval()
    for images, labels in testloader:
        all_labels_fp64.extend(labels.numpy())
        images, labels = images.to(device), labels.to(device)
        outputs = model_64(images.double())
        _, predicted = torch.max(outputs.data, 1)
        total_fp64 += labels.size(0)
        correct_fp64 += (predicted == labels).sum().item()
        predicted_tensor_cpu = predicted.to('cpu')
        all_predictions_fp64.extend(predicted_tensor_cpu.numpy())
end_time_fp64 = time()
print("Time: ",end_time_fp64 - start_time_fp64)

print('Accuracy achieved by the network on test images is: %d%%' % (100 * correct_fp64 / total_fp64))

## INT-8

In [None]:
from torch.ao.quantization import QuantStub, DeQuantStub
import torch
from torch.ao.quantization import (
  get_default_qconfig_mapping,
  get_default_qat_qconfig_mapping,
  QConfigMapping,
)
import torch.ao.quantization.quantize_fx as quantize_fx
import copy

In [None]:
model_fp32 = load_checkpoint()

In [None]:
model_fp32.to('cpu')

In [None]:
input_data = next(iter(trainloader))[0][:1]  
calibrate_data = input_data.to("cpu")

In [None]:
model_int8 = copy.deepcopy(model_fp32)
#model_int8.to(device)
qconfig_mapping = get_default_qconfig_mapping("qnnpack")
model_int8.eval()
# prepare
model_prepared = quantize_fx.prepare_fx(model_int8, qconfig_mapping, calibrate_data)
# calibrate

In [None]:
with torch.no_grad():
    for i in range(20):
        batch = next(iter(trainloader))[0]
        output = model_prepared(batch.to('cpu'))

In [None]:
model_quantized_static = quantize_fx.convert_fx(model_prepared)
model_quantized_static.state_dict()

In [None]:
model_quantized_static

In [None]:
model_quantized_static.to('cpu')

In [None]:
all_predictions_int8 = []
all_labels_int8 = []
correct_pred = 0
total_pred = 0
start_time_int8 = time()
with torch.no_grad():
    model_quantized_static.eval()
    for data in testloader:
        images, labels = data
        all_labels_int8.extend(labels.numpy())
        #images, labels = images.to(device), labels.to(device)
        outputs = model_quantized_static(images.to('cpu'))
        _, predicted = torch.max(outputs.data, 1)
        total_pred += labels.size(0)
        correct_pred += (predicted == labels).sum().item()
        predicted_tensor_cpu = predicted.to('cpu')
        all_predictions_int8.extend(predicted_tensor_cpu.numpy())
end_time_int8 = time()
print("Time: ",end_time_int8 - start_time_int8)
print('Accuracy achieved by the network on test images is: %d%%' % (100 * correct_pred / total_pred))

In [None]:
model_quantized_static.state_dict().keys()

In [None]:
weight_8 = model_quantized_static.state_dict()['features.0.weight']

## Extracting outputs and weights from fp32 model

In [None]:
model_fp32 = load_checkpoint()

In [None]:
model_fp32

In [None]:
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

In [None]:
model_children = list(model_fp32.children())
print(model_children)

In [None]:
model_children[0]

In [None]:
for i in range(len(model_children[0])):
    model_children[0][i].register_forward_hook(get_activation('conv'+str(i)))

In [None]:
model_fp32.to(device)
correct = 0
total = 0
count = 1
new = []
all_labels = []
all_predictions = []
with torch.no_grad():
    model_fp32.eval()
    for images, labels in testloader:
        new.extend(images)
        all_labels.extend(labels.numpy())
        images, labels = images.to(device), labels.to(device)
        outputs = model_fp32(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        predicted_tensor_cpu = predicted.to('cpu')
        all_predictions.extend(predicted_tensor_cpu.numpy())
        if count == 1:
            break

print('Accuracy achieved by the network on test images is: %d%%' % (100 * correct / total))

In [None]:
for i in range(len(activation)):
    print(i,':',activation['conv'+str(i)].shape)

In [None]:
activation.keys()

In [None]:
model_fp32.state_dict().keys()

### Extracting weights for testing 

In [None]:
# Extracting weights and bias to check only the shape and number of filters
weights_0 = model_fp32.state_dict()['features.0.weight']
bias_0 = model_fp32.state_dict()['features.0.bias']

In [None]:
weights_0.shape # --> torch.Size([64, 3, 3, 3]) ==> 64 filters of 3*3*3

In [None]:
weights_10.shape # --> torch.Size([256, 128, 3, 3]) 256 filters of 128*3*3

## CONV FUNCTION

In [None]:
def ReLU(z):
    return np.maximum(0, z)

In [None]:
def conv(image, input_shape, n_filters, filters, bias, kernel_size, stride, padding=False):
    if(padding):
        image = np.pad(image, ((0, 0), (1, 1),(1, 1)), mode='constant', constant_values=0)
    img_h, img_w = image.shape[1:]
    filter_h, filter_w = kernel_size[:]

    #filters = np.random.rand(n_filters, kernel_size[0], kernel_size[1], input_shape[2])

    output_fmaps = []
    fmap_h = (img_h - filter_h)//stride + 1
    fmap_w =  (img_w - filter_w)//stride + 1

    for n, filter in enumerate(filters[: , : , :, ]):
        fmap = np.zeros((fmap_h, fmap_w))

        sum = 0
        for i in range(0, stride*fmap_h, stride):
            for j in range(0, stride*fmap_w, stride):
                input_patch = image[:, i:i+filter_h, j:j+filter_w]
                sum = np.sum(input_patch * filter) + bias[n]
                fmap[i//stride, j//stride] = ReLU(sum)
        output_fmaps.append(fmap)

    return output_fmaps

## MAX POOLING 

In [None]:
# import numpy as np

# def maxPool2D(fmaps, kernel_size, stride, padding=0):

#     # Add padding to the input image
#     padded_image = np.pad(image, pad_width=padding, mode='constant', constant_values=0)

#     # Initialize output image
#     output_image_maps = []
#     output_height = (padded_image.shape[0] - kernel_size) // stride + 1
#     output_width = (padded_image.shape[1] - kernel_size) // stride + 1
#     output_image = np.zeros((output_height, output_width))

#     # Perform max pooling
#     for i in range(0, padded_image.shape[0] - kernel_size + 1, stride):
#         for j in range(0, padded_image.shape[1] - kernel_size + 1, stride):
#             output_image[i // stride, j // stride] = np.max(padded_image[i:i + kernel_size, j:j + kernel_size])

#     return output_image


### Comparing Model outputs with custom conv function output

In [None]:
img_index = 3 # range from 0 to 49 -> 50 different images

In [None]:
test_img = new[img_index].numpy()
io.imshow(test_img[0]) # test image has 3 channels,displaying only only 1 channel
io.show()

In [None]:
weights = model_fp32.state_dict()['features.0.weight']
bias = model_fp32.state_dict()['features.0.bias']
weights = weights.to('cpu').numpy()
bias = bias.to('cpu').numpy()

In [None]:
# Testing custom conv function for 1 image
start = time()
layer_1_fmaps = conv(test_img, test_img.shape, weights.shape[0], weights, bias, weights.shape[2:], stride=1,padding=True)
end = time()
print(f"Time taken : {end-start}")

In [None]:
layer_1_fmaps[0].dtype

In [None]:
layer_1_fmaps = [arr.astype(np.float32) for arr in layer_1_fmaps]

In [None]:
# getting the output of required layer for 50 images from model
layer1 = activation['conv0'].cpu().numpy() 

In [None]:
io.imshow(layer_1_fmaps[1]) # from custom function
io.show()

In [None]:
io.imshow(layer1[3][1] )
io.show()

In [None]:
io.imshow(layer1[3][1] - layer_1_fmaps[1]) # difference in fp32
io.show()

In [None]:
layer1[3][1] # one ofmap out of 64 maps for an image out of 50 images

In [None]:
layer_1_fmaps[1] # one ofmap out of 64 maps

In [None]:
layer_1_fmaps[0].dtype == layer1[3][0].dtype

In [None]:
layer_1_fmaps[0].shape == layer1[3][0].shape

In [None]:
layer_1_fmaps = [arr.astype(np.int8) for arr in layer_1_fmaps]
layer1[3] = [arr.astype(np.int8) for arr in layer1[3]]

In [None]:
layer_1_fmaps == layer1[3]

### Storing the weights in a pt file

In [None]:
weights = model_fp32.state_dict()['features.0.weight']

# Create a simple script module to hold the weights
class MyScriptModule(torch.jit.ScriptModule):
    def __init__(self, weights):
        super(MyScriptModule, self).__init__()
        self.weights = torch.nn.Parameter(weights)

    @torch.jit.script_method
    def forward(self):
        return self.weights

# Instantiate the script module and save it
script_module = MyScriptModule(weights)
script_module.save("features.0.weight.pt")