In [13]:
# Imports here
import torch
import PIL
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from torchvision import datasets, transforms, models
from torch import nn
from torch import optim
from collections import OrderedDict
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [14]:
from time import time

In [15]:

data_dir = '/kaggle/input/'
train_dir = data_dir + '/train'

# Define your transformations
train_transforms = transforms.Compose([
    transforms.RandomRotation(30),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load all the images from the train folder
all_data = datasets.ImageFolder(train_dir, transform=train_transforms)

# Calculate the sizes for train, validation, and test sets
total_size = len(all_data)
train_size = int(0.7 * total_size)
test_size = int(0.2 * total_size)
valid_size = total_size - train_size - test_size

# Use random_split to split the dataset
train_data, valid_data, test_data = torch.utils.data.random_split(all_data, [train_size, valid_size, test_size])

# Create data loaders
trainloader = torch.utils.data.DataLoader(train_data, batch_size=50, shuffle=False)
validloader = torch.utils.data.DataLoader(valid_data, batch_size=50)
testloader = torch.utils.data.DataLoader(test_data, batch_size=50)


In [16]:
# Load a pre-trained network 
model = models.alexnet(pretrained=True)
model.name = "alexnet"
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [17]:
# Freeze parameters so we don't backprop through them
for param in model.parameters():
    param.requires_grad = False

In [18]:
# Define a new, untrainted feed-forward network as a classifier, using ReLU activations and dropout
classifier = nn.Sequential(OrderedDict([
                          ('fc1', nn.Linear(9216, 4096, bias=True)),
                          ('relu1', nn.ReLU()),
                          ('dropout1', nn.Dropout(p=0.5)),
                          ('fc2', nn.Linear(4096, 14, bias=True)),
                          ('output', nn.LogSoftmax(dim=1))
                          ]))
    
model.classifier = classifier

In [19]:
# Device agnostic code, automatically uses CUDA if it's enabled
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [20]:
# change to device
model.to(device)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (fc1): Linear(in_features=9216, out_features=4096, bias=True)
    (relu1): ReLU()
    (dropout1): Dr

In [21]:
# Define loss and optimizer
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

# Define deep learning method
epochs = 5
print_every = 30 # Prints every 30 images out of batch of 50 images
steps = 0

In [22]:
# Implement a function for the validation pass
def validation(model, testloader, criterion):
    test_loss = 0
    accuracy = 0
    
    for ii, (inputs, labels) in enumerate(testloader):
        
        # Uncomment below line if gpu is available
        #inputs, labels = inputs.to(device), labels.to(device)
        
        output = model.forward(inputs)
        test_loss += criterion(output, labels).item()
        
        ps = torch.exp(output)
        equality = (labels.data == ps.max(dim=1)[1])
        accuracy += equality.type(torch.FloatTensor).mean()
    
    return test_loss, accuracy

In [23]:
# Train the classifier layers using backpropogation using the pre-trained network to get features

print("Training process initializing .....\n")

for e in range(epochs):
    running_loss = 0
    model.train() # Technically not necessary, setting this for good measure
    
    for ii, (inputs, labels) in enumerate(trainloader):
        steps += 1
        
        # Uncomment below line if gpu is available
        #inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        # Forward and backward passes
        outputs = model.forward(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if steps % print_every == 0:
            model.eval()

            with torch.no_grad():
                valid_loss, accuracy = validation(model, validloader, criterion)
            
            print("Epoch: {}/{} | ".format(e+1, epochs),
                  "Training Loss: {:.4f} | ".format(running_loss/print_every),
                  "Validation Loss: {:.4f} | ".format(valid_loss/len(validloader)),
                  "Validation Accuracy: {:.4f}".format(accuracy/len(validloader)))
            
            running_loss = 0
            model.train()

print("\nTraining process is now complete!!")

Training process initializing .....

Epoch: 1/5 |  Training Loss: 4.6187 |  Validation Loss: 1.1444 |  Validation Accuracy: 0.7164
Epoch: 1/5 |  Training Loss: 1.1984 |  Validation Loss: 0.8339 |  Validation Accuracy: 0.7245
Epoch: 1/5 |  Training Loss: 0.9206 |  Validation Loss: 0.7471 |  Validation Accuracy: 0.7600
Epoch: 1/5 |  Training Loss: 0.9151 |  Validation Loss: 0.7173 |  Validation Accuracy: 0.7629
Epoch: 1/5 |  Training Loss: 0.9804 |  Validation Loss: 0.7193 |  Validation Accuracy: 0.7710
Epoch: 1/5 |  Training Loss: 0.8576 |  Validation Loss: 0.6768 |  Validation Accuracy: 0.7833
Epoch: 2/5 |  Training Loss: 0.4843 |  Validation Loss: 0.6756 |  Validation Accuracy: 0.8014
Epoch: 2/5 |  Training Loss: 0.8015 |  Validation Loss: 0.6858 |  Validation Accuracy: 0.7836
Epoch: 2/5 |  Training Loss: 0.8842 |  Validation Loss: 0.6580 |  Validation Accuracy: 0.7969
Epoch: 2/5 |  Training Loss: 0.7931 |  Validation Loss: 0.6500 |  Validation Accuracy: 0.7917
Epoch: 2/5 |  Training 

## Testing your network

It's good practice to test your trained network on test data, images the network has never seen either in training or validation. This will give you a good estimate for the model's performance on completely new images. Run the test images through the network and measure the accuracy, the same way you did validation. You should be able to reach around 70% accuracy on the test set if the model has been trained well.

In [35]:
all_labels = []
all_predictions = []

Try to load all image tensors to gpu before the loop

In [25]:
# TODO: Do validation on the test set
# Do validation on the test set
from time import time
correct = 0
total = 0
start_time = time()
with torch.no_grad():
    model.eval()
    for images, labels in testloader:
        all_labels.extend(labels.numpy())
        # Uncomment below line if gpu is available
        #images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        #predicted_tensor_cpu = predicted.to('cpu')
        all_predictions.extend(predicted.numpy())
end_time = time()
print("Time: ",end_time - start_time)

print('Accuracy achieved by the network on test images is: %d%%' % (100 * correct / total))

Time:  71.00774002075195
Accuracy achieved by the network on test images is: 82%


In [26]:
print(len(all_labels))

2728


In [27]:
print(len(all_predictions))

2728


In [28]:
from sklearn.metrics import confusion_matrix
import numpy as np
all_labels = np.array(all_labels)
all_predictions = np.array(all_predictions)

# Calculate the confusion matrix
cm = confusion_matrix(all_labels, all_predictions)

print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[130   4   0   0   1   9   1   1   2   1   1   0   1   1]
 [  6 131   0   0   2   4   4   0   2  18   3   0   1   2]
 [  0   1 196   3   1   0   1   6   0   0   0   3   0   0]
 [  1   4   2 112   7   8   5  31   4   3   2   7   0   4]
 [  1   8   1  23 147   5   2   8   0   0   1   1   4   3]
 [  1   9   0   2   0 147   5   0   0   4   7   1   8   1]
 [  4   9   0   2   0   8 169   1   3   0   0   0   6   2]
 [  2   1   9  12   3   0   3 160   0   0   1   2   0   0]
 [  2   1   0   2   0   1   6   1 180   0   0   0   0   0]
 [  0  28   0   0   0   3   1   0   0 195   3   0   0   1]
 [  0   2   0   0   2  28   3   0   0   4 137   0  15   0]
 [  1   0   6   4   0   0   3   8   0   0   0 171   0   0]
 [  0   2   0   0   2  10   0   1   0   1   2   0 193   0]
 [  0  12   0   1   0   6   5   0   0   1   0   0   2 170]]


In [29]:
from sklearn.metrics import classification_report
report = classification_report(all_labels, all_predictions)
print(report)

              precision    recall  f1-score   support

           0       0.88      0.86      0.87       152
           1       0.62      0.76      0.68       173
           2       0.92      0.93      0.92       211
           3       0.70      0.59      0.64       190
           4       0.89      0.72      0.80       204
           5       0.64      0.79      0.71       185
           6       0.81      0.83      0.82       204
           7       0.74      0.83      0.78       193
           8       0.94      0.93      0.94       193
           9       0.86      0.84      0.85       231
          10       0.87      0.72      0.79       191
          11       0.92      0.89      0.90       193
          12       0.84      0.91      0.88       211
          13       0.92      0.86      0.89       197

    accuracy                           0.82      2728
   macro avg       0.83      0.82      0.82      2728
weighted avg       0.83      0.82      0.82      2728



In [30]:
# Assuming your train_data is a Subset object
original_dataset = train_data.dataset

# Create a class_to_idx mapping
model.class_to_idx = {class_name: idx for idx, class_name in enumerate(original_dataset.classes)}


In [31]:
print(model.class_to_idx)

{'astilbe': 0, 'bellflower': 1, 'black_eyed_susan': 2, 'calendula': 3, 'california_poppy': 4, 'carnation': 5, 'common_daisy': 6, 'coreopsis': 7, 'dandelion': 8, 'iris': 9, 'rose': 10, 'sunflower': 11, 'tulip': 12, 'water_lily': 13}


In [32]:
checkpoint = {'architecture': model.name,
             'classifier': model.classifier,
             'class_to_idx': model.class_to_idx,
             'state_dict': model.state_dict()}

torch.save(checkpoint, 'model.pth')

In [33]:
# TODO: Write a function that loads a checkpoint and rebuilds the model
# Write a function that loads a checkpoint and rebuilds the model
def load_checkpoint():
    """
    Loads deep learning model checkpoint.
    """
    
    # Load the saved file
    checkpoint = torch.load("/kaggle/working/model.pth")
    
    # Download pretrained model
    model = models.alexnet(pretrained=True);
    
    # Freeze parameters so we don't backprop through them
    for param in model.parameters(): param.requires_grad = False
    
    # Load stuff from checkpoint
    model.class_to_idx = checkpoint['class_to_idx']
    model.classifier = checkpoint['classifier']
    model.load_state_dict(checkpoint['state_dict'])

    
    return model

# QUANTIZATION

In [36]:
model_fp16 = load_checkpoint()



In [None]:
weights = model.state_dict()
print(weights)

In [None]:
model_fp16.half()

In [39]:
weights_half = model_fp16.state_dict()
print(weights_half['features.0.weight'].dtype)

torch.float16


In [None]:
model_fp16.to(device)

In [46]:
all_labels_new = []
all_predictions_new = []

In [None]:
correct_new = 0
total_new = 0
start_time_fp16 = time()
with torch.no_grad():
    model_fp16.eval()
    for images, labels in testloader:
        all_labels_new.extend(labels.numpy())
        #images, labels = images.to(device), labels.to(device)
        outputs = model_fp16(images.half())
        _, predicted = torch.max(outputs.data, 1)
        total_new += labels.size(0)
        correct_new += (predicted == labels).sum().item()
        #predicted_tensor_cpu = predicted.to('cpu')
        all_predictions_new.extend(predicted.numpy())
end_time_fp16 = time()
print("Time: ",end_time_fp16 - start_time_fp16)

print('Accuracy achieved by the network on test images is: %d%%' % (100 * correct_new / total_new))

In [None]:
all_labels_new = np.array(all_labels_new)
all_predictions_new = np.array(all_predictions_new)

# Calculate the confusion matrix
cm_fp16 = confusion_matrix(all_labels_new, all_predictions_new)

print("Confusion Matrix:")
print(cm_fp16)

In [None]:
report_new = classification_report(all_labels_new, all_predictions_new)
print(report_new)

# ===============================================================

In [None]:
model_64 = load_checkpoint()
model_64.double()

In [50]:
weights_64 = model_64.state_dict()
print(weights_64['features.0.weight'].dtype)

torch.float64


In [51]:
all_labels_fp64 = []
all_predictions_fp64 = []

In [None]:
model_64.to(device)

In [53]:
correct_fp64 = 0
total_fp64 = 0
start_time_fp64 = time()
with torch.no_grad():
    model_64.eval()
    for images, labels in testloader:
        all_labels_fp64.extend(labels.numpy())
        #images, labels = images.to(device), labels.to(device)
        outputs = model_64(images.double())
        _, predicted = torch.max(outputs.data, 1)
        total_fp64 += labels.size(0)
        correct_fp64 += (predicted == labels).sum().item()
        #predicted_tensor_cpu = predicted.to('cpu')
        all_predictions_fp64.extend(predicted.numpy())
end_time_fp64 = time()
print("Time: ",end_time_fp64 - start_time_fp64)

print('Accuracy achieved by the network on test images is: %d%%' % (100 * correct_fp64 / total_fp64))

Time:  137.28399443626404
Accuracy achieved by the network on test images is: 82%


In [54]:
all_labels_fp64 = np.array(all_labels_fp64)
all_predictions_fp64 = np.array(all_predictions_fp64)
# Calculate the confusion matrix
cm_fp64 = confusion_matrix(all_labels_fp64, all_predictions_fp64)
print("Confusion Matrix:")
print(cm_fp64)

Confusion Matrix:
[[129   6   0   1   1   9   4   0   0   1   0   0   1   0]
 [  4 134   0   0   0   6   4   0   2  18   2   1   0   2]
 [  0   0 193   2   1   0   0  11   0   0   0   4   0   0]
 [  3   4   6 121   5   9   4  27   3   2   1   3   0   2]
 [  3   8   2  22 143   2   2   9   0   0   5   0   6   2]
 [  2   9   0   2   1 145   5   0   1   5   4   1   9   1]
 [  2   9   0   0   2   9 172   2   2   2   0   0   4   0]
 [  0   2   6   7   2   2   1 170   0   0   0   2   1   0]
 [  0   1   1   2   0   2   6   2 178   0   0   0   1   0]
 [  0  29   0   0   0   3   2   0   0 196   1   0   0   0]
 [  0   3   0   0   0  38   1   0   0   2 129   0  17   1]
 [  0   2   7   3   0   0   2   7   0   1   0 171   0   0]
 [  2   2   0   2   5   4   1   0   0   1   2   0 192   0]
 [  1  10   1   2   0   3   7   0   0   2   1   0   4 166]]


In [55]:
report_64 = classification_report(all_labels_fp64, all_predictions_fp64)
print(report_64)

              precision    recall  f1-score   support

           0       0.88      0.85      0.87       152
           1       0.61      0.77      0.68       173
           2       0.89      0.91      0.90       211
           3       0.74      0.64      0.68       190
           4       0.89      0.70      0.79       204
           5       0.62      0.78      0.70       185
           6       0.82      0.84      0.83       204
           7       0.75      0.88      0.81       193
           8       0.96      0.92      0.94       193
           9       0.85      0.85      0.85       231
          10       0.89      0.68      0.77       191
          11       0.94      0.89      0.91       193
          12       0.82      0.91      0.86       211
          13       0.95      0.84      0.89       197

    accuracy                           0.82      2728
   macro avg       0.83      0.82      0.82      2728
weighted avg       0.83      0.82      0.82      2728



# ===============================================================

In [56]:
from torch.ao.quantization import QuantStub, DeQuantStub
import torch
from torch.ao.quantization import (
  get_default_qconfig_mapping,
  get_default_qat_qconfig_mapping,
  QConfigMapping,
)
import torch.ao.quantization.quantize_fx as quantize_fx
import copy

In [57]:
model_fp32 = load_checkpoint()



In [58]:
model_fp32.to('cpu')

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (fc1): Linear(in_features=9216, out_features=4096, bias=True)
    (relu1): ReLU()
    (dropout1): Dr

In [59]:
input_data = next(iter(trainloader))[0][:1]  
calibrate_data = input_data.to("cpu")

In [60]:
model_int8 = copy.deepcopy(model_fp32)
#model_int8.to(device)
qconfig_mapping = get_default_qconfig_mapping("qnnpack")
model_int8.eval()
# prepare
model_prepared = quantize_fx.prepare_fx(model_int8, qconfig_mapping, calibrate_data)
# calibrate 

In [61]:
with torch.no_grad():
    for i in range(20):
        batch = next(iter(trainloader))[0]
        output = model_prepared(batch.to('cpu'))

In [62]:
model_quantized_static = quantize_fx.convert_fx(model_prepared)

In [None]:
model_quantized_static.state_dict()

In [64]:
model_quantized_static

GraphModule(
  (features): Module(
    (0): QuantizedConvReLU2d(3, 64, kernel_size=(11, 11), stride=(4, 4), scale=0.09054233878850937, zero_point=0, padding=(2, 2))
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): QuantizedConvReLU2d(64, 192, kernel_size=(5, 5), stride=(1, 1), scale=0.19043809175491333, zero_point=0, padding=(2, 2))
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): QuantizedConvReLU2d(192, 384, kernel_size=(3, 3), stride=(1, 1), scale=0.13451653718948364, zero_point=0, padding=(1, 1))
    (8): QuantizedConvReLU2d(384, 256, kernel_size=(3, 3), stride=(1, 1), scale=0.1333240419626236, zero_point=0, padding=(1, 1))
    (10): QuantizedConvReLU2d(256, 256, kernel_size=(3, 3), stride=(1, 1), scale=0.08818456530570984, zero_point=0, padding=(1, 1))
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (c

In [65]:
all_labels_int8 = []
all_predictions_int8 = []

In [66]:
model_quantized_static.to('cpu')

GraphModule(
  (features): Module(
    (0): QuantizedConvReLU2d(3, 64, kernel_size=(11, 11), stride=(4, 4), scale=0.09054233878850937, zero_point=0, padding=(2, 2))
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): QuantizedConvReLU2d(64, 192, kernel_size=(5, 5), stride=(1, 1), scale=0.19043809175491333, zero_point=0, padding=(2, 2))
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): QuantizedConvReLU2d(192, 384, kernel_size=(3, 3), stride=(1, 1), scale=0.13451653718948364, zero_point=0, padding=(1, 1))
    (8): QuantizedConvReLU2d(384, 256, kernel_size=(3, 3), stride=(1, 1), scale=0.1333240419626236, zero_point=0, padding=(1, 1))
    (10): QuantizedConvReLU2d(256, 256, kernel_size=(3, 3), stride=(1, 1), scale=0.08818456530570984, zero_point=0, padding=(1, 1))
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (c

In [67]:
correct_pred = 0
total_pred = 0
start_time_int8 = time()
with torch.no_grad():
    model_quantized_static.eval()
    for data in testloader:
        images, labels = data
        all_labels_int8.extend(labels.numpy())
        #images, labels = images.to(device), labels.to(device)
        outputs = model_quantized_static(images.to('cpu'))
        _, predicted = torch.max(outputs.data, 1)
        total_pred += labels.size(0)
        correct_pred += (predicted == labels).sum().item()
        predicted_tensor_cpu = predicted.to('cpu')
        all_predictions_int8.extend(predicted_tensor_cpu.numpy())
end_time_int8 = time()
print("Time: ",end_time_int8 - start_time_int8)

print('Accuracy achieved by the network on test images is: %d%%' % (100 * correct_pred / total_pred))

Time:  34.13240170478821
Accuracy achieved by the network on test images is: 81%


In [74]:
import os

In [70]:
def print_model_size(mdl):
    torch.save(mdl.state_dict(), "tmp.pt")
    print("%.2f MB" %(os.path.getsize("tmp.pt")/1e6))
    os.remove('tmp.pt')

In [76]:
print("FP64 model size:",end='')
print_model_size(model_64)
print("FP32 model size:",end='')
print_model_size(model)
print("FP16 model size:",end='')
print_model_size(model_fp16)
print("INT8 model size:",end='')
print_model_size(model_quantized_static)

FP64 model size:322.24 MB
FP32 model size:161.12 MB
FP16 model size:80.56 MB
INT8 model size:40.30 MB


In [None]:
all_labels_int8 = np.array(all_labels_int8)
all_predictions_int8 = np.array(all_predictions_int8)

# Calculate the confusion matrix
cm_int8 = confusion_matrix(all_labels_int8, all_predictions_int8)

print("Confusion Matrix:")
print(cm_int8)

In [None]:
report_int8 = classification_report(all_labels_int8, all_predictions_int8)
print(report_int8)

In [None]:
for name, child in model_int8.named_children():
        for x, y in child.named_children():
            print(name,x)

In [None]:
for module in model_int8.modules():
    classname = module.__class__.__name__
    print(classname)

In [None]:
for module in model_int8.modules():
    classname = module.__class__.__name__   
    if 'LogSoftmax' == classname:
        module = nn.Sequential(OrderedDict([
                          ('dequant',torch.quantization.DeQuantStub()),
                          ('output', nn.LogSoftmax(dim=1))
                          ]))

In [None]:
model_int8