### **IMPORT LIBRARY**


In [5]:
# Import library
import numpy as np
import torch.nn as nn, torch
from scipy.signal import savgol_filter
from scipy import sparse
import matplotlib.pyplot as plt
import pickle
import sys

# Adding path to py files
sys.path.append('../src')

# Importing the custom quantization module
from CustomQuantization import CustomQuantization as quant
from convertQuantizeModel import convertDenseLayer
from QuantLinearLayer import QuantLinear

### **LOADING WEIGHTS**


In [6]:
# Weights file names
bert_weight_name = 'bert-base-uncased'

# Loading bert model
with open(f'../saved/weights_only/{bert_weight_name}_weights.pkl','rb') as file:
    bert_weights_dict = pickle.load(file)

weight = bert_weights_dict['encoder.layer.9.attention.self.value.weight']

### **QUANTIZATION MODULE**


In [7]:
QuantizationObject = quant()

# Finding suitable weight range 
QuantizationObject.extractRange(weight.copy(), save_plot=False, plot_path='./plots/Sample.png')

# Applying quantization
QuantizationObject.proceedQuantization(weight.copy())

Local minimum:  -0.19814117  Local max:  0.20933148 Points:  407
Ratio of first region range coverage: 0.3308641975308642 Region of selection index:  1
Ratio of second region range coverage: 0.10864197530864196 Region of selection index:  2
(-0.14107497757717197, -0.006919361376352323) (0.03412825253583876, 0.07817935039282431)


### **DUMMY MODEL CONVERSION TESTING**


In [8]:
class NNModel(nn.Module):
    def __init__(self):
        super(NNModel, self).__init__()
        self.fc1 = nn.Linear(1000, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x    
    
# Dummy Model
SimpleNN = NNModel()
SimpleNN

NNModel(
  (fc1): Linear(in_features=1000, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [9]:
QuantNN = convertDenseLayer(SimpleNN, requires_plot=True)
QuantNN

Local minimum:  -0.031622294  Local max:  0.031622723 Points:  63
Ratio of first region range coverage: 0.34426229508196715 Region of selection index:  1
Ratio of second region range coverage: 0.2786885245901639 Region of selection index:  0
(0.008533272064394422, 0.029614944424894117) (-0.03162229433655739, -0.014556178616152871)
Local minimum:  -0.09127197  Local max:  0.09128484 Points:  182
Ratio of first region range coverage: 0.27222222222222214 Region of selection index:  2
Ratio of second region range coverage: 0.23333333333333336 Region of selection index:  0
(0.040128812685117615, 0.08927872252988289) (-0.09127196669578552, -0.049143472543129556)
Local minimum:  -0.10876174  Local max:  0.10891559 Points:  217
Ratio of first region range coverage: 0.18139534883720929 Region of selection index:  5
Ratio of second region range coverage: 0.14418604651162792 Region of selection index:  3
(0.014622183061689817, 0.0537439156237835) (-0.06462440341024356, -0.0335276416301178)


NNModel(
  (fc1): QuantLinear()
  (fc2): QuantLinear()
  (fc3): QuantLinear()
)

In [10]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()
        
        # Define convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        
        # Define max pooling layers
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Define fully connected layers
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, num_classes)
        
        # Define activation function
        self.relu = nn.ReLU()

    def forward(self, x):
        # Convolutional layers with ReLU activation and max pooling
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        
        # Flatten the tensor for fully connected layers
        x = x.view(-1, 128 * 4 * 4)
        
        # Fully connected layers with ReLU activation
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of the SimpleCNN model
CNNmodel = SimpleCNN()
CNNmodel


SimpleCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2048, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=10, bias=True)
  (relu): ReLU()
)

In [11]:
Qunat_model = convertDenseLayer(CNNmodel)
Qunat_model

Local minimum:  -0.022097087  Local max:  0.022097042 Points:  44


Ratio of first region range coverage: 0.38095238095238093 Region of selection index:  0
Ratio of second region range coverage: 0.38095238095238093 Region of selection index:  1
(-0.022097086533904076, -0.006026494401422413) (0.0040176256813786275, 0.02008821781386029)
Local minimum:  -0.04413873  Local max:  0.0441814 Points:  88
Ratio of first region range coverage: 0.37209302325581395 Region of selection index:  4
Ratio of second region range coverage: 0.22093023255813954 Region of selection index:  0
(0.01005771298977462, 0.042174123396927665) (-0.04413872957229614, -0.025069610893049023)


SimpleCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): QuantLinear()
  (fc2): QuantLinear()
  (relu): ReLU()
)