### **IMPORT LIBRARY**


In [1]:
# Import library
import numpy as np
import torch.nn as nn, torch
from scipy.signal import savgol_filter
from scipy import sparse
import matplotlib.pyplot as plt
import pickle
import sys

# Adding path to py files
sys.path.append('../src')

# Importing the custom quantization module
from CustomQuantization import CustomQuantization as quant
from convertQuantizeModel import convertDenseLayer
from QuantLinearLayer import QuantLinear

### **LOADING WEIGHTS**


In [2]:
# Weights file names
bert_weight_name = 'bert-base-uncased'

# Loading bert model
with open(f'../saved/weights_only/{bert_weight_name}_weights.pkl','rb') as file:
    bert_weights_dict = pickle.load(file)

weight = bert_weights_dict['encoder.layer.9.attention.self.value.weight']

### **QUANTIZATION MODULE**


In [3]:
QuantizationObject = quant()

# Finding suitable weight range 
QuantizationObject.extractRange(weight.copy(), save_plot=False, plot_path='./plots/Sample.png')

# Applying quantization
QuantizationObject.proceedQuantization(weight.copy())

Local minimum:  -0.19814117  Local max:  0.20933148 Points:  407
Ratio of first region range coverage: 0.31358024691358033 Region of selection index:  2
Ratio of second region range coverage: 0.19506172839506172 Region of selection index:  3
First Region Range:  (-0.14708194546676093, -0.019934458470461686)
Second Region Range (0.006095735717757039, 0.08518747959734474)


### **DUMMY MODEL CONVERSION TESTING**


In [4]:
class NNModel(nn.Module):
    def __init__(self):
        super(NNModel, self).__init__()
        self.fc1 = nn.Linear(1000, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x    
    
# Dummy Model
SimpleNN = NNModel()
SimpleNN

NNModel(
  (fc1): Linear(in_features=1000, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [5]:
QuantNN = convertDenseLayer(SimpleNN, requires_plot=True)
QuantNN

Folder 'plots/' already exists.
Layer Name: fc1
Local minimum:  -0.03162242  Local max:  0.031621963 Points:  63
Ratio of first region range coverage: 0.4098360655737705 Region of selection index:  0
Ratio of second region range coverage: 0.06557377049180323 Region of selection index:  1
First Region Range:  (-0.031622420996427536, -0.00652544330509882)
Second Region Range (0.017567655278576746, 0.021583171709189337)
Layer Name: fc2
Local minimum:  -0.091281354  Local max:  0.091267414 Points:  182
Ratio of first region range coverage: 0.3333333333333333 Region of selection index:  2
Ratio of second region range coverage: 0.18888888888888888 Region of selection index:  0
First Region Range:  (-0.09128135442733765, -0.057178837175552666)
Second Region Range (0.029080471167197586, 0.0892613839644652)
Layer Name: fc3
Local minimum:  -0.1090786  Local max:  0.108907275 Points:  217
Ratio of first region range coverage: 0.1953488372093023 Region of selection index:  10
Ratio of second regio

NNModel(
  (fc1): QuantLinear()
  (fc2): QuantLinear()
  (fc3): QuantLinear()
)

In [6]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()
        
        # Define convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        
        # Define max pooling layers
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Define fully connected layers
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, num_classes)
        
        # Define activation function
        self.relu = nn.ReLU()

    def forward(self, x):
        # Convolutional layers with ReLU activation and max pooling
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        
        # Flatten the tensor for fully connected layers
        x = x.view(-1, 128 * 4 * 4)
        
        # Fully connected layers with ReLU activation
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of the SimpleCNN model
CNNmodel = SimpleCNN()
CNNmodel


SimpleCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2048, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=10, bias=True)
  (relu): ReLU()
)

In [7]:
Qunat_model = convertDenseLayer(CNNmodel)
Qunat_model

Layer Name: fc1
Local minimum:  -0.022097081  Local max:  0.022096813 Points:  44
Ratio of first region range coverage: 0.4523809523809524 Region of selection index:  1
Ratio of second region range coverage: 0.3333333333333333 Region of selection index:  0
First Region Range:  (-0.022097080945968628, -0.00803538750518452)
Second Region Range (0.0010042725638909795, 0.020087999376383697)
Layer Name: fc2
Local minimum:  -0.04419101  Local max:  0.04417848 Points:  88
Ratio of first region range coverage: 0.2558139534883721 Region of selection index:  3
Ratio of second region range coverage: 0.22093023255813954 Region of selection index:  0
First Region Range:  (-0.04419101029634476, -0.025111234323544934)
Second Region Range (0.02007770876992833, 0.0421700809489597)


SimpleCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): QuantLinear()
  (fc2): QuantLinear()
  (relu): ReLU()
)