In [1]:
# Import library
import numpy as np
import torch.nn as nn, torch
import matplotlib.pyplot as plt
import sys
from tqdm import tqdm
from sklearn.metrics import classification_report

# Adding path to py files
sys.path.append('..')

# **QUANTIZATION ONLY**


In [None]:
# Importing the custom quantization module
from CustomQuantization_tensor import CustomQuantization as quant
from convertQuantizeModel_tensor import convertDenseLayer
from utils_tensor import *

## **T5 SMALL PRETRAINED MODEL**


### **DOWNLOADING AND SAVING T5 SMALL MODEL**


In [3]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Define the model name
t5_model_name = 'google-t5/t5-small'

# T5 small Model
t5_tokenizer = T5Tokenizer.from_pretrained(t5_model_name)
t5_model = T5ForConditionalGeneration.from_pretrained(t5_model_name)

# Saving the orginal pretrained model
torch.save(t5_model, f"../../saved/{t5_model_name}.pt")
del t5_model

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


### **LOADING A COPY OF PRETRAINED MODEL AND QUANTIZING**


In [4]:
t5_small_model_copy = torch.load(f"../../saved/{t5_model_name}.pt")
Quant_t5_small_model = convertDenseLayer(t5_small_model_copy)

Layer Name: q
Local minimum:  tensor(-0.3770)  Local max:  tensor(0.4395) Points:  500
Ratio of first region range coverage: tensor(0.3373) Region of selection index:  2
Ratio of second region range coverage: tensor(0.2149) Region of selection index:  3
First Region Range:  (tensor(-0.2951), tensor(-0.0203))
Second Region Range (tensor(0.0124), tensor(0.1875))
Layer Name: k
Local minimum:  tensor(-3.5000)  Local max:  tensor(4.) Points:  500
Ratio of first region range coverage: tensor(0.2771) Region of selection index:  0
Ratio of second region range coverage: tensor(0.1727) Region of selection index:  1
First Region Range:  (tensor(-2.4329), tensor(-0.3587))
Second Region Range (tensor(0.0170), tensor(1.3096))
Layer Name: v
Local minimum:  tensor(-2.8750)  Local max:  tensor(2.3281) Points:  500
Ratio of first region range coverage: tensor(0.2771) Region of selection index:  0
Ratio of second region range coverage: tensor(0.1968) Region of selection index:  1
First Region Range:  (te

: 

# **OUTLIER TRAINING**


In [2]:
from outlier_training.convertQuantizeModel_OutlierTraining import convertDenseLayer

from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torch.nn import CrossEntropyLoss

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
path = 'ClassificationReport.txt'

In [3]:
class TextDataset(Dataset):
    def __init__(self, data):
        texts, labels = data
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        return text, label

In [5]:
class SimpleClassifier(nn.Module):
    def __init__(self, input_size, output_classes):
        super(SimpleClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(64, output_classes)
        self.sig = nn.Sigmoid()
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        out = self.sig(out)

        return out

## **BERT BASE MODEL TESTING**


### **DATASET LOADING**


In [5]:
dataset = load_dataset("glue", "sst2")
train_sentences, train_labels = dataset["train"]["sentence"], dataset["train"]["label"]
test_sentences, test_labels = dataset["validation"]["sentence"], dataset["validation"]["label"]
unique_class_count = len(set(train_labels))

train_dataset = TextDataset(data=(train_sentences, train_labels))
test_dataset = TextDataset(data=(test_sentences, test_labels))

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32)

### **SAVING MODEL**


In [3]:
from transformers import BertTokenizer, BertModel

# Load pre-trained BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased').to(device)

# Saving the model
# torch.save(model, f"../../saved/bert-base-uncased/bert-base-uncased.pt")

# del model

In [4]:
sentences = ["This is good","this is too bad","this is fine","How can That go wrong"]

Tokenised_Sentence = tokenizer(sentences, max_length=512, padding='max_length', return_tensors='pt')

Tokenised_Sentence.to(device)
Output = model(**Tokenised_Sentence)
Embeddings_Output = torch.sum(Output.last_hidden_state, dim= 1)

del model

In [21]:
Embeddings_Output

torch.Size([4, 768])

In [5]:
model = torch.load('../../saved/bert-base-uncased/bert-base-uncased-quant.pt')
sentences = ["This is good","this is too bad","this is fine","How can That go wrong"]

Tokenised_Sentence = tokenizer(sentences, max_length=512, padding='max_length', return_tensors='pt')

Tokenised_Sentence
Output = model(**Tokenised_Sentence)
Embeddings_Output_quant = torch.sum(Output.last_hidden_state, dim= 1)

del model

In [6]:
print(Embeddings_Output[0,:])
print(Embeddings_Output_quant[0,:])


tensor([-8.2479e+01, -1.1345e+02,  1.1523e+02,  9.6232e+00,  4.6548e+01,
         6.5042e+01,  5.3600e+01,  1.6288e+02,  9.1292e+01, -3.0552e+02,
        -4.1840e+01,  9.1445e+01,  1.3136e+02,  1.8263e+02,  7.2633e+01,
         1.0237e+02,  1.5599e+02,  2.2578e+02,  2.4568e+01,  7.8412e+01,
        -7.1214e+01, -1.4519e+02,  1.3365e+01, -8.7482e+01,  2.1157e+02,
         9.9785e+01, -1.5848e+02, -2.3635e+01, -4.6831e+01,  2.2749e+01,
         3.8513e+01, -1.5347e+02,  1.1719e+02,  4.2451e+01, -8.9812e+00,
        -6.2050e+01, -1.8140e+02, -7.9907e+01, -2.2877e+02, -5.0162e+01,
         5.4086e+01, -1.1202e+02,  1.0069e+02, -1.9310e+02, -1.3141e+02,
        -2.1523e+01, -1.9445e+02,  6.3946e+01, -2.1452e+02, -1.8621e+02,
         1.0916e+01,  9.1653e+00, -6.5905e+01,  3.4745e+01, -3.9024e+01,
        -1.9638e+01, -5.2824e+01, -2.3210e+01, -2.0746e+02, -1.6731e+02,
         1.1897e+02, -1.3113e+02,  1.8684e+02, -4.5105e+01, -2.4873e+01,
        -3.1907e+01,  2.7283e+02, -8.4907e+01, -6.9

### **FINE TUNING MODEL**


In [7]:
Token_max_length = 512

# Pretrained model weights
Pretrained_model = torch.load(f"../../saved/bert-base-uncased/bert-base-uncased.pt").to(device)

# Classifier Model
Classifier_model = SimpleClassifier(input_size=768, output_classes=unique_class_count).to(device)

optimizer = Adam(Classifier_model.parameters(), lr=5e-3)
loss_fn = CrossEntropyLoss()

EpochCycle = 5

#### **TRAINING THE CLASSIFIER WITH THE ORIGINAL MODEL**


In [None]:
for epoch in range(EpochCycle): 
    
    running_loss = 0.0
    correct = 0
    total = 0

    for text, labels in train_dataloader:
        optimizer.zero_grad()

        Tokenised_Sentence = tokenizer(text, max_length=512, padding='max_length', return_tensors='pt')

        Tokenised_Sentence.to(device)
        Output = Pretrained_model(**Tokenised_Sentence)
        Embeddings_Output = torch.sum(Output.last_hidden_state, dim= 1)

        Classifier_output = Classifier_model(Embeddings_Output)
        Prediction = torch.argmax(Classifier_output, dim = 1 )
        Loss = loss_fn(Prediction.to(torch.float64).to(device), labels.to(torch.float64).to(device))

        Loss.backward()
        optimizer.step()

        running_loss += Loss.item()
        total += labels.size(0)
        correct += (Prediction == labels).sum().item()

    print('Epoch:', epoch+1)
    print('Training Loss:', running_loss / len(train_dataloader))
    print('Training Accuracy:', correct / total)

#### **INFERENCE OF ORIGINAL MODEL**


In [None]:
y_pred_test = []
y_actual_test = []

with torch.no_grad():

    for text, labels in tqdm(train_dataloader):
        
        Tokenised_Sentence = tokenizer(text, max_length=512, padding='max_length', return_tensors='pt')

        Tokenised_Sentence.to(device)
        Output = Pretrained_model(**Tokenised_Sentence)
        Embeddings_Output = torch.sum(Output.last_hidden_state, dim= 1)

        Classifier_output = Classifier_model(Embeddings_Output)
        Prediction = torch.argmax(Classifier_output, dim = 1 )

        y_pred_test.extend(Prediction.tolist())
        y_actual_test.extend(labels.tolist())


report = classification_report(y_actual_test, y_pred_test)

# Save the classification report to a text file
with open(path, 'w') as f:
    f.write(report)

print("Classification report saved to 'classification_report.txt'")

#### **QUANTISING PRETRAINED MODEL**


In [9]:
QuantOutlierModel = convertDenseLayer(Classifier_model.to('cpu'))
Outliers_optimizer = Adam(QuantOutlierModel.parameters(), lr=5e-3)


#### **TRAINING THE OUTLIERS**


In [10]:
for epoch in range(EpochCycle): 
    
    running_loss = 0.0
    correct = 0
    total = 0

    for text, labels in train_dataloader:
        Outliers_optimizer.zero_grad()

        Tokenised_Sentence = tokenizer(text, max_length=512, padding='max_length', return_tensors='pt')

        Tokenised_Sentence.to(device)
        Output = QuantOutlierModel(**Tokenised_Sentence)
        Embeddings_Output = torch.sum(Output.last_hidden_state, dim= 1)

        Classifier_output = Classifier_model(Embeddings_Output)
        Prediction = torch.argmax(Classifier_output, dim = 1 )
        Loss = loss_fn(Prediction.to(torch.float64).to(device), labels.to(torch.float64).to(device))

        Loss.backward()
        Outliers_optimizer.step()

        running_loss += Loss.item()
        total += labels.size(0)
        correct += (Prediction == labels).sum().item()

    print('Epoch:', epoch+1)
    print('Training Loss:', running_loss / len(train_dataloader))
    print('Training Accuracy:', correct / total)

fc1.bias Parameter containing:
tensor([-0.0184,  0.0301,  0.0318, -0.0023,  0.0095,  0.0294, -0.0123,  0.0087,
        -0.0068, -0.0308,  0.0152,  0.0276,  0.0290, -0.0298, -0.0064,  0.0207,
         0.0358,  0.0079, -0.0260,  0.0202,  0.0170, -0.0168,  0.0208, -0.0097,
        -0.0097, -0.0307, -0.0105, -0.0129,  0.0314, -0.0333, -0.0006, -0.0231,
         0.0106, -0.0028, -0.0287,  0.0246,  0.0290,  0.0107, -0.0234,  0.0271,
         0.0093, -0.0074,  0.0165,  0.0230, -0.0081, -0.0196, -0.0060,  0.0273,
         0.0019, -0.0320,  0.0112,  0.0030,  0.0291,  0.0021, -0.0207, -0.0196,
         0.0215, -0.0155, -0.0175, -0.0025, -0.0048,  0.0341,  0.0038,  0.0083,
        -0.0222,  0.0306,  0.0338, -0.0004, -0.0006,  0.0338,  0.0021, -0.0079,
         0.0160,  0.0148, -0.0337, -0.0118, -0.0175,  0.0138, -0.0206,  0.0233,
        -0.0015, -0.0262, -0.0018, -0.0139, -0.0102,  0.0233,  0.0070,  0.0087,
         0.0009, -0.0143, -0.0325, -0.0066, -0.0136,  0.0192, -0.0071, -0.0076,
        -

#### **INFERENCE OF QUANTISED MODEL**


In [None]:
y_pred_test = []
y_actual_test = []

with torch.no_grad():

    for text, labels in tqdm(train_dataloader):
        
        Tokenised_Sentence = tokenizer(text, max_length=512, padding='max_length', return_tensors='pt')

        Tokenised_Sentence.to(device)
        Output = QuantOutlierModel(**Tokenised_Sentence)
        Embeddings_Output = torch.sum(Output.last_hidden_state, dim= 1)

        Classifier_output = Classifier_model(Embeddings_Output)
        Prediction = torch.argmax(Classifier_output, dim = 1 )

        y_pred_test.extend(Prediction.tolist())
        y_actual_test.extend(labels.tolist())


report = classification_report(y_actual_test, y_pred_test)

# Save the classification report to a text file
with open(path, 'w') as f:
    f.write(report)

print("Classification report saved to 'classification_report.txt'")

In [None]:
models = ["bert-base-uncased-quant.pt","bert-base-uncased-quant"]
for model in models:
    if "pt" in model:
        TestModel = torch.load(model)
        modelName = model.split("/")[-1].split(".")[0]
    else:
        TestModel = BertModel.from_pretrained(model)
        modelName = model
    tokenizer = BertTokenizer.from_pretrained(f'bert-{"base" if "base" in model else "large"}-uncased')
    tokenized_sentences = tokenizer(sentences, pad_to_max_length = True,max_length=512, return_tensors="pt")
    with torch.no_grad():
        outputs = TestModel(**tokenized_sentences)
        embeddings = outputs.last_hidden_state[:, 0, :]
    embeddings = embeddings.numpy()
    print(embeddings)