# Quantizing Student Model

Code authored by: Shaw Talebi

[Video](https://youtu.be/FLkUOkeMd5M) <br>
[Blog](https://towardsdatascience.com/compressing-large-language-models-llms-9f406eea5b5e) <br>
[Colab](https://colab.research.google.com/drive/1Tp9FdMNJRMcqWkGqIGiSkQoLXA0HStT8?usp=sharing)

In [None]:
!pip install datasets
!pip install transformers
!pip install bitsandbytes



### imports

In [None]:
from datasets import load_dataset

from transformers import AutoTokenizer, AutoModelForSequenceClassification

import torch
from torch.utils.data import DataLoader

from sklearn.metrics import accuracy_score, precision_recall_fscore_support

from transformers import BitsAndBytesConfig

### load data

In [None]:
data = load_dataset("shawhin/phishing-site-classification")
data

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


DatasetDict({
    train: Dataset({
        features: ['text', 'labels'],
        num_rows: 2100
    })
    validation: Dataset({
        features: ['text', 'labels'],
        num_rows: 450
    })
    test: Dataset({
        features: ['text', 'labels'],
        num_rows: 450
    })
})

### load student model

In [None]:
device = torch.device('cuda')

In [None]:
# Load student model and tokenizer
model_id = "shawhin/bert-phishing-classifier_student"
tokenizer = AutoTokenizer.from_pretrained("shawhin/bert-phishing-classifier_teacher")
model = AutoModelForSequenceClassification.from_pretrained(model_id).to(device)

### tokenize data

In [None]:
# define text preprocessing
def preprocess_function(examples):
    return tokenizer(examples["text"], padding='max_length', truncation=True)

# tokenize all datasetse
tokenized_data = data.map(preprocess_function, batched=True)
tokenized_data.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

### Evaluation

In [None]:
# Function to evaluate model performance
def evaluate_model(model, dataloader, device):
    model.eval()  # Set model to evaluation mode
    all_preds = []
    all_labels = []

    # Disable gradient calculations
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            # Forward pass to get logits
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            # Get predictions
            preds = torch.argmax(logits, dim=1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())

    # Calculate evaluation metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='binary')

    return accuracy, precision, recall, f1

### Evaluate model (pre-quantization)

In [None]:
# create data loader
validation_dataloader = DataLoader(tokenized_data['validation'], batch_size=128)

In [None]:
# Evaluate the student model
base_accuracy, base_precision, base_recall, base_f1 = evaluate_model(model, validation_dataloader, device)
print("Pre-quantization Performance")
print(f"Accuracy: {base_accuracy:.4f}, Precision: {base_precision:.4f}, Recall: {base_recall:.4f}, F1 Score: {base_f1:.4f}")

Pre-quantization Performance
Accuracy: 0.9289, Precision: 0.9707, Recall: 0.8844, F1 Score: 0.9256


### Quantize model

In [None]:
# load model in model as 4-bit
nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype = torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

model_nf4 = AutoModelForSequenceClassification.from_pretrained(model_id, device_map=device, quantization_config=nf4_config)

### Evaluate Model (post-quantization)

In [None]:
# Evaluate the student model
quantized_accuracy, quantized_precision, quantized_recall, quantized_f1 = evaluate_model(model_nf4, validation_dataloader, device)

print("Post-quantization Performance")
print(f"Accuracy: {quantized_accuracy:.4f}, Precision: {quantized_precision:.4f}, Recall: {quantized_recall:.4f}, F1 Score: {quantized_f1:.4f}")

Post-quantization Performance
Accuracy: 0.9356, Precision: 0.9757, Recall: 0.8933, F1 Score: 0.9327


### Push to Hub

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
model_nf4.push_to_hub("shawhin/bert-phishing-classifier_student_4bit")