In [1]:
import torch
import transformers
from transformers import pipeline
from transformers import DistilBertModel, DistilBertForMaskedLM, DistilBertTokenizer
from optimum.quanto import freeze, quantize, qint8
import datasets
from transformers import TrainingArguments
import numpy as np
import evaluate

  from .autonotebook import tqdm as notebook_tqdm
2025-02-26 15:14:29.924687: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740579269.940957   18627 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740579269.945544   18627 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-26 15:14:29.960692: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
model_id = "distilbert-base-uncased"
#model = DistilBertForMaskedLM.from_pretrained("distilbert-base-uncased", torch_dtype=torch.float16, attn_implementation="sdpa")
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

In [3]:
dataset = datasets.load_dataset("imdb")
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})


In [4]:
def preprocess(data):
    tokens = tokenizer(data["text"], truncation=True, padding = 'max_length',  max_length=512)
    tokens["label"] = data["label"]
    return tokens

In [5]:
tokens = dataset.map(preprocess, batched = True)

In [6]:
labels = tokens['train'].features['label'].names
num_labels = len(labels)
label2id, id2label = {}, {}

for idx, lbl in enumerate(labels):
    label2id[lbl] = idx
    id2label[idx] = lbl

In [7]:
small_train_dataset = tokens["train"].shuffle(seed=11).select(range(2000))
small_eval_dataset = tokens["train"].shuffle(seed=11).select(range(2000))

In [8]:
from transformers import DistilBertForSequenceClassification, AutoModelForSequenceClassification, DistilBertConfig, DataCollatorWithPadding

model = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    num_labels = num_labels,
    id2label = id2label,
    label2id = label2id,
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
import evaluate
import numpy as np

accuracy = evaluate.load("accuracy")

def preprocess_logits_for_metrics(logits, labels):
    """
    Preprocess the logits to ensure they are in the correct format for metric computation.
    This function will be called during the evaluation process.
    """
    if isinstance(logits, tuple):  
        logits = logits[0]  # get logit tensors

    pred_ids = torch.argmax(logits, dim=-1)
    
    return pred_ids, labels
    
def compute_metrics(eval_pred):
    
    predictions, labels = eval_pred

    return accuracy.compute(predictions=predictions[0], references=labels)


In [10]:
from transformers import TrainingArguments, Trainer

EPOCHS = 1
BATCH_SIZE = 8
LEARNING_RATE = 0.00005

training_args = TrainingArguments(
    output_dir = './quantized_activations_imdb',
    num_train_epochs = EPOCHS,
    per_device_train_batch_size = BATCH_SIZE,
    per_device_eval_batch_size = BATCH_SIZE,
    learning_rate = LEARNING_RATE,
    logging_dir = './logs',
    load_best_model_at_end= True,
    metric_for_best_model="accuracy",
    eval_strategy="epoch",
    eval_steps = 500,
    save_strategy="epoch",
    save_total_limit=2,
    report_to=['tensorboard'],
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
trainer = Trainer(
    model=model,                         
    args=training_args,                  
    train_dataset=small_train_dataset,         
    eval_dataset=small_eval_dataset.shuffle(seed=72).select(range(600)),
    compute_metrics = compute_metrics,
    preprocess_logits_for_metrics = preprocess_logits_for_metrics,
    tokenizer = tokenizer,
    data_collator = data_collator,
)


  trainer = Trainer(


In [11]:
from optimum.quanto import freeze, quantize, qint8
quantize(model, weights=qint8, activations=qint8)
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.693147,0.515


TrainOutput(global_step=250, training_loss=0.6937662353515625, metrics={'train_runtime': 10804.6829, 'train_samples_per_second': 0.185, 'train_steps_per_second': 0.023, 'total_flos': 264934797312000.0, 'train_loss': 0.6937662353515625, 'epoch': 1.0})

In [12]:
#quantize(model, weights=qint8, activations=None)
freeze(model)
#print("accuracy trained after quant of weights to int8 ", trainer.evaluate())

In [13]:
from safetensors.torch import save_file
import os

save_file(model.state_dict(), 'qmodel_act.safetensors')
os.path.getsize("./qmodel_act.safetensors")

138842952

In [14]:
fp_model = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    num_labels = num_labels,
    id2label = id2label,
    label2id = label2id,
)

save_file(fp_model.state_dict(), 'fp_model_act.safetensors')
os.path.getsize("./fp_model_act.safetensors")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


267832528

quantize(model)
trainer.train()
freeze(model)
trainer.evaluate()