In [2]:
import torch
import transformers
from transformers import pipeline
from transformers import DistilBertModel, DistilBertForMaskedLM, DistilBertTokenizer
from optimum.quanto import freeze, quantize, qint8
import datasets
from transformers import TrainingArguments
import numpy as np
import evaluate

  from .autonotebook import tqdm as notebook_tqdm
2025-02-24 22:48:32.777178: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740433712.831926    3929 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740433712.846548    3929 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-24 22:48:32.970126: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
model_id = "distilbert-base-uncased"
#model = DistilBertForMaskedLM.from_pretrained("distilbert-base-uncased", torch_dtype=torch.float16, attn_implementation="sdpa")
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

In [4]:
dataset = datasets.load_dataset("imdb")
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})


In [5]:
def preprocess(data):
    tokens = tokenizer(data["text"], truncation=True, padding = 'max_length',  max_length=512)
    tokens["label"] = data["label"]
    return tokens

In [6]:
tokens = dataset.map(preprocess, batched = True)

In [7]:
labels = tokens['train'].features['label'].names
num_labels = len(labels)
label2id, id2label = {}, {}

for idx, lbl in enumerate(labels):
    label2id[lbl] = idx
    id2label[idx] = lbl

In [8]:
small_train_dataset = tokens["train"].shuffle(seed=11).select(range(2000))
small_eval_dataset = tokens["train"].shuffle(seed=11).select(range(2000))

In [9]:
from transformers import DistilBertForSequenceClassification, AutoModelForSequenceClassification, DistilBertConfig, DataCollatorWithPadding

model = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    num_labels = num_labels,
    id2label = id2label,
    label2id = label2id,
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
import evaluate
import numpy as np

accuracy = evaluate.load("accuracy")

def preprocess_logits_for_metrics(logits, labels):
    """
    Preprocess the logits to ensure they are in the correct format for metric computation.
    This function will be called during the evaluation process.
    """
    if isinstance(logits, tuple):  
        logits = logits[0]  # get logit tensors

    pred_ids = torch.argmax(logits, dim=-1)
    
    return pred_ids, labels
    
def compute_metrics(eval_pred):
    
    predictions, labels = eval_pred

    return accuracy.compute(predictions=predictions[0], references=labels)


In [11]:
from transformers import TrainingArguments, Trainer

EPOCHS = 1
BATCH_SIZE = 16
LEARNING_RATE = 0.00005

training_args = TrainingArguments(
    output_dir = './imdb_tune_distilbert_quant',
    num_train_epochs = EPOCHS,
    per_device_train_batch_size = BATCH_SIZE,
    per_device_eval_batch_size = BATCH_SIZE,
    learning_rate = LEARNING_RATE,
    logging_dir = './logs',
    load_best_model_at_end= True,
    metric_for_best_model="accuracy",
    eval_strategy="epoch",
    eval_steps = 500,
    save_strategy="epoch",
    save_total_limit=2,
    report_to=['tensorboard'],
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
trainer = Trainer(
    model=model,                         
    args=training_args,                  
    train_dataset=small_train_dataset,         
    eval_dataset=small_eval_dataset,
    compute_metrics = compute_metrics,
    preprocess_logits_for_metrics = preprocess_logits_for_metrics,
    tokenizer = tokenizer,
    data_collator = data_collator,
)


  trainer = Trainer(


In [12]:
#evaluate PTQ

In [13]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.182091,0.9405


TrainOutput(global_step=125, training_loss=0.3952823181152344, metrics={'train_runtime': 1909.6057, 'train_samples_per_second': 1.047, 'train_steps_per_second': 0.065, 'total_flos': 264934797312000.0, 'train_loss': 0.3952823181152344, 'epoch': 1.0})

In [14]:
from optimum.quanto import freeze, quantize, qint8
quantize(model, weights=qint8, activations=None)
freeze(model)

In [15]:
print("acc after freeze:", trainer.evaluate())

acc after freeze: {'eval_loss': 0.18257860839366913, 'eval_accuracy': 0.9405, 'eval_runtime': 422.0492, 'eval_samples_per_second': 4.739, 'eval_steps_per_second': 0.296, 'epoch': 1.0}


In [18]:
training_args2 = TrainingArguments(
    output_dir = './imdb_tune_distilbert_quant2',
    num_train_epochs = EPOCHS,
    per_device_train_batch_size = BATCH_SIZE,
    per_device_eval_batch_size = BATCH_SIZE,
    learning_rate = LEARNING_RATE,
    logging_dir = './logs',
    load_best_model_at_end= True,
    metric_for_best_model="accuracy",
    eval_strategy="epoch",
    eval_steps = 500,
    save_strategy="epoch",
    save_total_limit=2,
    report_to=['tensorboard'],
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
trainer2 = Trainer(
    model=model,                         
    args=training_args2,                  
    train_dataset=small_train_dataset,         
    eval_dataset=small_eval_dataset,
    compute_metrics = compute_metrics,
    preprocess_logits_for_metrics = preprocess_logits_for_metrics,
    tokenizer = tokenizer,
    data_collator = data_collator,
)

print("acc after freeze:", trainer2.evaluate())


  trainer2 = Trainer(


acc after freeze: {'eval_loss': 0.18257860839366913, 'eval_model_preparation_time': 0.0147, 'eval_accuracy': 0.9405, 'eval_runtime': 445.6493, 'eval_samples_per_second': 4.488, 'eval_steps_per_second': 0.28}


In [19]:
print(model)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): QLinear(in_features=768, out_features=768, bias=True)
            (k_lin): QLinear(in_features=768, out_features=768, bias=True)
            (v_lin): QLinear(in_features=768, out_features=768, bias=True)
            (out_lin): QLinear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=Fal

In [16]:
from trl import SFTConfig, SFTTrainer
def qat():
    training_args = SFTConfig(
    output_dir = './imdb_tune_distilbert_quant',
    num_train_epochs = EPOCHS,
    per_device_train_batch_size = BATCH_SIZE,
    per_device_eval_batch_size = BATCH_SIZE,
    learning_rate = LEARNING_RATE,
    logging_dir = './logs',
    load_best_model_at_end= True,
    metric_for_best_model="accuracy",
    eval_strategy="epoch",
    eval_steps = 500,
    save_strategy="epoch",
    save_total_limit=2,
    report_to=['tensorboard'],
)

quantize(model)
trainer.train()
freeze(model)
trainer.evaluate()