Install Libraries

In [None]:
# Make sure you've run this and are on a GPU runtime
!pip install transformers datasets bitsandbytes accelerate evaluate

Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.6


Load FP32 (Normal) Model

In [None]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, BitsAndBytesConfig
from datasets import load_dataset

# --- THIS IS THE IMPORTANT CHANGE ---
# We use a model ALREADY fine-tuned on IMDB.
# This way, we get a real accuracy score to compare.
model_name = "textattack/distilbert-base-uncased-imdb"

# 1. Load the original FP32 model (for Thalha's comparison)
# This model will no longer give the "not initialized" warning.
model_fp32 = AutoModelForSequenceClassification.from_pretrained(model_name)

print("--- Original FP32 Model (Fine-tuned on IMDB) ---")
# Notice it will now have 2 labels (positive/negative)
print(model_fp32.config)

# 2. Define the 4-bit quantization configuration (Your Task)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

# 3. Load YOUR INT4 model (Your Task)
model_int4 = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"  # 'auto' puts it on the GPU
)

print("\n--- Quantized INT4 Model ---")
# Look at the layers, you'll see 'Linear4bit'
print(model_int4)

# 4. Load the tokenizer (we only need one)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 5. Load the IMDB dataset subset
imdb = load_dataset("imdb")
# We'll use a small subset for quick evaluation
# Note: The 'textattack' model was trained on the 'train' split,
# so we MUST use the 'test' split for a fair evaluation.
test_dataset = imdb["test"].shuffle(seed=42).select(range(1000))

print(f"\nLoaded {len(test_dataset)} testing examples.")

config.json:   0%|          | 0.00/485 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

--- Original FP32 Model (Fine-tuned on IMDB) ---
DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "dtype": "float32",
  "finetuning_task": "imdb",
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.57.1",
  "vocab_size": 30522
}



model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]


--- Quantized INT4 Model ---
DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear4bit(in_features=768, out_features=768, bias=True)
            (k_lin): Linear4bit(in_features=768, out_features=768, bias=True)
            (v_lin): Linear4bit(in_features=768, out_features=768, bias=True)
            (out_lin): Linear4bit(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
       

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

plain_text/train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

plain_text/test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

plain_text/unsupervised-00000-of-00001.p(…):   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]


Loaded 1000 testing examples.
