In [2]:
!pip install --upgrade transformers datasets accelerate peft bitsandbytes trl

Collecting transformers
  Downloading transformers-4.56.1-py3-none-any.whl.metadata (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Collecting trl
  Downloading trl-0.22.2-py3-none-any.whl.metadata (11 kB)
Downloading transformers-4.56.1-py3-none-any.whl (11.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m122.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trl-0.22.2-py3-none-any.whl (544 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

In [3]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer
)
from peft import get_peft_model, LoraConfig, TaskType
from trl import SFTTrainer

In [4]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

In [5]:
model_name = "google/gemma-2b"
dataset_name = "imdb"
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,                                       # The rank of the LoRA matrices. Lower is smaller, faster, but less expressive.
    lora_alpha=16,                             # The scaling factor for the LoRA matrices. A common practice is 2*r.
    lora_dropout=0.1,                          # Dropout for the LoRA layers
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    bias="none"                                # We are not training any bias parameters
)

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    print("Tokenizer pad token set to EOS token.")

tokenizer_config.json:   0%|          | 0.00/33.6k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

In [7]:
# Load the IMDB dataset
dataset = load_dataset(dataset_name)

def formatting_func(example):
    sentiment = 'positive' if example['label'] == 1 else 'negative'
    prompt_template = f"Review: {{text}}\nSentiment: {sentiment}{tokenizer.eos_token}"

    max_review_tokens = 512 - len(tokenizer("\nSentiment: " + sentiment + tokenizer.eos_token)['input_ids']) - 10
    inputs = tokenizer(example['text'], truncation=True, max_length=max_review_tokens)
    truncated_text = tokenizer.decode(inputs['input_ids'], skip_special_tokens=True)

    return prompt_template.format(text=truncated_text)

small_train_dataset = dataset["train"].shuffle(seed=42).select(range(500))
small_eval_dataset = dataset["test"].shuffle(seed=42).select(range(100))

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
)
base_model.config.pad_token_id = tokenizer.pad_token_id

print("\n--- Model has been loaded. Now checking GPU memory usage. ---")
!nvidia-smi

lora_model = get_peft_model(base_model, lora_config)

lora_model.print_trainable_parameters()

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

training_args = TrainingArguments(
    output_dir="./lora_imdb_classifier_results",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=8,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    report_to="none",
)

trainer = SFTTrainer(
    model=lora_model,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    peft_config=lora_config,
    formatting_func=formatting_func,
    args=training_args,
)

trainer.train()

lora_model.to("cuda")
lora_model.eval()

# Test with a positive review
positive_review = "This movie was absolutely incredible! The performances were brilliant and the story was captivating."
prompt = f"Review: {positive_review}\nSentiment:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

with torch.no_grad():
    output = lora_model.generate(**inputs, max_new_tokens=10, do_sample=False)

generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
predicted_sentiment = generated_text.split("Sentiment:")[1].strip().split()[0]

print(f"\nReview: '{positive_review}'")
print(f"Predicted Sentiment: {predicted_sentiment}")

# Test with a negative review
negative_review = "I was really let down by this film. The plot was predictable and the acting was wooden."
prompt = f"Review: {negative_review}\nSentiment:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

with torch.no_grad():
    output = lora_model.generate(**inputs, max_new_tokens=10, do_sample=False)

generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
predicted_sentiment = generated_text.split("Sentiment:")[1].strip().split()[0]

print(f"\nReview: '{negative_review}'")
print(f"Predicted Sentiment: {predicted_sentiment}")

README.md: 0.00B [00:00, ?B/s]

plain_text/train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

plain_text/test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

plain_text/unsupervised-00000-of-00001.p(…):   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]


--- Model has been loaded. Now checking GPU memory usage. ---
Sat Sep  6 21:29:52 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   52C    P0             28W /   70W |    2530MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+---------



Applying formatting function to train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Applying formatting function to eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
1,2.8815,2.858362,2.834415,132373.0,0.43146
2,2.8324,2.848953,2.799427,264746.0,0.431342
3,2.7537,2.84791,2.801224,397119.0,0.431081



Review: 'This movie was absolutely incredible! The performances were brilliant and the story was captivating.'
Predicted Sentiment: positive

Review: 'I was really let down by this film. The plot was predictable and the acting was wooden.'
Predicted Sentiment: negative
