In [1]:

!pip install --upgrade pip
!pip install torch torchvision torchaudio --quiet
!pip install transformers accelerate datasets peft bitsandbytes sentencepiece --quiet
!pip install huggingface_hub --quiet


Collecting pip
  Downloading pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.2-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.2


In [2]:
import torch

In [3]:

from huggingface_hub import login
login()  # paste your HF token (must have access to LLaMA-2)


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
# ===============================================================
# 📂 Step 3: Paths and config (Colab environment)
# ===============================================================
from pathlib import Path

BASE_DIR = Path("/content")
DATA_JSONL = BASE_DIR / "cbc_finetune_dataset.jsonl"
MODELS_DIR = BASE_DIR / "models"
MODELS_DIR.mkdir(parents=True, exist_ok=True)

LLAMA_MODEL_NAME = "meta-llama/Llama-2-7b-hf"   # requires HF approval
OUTPUT_NAME = "cbc_llama2_qlora_finetuned"

print("✅ Data path:", DATA_JSONL)
print("✅ Model output path:", MODELS_DIR)


✅ Data path: /content/cbc_finetune_dataset.jsonl
✅ Model output path: /content/models


In [6]:
# ===============================================================
# 🧠 Step 4: Load dataset
# ===============================================================
from datasets import load_dataset
assert DATA_JSONL.exists(), f"Dataset not found at {DATA_JSONL}"

raw_ds = load_dataset("json", data_files=str(DATA_JSONL), split="train")
raw_ds = raw_ds.shuffle(seed=42)
print("✅ Dataset loaded. Example:", raw_ds[0])

SEP = "\n--\n"
def to_text(example):
    return (example.get("prompt", "").strip() +
            SEP +
            example.get("completion", "").strip())

text_ds = raw_ds.map(lambda x: {"text": to_text(x)}, remove_columns=raw_ds.column_names)
text_ds = text_ds.train_test_split(test_size=0.05, seed=42)
print(text_ds)


Generating train split: 0 examples [00:00, ? examples/s]

✅ Dataset loaded. Example: {'prompt': "Generate a CBC-aligned question for Grade 4 Mathematics based on code M4.1.1 at the 'Remember' Bloom level.", 'completion': "Question: What is 5 + 7? Options: ['10',  '11',  '12',  '13']. Answer: 12"}


Map:   0%|          | 0/13 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 12
    })
    test: Dataset({
        features: ['text'],
        num_rows: 1
    })
})


In [7]:
# ===============================================================
# ⚙️ Step 5: Load tokenizer
# ===============================================================
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_NAME, use_fast=False)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
print("✅ Tokenizer ready")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

✅ Tokenizer ready


In [None]:
!pip uninstall -y bitsandbytes
!pip uninstall -y transformers accelerate peft

Found existing installation: bitsandbytes 0.48.1
Uninstalling bitsandbytes-0.48.1:
  Successfully uninstalled bitsandbytes-0.48.1
Found existing installation: transformers 4.57.1
Uninstalling transformers-4.57.1:
  Successfully uninstalled transformers-4.57.1
Found existing installation: accelerate 1.11.0
Uninstalling accelerate-1.11.0:
  Successfully uninstalled accelerate-1.11.0
Found existing installation: peft 0.17.1
Uninstalling peft-0.17.1:
  Successfully uninstalled peft-0.17.1


In [None]:
!pip install -U bitsandbytes==0.43.3
!pip install -U transformers==4.44.2
!pip install -U accelerate==0.33.0
!pip install -U peft==0.11.1

Collecting bitsandbytes==0.43.3
  Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl (137.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 MB[0m [31m62.7 MB/s[0m  [33m0:00:02[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.43.3
Collecting transformers==4.44.2
  Downloading transformers-4.44.2-py3-none-any.whl.metadata (43 kB)
Collecting tokenizers<0.20,>=0.19 (from transformers==4.44.2)
  Downloading tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.44.2-py3-none-any.whl (9.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m152.7 MB/s[0m  [33m0:00:00[0m
[?25hDownloading tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Collecting peft==0.11.1
  Downloading peft-0.11.1-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.11.1-py3-none-any.whl (251 kB)
Installing collected packages: peft
Successfully installed peft-0.11.1


In [8]:
# ===============================================================
# 🧮 Step 6: Load model with 4-bit quantization (QLoRA-ready)
# ===============================================================
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",  # NormalFloat4
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    LLAMA_MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
)

# resize embeddings if tokenizer added new tokens
model.resize_token_embeddings(len(tokenizer))
print("✅ 4-bit model loaded successfully!")


config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

✅ 4-bit model loaded successfully!


In [9]:
# ===============================================================
# 🧪 Step 7: Prepare PEFT (QLoRA) adapter
# ===============================================================
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  # efficient QLoRA config for LLaMA
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 4,194,304 || all params: 6,742,609,920 || trainable%: 0.0622


In [10]:
# ===============================================================
# 🧰 Step 8: Tokenize dataset
# ===============================================================
def tokenize_fn(example):
    return tokenizer(
        example["text"],
        truncation=True,
        max_length=512,
        padding="max_length"
    )

tokenized_ds = text_ds.map(tokenize_fn, batched=True, remove_columns=["text"])
print("✅ Tokenized dataset ready")


Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

✅ Tokenized dataset ready


In [11]:
# ===============================================================
# 🚀 Step 9: Training configuration
# ===============================================================
from transformers import DataCollatorForLanguageModeling, Trainer, TrainingArguments

data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir=str(MODELS_DIR / OUTPUT_NAME),
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=10,
    learning_rate=2e-4,
    fp16=True,
    num_train_epochs=1,  # start with 1 for testing
    logging_steps=5,
    save_steps=50,
    save_total_limit=2,
    optim="paged_adamw_32bit",
    report_to="none",
)

trainer = Trainer(
    model=model,
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    args=training_args,
)


  trainer = Trainer(


In [12]:
# ===============================================================
# 🏋️ Step 10: Train model
# ===============================================================
trainer.train()


The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 2}.


Step,Training Loss


TrainOutput(global_step=2, training_loss=3.029029369354248, metrics={'train_runtime': 13.5038, 'train_samples_per_second': 0.889, 'train_steps_per_second': 0.148, 'total_flos': 243727733882880.0, 'train_loss': 3.029029369354248, 'epoch': 1.0})

In [13]:

model.save_pretrained(MODELS_DIR / OUTPUT_NAME)
tokenizer.save_pretrained(MODELS_DIR / OUTPUT_NAME)
print("✅ Model saved at:", MODELS_DIR / OUTPUT_NAME)


✅ Model saved at: /content/models/cbc_llama2_qlora_finetuned


In [14]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

# Paths
BASE_DIR = Path("/content")
MODEL_BASE = "meta-llama/Llama-2-7b-hf"
MODEL_FINETUNED = BASE_DIR / "models" / "cbc_llama2_qlora_finetuned"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})

# Load 4-bit quantized base model
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_BASE,
    quantization_config=bnb_config,
    device_map="auto",
)

# Load LoRA adapter weights
model = PeftModel.from_pretrained(base_model, MODEL_FINETUNED)

print("✅ Fine-tuned model loaded successfully!")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

✅ Fine-tuned model loaded successfully!


In [15]:
def generate_cbc_question(prompt, max_new_tokens=200):
    """
    Generate a CBC-aligned question using your fine-tuned model.
    Example prompt:
    'Generate a CBC-aligned question for Grade 5 Science based on code S5.2.3 at the "Understand" Bloom level.'
    """
    formatted_prompt = prompt.strip()  # keep consistent with your dataset format
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=0.7,   # controls creativity
            top_p=0.9,         # controls randomness
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Trim to remove repeating the prompt
    if generated_text.startswith(formatted_prompt):
        generated_text = generated_text[len(formatted_prompt):].strip()

    return generated_text


In [17]:
prompt = """
Generate a CBC-aligned question for Grade 5 Mathematics based on code M5.3.2 at the 'Apply' Bloom level.
Follow this format exactly:
Question: ...
Options: ['...', '...', '...', '...']
Answer: ...
"""
print(generate_cbc_question(prompt))


Hint: ...

Question: The ratio of the lengths of two sides of a rectangle is 3:4. What is the ratio of the areas of the two rectangles?

Options: ['3:4', '1:1', '1:2', '4:3']
Answer: '1:2'
Hint: ...

Question: What is the ratio of the areas of the two rectangles?

Options: ['3:4', '1:1', '1:2', '4:3']
Answer: '1:2'
Hint: ...

Question: What is the ratio of the areas of the two rectangles?

Options: ['3:4', '1:1', '1:2', '4:3']
Answer: '1:2'
Hint: ...

Question: What is the ratio of the areas of the two rectangles?

Options


In [None]:
# ===============================================================
# 💾 Step 12: Save model to Google Drive
# ===============================================================
from google.colab import drive
drive.mount('/content/drive')

# Path inside your Google Drive where the model will be saved
drive_path = "/content/drive/MyDrive/cbc_llama2_qlora_finetuned"

# Create the directory if it doesn’t exist
import os
os.makedirs(drive_path, exist_ok=True)

# Save model + tokenizer
model.save_pretrained(drive_path)
tokenizer.save_pretrained(drive_path)

print("✅ Model successfully saved to Google Drive at:", drive_path)


Mounted at /content/drive
✅ Model successfully saved to Google Drive at: /content/drive/MyDrive/cbc_llama2_qlora_finetuned


In [18]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
!mkdir -p /content/drive/MyDrive/cbc_llama2_model


In [20]:
!cp -r /content/models/cbc_llama2_qlora_finetuned /content/drive/MyDrive/cbc_llama2_model/
print("✅ Model safely saved to Google Drive at: /content/drive/MyDrive/cbc_llama2_model/cbc_llama2_qlora_finetuned")


✅ Model safely saved to Google Drive at: /content/drive/MyDrive/cbc_llama2_model/cbc_llama2_qlora_finetuned


In [21]:
!ls /content/drive/MyDrive/cbc_llama2_model/cbc_llama2_qlora_finetuned


adapter_config.json	   README.md		    tokenizer.model
adapter_model.safetensors  special_tokens_map.json
checkpoint-2		   tokenizer_config.json
