# Fine-tune Llama 3.2 1B on Syllabus Data

This notebook fine-tunes the Llama 3.2 1B model on syllabus data using LoRA.

## 1. Install Required Packages

In [8]:
!pip install -q transformers datasets peft trl bitsandbytes accelerate

## 2. Login to Hugging Face

In [9]:
from huggingface_hub import login
login(token="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", add_to_git_credential=False)

## 3. Import Libraries and Setup Configuration

In [10]:
import os
import torch
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    LlamaTokenizerFast,
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
import logging

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Configuration
MODEL_NAME = "meta-llama/Llama-3.2-1B"
FINETUNE_DATA_FILE = "./syllabus_finetune_data_fixed.jsonl"
OUTPUT_DIR = "./llama3.2-1b-syllabus-finetuned"

# LoRA configuration
LORA_R = 16
LORA_ALPHA = 32
LORA_DROPOUT = 0.05
LORA_TARGET_MODULES = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]

# Training arguments
NUM_TRAIN_EPOCHS = 1
PER_DEVICE_TRAIN_BATCH_SIZE = 2
GRADIENT_ACCUMULATION_STEPS = 2
LEARNING_RATE = 2e-4
WEIGHT_DECAY = 0.001
MAX_GRAD_NORM = 0.3
WARMUP_RATIO = 0.03
LR_SCHEDULER_TYPE = "cosine"
MAX_SEQ_LENGTH = 1024

# Quantization config
USE_4BIT_QUANTIZATION = True
BNB_4BIT_COMPUTE_DTYPE = "bfloat16"
BNB_4BIT_QUANT_TYPE = "nf4"

## 4. Upload Training Data

In [11]:
from google.colab import files
uploaded = files.upload()  # Upload syllabus_finetune_data_fixed.jsonl

Saving syllabus_finetune_data_fixed.jsonl to syllabus_finetune_data_fixed (1).jsonl


## 5. Fine-tuning Function

In [12]:
def main():
    logging.info(f"Starting fine-tuning for model: {MODEL_NAME}")

    # 1. Load Tokenizer
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)

    if tokenizer.pad_token is None:
        logging.info("Tokenizer does not have a pad_token. Setting pad_token to eos_token.")
        tokenizer.pad_token = tokenizer.eos_token

    logging.info(f"Tokenizer loaded. Pad token ID: {tokenizer.pad_token_id}")

    # 2. Load Dataset
    logging.info(f"Loading dataset from {FINETUNE_DATA_FILE}")
    dataset = load_dataset("json", data_files=FINETUNE_DATA_FILE, split="train")
    logging.info(f"Dataset loaded. Number of examples: {len(dataset)}")

    # 3. Configure BitsAndBytes for 4-bit quantization (QLoRA)
    bnb_config = None
    if USE_4BIT_QUANTIZATION:
        compute_dtype = getattr(torch, BNB_4BIT_COMPUTE_DTYPE)
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type=BNB_4BIT_QUANT_TYPE,
            bnb_4bit_compute_dtype=compute_dtype,
            bnb_4bit_use_double_quant=False,
        )
        logging.info("Using 4-bit quantization (QLoRA).")

    # 4. Load Pre-trained Model
    logging.info(f"Loading base model: {MODEL_NAME}")
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        quantization_config=bnb_config if USE_4BIT_QUANTIZATION else None,
        device_map="auto",
        trust_remote_code=True,
        torch_dtype=compute_dtype if USE_4BIT_QUANTIZATION and bnb_config else torch.bfloat16
    )

    if USE_4BIT_QUANTIZATION:
        model = prepare_model_for_kbit_training(model)
        logging.info("Model prepared for k-bit training.")

    # 5. Configure LoRA
    peft_config = LoraConfig(
        r=LORA_R,
        lora_alpha=LORA_ALPHA,
        lora_dropout=LORA_DROPOUT,
        target_modules=LORA_TARGET_MODULES,
        bias="none",
        task_type="CAUSAL_LM",
    )
    model = get_peft_model(model, peft_config)
    logging.info("LoRA configured and PEFT model created.")
    model.print_trainable_parameters()

    # 6. Set up Training Arguments
    training_args = TrainingArguments(
        output_dir=OUTPUT_DIR,
        num_train_epochs=NUM_TRAIN_EPOCHS,
        per_device_train_batch_size=PER_DEVICE_TRAIN_BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
        learning_rate=LEARNING_RATE,
        weight_decay=WEIGHT_DECAY,
        optim="paged_adamw_8bit" if USE_4BIT_QUANTIZATION else "adamw_torch",
        fp16=False,
        bf16=True if not USE_4BIT_QUANTIZATION and torch.cuda.is_bf16_supported() else False,
        max_grad_norm=MAX_GRAD_NORM,
        warmup_ratio=WARMUP_RATIO,
        lr_scheduler_type=LR_SCHEDULER_TYPE,
        logging_steps=25,
        save_strategy="epoch",
        report_to="tensorboard",
    )

    # 7. Initialize SFTTrainer
    trainer = SFTTrainer(
        model=model,
        train_dataset=dataset,
        args=SFTConfig(
            per_device_train_batch_size=PER_DEVICE_TRAIN_BATCH_SIZE,
            gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
            learning_rate=LEARNING_RATE,
            weight_decay=WEIGHT_DECAY,
            max_grad_norm=MAX_GRAD_NORM,
            warmup_ratio=WARMUP_RATIO,
            lr_scheduler_type=LR_SCHEDULER_TYPE,
            logging_steps=25,
            save_strategy="epoch",
            report_to="tensorboard",
            max_seq_length=MAX_SEQ_LENGTH,
            packing=False,
    ),
)
    # 8. Start Fine-tuning
    logging.info("Starting training...")
    trainer.train()
    logging.info("Training finished.")

    # 9. Save the Fine-tuned Model Adapter
    logging.info(f"Saving LoRA adapter to {OUTPUT_DIR}")
    trainer.model.save_pretrained(OUTPUT_DIR)
    tokenizer.save_pretrained(OUTPUT_DIR)
    logging.info("Model adapter and tokenizer saved.")

    # 10. Save the full model (merged)
    merged_model = model.merge_and_unload()
    merged_model.save_pretrained(os.path.join(OUTPUT_DIR, "final_merged_checkpoint"))
    tokenizer.save_pretrained(os.path.join(OUTPUT_DIR, "final_merged_checkpoint"))
    logging.info("Full merged model saved.")

## 6. Run Fine-tuning

In [13]:
main()

trainable params: 11,272,192 || all params: 1,247,086,592 || trainable%: 0.9039


Converting train dataset to ChatML:   0%|          | 0/1 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/1 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1893087 > 131072). Running this sequence through the model will result in indexing errors


Truncating train dataset:   0%|          | 0/1 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


## 7. Download the Fine-tuned Model

In [14]:
!zip -r llama3.2-1b-syllabus-finetuned.zip llama3.2-1b-syllabus-finetuned/
files.download('llama3.2-1b-syllabus-finetuned.zip')

  adding: llama3.2-1b-syllabus-finetuned/ (stored 0%)
  adding: llama3.2-1b-syllabus-finetuned/adapter_model.safetensors (deflated 11%)
  adding: llama3.2-1b-syllabus-finetuned/tokenizer_config.json (deflated 96%)
  adding: llama3.2-1b-syllabus-finetuned/adapter_config.json (deflated 56%)
  adding: llama3.2-1b-syllabus-finetuned/tokenizer.json (deflated 85%)
  adding: llama3.2-1b-syllabus-finetuned/README.md (deflated 66%)
  adding: llama3.2-1b-syllabus-finetuned/final_merged_checkpoint/ (stored 0%)
  adding: llama3.2-1b-syllabus-finetuned/final_merged_checkpoint/model.safetensors (deflated 36%)
  adding: llama3.2-1b-syllabus-finetuned/final_merged_checkpoint/generation_config.json (deflated 32%)
  adding: llama3.2-1b-syllabus-finetuned/final_merged_checkpoint/tokenizer_config.json (deflated 96%)
  adding: llama3.2-1b-syllabus-finetuned/final_merged_checkpoint/config.json (deflated 57%)
  adding: llama3.2-1b-syllabus-finetuned/final_merged_checkpoint/tokenizer.json (deflated 85%)
  add

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>