In [None]:
!pip install datasets peft trl bitsandbytes -q

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)
from trl import SFTTrainer
from huggingface_hub import login, HfApi

In [None]:
# Set your Hugging Face token
HF_TOKEN = ""
login(token=HF_TOKEN)

# Configuration parameters
MODEL_NAME = "google/gemma-2-2b-it"
OUTPUT_DIR = "./results"
LORA_OUTPUT_DIR = f"{OUTPUT_DIR}/lora-adapters"
MERGED_OUTPUT_DIR = f"{OUTPUT_DIR}/merged-model"
HF_LORA_REPO = "CharanSaiVaddi/gemma-2-2b-it-gsm8k-lora"
HF_MERGED_REPO = "CharanSaiVaddi/gemma-2-2b-it-gsm8k-merged"

# Training parameters
MICRO_BATCH_SIZE = 1
BATCH_SIZE = 2
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
LEARNING_RATE = 2e-4
TRAIN_STEPS = 1000
SAVE_STEPS = 200
EVAL_STEPS = 200
MAX_SEQ_LENGTH = 2048

In [None]:
# Load dataset
dataset = load_dataset("gsm8k", "main")
train_dataset = dataset["train"]

In [None]:
# Format the GSM8K dataset
def format_instruction(example):
    """Format GSM8K examples in instruction format."""
    question = example["question"]
    answer = example["answer"]
    # Extract the final numeric answer from the step-by-step solution
    final_answer = answer.split("####")[1].strip() if "####" in answer else answer.strip()
    
    # Format it for Gemma-2 instruction format
    formatted_prompt = f"<start_of_turn>user\nSolve this math problem step by step: {question}<end_of_turn>\n<start_of_turn>model\n{answer}<end_of_turn>"
    
    return {"text": formatted_prompt}

processed_dataset = train_dataset.map(format_instruction)

In [None]:
import bitsandbytes as bnb
print(bnb.__version__) # Make sure version is updated

In [None]:
# !pip install -U bitsandbytes --force-reinstall # Ensure bitsandbytes is installed with the latest version
import bitsandbytes as bnb # Import bitsandbytes

# Configure quantization settings
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    token=HF_TOKEN,
)

In [None]:
# Set tokenizer padding token if needed
if not tokenizer.pad_token:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

# Configure LoRA
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "q_proj", 
        "k_proj", 
        "v_proj", 
        "o_proj",
        "gate_proj",
        "up_proj", 
        "down_proj"
    ],
)

# Prepare model for training
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

In [None]:
# Define training arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=1,
    max_steps=TRAIN_STEPS,
    per_device_train_batch_size=MICRO_BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    learning_rate=LEARNING_RATE,
    bf16=True,
    save_strategy="steps",
    save_steps=SAVE_STEPS,
    # eval_strategy="steps", 
    # eval_steps=EVAL_STEPS,
    logging_steps=50,
    remove_unused_columns=False,
    push_to_hub=False,
    report_to="tensorboard",
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    weight_decay=0.01,
)

# Set up the trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=processed_dataset,
    peft_config=peft_config,
)

# Train the model
trainer.train()

In [None]:
# Save the LoRA adapter model locally
trainer.model.save_pretrained(LORA_OUTPUT_DIR)
tokenizer.save_pretrained(LORA_OUTPUT_DIR)

# Push LoRA adapter to HF Hub
model.push_to_hub(HF_LORA_REPO)
tokenizer.push_to_hub(HF_LORA_REPO)

# Create and save the merged model
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, 
    torch_dtype=torch.float16,
    device_map="auto",
    token=HF_TOKEN
)
adapter_model = PeftModel.from_pretrained(base_model, LORA_OUTPUT_DIR)
merged_model = adapter_model.merge_and_unload()

In [None]:
# Save the merged model locally
merged_model.save_pretrained(MERGED_OUTPUT_DIR)
tokenizer.save_pretrained(MERGED_OUTPUT_DIR)

# Push the merged model to HF Hub
merged_model.push_to_hub(HF_MERGED_REPO)
tokenizer.push_to_hub(HF_MERGED_REPO)

print(f"Training complete! Models saved to:")
print(f"LoRA adapters: {HF_LORA_REPO}")
print(f"Merged model: {HF_MERGED_REPO}")

In [None]:
# Example inference code to test the model
def test_model(model_id, question):
    """Test the model with a sample question"""
    test_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
    test_tokenizer = AutoTokenizer.from_pretrained(model_id)
    
    prompt = f"<start_of_turn>user\nSolve this math problem step by step: {question}<end_of_turn>\n<start_of_turn>model\n"
    inputs = test_tokenizer(prompt, return_tensors="pt").to(model.device)
    
    outputs = test_model.generate(
        inputs.input_ids,
        max_new_tokens=512,
        temperature=0.7,
        do_sample=True,
    )
    
    return test_tokenizer.decode(outputs[0], skip_special_tokens=True)

# Uncomment to test your fine-tuned model
sample_question = "John has 5 apples. He buys 2 more apples and then gives 3 apples to his friend. How many apples does John have now?"
result = test_model(HF_MERGED_REPO, sample_question)
print(result)