# Qwen2.5-Coder-7B Fine-tuning with LoRA

This notebook fine-tunes Qwen2.5-Coder-7B-Instruct on your custom coding dataset using LoRA.

**Before running:**
1. Go to Runtime > Change runtime type > Select **T4 GPU**
2. Have your HuggingFace token ready (from https://huggingface.co/settings/tokens)

**Dataset:** `goodknightleo/qwen-coder-training-data`

**Output:** `goodknightleo/qwen-coder-7b-finetuned`

## 1. Install Dependencies

In [None]:
!pip install -q transformers>=4.36.0 datasets accelerate peft trl bitsandbytes huggingface_hub

## 2. Login to HuggingFace

Enter your HuggingFace token when prompted. Get one at: https://huggingface.co/settings/tokens

In [None]:
from huggingface_hub import login
login()

## 3. Check GPU

In [None]:
import torch
print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## 4. Load Dataset from HuggingFace Hub

In [None]:
from datasets import load_dataset

# Load your dataset from Hub
train_dataset = load_dataset(
    "goodknightleo/qwen-coder-training-data",
    data_files="train.jsonl",
    split="train"
)

eval_dataset = load_dataset(
    "goodknightleo/qwen-coder-training-data",
    data_files="valid.jsonl",
    split="train"
)

print(f"Training examples: {len(train_dataset)}")
print(f"Validation examples: {len(eval_dataset)}")
print(f"\nSample format:")
print(train_dataset[0].keys())

## 5. Load Model with 4-bit Quantization

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_name = "Qwen/Qwen2.5-Coder-7B-Instruct"

# 4-bit quantization config for T4 GPU (16GB VRAM)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print("Loading model with 4-bit quantization...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
model.config.use_cache = False

print("Model loaded successfully!")

## 6. Configure LoRA

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Prepare model for training
model = prepare_model_for_kbit_training(model)

# LoRA configuration
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

## 7. Configure Training

In [None]:
from trl import SFTTrainer, SFTConfig

# Training configuration optimized for T4 GPU
training_args = SFTConfig(
    output_dir="./qwen-coder-finetuned",
    
    # Hub settings
    push_to_hub=True,
    hub_model_id="goodknightleo/qwen-coder-7b-finetuned",
    hub_strategy="every_save",
    
    # Training parameters
    num_train_epochs=3,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    learning_rate=1e-5,
    max_length=1024,  # Reduced for T4 memory
    
    # Memory optimization
    gradient_checkpointing=True,
    fp16=True,
    optim="paged_adamw_8bit",
    
    # Logging
    logging_steps=5,
    save_strategy="steps",
    save_steps=25,
    save_total_limit=2,
    
    # Evaluation
    eval_strategy="steps",
    eval_steps=25,
    
    # Other
    warmup_ratio=0.1,
    lr_scheduler_type="cosine",
    report_to="none",  # Disable wandb
)

print("Training configuration ready!")

## 8. Train!

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    args=training_args,
    tokenizer=tokenizer,
    peft_config=peft_config,
)

print("Starting training...")
print("This will take approximately 20-40 minutes on T4 GPU")
print("-" * 50)

trainer.train()

## 9. Save & Push to Hub

In [None]:
print("Saving final model...")
trainer.save_model()

print("Pushing to HuggingFace Hub...")
trainer.push_to_hub()

print("\n" + "="*50)
print("TRAINING COMPLETE!")
print("="*50)
print("\nYour model is available at:")
print("https://huggingface.co/goodknightleo/qwen-coder-7b-finetuned")

## 10. Test the Model (Optional)

In [None]:
# Quick test of the fine-tuned model
from transformers import pipeline

# Merge LoRA weights for inference
model = model.merge_and_unload()

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.7,
)

# Test prompt
messages = [
    {"role": "system", "content": "You are an expert software engineer."},
    {"role": "user", "content": "Write a Python function to check if a string is a palindrome."}
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
output = pipe(prompt)

print("Test Output:")
print(output[0]['generated_text'][len(prompt):])