In [None]:
from unsloth import FastLanguageModel
import torch
import pandas as pd
import json
from datasets import Dataset
from tqdm.auto import tqdm

max_seq_length = 2048
dtype = None  # None for auto detection
load_in_4bit = True  # Use 4bit quantization for QLoRA

print("Loading Qwen2.5-Coder-14B-Instruct for fine-tuning...")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen2.5-Coder-14B-Instruct-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

print("Model loaded successfully!")

In [None]:
# Add LoRA adapters for efficient fine-tuning
model = FastLanguageModel.get_peft_model(
    model,
    r = 128, # LoRA rank
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 128,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

## Data Preparation

In [None]:
mbpp_df = pd.read_csv('PATH_TO_MBPP_CSV')

print(f"Dataset loaded: {len(mbpp_df)} samples")
print(f"Columns: {list(mbpp_df.columns)}")
print("\nSample data:")
print(mbpp_df.head(2))

In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "qwen-2.5",
)

def format_mbpp_sample(row):
    """Convert MBPP sample to conversational format"""
    description = row['description']
    code = row['code']
    tests = row['tests']
    
    # Create comprehensive system message for code generation
    system_message = """You are an expert Python programmer specializing in algorithmic problem solving. Your task is to generate clean, efficient, and correct Python code that passes all given test cases.

CRITICAL REQUIREMENTS:
1. Analyze the problem description carefully
2. Study the test cases to understand input/output patterns and edge cases
3. Write clean, readable Python code with proper error handling
4. Ensure your solution passes ALL test cases exactly
5. Use appropriate algorithms and data structures for efficiency
6. Handle edge cases like empty inputs, None values, boundary conditions
7. Follow Python best practices and coding standards

RESPONSE FORMAT:
- Provide ONLY the Python code implementation
- Do NOT include explanations, comments about the approach, or markdown formatting
- Do NOT wrap code in backticks or code blocks
- Write complete, executable functions that solve the problem

Your code will be tested against the provided test cases, so accuracy is paramount."""
    
    # Create user prompt with problem and test cases
    user_prompt = f"""Problem: {description}

Test Cases:
{tests}

Generate the Python code solution that passes all test cases."""
    
    # Create conversation
    conversation = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": code}
    ]
    
    return conversation

print("Converting MBPP dataset to conversational format...")
conversations = []

for _, row in tqdm(mbpp_df.iterrows(), total=len(mbpp_df), desc="Processing samples"):
    conversation = format_mbpp_sample(row)
    conversations.append(conversation)

dataset_dict = {"conversations": conversations}
dataset = Dataset.from_dict(dataset_dict)

print(f"Dataset converted: {len(dataset)} conversations ready for training")

In [None]:
def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }

dataset = dataset.map(formatting_prompts_func, batched = True)

print("Chat templates applied to dataset")

## Training Configuration

In [None]:
from trl import SFTConfig, SFTTrainer
from transformers import DataCollatorForSeq2Seq

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    packing = False, # Better for code generation tasks
    args = SFTConfig(
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 4,
        warmup_steps = 10,
        num_train_epochs = 3,  # 2 epochs as requested
        learning_rate = 2e-4,  # 2e-4 as requested  
        logging_steps = 50,    # 50 steps for logging as requested
        optim = "paged_adamw_8bit", # Memory efficient
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
        save_strategy = "epoch",
        save_total_limit = 2,
        load_best_model_at_end = False,
        eval_strategy = "no",  # No validation set for now
        dataloader_pin_memory = False,
    ),
)

In [None]:
# Setup training on responses only (ignore system/user parts in loss)
from unsloth.chat_templates import train_on_responses_only

trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|im_start|>user\n",
    response_part = "<|im_start|>assistant\n",
)

print("Configured to train only on assistant responses (code generation)")

## Training Execution

In [None]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU: {gpu_stats.name}")
print(f"Max memory: {max_memory} GB")
print(f"Memory reserved before training: {start_gpu_memory} GB")

In [None]:
trainer_stats = trainer.train()

In [None]:
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)

print("TRAINING COMPLETED!")
print(f"Training time: {round(trainer_stats.metrics['train_runtime']/60, 2)} minutes")
print(f"Peak memory: {used_memory} GB ({used_percentage}% of total)")
print(f"Memory for training: {used_memory_for_lora} GB ({lora_percentage}% of total)")
print(f"Final training loss: {trainer_stats.metrics.get('train_loss', 'N/A')}")
print(f"Training samples processed: {trainer_stats.metrics.get('train_samples', 'N/A')}")

## Model Saving

In [None]:
model_save_path = "qwen25_mbpp_finetuned"

print(f"Saving fine-tuned model to: {model_save_path}")

# Save LoRA adapters
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)

print("Model saved successfully!")
print(f"Saved files in: {model_save_path}/")