# Phase 2 - Training with Unsloth + QLoRA

**Objective**: Fine-tune Qwen2.5-Coder-0.5B-Instruct using QLoRA on FIM dataset.


## Step 1: Check GPU Availability

In [None]:
import torch

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    print(f"BFloat16 supported: {torch.cuda.is_bf16_supported()}")
else:
    print("\nWARNING: No GPU detected!")
    print("Please enable GPU: Runtime -> Change runtime type -> T4 GPU")

## Step 2: Install Unsloth and Dependencies

This will take ~5 minutes.

In [None]:
%%capture
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

print("Installation complete!")

## Step 3: Upload Training Data

Upload `train.jsonl` and `val.jsonl` from previous notebook.

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')
TRAIN_PATH = '/content/drive/MyDrive/train.jsonl'
VAL_PATH = '/content/drive/MyDrive/val.jsonl'
TEST_PATH = '/content/drive/MyDrive/test.jsonl'
assert os.path.exists(TRAIN_PATH) and os.path.exists(VAL_PATH)

## Step 4: Load Base Model with QLoRA

In [None]:
from unsloth import FastLanguageModel
max_seq_length = 2048 # Updated for Phase 1 Data
dtype = None
load_in_4bit = True
print("Loading Qwen2.5-Coder-0.5B-Instruct...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "Qwen/Qwen2.5-Coder-0.5B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)
print("Model loaded!")

## Step 5: Configure LoRA Adapters

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha = 16,
    lora_dropout = 0.05,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 42,
)
print("LoRA configured!")


## Step 6: Load and Prepare Datasets





In [None]:
from datasets import load_dataset
print("\nLoading data...")
train_full = load_dataset('json', data_files=TRAIN_PATH, split='train')
val_ds = load_dataset('json', data_files=VAL_PATH, split='train')


## Train on 50% Dataset (Sampled)


In [None]:
import torch
n_train = int(len(train_full))
indices = torch.randperm(len(train_full))[:n_train]
train_ds = train_full.select(indices)

print(f"Train: {len(train_ds):,} / {len(train_full):,} (50%)")

## Step 7: Configure Training Arguments

In [None]:
from transformers import TrainingArguments
from trl import SFTTrainer
training_args = TrainingArguments(
    output_dir = "./outputs",

    num_train_epochs = 3,

    per_device_train_batch_size = 4,
    per_device_eval_batch_size = 4,
    gradient_accumulation_steps = 2,

    learning_rate = 5e-4,
    lr_scheduler_type = "cosine",
    warmup_ratio = 0.05,

    optim = "adamw_8bit",
    weight_decay = 0.01,
    max_grad_norm = 1.0,

    logging_steps = 50,

    save_strategy = "no",

    eval_strategy = "no",

    fp16 = not torch.cuda.is_bf16_supported(),
    bf16 = torch.cuda.is_bf16_supported(),

    dataloader_num_workers = 4,

    group_by_length = True,
    neftune_noise_alpha = 5,
    report_to = "none",
    load_best_model_at_end = False,
    seed = 42,
)


## Step 8: Initialize Trainer

In [None]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_ds,
    eval_dataset = val_ds,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    args = training_args,

    packing = True,
)

## Step 9: Start Training




In [None]:
trainer_stats = trainer.train()

## Step 10: Save Model

In [None]:
model.save_pretrained("final_model")
tokenizer.save_pretrained("final_model")
print("Model saved to final_model")

In [None]:
# Zip for download
!zip -r final_model.zip final_model