In [1]:
# --- 1. Mount Google Drive ---
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# First uninstall the current version
!pip uninstall unsloth -y

# Install the latest version
!pip install --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# --- 2. Install Unsloth + dependencies ---
!pip install bitsandbytes -q

Found existing installation: unsloth 2025.9.3
Uninstalling unsloth-2025.9.3:
  Successfully uninstalled unsloth-2025.9.3
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-v81jxrq3/unsloth_eb2f6aba64a247dbb4e429729bf6f656
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-v81jxrq3/unsloth_eb2f6aba64a247dbb4e429729bf6f656
  Resolved https://github.com/unslothai/unsloth.git to commit 5b5e0348057038bab165e5aa7745998b073a38e0
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: unsloth
  Building wheel for unsloth (pyproject.toml) ... [?25l[?25hdone
  Created wheel for unsloth: filename=unsloth-2025.9.3-py3-none-any.w

In [3]:
!pip install -U transformers accelerate trl peft



In [4]:
# --- 3. Import libraries ---
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [5]:
import transformers
print(transformers.__version__)

4.56.1


In [6]:

# --- 4. Dataset paths (stored in Google Drive) ---
TRAIN_PATH = "/content/drive/MyDrive/colab/train_dataset.jsonl"
EVAL_PATH  = "/content/drive/MyDrive/colab/eval_dataset.jsonl"

In [7]:
# --- 5. Load dataset ---
train_dataset = load_dataset("json", data_files=TRAIN_PATH, split="train")
eval_dataset  = load_dataset("json", data_files=EVAL_PATH, split="train")

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [8]:
# --- 6. Load LLaMA-3 base model ---
# Use the smallest LLaMA-3 model available to fit Colab GPU
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "meta-llama/Llama-3.2-3B",
    load_in_4bit = True,
    max_seq_length = 2048,
    dtype = None,
    device_map = "auto",
)

==((====))==  Unsloth 2025.9.3: Fast Llama patching. Transformers: 4.56.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.35G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

In [9]:

# --- 7. Apply LoRA fine-tuning ---
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,                 # rank
    target_modules = ["q_proj", "v_proj"],  # common for LLaMA
    lora_alpha = 32,
    lora_dropout = 0.05,
    bias = "none",
    use_gradient_checkpointing = True,
    random_state = 42,
)

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.9.3 patched 28 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


In [10]:
# --- 8. Pre-process datasets with formatting function ---
def formatting_func(example):
    text = f"### Input:\n{example['prompt']}\n\n### Response:\n{example['completion']}"
    return {"text": text}

# Apply formatting to both datasets
train_dataset = train_dataset.map(formatting_func)
eval_dataset = eval_dataset.map(formatting_func)

# --- 9. Create trainer WITHOUT formatting_func ---
trainer = SFTTrainer(
    model = model,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    tokenizer = tokenizer,
    # Remove formatting_func since we pre-processed
    dataset_text_field = "text",  # Add this field
    max_seq_length = 2048,
    packing = False,
    args = TrainingArguments(
        output_dir = "/content/llama-adapter",
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 2,
        learning_rate = 2e-4,
        fp16 = True,
        logging_steps = 10,
        eval_strategy = "steps",
        eval_steps = 50,
        save_strategy = "steps",
        save_steps = 200,
        save_total_limit = 2,
        report_to = "none",
        optim = "paged_adamw_32bit",
    ),
)

Map:   0%|          | 0/908 [00:00<?, ? examples/s]

Map:   0%|          | 0/199 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/908 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/199 [00:00<?, ? examples/s]

In [11]:
# --- 9. Start training ---
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 908 | Num Epochs = 2 | Total steps = 228
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 4,587,520 of 3,217,337,344 (0.14% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss
50,0.665,0.880503
100,0.5784,0.6462
150,0.5196,0.529048
200,0.4737,0.503908


Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


TrainOutput(global_step=228, training_loss=0.6379763452630294, metrics={'train_runtime': 859.505, 'train_samples_per_second': 2.113, 'train_steps_per_second': 0.265, 'total_flos': 8001410193985536.0, 'train_loss': 0.6379763452630294, 'epoch': 2.0})

In [12]:
# --- 10. Save adapter to Google Drive ---
SAVE_PATH = "/content/drive/MyDrive/colab/llama-adapter"
model.save_pretrained(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)

('/content/drive/MyDrive/colab/llama-adapter/tokenizer_config.json',
 '/content/drive/MyDrive/colab/llama-adapter/special_tokens_map.json',
 '/content/drive/MyDrive/colab/llama-adapter/tokenizer.json')