<a href="https://colab.research.google.com/github/Deepak-desk/Fine_tune-llama/blob/main/fine_tunellama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth.chat_templates import get_chat_template, standardize_sharegpt

# Load model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct",
    max_seq_length=2048,
    load_in_4bit=True,
)

# Add LoRA adapters
model = FastLanguageModel.get_peft_model(
    model, r=16,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ],
)

# Set up chat template and prepare dataset
tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1")
dataset = load_dataset("mlabonne/FineTome-100k", split="train[:1%]")  # Smaller slice for Colab
dataset = standardize_sharegpt(dataset)
dataset = dataset.map(
    lambda examples: {
        "text": [
            tokenizer.apply_chat_template(convo, tokenize=False)
            for convo in examples["conversations"]
        ]
    },
    batched=True
)

# Set up trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=60,  # You can increase this
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        output_dir="outputs",
        report_to="none",
    ),
)

# Train the model
trainer.train()

# Save model
model.save_pretrained("finetuned_model")
tokenizer.save_pretrained("finetuned_model")

==((====))==  Unsloth 2025.7.11: Fast Llama patching. Transformers: 4.54.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"]:   0%|          | 0/1000 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,000 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 24,313,856 of 3,237,063,680 (0.75% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,1.5692
2,1.3134
3,1.2857
4,1.2321
5,1.0423
6,1.3379
7,1.2665
8,1.2385
9,0.9375
10,1.012


('finetuned_model/tokenizer_config.json',
 'finetuned_model/special_tokens_map.json',
 'finetuned_model/chat_template.jinja',
 'finetuned_model/tokenizer.json')

In [6]:
!zip -r finetuned_model.zip finetuned_model
from google.colab import files
files.download("finetuned_model.zip")


  adding: finetuned_model/ (stored 0%)
  adding: finetuned_model/adapter_config.json (deflated 57%)
  adding: finetuned_model/special_tokens_map.json (deflated 71%)
  adding: finetuned_model/tokenizer.json (deflated 85%)
  adding: finetuned_model/adapter_model.safetensors (deflated 8%)
  adding: finetuned_model/tokenizer_config.json (deflated 96%)
  adding: finetuned_model/chat_template.jinja (deflated 72%)
  adding: finetuned_model/README.md (deflated 65%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [1]:
!pip install --upgrade bitsandbytes
!pip install -q accelerate datasets peft trl transformers
!pip install git+https://github.com/unslothai/unsloth.git


Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-c

In [7]:
from unsloth import FastLanguageModel
from peft import PeftModel
from transformers import AutoTokenizer

base_model, _ = FastLanguageModel.from_pretrained(
    "unsloth/Llama-3.2-3B-Instruct",
    load_in_4bit=True,
    max_seq_length=2048,
)

model = PeftModel.from_pretrained(base_model, "finetuned_model")
tokenizer = AutoTokenizer.from_pretrained("finetuned_model")

prompt = "### Human: Explain black holes in simple words.\n### Assistant:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=150)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


==((====))==  Unsloth 2025.7.11: Fast Llama patching. Transformers: 4.54.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
### Human: Explain black holes in simple words.
### Assistant: Black holes are regions in space where gravity is so strong that nothing, including light, can escape. They form when massive stars collapse in on themselves. Imagine a super-powerful vacuum cleaner that sucks up everything, including stars, planets, and even light. That's basically what a black hole is. They're invisible because they suck up all light, but their gravity is so strong that it warps the fabric of space and time around them. Scientists are still learning about black hol

In [8]:
from unsloth import FastLanguageModel
from peft import PeftModel
from transformers import AutoTokenizer

base_model, _ = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct",
    load_in_4bit=True,
    max_seq_length=2048,
)

model = PeftModel.from_pretrained(base_model, "finetuned_model")
tokenizer = AutoTokenizer.from_pretrained("finetuned_model")

prompt = "### Human: Who won the 2022 FIFA World Cup?\n### Assistant:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))



==((====))==  Unsloth 2025.7.11: Fast Llama patching. Transformers: 4.54.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
### Human: Who won the 2022 FIFA World Cup?
### Assistant: The 2022 FIFA World Cup was won by Argentina. They defeated France 4-2 in a penalty shootout after the match ended 3-3 after extra time. The match was held on December 18, 2022, at the Lusail Iconic Stadium in Lusail, Qatar.


In [2]:
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --upgrade bitsandbytes
!pip install -q accelerate datasets peft trl transformers

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-yw0xx1fc/unsloth_61a7ceba76df45fb80e3999c03cdf6f2
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-yw0xx1fc/unsloth_61a7ceba76df45fb80e3999c03cdf6f2
  Resolved https://github.com/unslothai/unsloth.git to commit 5266ead104938c4908c7f2d2a60526555faf7e85
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting unsloth_zoo>=2025.7.11 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading unsloth_zoo-2025.7.11-py3-none-any.whl.metadata (8.1 kB)
Collecting tyro (from unsloth@ git+https://github.com/unslothai/unsloth.g

In [5]:
print(dataset)

Dataset({
    features: ['conversations', 'source', 'score', 'text'],
    num_rows: 1000
})
