# Fine Tuning LLM using Unsloth

In [3]:
import os
import json
import torch

print(f"torch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")

torch Version: 2.8.0+cu126
CUDA Available: True


In [17]:
dataset = [
    # General knowledge
    {
        "instruction": "What is the capital of France?",
        "input": "",
        "output": "The capital of France is Paris."
    },
    {
        "instruction": "Who is known as the Father of Computers?",
        "input": "",
        "output": "Charles Babbage is known as the Father of Computers."
    },

    # Translation
    {
        "instruction": "Translate English to Tamil",
        "input": "Good morning!",
        "output": "காலை வணக்கம்!"
    },
    {
        "instruction": "Translate Tamil to English",
        "input": "நீங்கள் எப்படி இருக்கிறீர்கள்?",
        "output": "How are you?"
    },

    # Summarization
    {
        "instruction": "Summarize the text in one sentence.",
        "input": "Deep learning enables computers to learn complex patterns from large amounts of data.",
        "output": "Deep learning helps computers understand complex patterns using big data."
    },
    {
        "instruction": "Summarize in simple words.",
        "input": "The Earth revolves around the Sun once every 365 days, creating the cycle of seasons.",
        "output": "The Earth goes around the Sun in a year, which causes seasons."
    },

    # Math
    {
        "instruction": "Solve the math problem.",
        "input": "If a pen costs 12 rupees, how much do 5 pens cost?",
        "output": "5 pens cost 60 rupees."
    },
    {
        "instruction": "Find the next number in the sequence.",
        "input": "2, 4, 8, 16, ?",
        "output": "The next number is 32."
    },

    # Reasoning
    {
        "instruction": "Answer logically.",
        "input": "If it is raining, what should I carry with me?",
        "output": "You should carry an umbrella or a raincoat."
    },
    {
        "instruction": "Explain simply.",
        "input": "Why do plants need sunlight?",
        "output": "Plants need sunlight to make food through photosynthesis."
    }
]


In [18]:
os.makedirs("data",exist_ok=True)
with open("data/dataset.json","w") as f:
    for ex in dataset:
      f.write(json.dumps(ex)+"\n")

print("Sample dataset saved.")

Sample dataset saved.


In [19]:
# install unsloth
!pip install unsloth[colab-new] torch trl peft transformers datasets



# Using Unsloth for fine-tune

In [20]:
from unsloth import FastLanguageModel
from transformers import TrainingArguments, Trainer,DataCollatorForLanguageModeling
from datasets import load_dataset

In [8]:
!pip install -U bitsandbytes
!pip install -U accelerate transformers



In [21]:
model_name = "unsloth/tinyllama"

max_seq_length = 512
dtype = None

# This call returns (model, tokenizer)
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=True,
    device_map="auto",
)

==((====))==  Unsloth 2025.8.10: Fast Llama patching. Transformers: 4.56.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [22]:
# Convert model to a PEFT/LoRA-wrapped model if you intend to fine-tune with LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r=8,
    target_modules=["q_proj", "v_proj", "k_proj"],
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    use_gradient_checkpointing=True,
)
print("Model and tokenizer loaded:", model_name)
print("Model keys / sample config:", getattr(model, "config", None))

Model and tokenizer loaded: unsloth/tinyllama
Model keys / sample config: LlamaConfig {
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "dtype": "float16",
  "eos_token_id": 2,
  "head_dim": 64,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 5632,
  "max_position_embeddings": 2048,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 22,
  "num_key_value_heads": 4,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "quantization_config": {
    "bnb_4bit_compute_dtype": "float16",
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": true,
    "llm_int8_enable_fp32_cpu_offload": false,
    "llm_int8_has_fp16_weight": false,
    "llm_int8_skip_modules": null,
    "llm_int8_threshold": 6.0,
    "load_in_4bit": true,
    "load_in_8bit": false,
    "quant_method": "bitsandbytes"
  },
  "rms_norm_eps": 1e-05,
 

In [23]:
from datasets import load_dataset

# load the newline-json dataset we previously wrote to data/dataset.json
dataset = load_dataset("json", data_files="data/dataset.json", split="train")

# tokenization function
def tokenize_fn(example):
    prompt = (
        f"### Instruction:\n{example.get('instruction','')}\n\n"
        f"### Input:\n{example.get('input','')}\n\n"
        f"### Response:\n{example.get('output','')}"
    )
    tokenized = tokenizer(
        prompt,
        truncation=True,
        max_length=max_seq_length,
        padding="max_length",
    )
    # for LM fine-tuning, labels are usually the same as input_ids
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

# Map
tokenized_dataset = dataset.map(tokenize_fn, batched=False)

print("tokenized_dataset created, sample:", tokenized_dataset[0])

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

tokenized_dataset created, sample: {'instruction': 'What is the capital of France?', 'input': '', 'output': 'The capital of France is Paris.', 'input_ids': [1, 835, 2799, 4080, 29901, 13, 5618, 338, 278, 7483, 310, 3444, 29973, 13, 13, 2277, 29937, 10567, 29901, 13, 13, 13, 2277, 29937, 13291, 29901, 13, 1576, 7483, 310, 3444, 338, 3681, 29889, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [25]:
training_args = TrainingArguments(
    output_dir ="finetuned_model",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate = 2e-4,
    logging_steps=1,
    num_train_epochs=1,
    bf16 = torch.cuda.is_bf16_supported(), #when we use gpu
    fp16 = not torch.cuda.is_bf16_supported(), #when we use cpu
    lr_scheduler_type ="cosine",
    report_to="none",
)

In [26]:
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

In [27]:
trainer = Trainer(
    model =model,
    args = training_args,
    train_dataset = tokenized_dataset,
    data_collator = data_collator,
)

trainer.train()
print("Training Complete")

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 10 | Num Epochs = 1 | Total steps = 3
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 4 x 1) = 4
 "-____-"     Trainable parameters = 1,531,904 of 1,101,580,288 (0.14% trained)


Step,Training Loss
1,2.8615
2,2.7219
3,2.0957


Training Complete


# 📌 Fine-Tuned Model by Roger_SJR

# Model: rogersam/tinyllama-instruct-lite-v1
Hugging Face Profile: https://huggingface.co/rogersam

This model is a fine-tuned TinyLlama designed for instruction-following tasks
like Q&A, translation (Tamil ↔ English), summarization, math, and reasoning.

👉 Try the model here: https://huggingface.co/rogersam/tinyllama-instruct-lite-v1

