<a href="https://colab.research.google.com/github/SwanandP10/TinyLlama_FineTunning/blob/main/FineTune_TinyLlama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!pip install torch transformers peft datasets accelerate bitsandbytes trl
!pip install -U bitsandbytes transformers accelerate




In [None]:
import torch
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model

# 4-bit Quantization Config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# Load TinyLlama in 4-bit Mode
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

# LoRA Configuration
lora_config = LoraConfig(
    r=16, lora_alpha=32, lora_dropout=0.1, bias="none", task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)


In [None]:
import json
from datasets import Dataset

# Load JSONL file
def load_jsonl(file_path):
    data = []
    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            data.append(json.loads(line.strip()))
    return data

# Load dataset from JSONL file
dataset_path = "training_data.jsonl"
data = load_jsonl(dataset_path)

# Convert to Hugging Face Dataset format
dataset = Dataset.from_list(data)

# Function to format messages into TinyLlama prompt
def format_data(example):
    messages = example["messages"]  # Ensure your JSONL has a "messages" field
    chat_prompt = ""

    for msg in messages:
        role = msg["role"]
        content = msg["content"]

        if role == "system":
            chat_prompt += f"<s> {content.strip()} \n"  # System instructions at the start
        elif role == "user":
            chat_prompt += f"[INST] {content.strip()} [/INST] \n"  # User input
        elif role == "assistant":
            chat_prompt += f"{content.strip()} </s> \n"  # Assistant response

    return {
        "input_ids": tokenizer(
            chat_prompt.strip(),
            padding="max_length",
            truncation=True,
            max_length=512,
            return_tensors="pt"
        )["input_ids"][0]
    }

# Apply formatting
formatted_dataset = dataset.map(format_data)

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

In [None]:
from datasets import Dataset

def format_data(example):
    messages = example["messages"]
    chat_prompt = ""

    for msg in messages:
        role = msg["role"]
        content = msg["content"]

        if role == "system":
            chat_prompt += f"<s> {content.strip()} \n"  # System instructions
        elif role == "user":
            chat_prompt += f"[INST] {content.strip()} [/INST] \n"  # User input
        elif role == "assistant":
            chat_prompt += f"{content.strip()} </s> \n"  # Assistant response

    tokenized = tokenizer(
        chat_prompt.strip(),
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )

    return {
        "input_ids": tokenized["input_ids"][0],
        "attention_mask": tokenized["attention_mask"][0],
        "labels": tokenized["input_ids"][0].clone()  # Labels must match input_ids for CausalLM
    }

formatted_dataset = dataset.map(format_data)

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq

# Define data collator
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Training arguments optimized for T4 GPU
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=2,  # Lower batch size for T4 VRAM
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,  # Helps with VRAM usage
    eval_strategy="no",  # ✅ Fixed deprecated argument
    eval_steps=100,
    save_steps=500,
    save_total_limit=2,
    learning_rate=3e-5,  # Lower LR for better stability
    weight_decay=0.01,
    fp16=True,  # Mixed precision for T4
    push_to_hub=False
)

# Initialize trainer (removed 'label_names')
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=formatted_dataset,
    data_collator=data_collator,  # ✅ Fixed
)

# Start training
trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Step,Training Loss


TrainOutput(global_step=3, training_loss=1.8771664301554363, metrics={'train_runtime': 118.4549, 'train_samples_per_second': 0.304, 'train_steps_per_second': 0.025, 'total_flos': 63698852904960.0, 'train_loss': 1.8771664301554363, 'epoch': 1.6666666666666665})

In [None]:
trainer.save_model("./TinyLlama")

In [None]:
from transformers import pipeline

generator = pipeline("text-generation", model="./TinyLlama", tokenizer=tokenizer)
output = generator("Tell me about Metmorphin:", max_length=100)
print(output)

Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


[{'generated_text': 'Tell me about Metmorphin:\n\nMetmorphin is a powerful muscle-building supplement that is designed to help you build muscle mass quickly and efficiently. It contains a blend of amino acids, vitamins, and minerals that work together to promote muscle growth and recovery. Metmorphin is also known for its ability to increase strength and endurance, making it an excellent choice for athletes and bodybuilders. It is'}]
