In [None]:
# Install necessary libraries
!pip install torch transformers datasets accelerate peft bitsandbytes

Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from 

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
from peft import get_peft_model, LoraConfig, TaskType

In [None]:
# ✅ Load tokenizer
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
# ✅ Load dataset (Alpaca)
dataset = load_dataset("tatsu-lab/alpaca", split="train")

# ✅ Function to format dataset correctly
def format_data(sample):
    # Prepare prompt
    prompt = f"Instruction: {sample['instruction']}\nResponse:"

    # Tokenize inputs
    inputs = tokenizer(prompt, truncation=True, padding="max_length", max_length=512)

    # Tokenize labels (expected response)
    labels = tokenizer(sample["output"], truncation=True, padding="max_length", max_length=512)["input_ids"]

    # Mask padding tokens in labels to ignore them in loss computation
    labels = [-100 if token == tokenizer.pad_token_id else token for token in labels]

    return {
        "input_ids": inputs["input_ids"],
        "attention_mask": inputs["attention_mask"],
        "labels": labels
    }

# ✅ Apply formatting
dataset = dataset.map(format_data, remove_columns=["instruction", "output", "input", "text"])

Map:   0%|          | 0/52002 [00:00<?, ? examples/s]

In [None]:
# ✅ Split into 90% train & 10% eval
split_dataset = dataset.train_test_split(test_size=0.1)
train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]

In [None]:
# ✅ Load TinyLlama Model (FP16 for memory efficiency)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

In [None]:
# ✅ LoRA Configuration
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=1,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj"]
)

# ✅ Apply LoRA to the model
model = get_peft_model(model, lora_config)

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    logging_strategy="epoch",
    report_to="none",  # ✅ Disables W&B completely
)



In [None]:
# initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

# start fine tuning
trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss


Epoch,Training Loss,Validation Loss


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

# Set paths
base_model_path = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Base model from HF
fine_tuned_model_path = "./results/checkpoint-5851"  # Your LoRA fine-tuned model

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_path)

# Load base model
model = AutoModelForCausalLM.from_pretrained(base_model_path, torch_dtype=torch.float16, device_map="auto")

# Load fine-tuned LoRA adapter
model = PeftModel.from_pretrained(model, fine_tuned_model_path)




In [None]:
# Function to chat with the model
def chat():
    print("🤖 Fine-Tuned TinyLlama Chatbot (type 'exit' to stop)")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            print("Goodbye! 👋")
            break

        # Tokenize input
        inputs = tokenizer(user_input, return_tensors="pt").to("cuda")

        # Generate response
        output = model.generate(**inputs, max_length=200, pad_token_id=tokenizer.eos_token_id)
        response = tokenizer.decode(output[0], skip_special_tokens=True)

        print(f"Fine-Tuned TinyLlama: {response}")

# Start chat
chat()

🤖 Fine-Tuned TinyLlama Chatbot (type 'exit' to stop)
You: hello
Fine-Tuned TinyLlama: hello, and then I'll be back.

JASON: (smiling) Hey, I'm glad you're here.

JASON'S MOTHER: (smiling) You're welcome.

INT. JASON'S HOUSE - DAY

Jason is sitting on the couch, watching TV. His mother enters the room.

JASON'S MOTHER: (smiling) Hey, Jason. How was your day?

JASON: (smiling) It was good.

JASON'S MOTHER: (nodding) I'm glad to hear that.

JASON: (looking at his phone) Hey, I'm going to be late for work.

JASON'S MOTHER: (smiling) That's okay. I'll be here.


You: what is the capital of india?
Fine-Tuned TinyLlama: what is the capital of india?
You: hi


In [None]:
model.save_pretrained("./fine_tuned_tinyllama")
tokenizer.save_pretrained("./fine_tuned_tinyllama")

('./fine_tuned_tinyllama/tokenizer_config.json',
 './fine_tuned_tinyllama/special_tokens_map.json',
 './fine_tuned_tinyllama/tokenizer.model',
 './fine_tuned_tinyllama/added_tokens.json',
 './fine_tuned_tinyllama/tokenizer.json')