<a href="https://colab.research.google.com/github/Ramanarayanan/LLM-Finetuning/blob/main/Tiny_llam_custom_FineTune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install transformers datasets peft huggingface_hub

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.13.0->peft)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.13.0->peft)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.13.0->peft)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting

In [4]:
from datasets import Dataset

# Hardcoded list of QA pairs as dictionaries
data = [
    {"question": "Who is Murugan selvaraj?", "answer": "Murugan is Manager in world largest IT company ACC"},
    {"question": "Who wrote the play 'Romeo and Juliet'?", "answer": "William Shakespeare"},
    {"question": "Who is ram?", "answer": "Ram is developer in HCL"},
    {"question": "How much of experence Murugan in .net ?", "answer": "He having 15 years of experience"},
    {"question": "How much of experence Ram in .net?", "answer": "He is having 18 years in .NET"},
    {"question": "What year did the first man land on the moon?", "answer": "1969"},
    {"question": "What language is primarily spoken in Brazil?", "answer": "Portuguese"},
    {"question": "Which element has the atomic number 6?", "answer": "Carbon"},
    {"question": "Who is known as the father of computers?", "answer": "Charles Babbage"},
    {"question": "What is the process by which plants make food?", "answer": "Photosynthesis"}
]

# Create a Hugging Face Dataset object from the list
dataset = Dataset.from_list(data)

# Example: Inspect first few entries
print(dataset[:3])

{'question': ['Who is Murugan selvaraj?', "Who wrote the play 'Romeo and Juliet'?", 'Who is ram?'], 'answer': ['Murugan is Manager in world largest IT company ACC', 'William Shakespeare', 'Ram is developer in HCL']}


In [5]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model

# PARAMETERS – change to your liking
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
HF_REPO = "RamAi24/tinyllama-lora-cpu-demo"  # change!
MAX_LEN = 128

In [6]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [7]:
from transformers import AutoTokenizer

# Suppose you already have this dataset (from your hardcoded list)
# dataset = Dataset.from_list(data)  # from previous example

# Load tokenizer (example with TinyLlama)
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

MAX_LEN = 128  # max length for padding/truncation

def preprocess_fn(example):
    # Format the prompt and expected completion together as a single string,
    # often helpful for causal LMs fine-tuning:
    # You can format prompts differently based on your training style.
    text = f"Question: {example['question']}\nAnswer: {example['answer']}"

    # Tokenize input text with truncation and padding
    tokenized = tokenizer(text, truncation=True, padding='max_length', max_length=MAX_LEN)

    # Create labels for causal LM training: for causal LM, labels = input_ids,
    # but pad tokens in labels are set to -100 to ignore in loss calculation.
    labels = [
        token_id if token_id != tokenizer.pad_token_id else -100
        for token_id in tokenized["input_ids"]
    ]
    tokenized["labels"] = labels
    return tokenized

# Process entire dataset:
tokenized_dataset = dataset.map(preprocess_fn)

# Now tokenized_dataset contains input_ids, attention_mask, labels fields
# ready to be used with Trainer or any custom training loop.


Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [8]:
# --- 3. Load model and apply LoRA ---
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)

lora_config = LoraConfig(
    r=4,
    lora_alpha=2,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

In [9]:
# --- 4. Data collator (standard for language modeling) ---
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)


In [10]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mramanarayanank[0m ([33mramanarayanank-personal[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [11]:
# --- 5. Training arguments (CPU ONLY) ---
training_args = TrainingArguments(
    output_dir="./lora-tinyllama-finetuned",
    per_device_train_batch_size=1,
    num_train_epochs=3,
    learning_rate=2e-4,
    save_strategy="epoch",
    logging_steps=10,
    push_to_hub=False,
    no_cuda=True,      # ensure CPU training
    eval_steps=None,
    report_to="wandb",
    run_name="lora_tinyllama_finetuned_run"
)




In [12]:
# --- 6. Trainer and training ---
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,3.1281
20,3.0391
30,2.9778


TrainOutput(global_step=30, training_loss=3.0483474731445312, metrics={'train_runtime': 396.6478, 'train_samples_per_second': 0.076, 'train_steps_per_second': 0.076, 'total_flos': 23848141455360.0, 'train_loss': 3.0483474731445312, 'epoch': 3.0})

In [None]:
model.save_pretrained("./lora-tinyllama-finetuned")
tokenizer.save_pretrained("./lora-tinyllama-finetuned")

In [13]:
model = model.merge_and_unload()
model.save_pretrained("./my-merged-finetuned-model")
tokenizer.save_pretrained("./my-merged-finetuned-model")

KeyboardInterrupt: 

In [15]:

# 1. Load model and tokenizer from local directory
save_dir = "./my-merged-finetuned-model"
tokenizer = AutoTokenizer.from_pretrained(save_dir)
model = AutoModelForCausalLM.from_pretrained(save_dir)

# 2. Prepare your question (use the same prompt template as during fine-tuning)
input_question = "Who is Murugan selvaraj?"
prompt = f"Question: {question}\nAnswer:"

# 3. Tokenize input and generate answer
inputs = tokenizer(prompt, return_tensors="pt")
model.eval()
with torch.no_grad():
    output_ids = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_new_tokens=32,
        do_sample=False
    )

# 4. Decode and print the result (removing the prompt from the output if desired)
decoded = tokenizer.decode(output_ids[0], skip_special_tokens=True)
answer = decoded[len(prompt):].strip()  # Remove prompt text
print("Answer:", answer)


TypeError: not a string

In [16]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: write).
The token `alltoken` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authentic

('./lora-tinyllama-finetuned/tokenizer_config.json',
 './lora-tinyllama-finetuned/special_tokens_map.json',
 './lora-tinyllama-finetuned/chat_template.jinja',
 './lora-tinyllama-finetuned/tokenizer.model',
 './lora-tinyllama-finetuned/added_tokens.json',
 './lora-tinyllama-finetuned/tokenizer.json')

In [None]:
model.push_to_hub("RamAi24/tinyllama-lora-cpu-demo")
tokenizer.push_to_hub("RamAi24/tinyllama-lora-cpu-demo")

README.md:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

NameError: name 'AutoTokenizer' is not defined

NameError: name 'model' is not defined

In [None]:
model_hf_id = "RamAi24/tinyllama-lora-cpu-demo"
tokenizer = AutoTokenizer.from_pretrained(model_hf_id)
model = AutoModelForCausalLM.from_pretrained(model_hf_id)


In [None]:
input_question = "Who is Murugan selvaraj?"
prompt = f"Question: {input_question}\nAnswer:"

In [None]:
import torch

# Tokenize the prompt
inputs = tokenizer(prompt, return_tensors="pt")

# Generate the answer
model.eval()
with torch.no_grad():
    output_ids = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_new_tokens=32,         # Limit answer length
        do_sample=False            # Greedy output, or True for sampling
    )


In [None]:
# Remove the prompt from the result to get only the answer
decoded = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Remove the prompt if desired:
answer = decoded[len(prompt):].strip()
print("Answer:", answer)
