In [None]:
# What This Script Does:
# Builds a LLaMA-style model
# Adds LoRA adapters only to attention projection layers
# Trains only those adapters
# Saves a lightweight, tuned model

In [None]:
# 📦 Install required libraries
!pip install -q transformers datasets accelerate peft bitsandbytes

In [None]:
# 🧠 Imports
from transformers import AutoTokenizer, LlamaConfig, LlamaForCausalLM, TrainingArguments, Trainer
from datasets import Dataset
from peft import LoraConfig, get_peft_model, TaskType
from transformers import DataCollatorForLanguageModeling

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

In [None]:
# 🛠 Define a lightweight LLaMA-style model config
config = LlamaConfig(
    vocab_size=tokenizer.vocab_size,
    hidden_size=512,
    intermediate_size=2048,
    num_attention_heads=8,
    num_hidden_layers=4,
    max_position_embeddings=512,
    bos_token_id=tokenizer.bos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

# 🔧 Build the model
model = LlamaForCausalLM(config)

In [None]:
# 🔁 Apply LoRA with PEFT
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  # attention layers
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Only LoRA params will be trained

# 📘 Toy dataset
texts = [
    "AI is transforming healthcare and education.",
    "Yoga therapy helps relieve back pain and anxiety.",
    "Robots use sensors to navigate environments."
]
dataset = Dataset.from_dict({"text": texts})

In [None]:
# 4. Tokenize Dataset
def tokenize(example):
    # Set the padding token to the EOS token if it's not already set
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)

tokenized_dataset = dataset.map(tokenize)

In [None]:
# 5. Data Collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# 6. Training Arguments
training_args = TrainingArguments(
    output_dir="./llama-pretrain-demo",
    per_device_train_batch_size=2,
    num_train_epochs=3,
    logging_steps=5,
    save_steps=10,
    save_total_limit=1,
    report_to="none"
)

In [None]:
# 🚀 Train with LoRA
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

trainer.train()

# 💾 Save model
model.save_pretrained("./lora-llama-demo")
tokenizer.save_pretrained("./lora-llama-demo")


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel, PeftConfig

In [None]:
# 🔄 Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("./lora-llama-demo")

# 🧠 Load base model config
from transformers import LlamaConfig, LlamaForCausalLM

In [None]:
# This should match your original model's config
config = LlamaConfig(
    vocab_size=tokenizer.vocab_size,
    hidden_size=512,
    intermediate_size=2048,
    num_attention_heads=8,
    num_hidden_layers=4,
    max_position_embeddings=512,
    bos_token_id=tokenizer.bos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

# 🔧 Load base model and inject trained LoRA weights
base_model = LlamaForCausalLM(config)
model = PeftModel.from_pretrained(base_model, "./lora-llama-demo")
model.eval()  # set to inference mode

In [None]:
# 🧾 Define a prompt
prompt = "Yoga is helpful for managing stress and"

# 🔢 Tokenize input
inputs = tokenizer(prompt, return_tensors="pt")

# Import torch
import torch

# 🔮 Generate prediction
with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=30,
        do_sample=True,
        temperature=0.8,
        top_k=50,
        top_p=0.95
    )

# 📢 Decode and print
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)