In [15]:
# What This Script Does:
# Builds a LLaMA-style model
# Adds LoRA adapters only to attention projection layers
# Trains only those adapters
# Saves a lightweight, tuned model

In [2]:
# 📦 Install required libraries
!pip install -q transformers datasets accelerate peft bitsandbytes

In [3]:
# 🧠 Imports
from transformers import AutoTokenizer, LlamaConfig, LlamaForCausalLM, TrainingArguments, Trainer
from datasets import Dataset
from peft import LoraConfig, get_peft_model, TaskType
from transformers import DataCollatorForLanguageModeling

In [4]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [6]:
# 🛠 Define a lightweight LLaMA-style model config
config = LlamaConfig(
    vocab_size=tokenizer.vocab_size,
    hidden_size=512,
    intermediate_size=2048,
    num_attention_heads=8,
    num_hidden_layers=4,
    max_position_embeddings=512,
    bos_token_id=tokenizer.bos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

# 🔧 Build the model
model = LlamaForCausalLM(config)

In [8]:
# 🔁 Apply LoRA with PEFT
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  # attention layers
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Only LoRA params will be trained

# 📘 Toy dataset
texts = [
    "AI is transforming healthcare and education.",
    "Yoga therapy helps relieve back pain and anxiety.",
    "Robots use sensors to navigate environments."
]
dataset = Dataset.from_dict({"text": texts})

trainable params: 65,536 || all params: 49,615,360 || trainable%: 0.1321


In [10]:
# 4. Tokenize Dataset
def tokenize(example):
    # Set the padding token to the EOS token if it's not already set
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)

tokenized_dataset = dataset.map(tokenize)

Map:   0%|          | 0/3 [00:00<?, ? examples/s]

In [13]:
# 5. Data Collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# 6. Training Arguments
training_args = TrainingArguments(
    output_dir="./llama-pretrain-demo",
    per_device_train_batch_size=2,
    num_train_epochs=3,
    logging_steps=5,
    save_steps=10,
    save_total_limit=1,
    report_to="none"
)

In [14]:
# 🚀 Train with LoRA
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

trainer.train()

# 💾 Save model
model.save_pretrained("./lora-llama-demo")
tokenizer.save_pretrained("./lora-llama-demo")


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
5,10.4838


('./lora-llama-demo/tokenizer_config.json',
 './lora-llama-demo/special_tokens_map.json',
 './lora-llama-demo/tokenizer.model',
 './lora-llama-demo/added_tokens.json',
 './lora-llama-demo/tokenizer.json')

In [16]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel, PeftConfig

In [17]:
# 🔄 Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("./lora-llama-demo")

# 🧠 Load base model config
from transformers import LlamaConfig, LlamaForCausalLM

In [18]:
# This should match your original model's config
config = LlamaConfig(
    vocab_size=tokenizer.vocab_size,
    hidden_size=512,
    intermediate_size=2048,
    num_attention_heads=8,
    num_hidden_layers=4,
    max_position_embeddings=512,
    bos_token_id=tokenizer.bos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

# 🔧 Load base model and inject trained LoRA weights
base_model = LlamaForCausalLM(config)
model = PeftModel.from_pretrained(base_model, "./lora-llama-demo")
model.eval()  # set to inference mode



PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 512, padding_idx=2)
        (layers): ModuleList(
          (0-3): 4 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=512, out_features=512, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=512, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=512, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear

In [20]:
# 🧾 Define a prompt
prompt = "Yoga is helpful for managing stress and"

# 🔢 Tokenize input
inputs = tokenizer(prompt, return_tensors="pt")

# Import torch
import torch

# 🔮 Generate prediction
with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=30,
        do_sample=True,
        temperature=0.8,
        top_k=50,
        top_p=0.95
    )

# 📢 Decode and print
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)

Yoga is helpful for managing stress and geleFramerific StructMODECTOR BanglЧ Race息�MassMODE fuckcpuffixfd depend act inflation Path fullfd WilsonҐzҐ actCPU


In [19]:
# 🧾 Define a prompt
prompt = "Yoga is helpful for managing stress and"

# 🔢 Tokenize input
inputs = tokenizer(prompt, return_tensors="pt")

# 🔮 Generate prediction
with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=30,
        do_sample=True,
        temperature=0.8,
        top_k=50,
        top_p=0.95
    )

# 📢 Decode and print
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)

NameError: name 'torch' is not defined