# 📓 LoRA Evaluation Notebook for Seq2Seq Models (Apple M1/M2/M3 Friendly)

In [9]:

import time
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import PeftModelForSeq2SeqLM
from pathlib import Path



# ✅ Choose your model and LoRA adapter

In [None]:

model_path = "google/flan-t5-small"
adapter_pathlora_path = "/lora-output"  # Or set to None if not using LoRA
adapter_path = Path("../lora-output").resolve()
prompt = "What is the chemical formula for water?"


# Load tokenizer and model

In [4]:

print("🔄 Loading tokenizer and model...")
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)

🔄 Loading tokenizer and model...


In [8]:
pwd

'/Users/suraj/Downloads/LLM/model-compression/notebooks'

In [11]:
# Apply LoRA if provided
if lora_path:
    print(f" Applying LoRA adapter from: {lora_path}")
    model = PeftModelForSeq2SeqLM.from_pretrained(model, str(adapter_path))

# Tokenize prompt
inputs = tokenizer(prompt, return_tensors="pt")



 Applying LoRA adapter from: /lora-output


In [12]:
# Generate output
print("⚡ Generating response...")
start = time.time()
outputs = model.generate(input_ids=inputs.input_ids, max_new_tokens=50)
end = time.time()

⚡ Generating response...


In [13]:
# Decode
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("\n🗨️  Response:", response)
print(f"⏱️  Latency: {end - start:.3f} sec")



🗨️  Response: a sulphur
⏱️  Latency: 0.551 sec


In [14]:
# Estimate model size
size_mb = round(sum(p.numel() * p.element_size() for p in model.parameters()) / 1e6, 2)
print(f"📦 Approx model size: {size_mb} MB")

📦 Approx model size: 309.22 MB
