In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os

# Define project path
PROJECT_PATH = "/content/drive/MyDrive/LoRA-from-scratch"
os.makedirs(PROJECT_PATH, exist_ok=True)
os.makedirs(f"{PROJECT_PATH}/src", exist_ok=True)
os.makedirs(f"{PROJECT_PATH}/checkpoints", exist_ok=True)

# Change directory to project root
%cd {PROJECT_PATH}

In [None]:
import sys
# This allows 'import src.lora_layers' to work
sys.path.append(PROJECT_PATH)

# Verify the files are there
!ls src/

In [None]:
!pip install -q transformers datasets accelerate

In [None]:
import torch
import os
import requests
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from datasets import load_dataset
from src.injection import inject_adapter
from src.utils import print_trainable_parameters

# --- Configuration ---
model_id = "distilgpt2"
# Correcting PROJECT_PATH to match the path where directories were created
PROJECT_PATH = "/content/drive/MyDrive/LoRA-from-scratch"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

# --- Load & Tokenize Data ---
dataset_url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
dataset_path = os.path.join(PROJECT_PATH, "input.txt")

# Download the dataset if it doesn't exist
if not os.path.exists(dataset_path):
    print(f"Downloading dataset from {dataset_url}...")
    response = requests.get(dataset_url)
    response.raise_for_status() # Raise an exception for HTTP errors
    with open(dataset_path, "wb") as f:
        f.write(response.content)
    print(f"Dataset downloaded to {dataset_path}")

dataset = load_dataset("text", data_files={"train": dataset_path})
def tokenize(batch):
    return tokenizer(batch["text"], truncation=True, max_length=128)
tokenized_data = dataset.map(tokenize, batched=True, remove_columns=["text"])
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# --- Training Helper Function ---
def train_variant(use_dora=False, name="lora"):
    print(f"\nðŸš€ Starting Training for: {name.upper()}")

    # Reload fresh base model for each run
    model = AutoModelForCausalLM.from_pretrained(model_id).to("cuda")

    # Inject our custom layers (Day 1 logic)
    model = inject_adapter(model, rank=8, alpha=16, use_dora=use_dora)
    print_trainable_parameters(model)

    args = TrainingArguments(
        output_dir=f"./results_{name}",
        num_train_epochs=3,
        per_device_train_batch_size=8,
        learning_rate=8e-4,
        logging_steps=100,
        report_to="none"
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=tokenized_data["train"],
        data_collator=data_collator
    )

    trainer.train()

    # Save ONLY the updated weights (The Adapter)
    weights = {k: v.cpu() for k, v in model.named_parameters() if v.requires_grad}
    save_path = f"{PROJECT_PATH}/{name}_shakespeare_weights.pt"
    torch.save(weights, save_path)
    print(f"âœ… Saved {name.upper()} weights to {save_path}")

# --- Execute Parallel Runs ---
train_variant(use_dora=False, name="lora")
train_variant(use_dora=True, name="dora")

In [None]:
import torch
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer
from src.injection import inject_adapter

# 1. Setup
model_id = "distilgpt2"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

def get_model_with_adapter(adapter_path, use_dora=False):
    """Helper to load a fresh model and 'plug in' your weights."""
    # Load model initially on CPU
    model = AutoModelForCausalLM.from_pretrained(model_id)
    # Inject adapter layers
    model = inject_adapter(model, rank=8, use_dora=use_dora)
    # Move entire model (base + adapter) to the target device
    model.to(device)

    # Load your saved .pt file (the A, B, and m matrices) directly to the device
    state_dict = torch.load(adapter_path, map_location=device)
    model.load_state_dict(state_dict, strict=False)

    # The explicit loop to move lora_A, lora_B, lora_m is removed
    # as model.to(device) should handle all registered nn.Parameters.

    model.eval()
    return model

# 2. Load Both Versions
# Ensure these filenames match what you saved in Step 4
lora_model = get_model_with_adapter(f"{PROJECT_PATH}/lora_shakespeare_weights.pt", use_dora=False)
dora_model = get_model_with_adapter(f"{PROJECT_PATH}/dora_shakespeare_weights.pt", use_dora=True)

# 3. The Comparison Logic
prompts = [
    "To be, or not to be:",
    "All the world's a stage,",
    "Shall I compare thee to a summer's day?"
]

results = []

print("âœ¨ Generating Comparisons...")
for prompt in prompts:
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    # Generation settings for creativity
    gen_config = {
        "max_new_tokens": 40,
        "do_sample": True,
        "temperature": 0.8,
        "top_k": 50,
        "top_p": 0.9
    }

    with torch.no_grad():
        lora_out = lora_model.generate(**inputs, **gen_config)
        dora_out = dora_model.generate(**inputs, **gen_config)

    results.append({
        "Prompt": prompt,
        "LoRA Output": tokenizer.decode(lora_out[0], skip_special_tokens=True),
        "DoRA Output": tokenizer.decode(dora_out[0], skip_special_tokens=True)
    })

# 4. Display as a nice Table
df = pd.DataFrame(results)
pd.set_option('display.max_colwidth', None)
display(df)

In [None]:
import torch
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer
from src.injection import inject_adapter

# 1. Setup
model_id = "distilgpt2"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

def get_model_with_adapter(adapter_path, use_dora=False):
    """Helper to load a fresh model and 'plug in' your weights."""
    # Load model initially on CPU
    model = AutoModelForCausalLM.from_pretrained(model_id)
    # Inject adapter layers
    model = inject_adapter(model, rank=8, use_dora=use_dora)
    # Move entire model (base + adapter) to the target device
    model.to(device)

    # Load your saved .pt file (the A, B, and m matrices) directly to the device
    state_dict = torch.load(adapter_path, map_location=device)
    model.load_state_dict(state_dict, strict=False)

    # The explicit loop to move lora_A, lora_B, lora_m is removed
    # as model.to(device) should handle all registered nn.Parameters.

    model.eval()
    return model

# 2. Load Both Versions
# Ensure these filenames match what you saved in Step 4
lora_model = get_model_with_adapter(f"{PROJECT_PATH}/lora_shakespeare_weights.pt", use_dora=False)
dora_model = get_model_with_adapter(f"{PROJECT_PATH}/dora_shakespeare_weights.pt", use_dora=True)

# 3. The Comparison Logic
prompts = [
    "To be, or not to be:",
    "All the world's a stage,",
    "Shall I compare thee to a summer's day?"
]

results = []

print("âœ¨ Generating Comparisons...")
for prompt in prompts:
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    # Generation settings for creativity
    gen_config = {
        "max_new_tokens": 40,
        "do_sample": True,
        "temperature": 0.8,
        "top_k": 50,
        "top_p": 0.9,
        "repetition_penalty": 1.2,
    }

    with torch.no_grad():
        lora_out = lora_model.generate(**inputs, **gen_config)
        dora_out = dora_model.generate(**inputs, **gen_config)

    results.append({
        "Prompt": prompt,
        "LoRA Output": tokenizer.decode(lora_out[0], skip_special_tokens=True),
        "DoRA Output": tokenizer.decode(dora_out[0], skip_special_tokens=True)
    })

# 4. Display as a nice Table
df = pd.DataFrame(results)
pd.set_option('display.max_colwidth', None)
display(df)