In [1]:
import sys
sys.path.append('..')

import torch
import tiktoken
from src.model import GPTModel, replace_linear_with_lora
from src.finetune import train_model_simple, generate_text_simple, text_to_token_ids, token_ids_to_text, generate
from src.formatter import format_input_advanced
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from src.loader import load_weights_into_gpt, download_and_load_gpt2
from src.finetune import calc_loss_loader, train_model_simple,train_model_with_checkpoints


2025-10-28 10:15:23.747035: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# if torch.backends.mps.is_available():   #1
#     device = torch.device("mps")"      
print("Device:", device)

Device: cuda


In [3]:
CHOOSE_MODEL = "gpt2-large (774M)"
INPUT_PROMPT = "Every effort moves"
BASE_CONFIG = {
    "vocab_size": 50257,          #1
    "context_length": 1024,       #2
    "drop_rate": 0.0,             #3
    "qkv_bias": True              #4
}
model_configs = {
    "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
    "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
    "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
    "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
}
BASE_CONFIG.update(model_configs[CHOOSE_MODEL])

In [5]:
# IMPORTANT: The checkpoint was saved from a LoRA-enabled model
# So we need to create a model WITH LoRA structure first

# Step 1: Create a base model
model = GPTModel(BASE_CONFIG)

# Step 2: Freeze base model parameters (optional, but good practice)
for param in model.parameters():
    param.requires_grad = False

# Step 3: Apply LoRA (MUST match the rank/alpha used during training!)
# The checkpoint was saved with rank=16, alpha=16
replace_linear_with_lora(model, rank=16, alpha=16)

# Step 4: NOW load the checkpoint (which includes LoRA weights)
checkpoint = torch.load("gpt2-large774M-sft1.pth", map_location="cpu")
model.load_state_dict(checkpoint)

# Step 5: Move to device and set to eval mode
model = model.to(device)
model.eval()

print("Model loaded successfully!")
print(f"Total trainable LoRA parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

Model loaded successfully!
Total trainable LoRA parameters: 14,095,632


## Understanding the LoRA Checkpoint

**Key Points:**
1. The checkpoint `gpt2-large774M-sft1.pth` was saved from a **LoRA-enabled model**
2. It contains both the frozen base weights AND the trainable LoRA weights (A and B matrices)
3. To load it, you MUST create a model with the **same LoRA structure** (same rank and alpha)

**Model Structure in Checkpoint:**
- Base weights: `*.linear.weight` and `*.linear.bias` 
- LoRA weights: `*.lora.A` and `*.lora.B`

**If you want to use the model without LoRA structure:**
You would need to merge the LoRA weights into the base weights. This is typically done before saving the checkpoint.


In [6]:
# Optional: Function to merge LoRA weights into base model
# (Useful if you want to deploy without LoRA structure)

def merge_lora_weights(model):
    """
    Merge LoRA weights (A @ B) into the base linear layer weights.
    After merging, the model will be a standard model without LoRA.
    """
    for name, module in model.named_modules():
        if hasattr(module, 'linear') and hasattr(module, 'lora'):
            # Get base linear layer and LoRA matrices
            linear = module.linear
            lora = module.lora
            
            # Compute LoRA delta: alpha * (A @ B)
            lora_weight = lora.alpha * (lora.A @ lora.B)
            
            # Merge into base weights
            with torch.no_grad():
                linear.weight.add_(lora_weight.T)
            
            print(f"Merged LoRA weights into {name}")
    
    return model

# Note: Don't run this unless you specifically want to merge and remove LoRA
# After merging, you would need to reconstruct a new GPTModel and copy the merged weights


In [7]:

# Verify model structure (you should see LinearWithLoRA modules)
print("Model structure sample:")
print(model.trf_blocks[0].att.W_query)
print("\nDevice:", next(model.parameters()).device)

Model structure sample:
LinearWithLoRA(
  (linear): Linear(in_features=1280, out_features=1280, bias=True)
  (lora): LoRALayer()
)

Device: cuda:0


In [8]:
# Set up tokenizer for text generation
tokenizer = tiktoken.get_encoding("gpt2")

PROMPT_STYLE = 'enhanced'

def format_input(entry):
    """
    Format instruction using the advanced formatter module.
    
    Available styles:
    - 'enhanced': Improved Alpaca format (recommended)
    - 'chatml': ChatML-style format (ChatGPT/GPT-4 style)
    - 'task_aware': Adapts based on task type
    - 'cot': Chain-of-thought for reasoning tasks
    - 'structured': Encourages structured outputs
    """
    return format_input_advanced(entry, style=PROMPT_STYLE)


In [9]:
# Test generation with a sample instruction
test_entry = {
    "instruction": "What is the capital of France?",
    "input": ""
}

input_text = format_input(test_entry)
token_ids = generate(
    model=model,
    idx=text_to_token_ids(input_text, tokenizer).to(device),
    max_new_tokens=100,
    context_size=BASE_CONFIG["context_length"],
    eos_id=50256
)
generated_text = token_ids_to_text(token_ids, tokenizer)

response_text = (
    generated_text[len(input_text):]
    .replace("### Response:", "")
    .strip()
)

print("Input:")
print(input_text)
print("\nModel Response:")
print(response_text)


Input:
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
What is the capital of France?
### Response:


Model Response:
The capital of France is Paris.


Loaded 500 evaluation samples

Sample entry structure:
Keys: ['instruction', 'input', 'output', 'model_response']

First sample:
Instruction: Given a sentence, add in 4 adjectives that describe the sentence.
Input: He was an astrophysicist.
Expected Output: He was a brilliant, inquisitive, ambitious, and passionate astrophysicist....


SAMPLE EVALUATION ENTRIES

--- Sample 1 ---
Instruction: Given a sentence, add in 4 adjectives that describe the sentence.
Input: He was an astrophysicist.
Expected Output: He was a brilliant, inquisitive, ambitious, and passionate astrophysicist.
Previous Model Response: He was an astrophyscientist, a scientist, and an astronomer.


--- Sample 2 ---
Instruction: Given a sentence, explain why this statement could be problematic.
Input: Women should stay at home and take care of the children.
Expected Output: This statement could be problematic because it implies that women are not capable of pursuing careers or other interests outside of the home. In addition, it implies that men are not responsible for taking care of the children, which is not fair or equitable.
Previous Model Response: This statement could be difficult to interpret, as it implies that women should stay at home and take care of the children. This could lead to a lack of respect for the role of women in society, as the

Testing on sample 0:
Instruction: Given a sentence, add in 4 adjectives that describe the sentence.
Input: He was an astrophysicist.

Expected Output: He was a brilliant, inquisitive, ambitious, and passionate astrophysicist.

Your Model's Response: He was an astrophyscientist, a scientist, and an astronomer.


## Evaluate on Full Dataset

Now you can evaluate your model on the full evaluation dataset. You have several options:

1. **Generate new responses** for all samples and save them
2. **Compare** your model's responses with expected outputs
3. **Calculate metrics** (BLEU, ROUGE, etc.)
4. **Use an LLM-as-a-Judge** approach for qualitative evaluation

The cells below will help you generate responses for the entire dataset.


Hello! Nice to meet you. How can I help today? 
- Ask me questions or explanations (any topic)
- Get help with writing, coding, or math
- Brainstorm ideas or plan a project
- Get summaries or explanations of articles or papers

If you’d like, tell me a bit about what you’re up to and I’ll tailor my response.
