In [None]:
print('Setup complete.')

# Lab 01: Introduction to Fine-Tuning

## Learning Objectives
- Understand what fine-tuning is and why it's important
- Prepare a dataset for a fine-tuning task
- Implement a basic fine-tuning loop from scratch
- Evaluate a fine-tuned model's performance

## Setup

In [None]:
import numpy as np
from typing import List, Dict, Tuple
from dataclasses import dataclass, field
import random
import math

## Part 1: What is Fine-Tuning?

Fine-tuning is the process of taking a pre-trained Large Language Model (LLM) and further training it on a smaller, task-specific dataset. This adapts the model's general knowledge to excel at a particular task, such as classification, summarization, or, in our case, question answering.

**Why Fine-Tune?**
- **Improved Performance**: Achieves state-of-the-art results on specific tasks.
- **Domain Adaptation**: Tailors the model to specific jargon, styles, or knowledge domains.
- **Efficiency**: Much cheaper and faster than training a model from scratch.

## Part 2: Preparing a Dataset

In [None]:
@dataclass
class TrainingExample:
    prompt: str
    completion: str

# A simple dataset for a chatbot that knows capital cities
dataset = [
    TrainingExample(prompt='What is the capital of France?', completion='Paris'),
    TrainingExample(prompt='What is the capital of Japan?', completion='Tokyo'),
    TrainingExample(prompt='What is the capital of Canada?', completion='Ottawa'),
    TrainingExample(prompt='What is the capital of Australia?', completion='Canberra'),
    TrainingExample(prompt='What is the capital of Brazil?', completion='Brasília'),
]

# In a real scenario, we would tokenize the text into integer IDs.
# Here, we'll use a mock tokenizer.
class MockTokenizer:
    def encode(self, text: str) -> List[int]:
        # Simple character-to-integer mapping
        return [ord(c) for c in text]
    
    def decode(self, tokens: List[int]) -> str:
        return "".join([chr(t) for t in tokens])

tokenizer = MockTokenizer()

# Example of tokenization
encoded_prompt = tokenizer.encode(dataset[0].prompt)
print(f'Original: {dataset[0].prompt}')
print(f'Encoded: {encoded_prompt}')
print(f'Decoded: {tokenizer.decode(encoded_prompt)}')

## Part 3: The Fine-Tuning Loop

In [None]:
class MockLLM:
    """A simplified mock LLM with a single weight matrix."""
    def __init__(self, vocab_size=256, dim=32):
        self.vocab_size = vocab_size
        self.dim = dim
        # A single linear layer represents the model's knowledge
        self.weights = np.random.randn(dim, vocab_size) * 0.1

    def forward(self, input_tokens: List[int]) -> np.ndarray:
        # Super simple forward pass: average input embeddings
        # In reality, this is a massive transformer architecture
        input_vectors = np.array([self.weights[:, token] for token in input_tokens])
        avg_vector = np.mean(input_vectors, axis=0)
        # Output logits for the next token
        logits = np.dot(avg_vector, self.weights)
        return logits

    def generate(self, prompt: str, tokenizer: MockTokenizer) -> str:
        tokens = tokenizer.encode(prompt)
        logits = self.forward(tokens)
        next_token = np.argmax(logits)
        # In a real model, we'd generate token by token. Here, we simplify.
        # Let's pretend the model's top prediction is a single character.
        return tokenizer.decode([next_token])

def loss_fn(logits: np.ndarray, target_tokens: List[int]) -> Tuple[float, np.ndarray]:
    """Cross-entropy loss (simplified)."""
    # We'll use a simple MSE loss for simplicity, as implementing softmax + cross-entropy is verbose
    target_one_hot = np.zeros_like(logits)
    target_one_hot[target_tokens[0]] = 1 # Assume we only predict the first token of completion
    
    loss = np.mean((logits - target_one_hot)**2)
    grad = 2 * (logits - target_one_hot) / logits.size
    return loss, grad

def fine_tune(model: MockLLM, dataset: List[TrainingExample], tokenizer: MockTokenizer, epochs: int, lr: float):
    """A basic fine-tuning loop."""
    for epoch in range(epochs):
        total_loss = 0
        for example in dataset:
            # Forward pass
            input_tokens = tokenizer.encode(example.prompt)
            logits = model.forward(input_tokens)
            
            # Calculate loss
            target_tokens = tokenizer.encode(example.completion)
            loss, grad = loss_fn(logits, target_tokens)
            total_loss += loss
            
            # Backward pass (simplified gradient update)
            # This is a huge simplification of backpropagation
            input_vectors = np.array([model.weights[:, token] for token in input_tokens])
            avg_vector = np.mean(input_vectors, axis=0).reshape(-1, 1)
            grad_reshaped = grad.reshape(1, -1)
            weight_grad = np.dot(avg_vector, grad_reshaped)
            model.weights -= lr * weight_grad
            
        print(f'Epoch {epoch+1}/{epochs}, Average Loss: {total_loss / len(dataset):.4f}')

## Part 4: Evaluation

In [None]:
def evaluate_model(model: MockLLM, test_dataset: List[TrainingExample]):
    correct = 0
    for example in test_dataset:
        # Our mock generation is very basic, so we'll check if the first letter matches
        predicted_char = model.generate(example.prompt, tokenizer)
        if example.completion and predicted_char == example.completion[0]:
            correct += 1
    return correct / len(test_dataset)

# Create a test set
test_dataset = [
    ,TrainingExample(prompt="What is the capital of Germany?completion="Berlin"
    ),
    ,TrainingExample(prompt="What is the capital of Italy?completion="Rome"
    ),
    ]

# Initialize model
model = MockLLM()

# Evaluate before fine-tuning
accuracy_before = evaluate_model(model, dataset + test_dataset)
print(f'Accuracy before fine-tuning: {accuracy_before:.2%}')

# Fine-tune the model
fine_tune(model, dataset, tokenizer, epochs=100, lr=0.01)

# Evaluate after fine-tuning
accuracy_after = evaluate_model(model, dataset + test_dataset)
print(f'Accuracy after fine-tuning: {accuracy_after:.2%}')

## Exercises

1. **Experiment with Hyperparameters**: Change the learning rate (`lr`) and number of `epochs`. How does it affect the final accuracy? What happens if the learning rate is too high or too low?
2. **Expand the Dataset**: Add more country-capital pairs to the `dataset`. Does the model's accuracy on the test set improve?
3. **Implement a Better Evaluation Metric**: The current evaluation only checks the first character. Modify `evaluate_model` to check for the full word completion (you may need to adjust the `generate` function to produce more tokens).

## Summary

In this lab, you learned:
- The core concept of fine-tuning a pre-trained model.
- How to structure a dataset for a question-answering task.
- The components of a fine-tuning loop: forward pass, loss calculation, and backward pass (gradient update).
- The importance of evaluating a model before and after fine-tuning to measure improvement.