In [None]:
print('Setup complete.')

# Lab 05: Sample End-to-End Fine-Tuning Project

## Learning Objectives
- Integrate all concepts from the previous labs into a single project
- Prepare a real-world style dataset for fine-tuning
- Apply LoRA for parameter-efficient fine-tuning
- Run, monitor, and evaluate the fine-tuning job from start to finish
- Gain a holistic understanding of a complete fine-tuning workflow

## Setup

In [None]:
import numpy as np
import json
import matplotlib.pyplot as plt
from typing import List, Dict, Tuple, Any
import random
import re
import math
from collections import Counter
# We will reuse the mock classes from previous labs
# In a real project, these would be imports from libraries like transformers, peft, etc.

### --- Mock Implementations from Previous Labs --- ###
# (These would normally be imported from other files or libraries)

In [None]:
# From Lab 01 & 02: Data Handling
@dataclass
class TrainingExample:
    prompt: str
    completion: str

def clean_text(text: str) -> str:
    return text.strip().lower()

# From Lab 03: LoRA and PEFT
class LoRALayer:
    def __init__(self, original_weights: np.ndarray, rank: int):
        self.original_weights = original_weights
        self.A = np.random.randn(original_weights.shape[0], rank) * 0.01
        self.B = np.zeros((rank, original_weights.shape[1]))
        self.original_weights.setflags(write=False)
    @property
    def combined_weights(self): return self.original_weights + np.dot(self.A, self.B)

class MockLoRAModel:
    def __init__(self, vocab_size=256, dim=32, lora_rank=4):
        self.lora_layer = LoRALayer(np.random.randn(dim, vocab_size) * 0.1, rank=lora_rank)
    def generate(self, prompt: str) -> str: return 'mock completion'

# From Lab 04: Evaluation
def simple_bleu(reference: str, candidate: str) -> float:
    ref_tokens, cand_tokens = reference.split(), candidate.split()
    if not ref_tokens or not cand_tokens: return 0.0
    p_numer = sum(min(cand_tokens.count(token), ref_tokens.count(token)) for token in set(cand_tokens))
    precision = p_numer / len(cand_tokens)
    bp = math.exp(1 - len(ref_tokens) / len(cand_tokens)) if len(cand_tokens) < len(ref_tokens) else 1.0
    return bp * precision

## Step 1: Load and Prepare the Dataset

Our task is to fine-tune a model to be a helpful assistant that answers questions about historical figures.

In [None]:
# Raw dataset
raw_data = [
    {"question": "  Who was the first emperor of Rome? ", "answer": "Augustus Caesar."},
    {"question": "Tell me about Cleopatra", "answer": "Cleopatra was the last active ruler of the Ptolemaic Kingdom of Egypt."},
    {"question": "When did Leonardo da Vinci live?  ", "answer": "1452-1519"},
    {"question": "what is einstein known for", "answer": "The theory of relativity."},
    {"question": "", "answer": "This should be removed."} # Bad data
]

# Data Preparation
def prepare_dataset(raw_data: List[Dict[str, str]]) -> List[TrainingExample]:
    prepared = []
    for item in raw_data:
        prompt = clean_text(item.get('question', ''))
        completion = clean_text(item.get('answer', ''))
        if prompt and completion:
            # Formatting for instruction-following
            formatted_prompt = f"### Instruction:\nAnswer the following question.\
\
### Question:\n{prompt}\\
### Answer:\n"
            prepared.append(TrainingExample(prompt=formatted_prompt, completion=completion))
    return prepared

dataset = prepare_dataset(raw_data)
train_dataset = dataset[:3]
eval_dataset = dataset[3:]

print("--- Prepared Training Example ---")
print(train_dataset[0].prompt + train_dataset[0].completion)

## Step 2: Set Up the Model and Training Pipeline

In [None]:
class EndToEndPipeline:
    def __init__(self, model, train_data, eval_data, lr=0.01):
        self.model = model
        self.train_data = train_data
        self.eval_data = eval_data
        self.lr = lr
        self.history = {'loss': [], 'bleu_score': []}

    def run(self, epochs: int):
        print("--- Starting End-to-End Fine-Tuning Job ---")
        for epoch in range(epochs):
            # 1. Training Step
            total_loss = self._train_one_epoch()
            avg_loss = total_loss / len(self.train_data)
            
            # 2. Evaluation Step
            avg_bleu = self._evaluate()
            
            # 3. Logging
            self.history['loss'].append(avg_loss)
            self.history['bleu_score'].append(avg_bleu)
            print(f'Epoch {epoch+1}/{epochs} | Loss: {avg_loss:.4f} | Avg BLEU: {avg_bleu:.4f}')

    def _train_one_epoch(self) -> float:
        # Mock training: update LoRA weights
        grad_A = np.random.randn(*self.model.lora_layer.A.shape) * 0.01
        grad_B = np.random.randn(*self.model.lora_layer.B.shape) * 0.01
        self.model.lora_layer.A -= self.lr * grad_A
        self.model.lora_layer.B -= self.lr * grad_B
        return np.mean((grad_A**2 + grad_B**2)) * len(self.train_data) # Mock loss

    def _evaluate(self) -> float:
        # Mock generation that improves over time
        total_bleu = 0
        for item in self.eval_data:
            # Simulate model getting better
            improvement_factor = len(self.history['loss']) / 10
            if random.random() < improvement_factor:
                mock_completion = item.completion
            else:
                mock_completion = 'i do not know'
            
            total_bleu += simple_bleu(item.completion, mock_completion)
        return total_bleu / len(self.eval_data)

    def plot_results(self):
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
        ax1.plot(self.history['loss'], label='Loss')
        ax1.set_title('Training Loss')
        ax2.plot(self.history['bleu_score'], label='BLEU Score', color='green')
        ax2.set_title('Evaluation BLEU Score')
        plt.show()

## Step 3: Run the Fine-Tuning Job and Evaluate

In [None]:
# Initialize everything
lora_model = MockLoRAModel(lora_rank=8)
pipeline = EndToEndPipeline(lora_model, train_dataset, eval_dataset, lr=0.01)

# Run the job
pipeline.run(epochs=10)

# Visualize the results
print("
--- Training and Evaluation Metrics ---")
pipeline.plot_results()

## Final Project Summary

Congratulations! You have completed an end-to-end fine-tuning project. In this lab, you:
1. **Prepared Data**: Loaded raw data, cleaned it, and formatted it for an instruction-following task.
2. **Set Up a PEFT Model**: Configured a mock model to use LoRA, ensuring only a small fraction of parameters would be updated.
3. **Ran a Training Pipeline**: Executed a training loop that included both training steps and evaluation on a hold-out set.
4. **Monitored and Evaluated**: Logged key metrics (loss and BLEU score) and visualized them to assess the model's learning progress.

This workflow is a blueprint for real-world fine-tuning. While the model and data were simplified, the steps—data prep, model setup, training, and evaluation—are universal.

## Exercises for Further Exploration

1. **Integrate Real Components**: Try to replace one of the mock components with a real one. For example, use a real tokenizer from the `transformers` library.
2. **Add More Augmentation**: In the data preparation step, add the data augmentation techniques from Lab 02 to increase the size and diversity of your training set.
3. **Experiment with LoRA Rank**: Rerun the pipeline with different LoRA ranks (e.g., 2 vs. 32). Does a higher rank lead to a better BLEU score in this mock setup? What are the trade-offs?