In [1]:
%%bash
# Clone starter template
git clone https://github.com/huggingface/transformers.git
cd transformers

# Create your new repo
git init feelgood-pipeline


cd feelgood-pipeline

# Initialize basic structure
mkdir -p src/{core,data,models,evaluation,experiment}
mkdir -p configs notebooks tests data/raw data/processed
touch requirements.txt README.md

Initialized empty Git repository in /content/transformers/feelgood-pipeline/.git/


Cloning into 'transformers'...
hint: Using 'master' as the name for the initial branch. This default branch name
hint: is subject to change. To configure the initial branch name to use in all
hint: 
hint: 	git config --global init.defaultBranch <name>
hint: 
hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and
hint: 'development'. The just-created branch can be renamed via this command:
hint: 
hint: 	git branch -m <name>


In [4]:
import torch
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer

class MinimalDPO:
    """Minimal working DPO - focus on core algorithm"""

    def __init__(self, model_name="facebook/opt-125m", beta=0.1):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.beta = beta

        # Load model and reference (freeze reference)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float32
        ).to(self.device)
        self.ref_model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float32
        ).to(self.device)
        self.ref_model.eval()
        for param in self.ref_model.parameters():
            param.requires_grad = False

        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=5e-7)

    def get_logprobs(self, model, input_ids, attention_mask, target_ids):
        """Get log probabilities for target tokens"""
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            output_hidden_states=False
        )
        logits = outputs.logits
        log_probs = F.log_softmax(logits, dim=-1)

        # Gather logprobs for target tokens
        logprobs = torch.gather(
            log_probs,
            dim=-1,
            index=target_ids.unsqueeze(-1)
        ).squeeze(-1)

        # Mask padding
        logprobs = (logprobs * attention_mask).sum(-1) / attention_mask.sum(-1)
        return logprobs

    def compute_dpo_loss(self, batch):
        """Single DPO loss computation"""
        # Parse batch
        input_ids = batch["input_ids"].to(self.device)
        attention_mask = batch["attention_mask"].to(self.device)
        chosen_ids = batch["chosen_ids"].to(self.device)
        rejected_ids = batch["rejected_ids"].to(self.device)

        # Get log probabilities
        with torch.no_grad():
            chosen_lp_ref = self.get_logprobs(
                self.ref_model, input_ids, attention_mask, chosen_ids
            )
            rejected_lp_ref = self.get_logprobs(
                self.ref_model, input_ids, attention_mask, rejected_ids
            )

        chosen_lp = self.get_logprobs(
            self.model, input_ids, attention_mask, chosen_ids
        )
        rejected_lp = self.get_logprobs(
            self.model, input_ids, attention_mask, rejected_ids
        )

        # DPO loss: -log σ(β * (lp_chosen - lp_ref_chosen - lp_rejected + lp_ref_rejected))
        logits = self.beta * ((chosen_lp - chosen_lp_ref) - (rejected_lp - rejected_lp_ref))
        loss = -F.logsigmoid(logits).mean()

        return loss

    def train_step(self, batch):
        """Single training iteration"""
        self.model.train()
        loss = self.compute_dpo_loss(batch)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
        self.optimizer.step()
        self.optimizer.zero_grad()
        return loss.item()

# Create minimal batch
def create_test_batch():
    return {
        "input_ids": torch.randint(0, 32000, (4, 128)),
        "attention_mask": torch.ones(4, 128),
        "chosen_ids": torch.randint(0, 32000, (4, 128)),
        "rejected_ids": torch.randint(0, 32000, (4, 128))
    }

# Initialize and test
trainer = MinimalDPO()
batch = create_test_batch()

# Single step
loss = trainer.train_step(batch)
print(f"Loss: {loss:.4f}")

# Multiple steps
for i in range(5):
    loss = trainer.train_step(batch)
    print(f"Step {i+1}: Loss = {loss:.4f}")

Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]



Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]



Loss: 0.6935
Step 1: Loss = 0.6957
Step 2: Loss = 0.6961
Step 3: Loss = 0.6900
Step 4: Loss = 0.6916
Step 5: Loss = 0.6921


In [5]:
from enum import Enum
from typing import List, Dict

class PedagogyLabel(Enum):
    SCAFFOLDING = "scaffolding"
    MASTERY = "mastery_learning"
    COLLABORATIVE = "collaborative_learning"
    METACOGNITIVE = "metacognitive"

    @staticmethod
    def from_response_quality(text: str) -> Dict[str, float]:
        """Score response on pedagogical dimensions"""
        scores = {}

        # Scaffolding: step-by-step structure
        scaffolding_keywords = ["first", "then", "next", "step", "gradually", "start with"]
        scores["scaffolding"] = sum(1 for kw in scaffolding_keywords if kw in text.lower()) / len(scaffolding_keywords)

        # Mastery: checking understanding, prerequisites
        mastery_keywords = ["check", "understand", "verify", "practice", "ensure", "confirm"]
        scores["mastery"] = sum(1 for kw in mastery_keywords if kw in text.lower()) / len(mastery_keywords)

        # Collaborative: peer/group language
        collaborative_keywords = ["discuss", "together", "partner", "peer", "collaborate", "share"]
        scores["collaborative"] = sum(1 for kw in collaborative_keywords if kw in text.lower()) / len(collaborative_keywords)

        # Metacognitive: reflection, reasoning
        metacognitive_keywords = ["think", "reflect", "consider", "why", "approach", "strategy"]
        scores["metacognitive"] = sum(1 for kw in metacognitive_keywords if kw in text.lower()) / len(metacognitive_keywords)

        return scores

def create_preference_pair(prompt, good_response, bad_response):
    """Create DPO-format preference pair with pedagogy annotation"""
    good_pedagogy = PedagogyLabel.from_response_quality(good_response)
    bad_pedagogy = PedagogyLabel.from_response_quality(bad_response)

    return {
        "prompt": prompt,
        "chosen": good_response,
        "rejected": bad_response,
        "pedagogical_scores_chosen": good_pedagogy,
        "pedagogical_scores_rejected": bad_pedagogy,
        "quality_reason": "chosen response better scaffolds complexity"
    }

PEDAGOGICAL_PROMPTS = [
    # Scaffolding focus
    ("Explain quantum entanglement to a high school student",
     "Let's build this from basics. First, think of particles as spinning tops. When two tops are entangled (Step 1), measuring one instantly tells us about the other (Step 2). This seems weird because of Einstein's 'spooky action at a distance' (Step 3). In reality, they share quantum states from creation time. You don't need to fully understand the math yet—just remember the pattern.",
     "Quantum entanglement occurs when two particles share quantum states such that the measurement of one instantaneously affects the other, violating classical locality assumptions through Bell inequalities."),

    # Mastery focus
    ("How do I learn Python programming?",
     "Start with Python basics: variables, loops, functions. Practice 20 mini-projects until comfortable. Then learn OOP concepts—classes, inheritance, polymorphism. Build 5 projects using OOP. Finally, tackle async programming and design patterns. Check yourself: can you explain each concept to someone else?",
     "Learn Python: syntax, functions, OOP, decorators, async, testing, design patterns."),

    # Metacognitive focus
    ("What's machine learning?",
     "Think about how you learn. You see examples, identify patterns, make predictions. That's machine learning. Consider: What patterns matter? How do we measure success? What if new data contradicts our patterns? These questions guide ML algorithm design.",
     "Machine learning is computational methods enabling systems to learn from data through optimization of loss functions."),
]

def generate_training_data(n_examples=10):
    """Generate training data for pedagogical alignment"""
    data = []
    for prompt, good, bad in PEDAGOGICAL_PROMPTS * (n_examples // 3):
        data.append({
            "prompt": prompt,
            "chosen": good,
            "rejected": bad,
            "model": "preference_based"
        })
    return data

In [16]:
import os

# Get current working directory, which should be 'transformers/feelgood-pipeline'
project_root = os.getcwd()

# 1. Create __init__.py files for package recognition
os.makedirs(os.path.join(project_root, "src", "core"), exist_ok=True)
os.makedirs(os.path.join(project_root, "src", "data"), exist_ok=True)
os.makedirs(os.path.join(project_root, "src", "experiment"), exist_ok=True)

with open(os.path.join(project_root, "src", "__init__.py"), "w") as f: pass
with open(os.path.join(project_root, "src", "core", "__init__.py"), "w") as f: pass
with open(os.path.join(project_root, "src", "data", "__init__.py"), "w") as f: pass
with open(os.path.join(project_root, "src", "experiment", "__init__.py"), "w") as f: pass

# 2. Write MinimalDPO class to src/core/simple_dpo.py
minimal_dpo_code = """
import torch
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer

class MinimalDPO:
    '''Minimal working DPO - focus on core algorithm'''

    def __init__(self, model_name="facebook/opt-125m", beta=0.1):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.beta = beta

        # Load model and reference (freeze reference)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float32
        ).to(self.device)
        self.ref_model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float32
        ).to(self.device)
        self.ref_model.eval()
        for param in self.ref_model.parameters():
            param.requires_grad = False

        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=5e-7)

    def get_logprobs(self, model, input_ids, attention_mask, target_ids):
        '''Get log probabilities for target tokens'''
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            output_hidden_states=False
        )
        logits = outputs.logits
        log_probs = F.log_softmax(logits, dim=-1)

        # Gather logprobs for target tokens
        logprobs = torch.gather(
            log_probs,
            dim=-1,
            index=target_ids.unsqueeze(-1)
        ).squeeze(-1)

        # Mask padding
        logprobs = (logprobs * attention_mask).sum(-1) / attention_mask.sum(-1)
        return logprobs

    def compute_dpo_loss(self, batch):
        '''Single DPO loss computation'''
        # Parse batch
        input_ids = batch["input_ids"].to(self.device)
        attention_mask = batch["attention_mask"].to(self.device)
        chosen_ids = batch["chosen_ids"].to(self.device)
        rejected_ids = batch["rejected_ids"].to(self.device)

        # Get log probabilities
        with torch.no_grad():
            chosen_lp_ref = self.get_logprobs(
                self.ref_model, input_ids, attention_mask, chosen_ids
            )
            rejected_lp_ref = self.get_logprobs(
                self.ref_model, input_ids, attention_mask, rejected_ids
            )

        chosen_lp = self.get_logprobs(
            self.model, input_ids, attention_mask, chosen_ids
        )
        rejected_lp = self.get_logprobs(
            self.model, input_ids, attention_mask, rejected_ids
        )

        # DPO loss: -log σ(β * (lp_chosen - lp_ref_chosen - lp_rejected + lp_ref_rejected))
        logits = self.beta * ((chosen_lp - chosen_lp_ref) - (rejected_lp - rejected_lp_ref))
        loss = -F.logsigmoid(logits).mean()

        return loss

    def train_step(self, batch):
        '''Single training iteration'''
        self.model.train()
        loss = self.compute_dpo_loss(batch)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
        self.optimizer.step()
        self.optimizer.zero_grad()
        return loss.item()
"""

with open(os.path.join(project_root, "src", "core", "simple_dpo.py"), "w") as f:
    f.write(minimal_dpo_code)

# 3. Write ExperimentLogger class to src/experiment/logger.py
experiment_logger_code = """
import wandb
import json
from datetime import datetime

class ExperimentLogger:
    def __init__(self, project_name="feelgoodai-pipeline", run_name=None):
        self.run = wandb.init(
            project=project_name,
            name=run_name or f"dpo_run_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
            tags=["dpo", "pedagogical"]
        )

    def log_metrics(self, step, loss, pedagogical_scores=None, wellbeing=None):
        '''Log training metrics'''
        log_dict = {
            "loss": loss,
            "step": step
        }

        if pedagogical_scores:
            for strategy, score in pedagogical_scores.items():
                log_dict[f"pedagogy/{strategy}"] = score

        if wellbeing:
            for dimension, score in wellbeing.items():
                log_dict[f"wellbeing/{dimension}"] = score

        self.run.log(log_dict)

    def finish(self):
        self.run.finish()
"""

with open(os.path.join(project_root, "src", "experiment", "logger.py"), "w") as f:
    f.write(experiment_logger_code)

# 4. Write PedagogyLabel class and create_preference_pair to src/data/pedagogy_labels.py
pedagogy_labels_code = """
from enum import Enum
from typing import List, Dict

class PedagogyLabel(Enum):
    SCAFFOLDING = "scaffolding"
    MASTERY = "mastery_learning"
    COLLABORATIVE = "collaborative_learning"
    METACOGNITIVE = "metacognitive"

    @staticmethod
    def from_response_quality(text: str) -> Dict[str, float]:
        '''Score response on pedagogical dimensions'''
        scores = {}

        # Scaffolding: step-by-step structure
        scaffolding_keywords = ["first", "then", "next", "step", "gradually", "start with"]
        scores["scaffolding"] = sum(1 for kw in scaffolding_keywords if kw in text.lower()) / len(scaffolding_keywords)

        # Mastery: checking understanding, prerequisites
        mastery_keywords = ["check", "understand", "verify", "practice", "ensure", "confirm"]
        scores["mastery"] = sum(1 for kw in mastery_keywords if kw in text.lower()) / len(mastery_keywords)

        # Collaborative: peer/group language
        collaborative_keywords = ["discuss", "together", "partner", "peer", "collaborate", "share"]
        scores["collaborative"] = sum(1 for kw in collaborative_keywords if kw in text.lower()) / len(collaborative_keywords)

        # Metacognitive: reflection, reasoning
        metacognitive_keywords = ["think", "reflect", "consider", "why", "approach", "strategy"]
        scores["metacognitive"] = sum(1 for kw in metacognitive_keywords if kw in text.lower()) / len(metacognitive_keywords)

        return scores

def create_preference_pair(prompt, good_response, bad_response):
    '''Create DPO-format preference pair with pedagogy annotation'''
    good_pedagogy = PedagogyLabel.from_response_quality(good_response)
    bad_pedagogy = PedagogyLabel.from_response_quality(bad_response)

    return {
        "prompt": prompt,
        "chosen": good_response,
        "rejected": bad_response,
        "pedagogical_scores_chosen": good_pedagogy,
        "pedagogical_scores_rejected": bad_pedagogy,
        "quality_reason": "chosen response better scaffolds complexity"
    }
"""

with open(os.path.join(project_root, "src", "data", "pedagogy_labels.py"), "w") as f:
    f.write(pedagogy_labels_code)

# 5. Write PEDAGOGICAL_PROMPTS and generate_training_data to src/data/synthetic_pedagogy_data.py
synthetic_pedagogy_data_code = """
PEDAGOGICAL_PROMPTS = [
    # Scaffolding focus
    ("Explain quantum entanglement to a high school student",
     "Let's build this from basics. First, think of particles as spinning tops. When two tops are entangled (Step 1), measuring one instantly tells us about the other (Step 2). This seems weird because of Einstein's 'spooky action at a distance' (Step 3). In reality, they share quantum states from creation time. You don't need to fully understand the math yet—just remember the pattern.",
     "Quantum entanglement occurs when two particles share quantum states such that the measurement of one instantaneously affects the other, violating classical locality assumptions through Bell inequalities."),

    # Mastery focus
    ("How do I learn Python programming?",
     "Start with Python basics: variables, loops, functions. Practice 20 mini-projects until comfortable. Then learn OOP concepts—classes, inheritance, polymorphism. Build 5 projects using OOP. Finally, tackle async programming and design patterns. Check yourself: can you explain each concept to someone else?",
     "Learn Python: syntax, functions, OOP, decorators, async, testing, design patterns."),

    # Metacognitive focus
    ("What's machine learning?",
     "Think about how you learn. You see examples, identify patterns, make predictions. That's machine learning. Consider: What patterns matter? How do we measure success? What if new data contradicts our patterns? These questions guide ML algorithm design.",
     "Machine learning is computational methods enabling systems to learn from data through optimization of loss functions."),
]

def generate_training_data(n_examples=10):
    '''Generate training data for pedagogical alignment'''
    data = []
    for prompt, good, bad in PEDAGOGICAL_PROMPTS * (n_examples // 3):
        data.append({
            "prompt": prompt,
            "chosen": good,
            "rejected": bad,
            "model": "preference_based"
        })
    return data
"""

with open(os.path.join(project_root, "src", "data", "synthetic_pedagogy_data.py"), "w") as f:
    f.write(synthetic_pedagogy_data_code)

# 6. Write configs/experiment.yaml
from IPython import get_ipython
ipython = get_ipython()

# Accessing config_yaml from the global scope
if ipython is not None:
    config_yaml = ipython.user_ns.get('config_yaml')

if config_yaml:
    configs_dir = os.path.join(project_root, "configs")
    os.makedirs(configs_dir, exist_ok=True)
    with open(os.path.join(configs_dir, "experiment.yaml"), "w") as f:
        f.write(config_yaml)
    print(f"Created configs/experiment.yaml")
else:
    print("Warning: 'config_yaml' not found in global scope. Cannot write configs/experiment.yaml")

print("All necessary source files and config have been created in the project structure.")

Created configs/experiment.yaml
All necessary source files and config have been created in the project structure.


In [6]:
import json
import os

# Generate a small dataset
training_data = generate_training_data(n_examples=10)

print(f"Generated {len(training_data)} training examples.")
print("First example:")
print(json.dumps(training_data[0], indent=2))

# Save the data to a file
data_dir = "transformers/feelgood-pipeline/data/processed"
os.makedirs(data_dir, exist_ok=True)
file_path = os.path.join(data_dir, "synthetic_pedagogy_data.json")

with open(file_path, "w") as f:
    json.dump(training_data, f, indent=2)

print(f"\nSaved synthetic data to {file_path}")

Generated 9 training examples.
First example:
{
  "prompt": "Explain quantum entanglement to a high school student",
  "chosen": "Let's build this from basics. First, think of particles as spinning tops. When two tops are entangled (Step 1), measuring one instantly tells us about the other (Step 2). This seems weird because of Einstein's 'spooky action at a distance' (Step 3). In reality, they share quantum states from creation time. You don't need to fully understand the math yet\u2014just remember the pattern.",
  "rejected": "Quantum entanglement occurs when two particles share quantum states such that the measurement of one instantaneously affects the other, violating classical locality assumptions through Bell inequalities.",
  "model": "preference_based"
}

Saved synthetic data to transformers/feelgood-pipeline/data/processed/synthetic_pedagogy_data.json


In [10]:
import wandb
import json
from datetime import datetime

class ExperimentLogger:
    def __init__(self, project_name="feelgoodai-pipeline", run_name=None):
        self.run = wandb.init(
            project=project_name,
            name=run_name or f"dpo_run_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
            tags=["dpo", "pedagogical"]
        )

    def log_metrics(self, step, loss, pedagogical_scores=None, wellbeing=None):
        """Log training metrics"""
        log_dict = {
            "loss": loss,
            "step": step
        }

        if pedagogical_scores:
            for strategy, score in pedagogical_scores.items():
                log_dict[f"pedagogy/{strategy}"] = score

        if wellbeing:
            for dimension, score in wellbeing.items():
                log_dict[f"wellbeing/{dimension}"] = score

        self.run.log(log_dict)

    def finish(self):
        self.run.finish()

In [8]:
!pip install wandb

Collecting wandb
  Downloading wandb-0.24.2-py3-none-manylinux_2_28_x86_64.whl.metadata (12 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading gitpython-3.1.46-py3-none-any.whl.metadata (13 kB)
Collecting sentry-sdk>=2.0.0 (from wandb)
  Downloading sentry_sdk-2.52.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb)
  Downloading gitdb-4.0.12-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb)
  Downloading smmap-5.0.2-py3-none-any.whl.metadata (4.3 kB)
Downloading wandb-0.24.2-py3-none-manylinux_2_28_x86_64.whl (23.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.0/23.0 MB[0m [31m115.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gitpython-3.1.46-py3-none-any.whl (208 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m208.6/208.6 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sentry_sdk-2.

In [12]:
config_yaml = """
experiment:
  name: "dpo_pedagogical_v1"
  seed: 42

model:
  name: "mistralai/Mistral-7B-v0.1"
  dtype: "bfloat16"
  max_length: 512

training:
  method: "dpo"
  lr: 5.0e-7
  beta: 0.1
  epochs: 3
  batch_size: 16
  gradient_accumulation: 4

data:
  train_path: "data/processed/train.jsonl"
  val_path: "data/processed/val.jsonl"
  val_split: 0.1

evaluation:
  eval_steps: 50
  eval_on_pedagogical: true
  track_wellbeing: true

logging:
  use_wandb: true
  log_dir: "logs/"
  save_strategy: "steps"
  save_steps: 100
"""

# You can then parse this string using a library like PyYAML:
# import yaml
# config = yaml.safe_load(config_yaml)
# print(config)

In [18]:
with open('/content/feelgood_demo_project.md', 'r') as f:
    content = f.read()
print(content)

# FeelGoodAI Demo: Pedagogical Fine-Tuning Pipeline
## Research Engineer Technical Demonstration

**Project Goal:** Build a working fine-tuning pipeline demonstrating DPO/KTO/ORPO preference alignment with pedagogical strategy focus (scaffolding, mastery learning, collaborative learning).

**Candidate:** Your Name | Data Scientist & Quantitative Finance Professional  
**Target Position:** Research Engineer, FeelGoodAI Project, LIG/DAISY Lab, Grenoble

---

## Project Overview

This demonstration implements a **production-ready fine-tuning pipeline** for aligning language models with pedagogical strategies, showcasing:

1. **Multiple RLHF Methods** - DPO, KTO, ORPO implementations
2. **Pedagogical Strategy Annotation** - Dataset generation framework
3. **Pipeline Architecture** - Modular, scalable, deployment-ready
4. **Experiment Management** - Well-being dimension tracking
5. **Integration Points** - Educational platform compatibility

**Repository Structure:**
```
feelgood-pipeline/


In [20]:
import argparse
import yaml
import torch
from pathlib import Path
import sys
import os

# Add the project root to sys.path to discover 'src' package
# Assuming current working directory is 'transformers/feelgood-pipeline'
sys.path.append(os.getcwd())

from src.core.simple_dpo import MinimalDPO
from src.experiment.logger import ExperimentLogger
from src.data.pedagogy_labels import PedagogyLabel
from src.data.synthetic_pedagogy_data import generate_training_data # Import generate_training_data from the file

def load_config(config_path):
    with open(config_path) as f:
        return yaml.safe_load(f)

def train_pipeline(config_path="configs/experiment.yaml"):
    """Full training pipeline"""
    # Load config
    config = load_config(config_path)

    # Initialize logging
    logger = ExperimentLogger(run_name=config["experiment"]["name"])

    # Initialize trainer
    # Use model_name and dtype consistent with src/core/simple_dpo.py
    trainer = MinimalDPO(
        model_name="facebook/opt-125m", # Update model name
        beta=config["training"]["beta"]
    )

    # Generate synthetic data
    training_data = generate_training_data(n_examples=100)

    print(f"Training on {len(training_data)} examples")

    # Training loop
    for epoch in range(config["training"]["epochs"]):
        for step, example in enumerate(training_data):
            # Create batch
            batch = {
                "input_ids": torch.randint(0, 32000, (1, 128)),
                "attention_mask": torch.ones(1, 128),
                "chosen_ids": torch.randint(0, 32000, (1, 128)),
                "rejected_ids": torch.randint(0, 32000, (1, 128))
            }

            # Train step
            loss = trainer.train_step(batch)

            # Log metrics
            if step % 10 == 0:
                pedagogical_scores = PedagogyLabel.from_response_quality(example["chosen"])
                logger.log_metrics(
                    step=epoch * len(training_data) + step,
                    loss=loss,
                    pedagogical_scores=pedagogical_scores
                )
                print(f"Epoch {epoch+1}, Step {step}: Loss = {loss:.4f}")

    # Save checkpoint
    checkpoint_path = f"checkpoints/{config['experiment']['name']}"
    Path(checkpoint_path).mkdir(parents=True, exist_ok=True)
    trainer.model.save_pretrained(checkpoint_path)
    trainer.tokenizer.save_pretrained(checkpoint_path)
    print(f"Saved checkpoint to {checkpoint_path}")

    logger.finish()

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", default="configs/experiment.yaml")
    args = parser.parse_args([]) # Pass an empty list to parse_args

    train_pipeline(args.config)


  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 1


[34m[1mwandb[0m: You chose 'Create a W&B account'
[34m[1mwandb[0m: Create an account here: https://wandb.ai/authorize?signup=true&ref=models
[34m[1mwandb[0m: After creating your account, create a new API key and store it securely.
[34m[1mwandb[0m: Paste your API key and hit enter:

 ··········


[34m[1mwandb[0m: [32m[41mERROR[0m Invalid API key: API key must have 40+ characters, has 36.
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 2


[34m[1mwandb[0m: You chose 'Use an existing W&B account'
[34m[1mwandb[0m: Logging into https://api.wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: Create a new API key at: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Store your API key securely and do not share it.
[34m[1mwandb[0m: Paste your API key and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mbosesuva05[0m ([33mbosesuva05-kedge-bs[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]



Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]



Training on 99 examples
Epoch 1, Step 0: Loss = 0.6918
Epoch 1, Step 10: Loss = 0.6877
Epoch 1, Step 20: Loss = 0.6962
Epoch 1, Step 30: Loss = 0.6890
Epoch 1, Step 40: Loss = 0.6963
Epoch 1, Step 50: Loss = 0.6842
Epoch 1, Step 60: Loss = 0.6943
Epoch 1, Step 70: Loss = 0.6953
Epoch 1, Step 80: Loss = 0.6915
Epoch 1, Step 90: Loss = 0.6970
Epoch 2, Step 0: Loss = 0.6911
Epoch 2, Step 10: Loss = 0.6985
Epoch 2, Step 20: Loss = 0.6922
Epoch 2, Step 30: Loss = 0.6937
Epoch 2, Step 40: Loss = 0.6958
Epoch 2, Step 50: Loss = 0.6992
Epoch 2, Step 60: Loss = 0.6880
Epoch 2, Step 70: Loss = 0.6886
Epoch 2, Step 80: Loss = 0.6883
Epoch 2, Step 90: Loss = 0.6972
Epoch 3, Step 0: Loss = 0.7002
Epoch 3, Step 10: Loss = 0.6876
Epoch 3, Step 20: Loss = 0.7015
Epoch 3, Step 30: Loss = 0.6913
Epoch 3, Step 40: Loss = 0.6835
Epoch 3, Step 50: Loss = 0.6887
Epoch 3, Step 60: Loss = 0.6955
Epoch 3, Step 70: Loss = 0.6974
Epoch 3, Step 80: Loss = 0.6999
Epoch 3, Step 90: Loss = 0.6933


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Saved checkpoint to checkpoints/dpo_pedagogical_v1


0,1
loss,▄▃▆▃▆▁▅▆▄▆▄▇▄▅▆▇▃▃▃▆█▃█▄▁▃▆▆▇▅
pedagogy/collaborative,█▁▁█▁▁█▁▁██▁▁█▁▁█▁▁██▁▁█▁▁█▁▁█
pedagogy/mastery,▅█▁▅█▁▅█▁▅▅█▁▅█▁▅█▁▅▅█▁▅█▁▅█▁▅
pedagogy/metacognitive,▅▁█▅▁█▅▁█▅▅▁█▅▁█▅▁█▅▅▁█▅▁█▅▁█▅
pedagogy/scaffolding,██▁██▁██▁███▁██▁██▁███▁██▁██▁█
step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
loss,0.69332
pedagogy/collaborative,0.16667
pedagogy/mastery,0.16667
pedagogy/metacognitive,0.16667
pedagogy/scaffolding,0.33333
step,288.0


In [22]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from src.data.pedagogy_labels import PedagogyLabel

class PedagogicalInference:
    def __init__(self, checkpoint_path):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        # Load the fine-tuned model and tokenizer directly from the checkpoint path
        self.model = AutoModelForCausalLM.from_pretrained(checkpoint_path).to(self.device)
        self.tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
        self.model.eval()

    def generate_response(self, prompt, max_length=200):
        """Generate pedagogically-aligned response"""
        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True
        ).to(self.device)

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_length=max_length,
                temperature=0.7,
                top_p=0.9,
                do_sample=True
            )

        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Analyze pedagogical qualities
        pedagogy_scores = PedagogyLabel.from_response_quality(response)

        return {
            "prompt": prompt,
            "response": response,
            "pedagogical_analysis": pedagogy_scores
        }

# Usage
if __name__ == "__main__":
    inference = PedagogicalInference("checkpoints/dpo_pedagogical_v1")

    prompts = [
        "Explain gravity to a 10-year-old",
        "What is recursion in programming?",
        "Teach me about photosynthesis"
    ]

    for prompt in prompts:
        result = inference.generate_response(prompt)
        print(f"\nPrompt: {result['prompt']}")
        print(f"Response: {result['response'][:200]}...")
        print(f"Pedagogy Scores: {result['pedagogical_analysis']}")

Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]




Prompt: Explain gravity to a 10-year-old
Response: Explain gravity to a 10-year-old and you'll get the same answer.
Well, I don't know. I was just trying to get some information about the universe and how gravity works....
Pedagogy Scores: {'scaffolding': 0.0, 'mastery': 0.0, 'collaborative': 0.0, 'metacognitive': 0.0}

Prompt: What is recursion in programming?
Response: What is recursion in programming?

I am trying to build a recursion system to help with programming. The system is being developed by a company called Oracle.

I am trying to build a recursion system ...
Pedagogy Scores: {'scaffolding': 0.0, 'mastery': 0.0, 'collaborative': 0.0, 'metacognitive': 0.0}

Prompt: Teach me about photosynthesis
Response: Teach me about photosynthesis. I'm not going to go into a lot of detail about the process, but I'm thinking of doing a lot of stuff in photosynthesis, too.

You can get a great view of the plant with ...
Pedagogy Scores: {'scaffolding': 0.0, 'mastery': 0.0, 'collaborative':

In [24]:
import unittest
import torch
from src.core.simple_dpo import MinimalDPO

class TestDPOPipeline(unittest.TestCase):
    def setUp(self):
        self.trainer = MinimalDPO()

    def test_loss_computation(self):
        """Test DPO loss is computable and differentiable"""
        batch = {
            "input_ids": torch.randint(0, 32000, (4, 128)),
            "attention_mask": torch.ones(4, 128),
            "chosen_ids": torch.randint(0, 32000, (4, 128)),
            "rejected_ids": torch.randint(0, 32000, (4, 128))
        }

        loss = self.trainer.compute_dpo_loss(batch)

        self.assertIsNotNone(loss)
        self.assertTrue(loss.requires_grad)
        self.assertTrue(loss.item() > 0)

    def test_training_step(self):
        """Test training step updates model weights"""
        batch = {
            "input_ids": torch.randint(0, 32000, (4, 128)),
            "attention_mask": torch.ones(4, 128),
            "chosen_ids": torch.randint(0, 32000, (4, 128)),
            "rejected_ids": torch.randint(0, 32000, (4, 128))
        }

        initial_params = [p.clone() for p in self.trainer.model.parameters()]
        loss = self.trainer.train_step(batch)
        final_params = [p.clone() for p in self.trainer.model.parameters()]

        # Check that parameters have changed
        params_changed = any(
            not torch.allclose(ip, fp) for ip, fp in zip(initial_params, final_params)
        )
        self.assertTrue(params_changed)

if __name__ == "__main__":
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]



Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]

.

Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]



Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]

.
----------------------------------------------------------------------
Ran 2 tests in 13.569s

OK
