# Soft Prompting Pipeline Demo

This notebook demonstrates how to use the soft prompting pipeline to discover behavioral differences between models.

In [1]:
import os
import sys
import yaml
import torch
from pathlib import Path

# Add the project root directory to Python path
project_root = str(Path().absolute().parent)
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from src.soft_prompting.core.pipeline import DivergencePipeline
from src.soft_prompting.core.experiment import ExperimentConfig
from src.soft_prompting.models.model_manager import ModelPairManager
from src.soft_prompting.models.registry import ModelRegistry

  from .autonotebook import tqdm as notebook_tqdm


## Setup Configuration

In [2]:
# Load configuration
config_path = Path(
    "../src/soft_prompting/config/experiments/intervention_comparison.yaml"
)
with open(config_path) as f:
    config_dict = yaml.safe_load(f)

# Setup experiment directory
output_dir = Path("outputs/demo")
output_dir.mkdir(parents=True, exist_ok=True)

# Create experiment config
config = ExperimentConfig.from_dict(
    {
        "name": "demo_experiment",
        "output_dir": str(output_dir),
        "model_1_name": "HuggingFaceTB/SmolLM-135M-Instruct",  # Small model for testing
        "model_2_name": "HuggingFaceTB/SmolLM-135M",
        **config_dict,
    }
)

## Initialize Models

In [3]:
# Choose device - set to "mps" for Mac M1/M2, "cuda" for NVIDIA GPU, or "cpu" for CPU
DEVICE = "mps"  # Change this as needed

# Setup model registry and manager
registry = ModelRegistry()
model_manager = ModelPairManager(
    model_1_name=config.model_1_name,
    model_2_name=config.model_2_name,
    device=DEVICE,  # The ModelPairManager will handle device availability checking
    torch_dtype=torch.float16,
    load_in_8bit=False,
    use_cache=True,
)

# Load model pair
model_1, model_2, tokenizer = model_manager.load_model_pair()

## Run Pipeline

In [4]:
# Initialize pipeline
pipeline = DivergencePipeline(config=config, use_wandb=True, test_mode=True)

# Run pipeline
try:
    results = pipeline.run(validate_only=True)
    print("Pipeline completed successfully!")
except Exception as e:
    print(f"Error running pipeline: {e}")
finally:
    pipeline.cleanup()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


Initializing DivergencePipeline...
Setting up ModelPairManager...
Setting up experiment tracking...


[34m[1mwandb[0m: Currently logged in as: [33mjacquesthibs[0m. Use [1m`wandb login --relogin`[0m to force relogin


Pipeline initialization complete.

=== Starting pipeline run ===

Step 1: Loading models...
Successfully loaded models: HuggingFaceTB/SmolLM-135M-Instruct and HuggingFaceTB/SmolLM-135M

Step 2: Creating dataloaders...
Error running pipeline: No processor found for category: persona/ethics
Cleaning up pipeline resources...


Cleanup complete.


## Analyze Results

In [None]:
from src.soft_prompting.analysis.divergence_analyzer import DivergenceAnalyzer

# Initialize analyzer
analyzer = DivergenceAnalyzer(metrics=pipeline.trainer.metrics, output_dir=output_dir)

# Generate analysis report
report = analyzer.generate_report(
    dataset=results["dataset"], output_file="analysis_report.json"
)

# Display key findings
print("\nKey Findings:")
print(
    f"Number of high-divergence examples: {report['divergence_patterns']['num_high_divergence']}"
)
print(
    f"Mean semantic difference: {report['divergence_patterns']['mean_semantic_diff']:.3f}"
)

print("\nBehavior Clusters:")
for cluster_id, stats in report["behavior_clusters"].items():
    print(f"\nCluster {cluster_id}:")
    print(f"Size: {stats['size']}")
    print(f"Mean divergence: {stats['mean_divergence']:.3f}")
    print("Example prompts:")
    for prompt in stats["example_prompts"]:
        print(f"- {prompt}")