# Ablation Studies: Test Impact of Each Component

Studies:
1. Hippocampus ON vs OFF
2. Interleaved vs Sequential training
3. QA-only vs Multi-turn vs Hybrid
4. LoRA rank: 8 vs 16 vs 32 vs 64
5. Semantic vs Keyword vs Hybrid scoring

In [None]:
# Cell 1: Install Dependencies
!pip install unsloth transformers datasets trl google-generativeai sentence-transformers scikit-learn wandb pandas matplotlib -q
print("‚úÖ Dependencies installed")

In [None]:
# Cell 2: Imports
import sys
from pathlib import Path

# Add scripts to path
scripts_dir = Path.cwd() / "scripts"
if scripts_dir.exists():
    sys.path.insert(0, str(scripts_dir))

from scripts.training.hippocampus import create_hippocampus
from scripts.training.replay_buffer import PrioritizedReplayBuffer
from scripts.evaluation.scoring import SemanticScorer, HybridScorer
from scripts.utilities.data_loader import load_people_data
import pandas as pd
import matplotlib.pyplot as plt

print("‚úÖ Modules imported")

## Study 1: Hippocampus ON vs OFF

In [None]:
# Study 1: Hippocampus ON vs OFF
print("="*70)
print("ABLATION STUDY 1: Hippocampus ON vs OFF")
print("="*70)

results_ablation1 = []

# Note: This is a template - actual training code would go here
# Run 1: With hippocampus
print("\nüß† Run 1: Hippocampus ENABLED")
print("   (Training with hippocampus verification enabled)")
# scores_with = evaluate_all()
# results_ablation1.append({"Config": "With Hippocampus", **scores_with})

# Run 2: Without hippocampus (store everything)
print("\nüß† Run 2: Hippocampus DISABLED")
print("   (Training without hippocampus - store all facts)")
# scores_without = evaluate_all()
# results_ablation1.append({"Config": "Without Hippocampus", **scores_without})

# Compare
if results_ablation1:
    df1 = pd.DataFrame(results_ablation1)
    print("\nüìä Results:")
    print(df1)
    
    # Plot
    df1.plot(x="Config", y=["Single Q", "Conversation", "Correction", "Extended"], kind="bar")
    plt.title("Ablation Study 1: Hippocampus Impact")
    plt.ylabel("Score")
    plt.xticks(rotation=0)
    plt.tight_layout()
    plt.savefig("ablation_hippocampus.png")
    print("‚úÖ Chart saved: ablation_hippocampus.png")
else:
    print("‚ö†Ô∏è Add training and evaluation code to run this study")

## Study 2: Interleaved vs Sequential Training

In [None]:
# Study 2: Interleaved vs Sequential
print("\n" + "="*70)
print("ABLATION STUDY 2: Interleaved vs Sequential Training")
print("="*70)

results_ablation2 = []

# Run 1: Interleaved (current approach)
print("\nüîÄ Run 1: INTERLEAVED")
print("   (Shuffled training data - prevents catastrophic forgetting)")
# training_data_interleaved = shuffle(training_data)
# scores_interleaved = train_and_evaluate(training_data_interleaved)
# results_ablation2.append({"Config": "Interleaved", **scores_interleaved})

# Run 2: Sequential (all Obama, then all Musk, then all Curie)
print("\n‚û°Ô∏è Run 2: SEQUENTIAL")
print("   (All examples for one person, then next person)")
# training_data_sequential = sort_by_person(training_data)
# scores_sequential = train_and_evaluate(training_data_sequential)
# results_ablation2.append({"Config": "Sequential", **scores_sequential})

# Compare
if results_ablation2:
    df2 = pd.DataFrame(results_ablation2)
    print("\nüìä Results:")
    print(df2)
    print("\nüí° Expected: Sequential shows catastrophic forgetting (earlier people forgotten)")
    print("   Interleaved should maintain all people equally")
else:
    print("‚ö†Ô∏è Add training and evaluation code to run this study")

## Study 3: QA vs Multi-turn vs Hybrid

In [None]:
# Study 3: QA vs Multi-turn vs Hybrid
print("\n" + "="*70)
print("ABLATION STUDY 3: Training Data Format")
print("="*70)

results_ablation3 = []

# Run 1: QA only
print("\nüìù Run 1: QA-ONLY")
print("   (Single question-answer pairs)")
# qa_data = load_qa_only()
# scores_qa = train_and_evaluate(qa_data)
# results_ablation3.append({"Config": "QA Only", **scores_qa})

# Run 2: Multi-turn only
print("\nüí¨ Run 2: MULTI-TURN ONLY")
print("   (Conversation-style training data)")
# multiturn_data = load_multiturn_only()
# scores_multiturn = train_and_evaluate(multiturn_data)
# results_ablation3.append({"Config": "Multi-turn Only", **scores_multiturn})

# Run 3: Hybrid (50/50 mix)
print("\nüîÄ Run 3: HYBRID")
print("   (50% QA, 50% multi-turn)")
# hybrid_data = load_hybrid()
# scores_hybrid = train_and_evaluate(hybrid_data)
# results_ablation3.append({"Config": "Hybrid", **scores_hybrid})

# Compare
if results_ablation3:
    df3 = pd.DataFrame(results_ablation3)
    print("\nüìä Results:")
    print(df3)
else:
    print("‚ö†Ô∏è Add training and evaluation code to run this study")

## Study 4: LoRA Rank

In [None]:
# Study 4: LoRA Rank
print("\n" + "="*70)
print("ABLATION STUDY 4: LoRA Rank")
print("="*70)

results_ablation4 = []

for rank in [8, 16, 32, 64]:
    print(f"\nüîß Testing rank={rank}")
    
    # Note: Would reload model with new rank
    # model = load_model_with_rank(rank, alpha=rank*2)
    # scores = train_and_evaluate(training_data)
    
    # results_ablation4.append({
    #     "Rank": rank,
    #     "Alpha": rank*2,
    #     **scores
    # })

# Compare
if results_ablation4:
    df4 = pd.DataFrame(results_ablation4)
    print("\nüìä Results:")
    print(df4)
    
    # Plot
    df4.plot(x="Rank", y="Overall", kind="line", marker="o")
    plt.title("Ablation Study 4: LoRA Rank Impact")
    plt.xlabel("LoRA Rank")
    plt.ylabel("Overall Score")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("ablation_lora_rank.png")
    print("‚úÖ Chart saved: ablation_lora_rank.png")
else:
    print("‚ö†Ô∏è Add training and evaluation code to run this study")

## Study 5: Scoring Method

In [None]:
# Study 5: Scoring Method
print("\n" + "="*70)
print("ABLATION STUDY 5: Scoring Method")
print("="*70)

# Load people data
PEOPLE = load_people_data("configs/people_data.yaml")

# Initialize semantic scorer
scorer = SemanticScorer()
scorer.precompute_embeddings(PEOPLE)

results_ablation5 = []

# Note: This study compares scoring methods on the same model outputs
# Get sample recalls (would need trained model)
# recalls = {p["id"]: recall_person(p) for p in PEOPLE}

# Method 1: Keyword
print("\nüîë Method 1: KEYWORD")
print("   (Simple keyword matching)")
# scores_keyword = {}
# for person in PEOPLE:
#     scores_keyword[person["id"]] = score_recall_keyword(person, recalls[person["id"]])
# avg_keyword = sum(s["overall"] for s in scores_keyword.values()) / len(scores_keyword)
# results_ablation5.append({"Method": "Keyword", "Average Score": avg_keyword})

# Method 2: Semantic
print("\nüß† Method 2: SEMANTIC")
print("   (Semantic similarity using embeddings)")
# scores_semantic = {}
# for person in PEOPLE:
#     scores_semantic[person["id"]] = scorer.score(person, recalls[person["id"]])
# avg_semantic = sum(s["overall"] for s in scores_semantic.values()) / len(scores_semantic)
# results_ablation5.append({"Method": "Semantic", "Average Score": avg_semantic})

# Method 3: Hybrid
print("\nüîÄ Method 3: HYBRID (70% semantic)")
print("   (Combination of semantic and keyword)")
# hybrid_scorer = HybridScorer(scorer, semantic_weight=0.7)
# scores_hybrid = {}
# for person in PEOPLE:
#     scores_hybrid[person["id"]] = hybrid_scorer.score(person, recalls[person["id"]])
# avg_hybrid = sum(s["overall"] for s in scores_hybrid.values()) / len(scores_hybrid)
# results_ablation5.append({"Method": "Hybrid", "Average Score": avg_hybrid})

# Compare
if results_ablation5:
    df5 = pd.DataFrame(results_ablation5)
    print("\nüìä Results:")
    print(df5)
else:
    print("‚ö†Ô∏è Add model recall and evaluation code to run this study")

## Final Report

In [None]:
# Final Report
print("\n" + "="*70)
print("üìä ABLATION STUDIES SUMMARY")
print("="*70)

if 'df1' in locals():
    print("\n1. Hippocampus Impact:")
    print(df1)

if 'df2' in locals():
    print("\n2. Interleaving Impact:")
    print(df2)

if 'df3' in locals():
    print("\n3. Data Format Impact:")
    print(df3)

if 'df4' in locals():
    print("\n4. LoRA Rank Impact:")
    print(df4)

if 'df5' in locals():
    print("\n5. Scoring Method Impact:")
    print(df5)

print("\nüèÅ Ablation studies template complete!")
print("   Fill in the training and evaluation code to run actual studies.")