# Analysis Visualizer

Comprehensive visualization tools for analyzing experimental results using `analyze_experiments.py`.

## Features

- **Priority 1**: Scaling Law analysis (Model Size vs Structural Damage)
- **Priority 2**: Placebo Test visualization (Treatment specificity)
- **Priority 3**: Mechanism Test comparison (Optimizer effects)
- **Priority 4**: Shield Matrix visualization (Quantization defense)

## Usage

This notebook demonstrates how to:
1. Run analysis scripts programmatically
2. Load and visualize analysis results
3. Create custom visualizations from analysis outputs

In [None]:
import subprocess
import sys
import json
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Image, display, Markdown

# Setup paths
notebook_dir = Path.cwd()
project_root = notebook_dir.parent
experiment_dir = project_root / "experiments" / "01_pythia_160m"
analyze_script = experiment_dir / "analyze_experiments.py"

# Visualization settings
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)

print("üìä Analysis Visualizer Setup Complete")
print(f"Project root: {project_root}")
print(f"Analysis script: {analyze_script}")

In [None]:
# Priority 1: Scaling Law Analysis
# ===================================

def run_priority1_analysis(results_dir, models, output_dir):
    """
    Run Priority 1 analysis: Scaling Law (Model Size vs Structural Damage)
    """
    cmd = [
        sys.executable,
        str(analyze_script),
        "1",
        "--results-dir", str(results_dir),
        "--models"] + models + [
        "--output-dir", str(output_dir)
    ]
    
    print(f"Running Priority 1 analysis...")
    print(f"Command: {' '.join(cmd)}\n")
    
    result = subprocess.run(cmd, cwd=str(project_root), capture_output=True, text=True)
    
    if result.returncode == 0:
        print("‚úÖ Analysis completed successfully!")
        print(result.stdout)
        return True
    else:
        print("‚ùå Analysis failed:")
        print(result.stderr)
        return False

def visualize_priority1_results(output_dir):
    """
    Load and display Priority 1 analysis results
    """
    output_path = Path(output_dir)
    
    # Load summary data
    summary_csv = output_path / "scaling_law_summary.csv"
    summary_json = output_path / "scaling_law_summary.json"
    plot_png = output_path / "scaling_law_curve.png"
    
    if summary_csv.exists():
        df = pd.read_csv(summary_csv)
        print("\nüìä Summary Statistics:")
        print(df.to_string(index=False))
    
    if summary_json.exists():
        with open(summary_json, 'r') as f:
            data = json.load(f)
        print(f"\nüìà Data points: {len(data)} models")
    
    if plot_png.exists():
        print(f"\nüìâ Scaling Law Plot:")
        display(Image(str(plot_png)))
    else:
        print("‚ö†Ô∏è Plot not found. Run analysis first.")

print("‚úÖ Analysis functions ready!")

In [None]:
# Priority 2: Placebo Test Analysis
# ===================================

def run_priority2_analysis(results_dir, model, treatments, output_dir):
    """
    Run Priority 2 analysis: Placebo Test (Treatment Specificity)
    """
    cmd = [
        sys.executable,
        str(analyze_script),
        "2",
        "--results-dir", str(results_dir),
        "--model", model,
        "--treatments"] + treatments + [
        "--output-dir", str(output_dir)
    ]
    
    print(f"Running Priority 2 analysis...")
    print(f"Command: {' '.join(cmd)}\n")
    
    result = subprocess.run(cmd, cwd=str(project_root), capture_output=True, text=True)
    
    if result.returncode == 0:
        print("‚úÖ Analysis completed successfully!")
        print(result.stdout)
        return True
    else:
        print("‚ùå Analysis failed:")
        print(result.stderr)
        return False

def visualize_priority2_results(output_dir):
    """
    Load and display Priority 2 analysis results
    """
    output_path = Path(output_dir)
    
    summary_csv = output_path / "placebo_summary.csv"
    summary_json = output_path / "placebo_summary.json"
    plot_png = output_path / "placebo_comparison.png"
    
    if summary_csv.exists():
        df = pd.read_csv(summary_csv)
        print("\nüìä Treatment Comparison:")
        print(df.to_string(index=False))
    
    if summary_json.exists():
        with open(summary_json, 'r') as f:
            data = json.load(f)
        if 'statistical_tests' in data:
            print("\nüìà Statistical Tests:")
            anova = data['statistical_tests']['anova']
            print(f"  ANOVA: F={anova.get('f_statistic', 'N/A'):.3f}, p={anova.get('p_value', 'N/A'):.3f}")
    
    if plot_png.exists():
        print(f"\nüìâ Treatment Comparison Plot:")
        display(Image(str(plot_png)))

In [None]:
# Priority 3: Mechanism Test Analysis
# ===================================

def run_priority3_analysis(results_dir, model, optimizers, output_dir):
    """
    Run Priority 3 analysis: Mechanism Test (Optimizer Comparison)
    """
    cmd = [
        sys.executable,
        str(analyze_script),
        "3",
        "--results-dir", str(results_dir),
        "--model", model,
        "--optimizers"] + optimizers + [
        "--output-dir", str(output_dir)
    ]
    
    print(f"Running Priority 3 analysis...")
    print(f"Command: {' '.join(cmd)}\n")
    
    result = subprocess.run(cmd, cwd=str(project_root), capture_output=True, text=True)
    
    if result.returncode == 0:
        print("‚úÖ Analysis completed successfully!")
        print(result.stdout)
        return True
    else:
        print("‚ùå Analysis failed:")
        print(result.stderr)
        return False

def visualize_priority3_results(output_dir):
    """
    Load and display Priority 3 analysis results
    """
    output_path = Path(output_dir)
    
    summary_csv = output_path / "mechanism_summary.csv"
    summary_json = output_path / "mechanism_summary.json"
    plot_png = output_path / "mechanism_comparison.png"
    
    if summary_csv.exists():
        df = pd.read_csv(summary_csv)
        print("\nüìä Optimizer Comparison:")
        print(df.to_string(index=False))
    
    if summary_json.exists():
        with open(summary_json, 'r') as f:
            data = json.load(f)
        if 'statistical_test' in data:
            test = data['statistical_test']
            print("\nüìà Statistical Test:")
            print(f"  Type: {test.get('test_type', 'N/A')}")
            print(f"  p-value: {test.get('p_value', 'N/A'):.3f}")
    
    if plot_png.exists():
        print(f"\nüìâ Optimizer Comparison Plot:")
        display(Image(str(plot_png)))

In [None]:
# Priority 4: Shield Matrix Analysis
# ===================================

def run_priority4_analysis(results_dir, model, precisions, output_dir):
    """
    Run Priority 4 analysis: Shield Matrix (Quantization Defense)
    """
    cmd = [
        sys.executable,
        str(analyze_script),
        "4",
        "--results-dir", str(results_dir),
        "--model", model,
        "--precisions"] + precisions + [
        "--output-dir", str(output_dir)
    ]
    
    print(f"Running Priority 4 analysis...")
    print(f"Command: {' '.join(cmd)}\n")
    
    result = subprocess.run(cmd, cwd=str(project_root), capture_output=True, text=True)
    
    if result.returncode == 0:
        print("‚úÖ Analysis completed successfully!")
        print(result.stdout)
        return True
    else:
        print("‚ùå Analysis failed:")
        print(result.stderr)
        return False

def visualize_priority4_results(output_dir):
    """
    Load and display Priority 4 analysis results
    """
    output_path = Path(output_dir)
    
    summary_csv = output_path / "shield_summary.csv"
    summary_json = output_path / "shield_summary.json"
    plot_png = output_path / "shield_matrix.png"
    
    if summary_csv.exists():
        df = pd.read_csv(summary_csv)
        print("\nüìä Quantization Shield Comparison:")
        print(df.to_string(index=False))
    
    if summary_json.exists():
        with open(summary_json, 'r') as f:
            data = json.load(f)
        if 'statistical_tests' in data:
            print("\nüìà Statistical Tests:")
            anova = data['statistical_tests']['anova']
            print(f"  ANOVA: F={anova.get('f_statistic', 'N/A'):.3f}, p={anova.get('p_value', 'N/A'):.3f}")
    
    if plot_png.exists():
        print(f"\nüìâ Shield Matrix Plot:")
        display(Image(str(plot_png)))

## Example Usage

Below are examples of how to use the analysis functions. Adjust paths based on your experiment results.

In [None]:
# Example 1: Priority 1 - Scaling Law Analysis
# ==============================================

results_dir = project_root / "experiments" / "results" / "priority1_scaling_law" / "raw_data"
output_dir = project_root / "experiments" / "results" / "priority1_scaling_law"
models = ["70m", "160m", "410m", "1b"]

# Uncomment to run analysis:
# run_priority1_analysis(results_dir, models, output_dir)

# Visualize results (if analysis has been run):
if output_dir.exists():
    visualize_priority1_results(output_dir)
else:
    print("‚ö†Ô∏è Results directory not found. Run analysis first or adjust paths.")

In [None]:
# Example 2: Priority 2 - Placebo Test Analysis
# ===============================================

results_dir = project_root / "experiments" / "results" / "priority2_placebo" / "raw_data"
output_dir = project_root / "experiments" / "results" / "priority2_placebo"
model = "410m"
treatments = ["eigen_prion", "gaussian_noise", "random_text"]

# Uncomment to run analysis:
# run_priority2_analysis(results_dir, model, treatments, output_dir)

# Visualize results (if analysis has been run):
if output_dir.exists():
    visualize_priority2_results(output_dir)
else:
    print("‚ö†Ô∏è Results directory not found. Run analysis first or adjust paths.")

In [None]:
# Example 3: Priority 3 - Mechanism Test Analysis
# ==================================================

results_dir = project_root / "experiments" / "results" / "priority3_mechanism" / "raw_data"
output_dir = project_root / "experiments" / "results" / "priority3_mechanism"
model = "410m"
optimizers = ["adamw", "sgd"]

# Uncomment to run analysis:
# run_priority3_analysis(results_dir, model, optimizers, output_dir)

# Visualize results (if analysis has been run):
if output_dir.exists():
    visualize_priority3_results(output_dir)
else:
    print("‚ö†Ô∏è Results directory not found. Run analysis first or adjust paths.")

In [None]:
# Example 4: Priority 4 - Shield Matrix Analysis
# ================================================

results_dir = project_root / "experiments" / "results" / "priority4_shield" / "raw_data"
output_dir = project_root / "experiments" / "results" / "priority4_shield"
model = "1b"
precisions = ["fp16", "8bit", "4bit"]

# Uncomment to run analysis:
# run_priority4_analysis(results_dir, model, precisions, output_dir)

# Visualize results (if analysis has been run):
if output_dir.exists():
    visualize_priority4_results(output_dir)
else:
    print("‚ö†Ô∏è Results directory not found. Run analysis first or adjust paths.")

## Custom Visualizations

You can also load the analysis results and create custom visualizations:

In [None]:
# Custom Visualization: Compare all Priorities
# =============================================

def load_all_summaries():
    """
    Load summary data from all priority analyses
    """
    summaries = {}
    
    priorities = {
        1: ("priority1_scaling_law", "scaling_law_summary.csv"),
        2: ("priority2_placebo", "placebo_summary.csv"),
        3: ("priority3_mechanism", "mechanism_summary.csv"),
        4: ("priority4_shield", "shield_summary.csv"),
    }
    
    for priority, (dir_name, csv_file) in priorities.items():
        csv_path = project_root / "experiments" / "results" / dir_name / csv_file
        if csv_path.exists():
            summaries[priority] = pd.read_csv(csv_path)
            print(f"‚úÖ Loaded Priority {priority} summary ({len(summaries[priority])} entries)")
        else:
            print(f"‚ö†Ô∏è Priority {priority} summary not found: {csv_path}")
    
    return summaries

# Load all summaries
all_summaries = load_all_summaries()

# Example: Create a combined visualization
if len(all_summaries) > 0:
    print(f"\nüìä Loaded {len(all_summaries)} priority summaries")
    print("\nAvailable summaries:")
    for priority, df in all_summaries.items():
        print(f"  Priority {priority}: {df.shape[0]} rows, {df.shape[1]} columns")
        print(f"    Columns: {', '.join(df.columns[:5])}...")
else:
    print("\n‚ö†Ô∏è No summaries found. Run analyses first.")