# DRL vs EA Quantum Architecture Search Comparison

This notebook provides a scaffold for comparing the DRL approach from arXiv:2407.20147
with the coevolutionary (EA) agents in this repository.

## Prerequisites

1. Run experiments and generate logs using the configs in `comparison/experiments/configs/`
2. Place log files in `comparison/logs/` following the naming convention:
   - `drl_run_{seed}.jsonl` for DRL results
   - `ea_run_{seed}.jsonl` for EA results
3. Install dependencies: `pip install -r comparison/requirements.txt`

## 1. Setup and Imports

In [None]:
import sys
import json
from pathlib import Path

# Add repository root to path
repo_root = Path().resolve().parent
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

# Import comparison modules
from comparison.analysis.compute_metrics import (
    load_logs,
    validate_logs,
    aggregate_metrics,
    save_summary,
)
from comparison.diagnostics.diagnose_fidelity import (
    compute_fidelities,
    run_basic_sanity_checks,
)

# Optional: Import visualization libraries
try:
    import matplotlib.pyplot as plt
    import numpy as np
    HAS_PLOTTING = True
except ImportError:
    HAS_PLOTTING = False
    print("matplotlib/numpy not available. Install with: pip install matplotlib numpy")

## 2. Load Experiment Logs

Load logs from `comparison/logs/` directory. Update paths as needed for your experiments.

In [None]:
# Configure log paths
LOGS_DIR = Path("../logs")

# Load DRL logs (placeholder - update with actual paths)
drl_log_pattern = str(LOGS_DIR / "drl" / "*.jsonl")
drl_logs = load_logs(drl_log_pattern)
print(f"Loaded {len(drl_logs)} DRL log entries")

# Load EA logs (placeholder - update with actual paths)
ea_log_pattern = str(LOGS_DIR / "ea" / "*.jsonl")
ea_logs = load_logs(ea_log_pattern)
print(f"Loaded {len(ea_logs)} EA log entries")

## 3. Validate Logs Against Schema

Ensure log entries conform to the expected schema for fair comparison.

In [None]:
# Validate DRL logs
valid_drl, drl_errors = validate_logs(drl_logs)
print(f"DRL: {len(valid_drl)} valid, {len(drl_errors)} errors")

# Validate EA logs
valid_ea, ea_errors = validate_logs(ea_logs)
print(f"EA: {len(valid_ea)} valid, {len(ea_errors)} errors")

# Show first few errors if any
if drl_errors:
    print("\nDRL validation errors (first 3):")
    for idx, err in drl_errors[:3]:
        print(f"  Entry {idx}: {err}")

if ea_errors:
    print("\nEA validation errors (first 3):")
    for idx, err in ea_errors[:3]:
        print(f"  Entry {idx}: {err}")

## 4. Compute Aggregated Metrics

Compute per-run and cross-run statistics for both methods.

In [None]:
# Combine logs for comparison
all_logs = valid_drl + valid_ea

# Compute aggregated metrics
metrics = aggregate_metrics(all_logs)

# Display summary
print(f"Total runs: {metrics['total_runs']}")
print(f"Total log entries: {metrics['total_logs']}")

print("\n--- By Method ---")
for method, stats in metrics.get('by_method', {}).items():
    print(f"\n{method}:")
    print(f"  Runs: {stats['n_runs']}")
    if stats['mean_max_fidelity'] is not None:
        print(f"  Mean max fidelity: {stats['mean_max_fidelity']:.4f} ± {stats.get('std_max_fidelity', 0):.4f}")
    if stats['mean_final_fidelity'] is not None:
        print(f"  Mean final fidelity: {stats['mean_final_fidelity']:.4f}")

## 5. Fidelity vs Evaluations Plot

Plot learning curves showing fidelity improvement over training evaluations.

In [None]:
def plot_fidelity_vs_evals(logs_by_method, title="Fidelity vs Evaluations"):
    """
    Plot fidelity learning curves for each method.
    
    Args:
        logs_by_method: dict mapping method name to list of log entries
        title: Plot title
    """
    if not HAS_PLOTTING:
        print("Plotting not available. Install matplotlib.")
        return
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    colors = {'drl': 'blue', 'ea': 'orange'}
    
    for method, logs in logs_by_method.items():
        # Group by seed and plot individual runs
        seeds = {}
        for log in logs:
            seed = log.get('seed', 0)
            if seed not in seeds:
                seeds[seed] = {'evals': [], 'fidelities': []}
            seeds[seed]['evals'].append(log.get('cum_eval_count', 0))
            seeds[seed]['fidelities'].append(log.get('best_fidelity', 0))
        
        # Plot each seed with light color
        color = colors.get(method, 'gray')
        for seed, data in seeds.items():
            ax.plot(data['evals'], data['fidelities'], 
                   color=color, alpha=0.3, linewidth=1)
        
        # TODO: Add mean line with standard deviation shading
        # This requires aligning evaluation counts across seeds
    
    ax.set_xlabel('Cumulative Evaluations')
    ax.set_ylabel('Best Fidelity')
    ax.set_title(title)
    ax.legend(list(logs_by_method.keys()))
    ax.grid(True, alpha=0.3)
    ax.set_ylim(0, 1.05)
    
    plt.tight_layout()
    return fig

# Example usage (uncomment when logs are available)
# fig = plot_fidelity_vs_evals({'drl': valid_drl, 'ea': valid_ea})
# plt.show()
print("TODO: Uncomment plotting code when logs are available")

## 6. Empirical CDF (ECDF) Plot

Plot the empirical cumulative distribution function of final fidelities.

In [None]:
def plot_ecdf(fidelities_by_method, title="ECDF of Final Fidelities"):
    """
    Plot empirical CDF of final fidelities for each method.
    
    Args:
        fidelities_by_method: dict mapping method name to list of final fidelities
        title: Plot title
    """
    if not HAS_PLOTTING:
        print("Plotting not available. Install matplotlib.")
        return
    
    fig, ax = plt.subplots(figsize=(8, 6))
    
    colors = {'drl': 'blue', 'ea': 'orange'}
    
    for method, fidelities in fidelities_by_method.items():
        if not fidelities:
            continue
        
        sorted_fids = np.sort(fidelities)
        ecdf = np.arange(1, len(sorted_fids) + 1) / len(sorted_fids)
        
        color = colors.get(method, 'gray')
        ax.step(sorted_fids, ecdf, where='post', 
               color=color, linewidth=2, label=f"{method} (n={len(fidelities)})")
    
    ax.set_xlabel('Final Fidelity')
    ax.set_ylabel('Cumulative Probability')
    ax.set_title(title)
    ax.legend()
    ax.grid(True, alpha=0.3)
    ax.set_xlim(0, 1.05)
    ax.set_ylim(0, 1.05)
    
    # Add threshold lines
    for thresh in [0.9, 0.95, 0.99]:
        ax.axvline(x=thresh, color='red', linestyle='--', alpha=0.3)
    
    plt.tight_layout()
    return fig

# Example usage (uncomment when metrics are computed)
# Extract final fidelities from per_run metrics
# drl_fids = [r['final_fidelity'] for r in metrics['per_run'].values() 
#             if r['method'] == 'drl' and r['final_fidelity'] is not None]
# ea_fids = [r['final_fidelity'] for r in metrics['per_run'].values() 
#            if r['method'] == 'ea' and r['final_fidelity'] is not None]
# fig = plot_ecdf({'drl': drl_fids, 'ea': ea_fids})
# plt.show()
print("TODO: Uncomment plotting code when metrics are available")

## 7. Pareto Plot (Fidelity vs Gate Count)

Plot the Pareto frontier of fidelity vs circuit complexity.

In [None]:
def plot_pareto(runs_by_method, title="Pareto: Fidelity vs Gate Count"):
    """
    Plot Pareto frontier of fidelity vs gate count.
    
    Args:
        runs_by_method: dict mapping method name to list of run dicts with
                       'max_fidelity' and 'min_gate_count' keys
        title: Plot title
    """
    if not HAS_PLOTTING:
        print("Plotting not available. Install matplotlib.")
        return
    
    fig, ax = plt.subplots(figsize=(8, 6))
    
    colors = {'drl': 'blue', 'ea': 'orange'}
    markers = {'drl': 'o', 'ea': 's'}
    
    for method, runs in runs_by_method.items():
        fidelities = []
        gate_counts = []
        
        for run in runs:
            fid = run.get('max_fidelity')
            gc = run.get('min_gate_count') or run.get('final_gate_count')
            if fid is not None and gc is not None:
                fidelities.append(fid)
                gate_counts.append(gc)
        
        if fidelities:
            color = colors.get(method, 'gray')
            marker = markers.get(method, 'o')
            ax.scatter(gate_counts, fidelities, 
                      c=color, marker=marker, s=100, alpha=0.7,
                      label=f"{method} (n={len(fidelities)})")
    
    ax.set_xlabel('Gate Count')
    ax.set_ylabel('Max Fidelity')
    ax.set_title(title)
    ax.legend()
    ax.grid(True, alpha=0.3)
    ax.set_ylim(0, 1.05)
    
    # Add fidelity threshold line
    ax.axhline(y=0.99, color='green', linestyle='--', alpha=0.5, label='0.99 threshold')
    
    plt.tight_layout()
    return fig

# Example usage (uncomment when metrics are computed)
# drl_runs = [r for r in metrics['per_run'].values() if r['method'] == 'drl']
# ea_runs = [r for r in metrics['per_run'].values() if r['method'] == 'ea']
# fig = plot_pareto({'drl': drl_runs, 'ea': ea_runs})
# plt.show()
print("TODO: Uncomment plotting code when metrics are available")

## 8. Save Results

Save aggregated metrics and plots for reproducibility.

In [None]:
# Save metrics summary
output_dir = Path("../logs/analysis_output")
output_dir.mkdir(parents=True, exist_ok=True)

# Uncomment when logs are available:
# json_path, csv_path = save_summary(metrics, output_dir)
# print(f"Saved JSON: {json_path}")
# print(f"Saved CSV: {csv_path}")

# Save plots
# if HAS_PLOTTING:
#     fig = plot_fidelity_vs_evals({'drl': valid_drl, 'ea': valid_ea})
#     fig.savefig(output_dir / 'fidelity_vs_evals.png', dpi=150)
#     plt.close(fig)
#     print(f"Saved: {output_dir / 'fidelity_vs_evals.png'}")

print("TODO: Uncomment save code when analysis is complete")

## 9. Run Diagnostics Sanity Check

Verify that the fidelity computation is working correctly.

In [None]:
# Run sanity checks
sanity_results = run_basic_sanity_checks()

print("Fidelity Sanity Checks:")
print("=" * 50)

for test_name, result in sanity_results.items():
    print(f"\n{test_name}:")
    print(f"  Raw trace fidelity: {result['raw_trace_f']:.6f}")
    print(f"  Phase-corrected trace fidelity: {result['phase_corrected_trace_f']:.6f}")
    print(f"  Average gate fidelity: {result['Favg']:.6f}")

# Verify expected results
assert abs(sanity_results['identity_identity']['raw_trace_f'] - 1.0) < 1e-10
assert abs(sanity_results['toffoli_toffoli']['raw_trace_f'] - 1.0) < 1e-10
assert abs(sanity_results['toffoli_toffoli_phase']['phase_corrected_trace_f'] - 1.0) < 1e-10
print("\n✓ All sanity checks passed!")

## Next Steps

1. **Generate logs**: Run experiments using the configs in `comparison/experiments/configs/`
2. **Update paths**: Modify the log paths in this notebook to point to your results
3. **Analyze results**: Uncomment the plotting and analysis code
4. **Compare methods**: Look at the metrics summary to compare DRL vs EA

### Example Commands

```bash
# Run EA experiments (from repo root)
python run_experiments.py --preset quick --n-qubits 3 --seed 42

# Compute metrics from logs
python -m comparison.analysis.compute_metrics --input "comparison/logs/**/*.jsonl" --out comparison/logs/analysis_output
```