# Data/MC Comparison Validation

This notebook validates data/MC plots and compares yields across regions.

## Overview

- Load and inspect results
- Compare yields across regions
- Validate plot generation
- Check data/MC agreement


In [None]:
import sys
from pathlib import Path
import pickle
import numpy as np
import pandas as pd

# Add project root to path
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root))


In [None]:
# Load results
results_dir = project_root / "outputs" / "hists"
results_files = list(results_dir.glob("*.pkl")) if results_dir.exists() else []

if results_files:
    print(f"Found {len(results_files)} result files")

    # Load first file
    with open(results_files[0], 'rb') as f:
        results = pickle.load(f)

    print("\n=== Results Structure ===")
    print(f"Keys: {list(results.keys())}")

    if 'region_histograms' in results:
        regions = list(results['region_histograms'].keys())
        print(f"\nRegions: {regions}")

        # Calculate yields per region
        print("\n=== Yields per Region ===")
        yields = {}
        for region in regions:
            region_hists = results['region_histograms'][region]
            if 'met' in region_hists:
                met_hist = region_hists['met']
                if hasattr(met_hist, 'values'):
                    yield_val = met_hist.values().sum()
                elif isinstance(met_hist, dict) and 'values' in met_hist:
                    yield_val = np.sum(met_hist['values'])
                else:
                    yield_val = 0
                yields[region] = yield_val

        # Create DataFrame
        df = pd.DataFrame(list(yields.items()), columns=['Region', 'Yield'])
        print(df.to_string(index=False))
else:
    print("No result files found. Run analysis first.")


In [None]:
# Validate plot generation would work
if results_files:
    from darkbottomline.plotting import PlotManager

    plot_manager = PlotManager()

    print("=== Plot Generation Validation ===")

    # Check which variables would be plotted for each region
    for region in regions[:3]:  # Check first 3 regions
        excluded = plot_manager._get_excluded_variables_for_region(region)
        region_hists = results['region_histograms'][region]

        # Get variables that would be plotted
        variables_to_plot = []
        for var_name in region_hists.keys():
            if not any(excluded_var in var_name for excluded_var in excluded):
                variables_to_plot.append(var_name)

        print(f"\n{region}:")
        print(f"  Total variables: {len(region_hists)}")
        print(f"  Excluded: {len(excluded)} patterns")
        print(f"  Would plot: {len(variables_to_plot)} variables")
        print(f"  Sample: {variables_to_plot[:5]}")
