# Histogram Structure Validation

This notebook validates that histograms are correctly defined and structured.

## Overview

- Check histogram definitions
- Validate histogram structure
- Test histogram filling
- Verify variable names match exclusion patterns


In [None]:
import sys
from pathlib import Path
import pickle

# Add project root to path
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root))

from darkbottomline.histograms import HistogramManager
from darkbottomline.plotting import PlotManager


In [None]:
# Initialize histogram manager
hist_manager = HistogramManager()
histograms = hist_manager.define_histograms()

print(f"Total histograms defined: {len(histograms)}")
print("\nHistogram categories:")
print("  Jet variables:", [k for k in histograms.keys() if 'jet' in k.lower()][:10])
print("  Lepton variables:", [k for k in histograms.keys() if any(x in k.lower() for x in ['lep', 'muon', 'electron'])][:10])
print("  MET variables:", [k for k in histograms.keys() if 'met' in k.lower() or 'recoil' in k.lower()])
print("  Mass variables:", [k for k in histograms.keys() if any(x in k.lower() for x in ['mass', 'm_', 'z_', 'w_'])][:10])
print("  Multiplicity variables:", [k for k in histograms.keys() if k.startswith('n_')])


In [None]:
# Check if results file exists and validate structure
results_dir = project_root / "outputs" / "hists"
results_files = list(results_dir.glob("*.pkl")) if results_dir.exists() else []

if results_files:
    print(f"Found {len(results_files)} result files:")
    for f in results_files[:5]:
        print(f"  - {f.name}")

    # Load one file to check structure
    if results_files:
        with open(results_files[0], 'rb') as f:
            results = pickle.load(f)

        print("\n=== Results Structure ===")
        print(f"Keys: {list(results.keys())}")

        if 'region_histograms' in results:
            regions = list(results['region_histograms'].keys())
            print(f"\nRegions in results: {regions}")

            if regions:
                first_region = regions[0]
                region_hists = results['region_histograms'][first_region]
                print(f"\nVariables in {first_region}: {len(region_hists)}")
                print(f"  Sample: {list(region_hists.keys())[:10]}")
else:
    print("No result files found. Run analysis first to generate histograms.")


In [None]:
# Validate that exclusion patterns match histogram names
plot_manager = PlotManager()

# Get exclusion patterns
test_region = "1b:SR"
excluded = plot_manager._get_excluded_variables_for_region(test_region)

print(f"=== Exclusion Pattern Matching for {test_region} ===")
print(f"Excluded patterns: {excluded[:10]}...")

# Check if excluded patterns would match any histogram names
if results_files:
    with open(results_files[0], 'rb') as f:
        results = pickle.load(f)

    if 'region_histograms' in results and test_region in results['region_histograms']:
        region_hists = results['region_histograms'][test_region]
        hist_names = list(region_hists.keys())

        print(f"\nHistogram names in {test_region}: {len(hist_names)}")

        # Check which histograms would be excluded
        excluded_matches = []
        for hist_name in hist_names:
            if any(excluded_var in hist_name for excluded_var in excluded):
                excluded_matches.append(hist_name)

        print(f"\nHistograms that would be excluded ({len(excluded_matches)}):")
        for hist in excluded_matches[:10]:
            print(f"  - {hist}")

        # Check which histograms would be plotted
        included_matches = [h for h in hist_names if h not in excluded_matches]
        print(f"\nHistograms that would be plotted ({len(included_matches)}):")
        for hist in included_matches[:10]:
            print(f"  - {hist}")
