## Table of Contents
1. [Setup and Imports](#setup)
2. [Inventory All Report Artifacts](#inventory)
3. [Master Evidence Index](#evidence-index)
4. [Cross-Domain Synthesis](#cross-domain)
5. [Centrality â†” Delay Correlation](#centrality-delay)
6. [Robustness â†” Hub Dependence](#robustness-hub)
7. [Integrated Narrative](#narrative)
8. [Write Report Outputs](#write-outputs)
9. [Reproducibility Notes](#reproducibility)

In [None]:
# ============================================================================
# SETUP AND IMPORTS
# ============================================================================

import json
from pathlib import Path
from datetime import datetime
import warnings

import pandas as pd
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Project paths
REPO_ROOT = Path.cwd().parent.parent
RESULTS_DIR = REPO_ROOT / "results"
ANALYSIS_DIR = RESULTS_DIR / "analysis"
BUSINESS_DIR = RESULTS_DIR / "business"
NETWORKS_DIR = RESULTS_DIR / "networks"
TABLES_REPORT_DIR = RESULTS_DIR / "tables" / "report"
FIGURES_REPORT_DIR = RESULTS_DIR / "figures" / "report"
LOGS_DIR = RESULTS_DIR / "logs"
WARNINGS_LOG = TABLES_REPORT_DIR / "_warnings.log"

# Notebook identity
NOTEBOOK_ID = "nb09"
NOTEBOOK_NAME = "synthesis__integrated_findings"

# Plotting settings
plt.style.use("seaborn-v0_8-whitegrid")
sns.set_palette("husl")

# Ensure output directories exist
TABLES_REPORT_DIR.mkdir(parents=True, exist_ok=True)
FIGURES_REPORT_DIR.mkdir(parents=True, exist_ok=True)

print(f"Results dir exists: {RESULTS_DIR.exists()}")

In [None]:
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

def append_warning(message: str, notebook_id: str = NOTEBOOK_ID):
    """Append a warning to the consolidated warnings log."""
    timestamp = datetime.now().isoformat()
    with open(WARNINGS_LOG, "a") as f:
        f.write(f"[{timestamp}] [{notebook_id}] {message}\n")
    print(f"WARNING: {message}")

def safe_load_parquet(path: Path) -> pl.DataFrame | None:
    """Safely load a parquet file, returning None if it fails."""
    try:
        return pl.read_parquet(path)
    except Exception as e:
        append_warning(f"Failed to load {path.name}: {e}")
        return None

def safe_load_json(path: Path) -> dict | None:
    """Safely load a JSON file, returning None if it fails."""
    try:
        with open(path) as f:
            return json.load(f)
    except Exception as e:
        append_warning(f"Failed to load {path.name}: {e}")
        return None

<a id="inventory"></a>
## 2. Inventory All Report Artifacts

In [None]:
# ============================================================================
# INVENTORY ALL REPORT ARTIFACTS
# ============================================================================

# Collect all artifacts
all_artifacts = []

# Tables
for tbl in TABLES_REPORT_DIR.glob("*.csv"):
    nb_id = tbl.stem.split("_")[0] if "_" in tbl.stem else "misc"
    all_artifacts.append({
        "file": tbl.name,
        "type": "table",
        "notebook": nb_id,
        "path": str(tbl.relative_to(RESULTS_DIR))
    })

# Figures
for fig in FIGURES_REPORT_DIR.glob("*.png"):
    nb_id = fig.stem.split("_")[0] if "_" in fig.stem else "misc"
    all_artifacts.append({
        "file": fig.name,
        "type": "figure",
        "notebook": nb_id,
        "path": str(fig.relative_to(RESULTS_DIR))
    })

# Pipeline artifacts (analysis)
for art in ANALYSIS_DIR.glob("*"):
    all_artifacts.append({
        "file": art.name,
        "type": "pipeline",
        "notebook": "pipeline",
        "path": str(art.relative_to(RESULTS_DIR))
    })

artifact_df = pd.DataFrame(all_artifacts)
print(f"Total artifacts cataloged: {len(artifact_df)}")

# Summary by notebook
print("\nArtifacts by source:")
display(artifact_df.groupby(["notebook", "type"]).size().unstack(fill_value=0))

<a id="evidence-index"></a>
## 3. Master Evidence Index

Map each research question to the artifacts that answer it.

In [None]:
# ============================================================================
# MASTER EVIDENCE INDEX
# ============================================================================

# Define research questions and their evidence mappings
evidence_index = [
    {
        "research_question": "What is the basic network structure?",
        "domain": "structure",
        "primary_artifact": "nb02_network_stats.csv",
        "supporting_artifacts": "nb02_top_routes.csv, nb02_degree_distribution.png",
        "notebook": "nb02"
    },
    {
        "research_question": "Which airports are most central?",
        "domain": "centrality",
        "primary_artifact": "nb03_centrality_topK.csv",
        "supporting_artifacts": "nb03_centrality_distributions.png",
        "notebook": "nb03"
    },
    {
        "research_question": "What community structure exists?",
        "domain": "communities",
        "primary_artifact": "nb04_community_sizes.csv",
        "supporting_artifacts": "nb04_community_size_distribution.png",
        "notebook": "nb04"
    },
    {
        "research_question": "How robust is the network to failures?",
        "domain": "robustness",
        "primary_artifact": "nb05_robustness_metrics.csv",
        "supporting_artifacts": "nb05_robustness_curves.png",
        "notebook": "nb05"
    },
    {
        "research_question": "How do delays propagate through the network?",
        "domain": "dynamics",
        "primary_artifact": "nb06_superspreaders.csv",
        "supporting_artifacts": "nb06_cascade_distribution.png",
        "notebook": "nb06"
    },
    {
        "research_question": "Can we predict missing links?",
        "domain": "prediction",
        "primary_artifact": "nb07_linkpred_summary.csv",
        "supporting_artifacts": "nb07_linkpred_metrics.png",
        "notebook": "nb07"
    },
    {
        "research_question": "What are the business implications?",
        "domain": "business",
        "primary_artifact": "nb08_airline_kpi_summary.csv",
        "supporting_artifacts": "nb08_hub_concentration.png",
        "notebook": "nb08"
    }
]

evidence_df = pd.DataFrame(evidence_index)

# Check which artifacts actually exist
def check_exists(artifact_name):
    return (TABLES_REPORT_DIR / artifact_name).exists() or \
           (FIGURES_REPORT_DIR / artifact_name).exists()

evidence_df["primary_exists"] = evidence_df["primary_artifact"].apply(check_exists)

print("Master Evidence Index:")
display(evidence_df)

<a id="cross-domain"></a>
## 4. Cross-Domain Synthesis

Identify patterns that span multiple analysis domains.

In [None]:
# ============================================================================
# CROSS-DOMAIN SYNTHESIS
# ============================================================================

# Load centrality data
centrality_path = ANALYSIS_DIR / "airport_centrality.parquet"
centrality_df = safe_load_parquet(centrality_path)

# Load delay propagation data
delay_path = ANALYSIS_DIR / "delay_cascades.parquet"
delay_df = safe_load_parquet(delay_path)

# Load robustness data
robustness_path = ANALYSIS_DIR / "robustness_curves.parquet"
robustness_df = safe_load_parquet(robustness_path)

# Summary
cross_domain_insights = []

if centrality_df is not None:
    cross_domain_insights.append({
        "domain_pair": "structure-centrality",
        "finding": f"Centrality computed for {len(centrality_df)} airports",
        "evidence": "airport_centrality.parquet"
    })

if delay_df is not None:
    cross_domain_insights.append({
        "domain_pair": "centrality-dynamics",
        "finding": f"Delay cascades recorded for {len(delay_df)} events",
        "evidence": "delay_cascades.parquet"
    })

if robustness_df is not None:
    cross_domain_insights.append({
        "domain_pair": "structure-robustness",
        "finding": f"Robustness curves with {len(robustness_df)} data points",
        "evidence": "robustness_curves.parquet"
    })

if cross_domain_insights:
    synthesis_df = pd.DataFrame(cross_domain_insights)
    display(synthesis_df)
else:
    print("Not available: insufficient data for cross-domain synthesis")

<a id="centrality-delay"></a>
## 5. Centrality â†” Delay Correlation

Do high-centrality airports amplify delay propagation?

In [None]:
# ============================================================================
# CENTRALITY â†” DELAY CORRELATION
# ============================================================================

if centrality_df is not None and delay_df is not None:
    # Convert to pandas for analysis
    cent_pd = centrality_df.to_pandas()
    delay_pd = delay_df.to_pandas()
    
    # Find airport ID columns
    cent_airport = next((c for c in ["airport", "AIRPORT", "node"] if c in cent_pd.columns), None)
    delay_airport = next((c for c in ["origin", "ORIGIN", "airport", "seed_airport"] if c in delay_pd.columns), None)
    
    if cent_airport and delay_airport:
        # Aggregate delay data by airport
        delay_agg = delay_pd.groupby(delay_airport).agg({
            c: "mean" for c in delay_pd.select_dtypes(include=[np.number]).columns
        }).reset_index()
        delay_agg.columns = [delay_airport] + [f"mean_{c}" if c != delay_airport else c for c in delay_agg.columns[1:]]
        
        # Merge
        merged = cent_pd.merge(delay_agg, left_on=cent_airport, right_on=delay_airport, how="inner")
        
        # Find centrality columns
        cent_cols = [c for c in cent_pd.columns if "degree" in c.lower() or "between" in c.lower() 
                    or "closeness" in c.lower() or "pagerank" in c.lower()]
        
        # Find delay/cascade columns
        cascade_cols = [c for c in merged.columns if "mean_" in c and c != delay_airport]
        
        if cent_cols and cascade_cols:
            # Compute correlation matrix
            corr_cols = cent_cols[:4] + cascade_cols[:4]  # Limit to avoid clutter
            corr_matrix = merged[corr_cols].corr()
            
            fig, ax = plt.subplots(figsize=(10, 8))
            sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="RdBu_r", center=0, ax=ax)
            ax.set_title("Centrality â†” Delay Cascade Correlation")
            
            plt.tight_layout()
            fig_path = FIGURES_REPORT_DIR / f"{NOTEBOOK_ID}_centrality_delay_heatmap.png"
            plt.savefig(fig_path, dpi=150)
            plt.show()
            print(f"âœ… Saved: {fig_path.name}")
        else:
            print(f"Could not find suitable columns. Centrality: {cent_cols}, Cascade: {cascade_cols}")
    else:
        print(f"Could not find airport columns. Centrality: {cent_airport}, Delay: {delay_airport}")
else:
    print("Not available: need both centrality and delay data")

<a id="robustness-hub"></a>
## 6. Robustness â†” Hub Dependence

Does hub-and-spoke topology create fragility?

In [None]:
# ============================================================================
# ROBUSTNESS â†” HUB DEPENDENCE
# ============================================================================

robustness_summary_path = ANALYSIS_DIR / "robustness_summary.json"
robustness_summary = safe_load_json(robustness_summary_path)

if robustness_summary:
    print("Robustness Summary:")
    print(json.dumps(robustness_summary, indent=2, default=str))
    
    # Extract key metrics
    if "attack_types" in robustness_summary or "strategies" in robustness_summary:
        strategies = robustness_summary.get("attack_types", robustness_summary.get("strategies", {}))
        
        if isinstance(strategies, dict):
            strategy_df = pd.DataFrame([
                {"strategy": k, **v} if isinstance(v, dict) else {"strategy": k, "value": v}
                for k, v in strategies.items()
            ])
            
            display(strategy_df)
            
            # Plot if we have AUC or critical point data
            if "auc" in strategy_df.columns or "critical_fraction" in strategy_df.columns:
                metric = "auc" if "auc" in strategy_df.columns else "critical_fraction"
                
                fig, ax = plt.subplots(figsize=(10, 6))
                bars = ax.bar(strategy_df["strategy"], strategy_df[metric])
                ax.set_ylabel(metric.replace("_", " ").title())
                ax.set_title("Robustness by Attack Strategy")
                plt.xticks(rotation=45, ha="right")
                
                plt.tight_layout()
                fig_path = FIGURES_REPORT_DIR / f"{NOTEBOOK_ID}_robustness_hub_dependence.png"
                plt.savefig(fig_path, dpi=150)
                plt.show()
                print(f"âœ… Saved: {fig_path.name}")
else:
    print("Not available: robustness summary not found")

<a id="narrative"></a>
## 7. Integrated Narrative

### Cross-Cutting Synthesis

*(Populated after running cells above)*

#### Structure â†’ Centrality â†’ Dynamics
- The airport network exhibits [scale-free / small-world / ?] properties
- High-degree hubs dominate multiple centrality measures
- These hubs also appear as delay superspreaders

#### Robustness Implications
- Targeted attacks on high-degree nodes cause faster fragmentation
- This hub dependence creates systemic risk

#### Business Interpretation
- Airlines with concentrated hub strategies may face amplified disruption costs
- Trade-off between operational efficiency and network resilience

### Evidence Summary Table

| Claim | Evidence Artifact | Notebook |
|-------|-------------------|----------|
| Hub-and-spoke topology | degree distribution | nb02 |
| Centrality correlates with delay | heatmap | nb09 |
| Targeted attacks fragment faster | robustness curves | nb05 |

<a id="write-outputs"></a>
## 8. Write Report Outputs

In [None]:
# ============================================================================
# WRITE REPORT OUTPUTS
# ============================================================================

# Write master evidence index
evidence_path = TABLES_REPORT_DIR / f"{NOTEBOOK_ID}_master_evidence_index.csv"
evidence_df.to_csv(evidence_path, index=False)
print(f"âœ… Wrote: {evidence_path}")

# Write cross-domain synthesis
if cross_domain_insights:
    synthesis_path = TABLES_REPORT_DIR / f"{NOTEBOOK_ID}_cross_domain_synthesis.csv"
    synthesis_df.to_csv(synthesis_path, index=False)
    print(f"âœ… Wrote: {synthesis_path}")

print(f"\nðŸ“‹ All {NOTEBOOK_ID} outputs written.")

<a id="reproducibility"></a>
## 9. Reproducibility Notes

### Input Files Consumed
- All `results/tables/report/nb*.csv` files
- All `results/figures/report/nb*.png` files
- `results/analysis/airport_centrality.parquet`
- `results/analysis/delay_cascades.parquet`
- `results/analysis/robustness_summary.json`

### Assumptions Made
1. Prior notebooks have been run and generated their outputs
2. Evidence index maps research questions to artifacts
3. Cross-domain analysis uses inner joins (airports must appear in all datasets)

### Outputs Generated
| Artifact | Path |
|----------|------|
| Master Evidence Index | `results/tables/report/nb09_master_evidence_index.csv` |
| Cross-Domain Synthesis | `results/tables/report/nb09_cross_domain_synthesis.csv` |
| Centrality-Delay Heatmap | `results/figures/report/nb09_centrality_delay_heatmap.png` |
| Robustness-Hub Dependence | `results/figures/report/nb09_robustness_hub_dependence.png` |