# Unified Pipeline

This notebook integrates both modules (Demographic-Climate and Hybridization) into a unified workflow.

## Steps:
1. Execute Module 1 (Demographic-Climate)
2. Execute Module 2 (Hybridization)
3. Align timelines
4. Create integrated timeline figure
5. Generate synthesis interpretation


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import yaml
from datetime import datetime

# Configuration
with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)

OUTPUT_DIR = Path(config['output_dir'])

print("UNIFIED PIPELINE EXECUTION")
print("=" * 80)


## Step 1: Execute Module 1 (Demographic-Climate)


In [None]:
def execute_module1():
    """Execute demographic-climate integration module"""
    print("=" * 60)
    print("MODULE 1: Demographic-Climate Integration")
    print("=" * 60)
    
    required_files = [
        "psmc_results.csv",
        "climate_time_series.csv",
        "aligned_demographic_climate.csv"
    ]
    
    module1_complete = all((OUTPUT_DIR / f).exists() for f in required_files)
    
    if module1_complete:
        print("✓ Module 1 outputs found")
        psmc_df = pd.read_csv(OUTPUT_DIR / "psmc_results.csv")
        climate_df = pd.read_csv(OUTPUT_DIR / "climate_time_series.csv")
        aligned_df = pd.read_csv(OUTPUT_DIR / "aligned_demographic_climate.csv")
        return {'psmc': psmc_df, 'climate': climate_df, 'aligned': aligned_df, 'complete': True}
    else:
        print("⚠ Module 1 outputs not found. Please run Notebooks 1-4 first.")
        return {'complete': False}

module1_data = execute_module1()


## Step 2: Execute Module 2 (Hybridization)


In [None]:
def execute_module2():
    """Execute hybridization detection module"""
    print("\n" + "=" * 60)
    print("MODULE 2: Hybridization Detection")
    print("=" * 60)
    
    required_files = ["abba_baba_results.csv", "sliding_window_results.csv"]
    module2_complete = all((OUTPUT_DIR / f).exists() for f in required_files)
    
    if module2_complete:
        print("✓ Module 2 outputs found")
        abba_df = pd.read_csv(OUTPUT_DIR / "abba_baba_results.csv")
        window_df = pd.read_csv(OUTPUT_DIR / "sliding_window_results.csv")
        introgressed_regions = pd.read_csv(OUTPUT_DIR / "introgressed_regions.csv") if (OUTPUT_DIR / "introgressed_regions.csv").exists() else pd.DataFrame()
        return {'abba_baba': abba_df, 'windows': window_df, 'regions': introgressed_regions, 'complete': True}
    else:
        print("⚠ Module 2 outputs not found. Please run Notebooks 5-6 first.")
        return {'complete': False}

module2_data = execute_module2()


## Step 3: Create integrated timeline visualization


In [None]:
if module1_data.get('complete') and module2_data.get('complete'):
    aligned_df = module1_data['aligned']
    
    # Add introgression intensity (simplified - mean D-statistic)
    if len(module2_data['windows']) > 0:
        mean_d = module2_data['windows']['D'].mean()
        aligned_df['introgression_intensity'] = mean_d
    
    # Create integrated plot
    fig = plt.figure(figsize=(16, 10))
    gs = fig.add_gridspec(4, 1, hspace=0.3)
    
    # Panel 1: Demographic history
    ax1 = fig.add_subplot(gs[0, 0])
    ax1.semilogx(aligned_df['time_years'], aligned_df['Ne'], linewidth=2.5, color='steelblue')
    ax1.set_ylabel('Effective\nPopulation Size', fontsize=11)
    ax1.set_title('Integrated Timeline: Demography, Climate, and Hybridization', fontsize=14, fontweight='bold', pad=20)
    ax1.grid(True, alpha=0.3)
    ax1.invert_xaxis()
    
    # Panel 2: Climate
    ax2 = fig.add_subplot(gs[1, 0])
    climate_cols = [c for c in aligned_df.columns if c.endswith('_mean')]
    for col in climate_cols[:2]:
        ax2.plot(aligned_df['time_years'], aligned_df[col], linewidth=2, linestyle='--', alpha=0.8)
    ax2.set_ylabel('Climate\nVariables', fontsize=11)
    ax2.grid(True, alpha=0.3)
    ax2.invert_xaxis()
    
    # Panel 3: Distribution (if available)
    ax3 = fig.add_subplot(gs[2, 0])
    ax3.text(0.5, 0.5, 'Distribution data', ha='center', va='center', transform=ax3.transAxes)
    ax3.set_ylabel('Habitat\nSuitability', fontsize=11)
    ax3.invert_xaxis()
    
    # Panel 4: Hybridization
    ax4 = fig.add_subplot(gs[3, 0])
    if 'introgression_intensity' in aligned_df.columns:
        ax4.axhline(y=0, color='black', linestyle='-', linewidth=1)
        ax4.plot(aligned_df['time_years'], aligned_df['introgression_intensity'], color='red', linewidth=2)
    ax4.set_xlabel('Years Before Present', fontsize=12)
    ax4.set_ylabel('Introgression\nIntensity (D)', fontsize=11)
    ax4.grid(True, alpha=0.3)
    ax4.invert_xaxis()
    
    plt.tight_layout()
    plt.savefig(OUTPUT_DIR / "integrated_timeline.png", dpi=300, bbox_inches='tight')
    print(f"\nSaved integrated timeline: {OUTPUT_DIR / 'integrated_timeline.png'}")
    plt.show()
    
    print("\n" + "=" * 80)
    print("UNIFIED PIPELINE COMPLETE!")
    print("=" * 80)
else:
    print("\n⚠ Cannot complete integration - missing module outputs")
