# Unified Pipeline

This notebook integrates both modules (Demographic-Climate and Hybridization) into a unified workflow.

## Steps:
1. Execute Module 1 (Demographic-Climate)
2. Execute Module 2 (Hybridization)
3. Align timelines
4. Create integrated timeline figure
5. Generate synthesis interpretation


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import yaml
from datetime import datetime

# Configuration
with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)

OUTPUT_DIR = Path(config['output_dir'])

print("UNIFIED PIPELINE EXECUTION")
print("=" * 80)


## Step 1: Execute Module 1 (Demographic-Climate)


In [None]:
def execute_module1():
    """Execute demographic-climate integration module"""
    print("=" * 60)
    print("MODULE 1: Demographic-Climate Integration")
    print("=" * 60)
    
    required_files = [
        "psmc_results.csv",
        "climate_time_series.csv",
        "aligned_demographic_climate.csv"
    ]
    
    module1_complete = all((OUTPUT_DIR / f).exists() for f in required_files)
    
    if module1_complete:
        print("✓ Module 1 outputs found")
        psmc_df = pd.read_csv(OUTPUT_DIR / "psmc_results.csv")
        climate_df = pd.read_csv(OUTPUT_DIR / "climate_time_series.csv")
        aligned_df = pd.read_csv(OUTPUT_DIR / "aligned_demographic_climate.csv")
        return {'psmc': psmc_df, 'climate': climate_df, 'aligned': aligned_df, 'complete': True}
    else:
        print("⚠ Module 1 outputs not found. Please run Notebooks 1-4 first.")
        return {'complete': False}

module1_data = execute_module1()


## Step 2: Execute Module 2 (Hybridization)


In [None]:
def execute_module2():
    """Execute hybridization detection module"""
    print("\n" + "=" * 60)
    print("MODULE 2: Hybridization Detection")
    print("=" * 60)
    
    required_files = ["abba_baba_results.csv", "sliding_window_results.csv"]
    module2_complete = all((OUTPUT_DIR / f).exists() for f in required_files)
    
    if module2_complete:
        print("✓ Module 2 outputs found")
        abba_df = pd.read_csv(OUTPUT_DIR / "abba_baba_results.csv")
        window_df = pd.read_csv(OUTPUT_DIR / "sliding_window_results.csv")
        introgressed_regions = pd.read_csv(OUTPUT_DIR / "introgressed_regions.csv") if (OUTPUT_DIR / "introgressed_regions.csv").exists() else pd.DataFrame()
        return {'abba_baba': abba_df, 'windows': window_df, 'regions': introgressed_regions, 'complete': True}
    else:
        print("⚠ Module 2 outputs not found. Please run Notebooks 5-6 first.")
        return {'complete': False}

module2_data = execute_module2()


## Step 3: Create integrated timeline visualization


In [None]:
if module1_data.get('complete') and module2_data.get('complete'):
    aligned_df = module1_data['aligned']
    
    # Add introgression intensity (simplified - mean D-statistic)
    if len(module2_data['windows']) > 0:
        mean_d = module2_data['windows']['D'].mean()
        aligned_df['introgression_intensity'] = mean_d
    
    # Create integrated plot
    fig = plt.figure(figsize=(16, 10))
    gs = fig.add_gridspec(4, 1, hspace=0.3)
    
    # Panel 1: Demographic history
    ax1 = fig.add_subplot(gs[0, 0])
    ax1.semilogx(aligned_df['time_years'], aligned_df['Ne'], linewidth=2.5, color='steelblue')
    ax1.set_ylabel('Effective\nPopulation Size', fontsize=11)
    ax1.set_title('Integrated Timeline: Demography, Climate, and Hybridization', fontsize=14, fontweight='bold', pad=20)
    ax1.grid(True, alpha=0.3)
    ax1.invert_xaxis()
    
    # Panel 2: Climate
    ax2 = fig.add_subplot(gs[1, 0])
    climate_cols = [c for c in aligned_df.columns if c.endswith('_mean')]
    for col in climate_cols[:2]:
        ax2.plot(aligned_df['time_years'], aligned_df[col], linewidth=2, linestyle='--', alpha=0.8)
    ax2.set_ylabel('Climate\nVariables', fontsize=11)
    ax2.grid(True, alpha=0.3)
    ax2.invert_xaxis()
    
    # Panel 3: Distribution (if available)
    ax3 = fig.add_subplot(gs[2, 0])
    ax3.text(0.5, 0.5, 'Distribution data', ha='center', va='center', transform=ax3.transAxes)
    ax3.set_ylabel('Habitat\nSuitability', fontsize=11)
    ax3.invert_xaxis()
    
    # Panel 4: Hybridization
    ax4 = fig.add_subplot(gs[3, 0])
    if 'introgression_intensity' in aligned_df.columns:
        ax4.axhline(y=0, color='black', linestyle='-', linewidth=1)
        ax4.plot(aligned_df['time_years'], aligned_df['introgression_intensity'], color='red', linewidth=2)
    ax4.set_xlabel('Years Before Present', fontsize=12)
    ax4.set_ylabel('Introgression\nIntensity (D)', fontsize=11)
    ax4.grid(True, alpha=0.3)
    ax4.invert_xaxis()
    
    plt.tight_layout()
    plt.savefig(OUTPUT_DIR / "integrated_timeline.png", dpi=300, bbox_inches='tight')
    print(f"\nSaved integrated timeline: {OUTPUT_DIR / 'integrated_timeline.png'}")
    plt.show()
    
    print("\n" + "=" * 80)
    print("UNIFIED PIPELINE COMPLETE!")
    print("=" * 80)
else:
    print("\n⚠ Cannot complete integration - missing module outputs")


In [2]:
import os
import sys
import time
import json
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
from pathlib import Path
from datetime import datetime

# Configuration
NOTEBOOKS_DIR = Path.cwd()
OUTPUT_DIR = Path("../outputs")
OUTPUT_DIR.mkdir(exist_ok=True)

# List of notebooks to execute in order
NOTEBOOKS = [
    "01_PSMC_Demographic_Reconstruction.ipynb",
    "02_Paleoclimate_Data_Processing.ipynb",
    "03_MaxEnt_Distribution_Modeling.ipynb",
    "04_Demographic_Climate_Integration.ipynb",
    "05_VCF_Processing_ABBA_BABA.ipynb",
    "06_Sliding_Window_Introgression.ipynb",
    "08_Advanced_SDM_Analysis.ipynb"
]

# %%
def run_notebook(notebook_path):
    """Execute a notebook and return execution metadata."""
    with open(notebook_path) as f:
        nb = nbformat.read(f, as_version=4)
    
    ep = ExecutePreprocessor(timeout=3600, kernel_name='python3')
    start_time = time.time()
    
    try:
        ep.preprocess(nb, {'metadata': {'path': str(NOTEBOOKS_DIR)}})
        status = "completed"
        error = None
    except Exception as e:
        status = "failed"
        error = str(e)
    
    return {
        "notebook": notebook_path.name,
        "status": status,
        "duration": time.time() - start_time,
        "error": error,
        "timestamp": datetime.now().isoformat()
    }

# %%
def generate_summary(report):
    """Generate a markdown summary of the pipeline execution."""
    summary = "# Pipeline Execution Summary\n\n"
    summary += f"**Date:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
    
    # Summary table
    summary += "## Execution Status\n"
    summary += "| Notebook | Status | Duration (s) |\n"
    summary += "|----------|--------|-------------:|\n"
    for entry in report:
        status_emoji = "✅" if entry['status'] == 'completed' else "❌"
        summary += f"| {entry['notebook']} | {status_emoji} {entry['status'].upper()} | {entry['duration']:.1f} |\n"
    
    # Add any errors
    errors = [e for e in report if e['error']]
    if errors:
        summary += "\n## Errors\n"
        for error in errors:
            summary += f"### {error['notebook']}\n```\n{error['error']}\n```\n\n"
    
    # Save the report
    with open(OUTPUT_DIR / "pipeline_summary.md", "w") as f:
        f.write(summary)
    
    return summary

# %%
def main():
    """Main execution function for the pipeline."""
    print("=" * 80)
    print("STARTING UNIFIED PIPELINE EXECUTION")
    print("=" * 80)
    print(f"Working directory: {os.getcwd()}")
    print(f"Output directory: {OUTPUT_DIR.absolute()}")
    
    execution_report = []
    
    # Execute each notebook
    for nb_name in NOTEBOOKS:
        nb_path = NOTEBOOKS_DIR / nb_name
        if not nb_path.exists():
            print(f"\n⚠ Warning: {nb_name} not found, skipping...")
            continue
            
        print(f"\n{'=' * 40}")
        print(f"EXECUTING: {nb_name}")
        print(f"{'=' * 40}")
        
        result = run_notebook(nb_path)
        execution_report.append(result)
        
        if result['status'] == 'completed':
            print(f"✅ {nb_name} completed in {result['duration']:.1f} seconds")
        else:
            print(f"❌ {nb_name} failed after {result['duration']:.1f} seconds")
            print(f"Error: {result['error']}")
    
    # Generate and display summary
    summary = generate_summary(execution_report)
    print("\n" + "=" * 80)
    print("PIPELINE EXECUTION COMPLETE")
    print("=" * 80)
    print("\n" + summary)
    
    # Save final execution report
    with open(OUTPUT_DIR / "pipeline_execution_report.json", "w") as f:
        json.dump(execution_report, f, indent=2)
    
    return execution_report

# %%
def generate_final_visualizations():
    """Generate final visualizations for the poster."""
    try:
        import pandas as pd
        import matplotlib.pyplot as plt
        import seaborn as sns
        
        print("\nGenerating final visualizations...")
        
        # Create a figure with subplots for each analysis
        fig, axes = plt.subplots(3, 2, figsize=(15, 18))
        fig.suptitle('Integrated Analysis Results', fontsize=16, y=1.02)
        
        # Plot 1: Demographic History
        try:
            psmc_results = pd.read_csv(OUTPUT_DIR / "psmc_results.csv")
            axes[0, 0].semilogx(psmc_results['time_years'], psmc_results['Ne'], 'b-')
            axes[0, 0].set_title('Demographic History (PSMC)')
            axes[0, 0].set_xlabel('Years Before Present')
            axes[0, 0].set_ylabel('Effective Population Size')
            axes[0, 0].grid(True, alpha=0.3)
        except Exception as e:
            axes[0, 0].text(0.5, 0.5, 'Demographic data not available', 
                           ha='center', va='center')
        
        # Plot 2: Climate Data
        try:
            climate_data = pd.read_csv(OUTPUT_DIR / "climate_time_series.csv")
            for col in climate_data.columns[1:]:  # Skip the first column (time)
                axes[0, 1].plot(climate_data.iloc[:, 0], climate_data[col], label=col)
            axes[0, 1].set_title('Climate Time Series')
            axes[0, 1].set_xlabel('Time')
            axes[0, 1].set_ylabel('Value')
            axes[0, 1].legend()
            axes[0, 1].grid(True, alpha=0.3)
        except Exception as e:
            axes[0, 1].text(0.5, 0.5, 'Climate data not available', 
                           ha='center', va='center')
        
        # Add more plots as needed...
        
        plt.tight_layout()
        plt.savefig(OUTPUT_DIR / "final_combined_results.png", dpi=300, bbox_inches='tight')
        print(f"✅ Saved final visualizations to {OUTPUT_DIR / 'final_combined_results.png'}")
        
    except Exception as e:
        print(f"❌ Error generating visualizations: {str(e)}")

# %%
# Run the pipeline
if __name__ == "__main__":
    execution_report = main()
    
    # Uncomment to generate final visualizations
    # generate_final_visualizations()

STARTING UNIFIED PIPELINE EXECUTION
Working directory: /Users/divyadhole/Capstone-project/notebooks
Output directory: /Users/divyadhole/Capstone-project/notebooks/../outputs

EXECUTING: 01_PSMC_Demographic_Reconstruction.ipynb


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


❌ 01_PSMC_Demographic_Reconstruction.ipynb failed after 92.8 seconds
Error: An error occurred while executing the following cell:
------------------
def run_psmc(input_file, output_file, params=None):
    if params is None:
        params = config.get('psmc_params', {})

    cmd = ['psmc']
    for key, value in params.items():
        cmd.extend([key, str(value)])
    cmd.extend(['-o', str(output_file), str(input_file)])

    print(f"Running PSMC: {' '.join(cmd)}")
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        print(f"PSMC error: {result.stderr}")
        raise RuntimeError("PSMC failed")

    print(f"PSMC completed: {output_file}")
    return output_file

PSMC_OUTPUT = OUTPUT_DIR / "psmc_output.psmc"

if PSMC_INPUT.exists():
    run_psmc(PSMC_INPUT, PSMC_OUTPUT)
else:
    print("PSMC input file not found. Run Step 1 first.")
------------------


[0;31m---------------------------------------------------------------------------

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


❌ 02_Paleoclimate_Data_Processing.ipynb failed after 63.9 seconds
Error: An error occurred while executing the following cell:
------------------
# Clean the scientific names to standardize them
def clean_scientific_name(name):
    # Handle known synonyms
    synonyms = {
        'Presbytis phayrei': 'Trachypithecus phayrei',
        'Presbytis geei': 'Trachypithecus geei',
        'Semnopithecus dussumieri': 'Semnopithecus entellus'  # Subspecies of entellus
    }
    
    # Remove author and year from the name
    base_name = name.split('(')[0].strip()
    
    # Apply synonym mapping
    return synonyms.get(base_name, base_name)

# Apply cleaning to the scientific names
occurrences_gbif['scientific_name'] = occurrences_gbif['scientific_name'].apply(clean_scientific_name)

# Remove any remaining duplicates based on coordinates and species
occurrences_gbif = occurrences_gbif.drop_duplicates(
    subset=['scientific_name', 'longitude', 'latitude']
)

# Remove any records with missing c

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


❌ 03_MaxEnt_Distribution_Modeling.ipynb failed after 2.5 seconds
Error: An error occurred while executing the following cell:
------------------
# Step 1 – Prepare occurrence data for MaxEnt
def prepare_maxent_occurrences(occurrence_file, output_file, species_name=None):
    """Format occurrence data for MaxEnt (CSV: species, longitude, latitude)."""
    occurrence_file = Path(occurrence_file)
    output_file = Path(output_file)

    if not occurrence_file.exists():
        raise FileNotFoundError(f"Occurrence file not found: {occurrence_file}")

    df = pd.read_csv(occurrence_file)
    if df.empty:
        raise ValueError("Occurrence table is empty; rerun Notebook 02 or provide valid points.")

    lon_cols = [c for c in df.columns if c.lower() in {"lon", "longitude", "decimallongitude"}]
    lat_cols = [c for c in df.columns if c.lower() in {"lat", "latitude", "decimallatitude"}]
    if not lon_cols or not lat_cols:
        raise ValueError("Occurrence data must contain longitude/l

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


❌ 04_Demographic_Climate_Integration.ipynb failed after 11.8 seconds
Error: An error occurred while executing the following cell:
------------------
# 04 Demographic and Climate Integration Analysis
# For Rhinopithecus roxellana (Golden Snub-nosed Monkey)

# 1. Setup and Data Loading

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import rasterio
from rasterio.plot import show
import geopandas as gpd
from scipy import stats
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.seasonal import seasonal_decompose

# Set up directories
DATA_DIR = Path("data")
OUTPUT_DIR = Path("outputs")
DEMO_CLIMATE_DIR = OUTPUT_DIR / "demographic_climate"
DEMO_CLIMATE_DIR.mkdir(parents=True, exist_ok=True)

# Set plotting style
plt.style.use('seaborn')
sns.set_theme(style="whitegrid")
sns.set_palette("viridis")

print("Demographic and Climate Integration Analysis")
print("="*50)
print(f"Data directory: {DATA

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


KeyboardInterrupt: 

In [4]:
import os
import sys
import time
import json
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
from pathlib import Path
from datetime import datetime
import logging
import traceback
import shutil
import importlib
from typing import List, Dict, Any, Optional

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(Path.cwd() / 'pipeline.log'),
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger(__name__)

class Config:
    """Configuration class for the pipeline."""
    def __init__(self):
        # Directory setup
        self.NOTEBOOKS_DIR = Path.cwd()
        self.OUTPUT_DIR = self.NOTEBOOKS_DIR.parent / "outputs"
        self.TEMP_DIR = self.OUTPUT_DIR / "temp"
        
        # Notebooks to execute in order
        self.NOTEBOOKS = [
            "01_PSMC_Demographic_Reconstruction.ipynb",
            "02_Paleoclimate_Data_Processing.ipynb",
            "03_MaxEnt_Distribution_Modeling.ipynb",
            "04_Demographic_Climate_Integration.ipynb",
            "05_VCF_Processing_ABBA_BABA.ipynb",
            "06_Sliding_Window_Introgression.ipynb",
            "08_Advanced_SDM_Analysis.ipynb"
        ]
        
        # Execution parameters
        self.KERNEL_NAME = 'python3'
        self.TIMEOUT = 7200  # 2 hours per notebook
        self.CLEAN_TEMP = True

def setup_environment() -> None:
    """Set up the execution environment."""
    try:
        # Create necessary directories
        config.OUTPUT_DIR.mkdir(exist_ok=True, parents=True)
        config.TEMP_DIR.mkdir(exist_ok=True, parents=True)
        
        # Copy config files if they don't exist
        config_file = config.NOTEBOOKS_DIR / 'config.yaml'
        if not (config.OUTPUT_DIR / 'config.yaml').exists() and config_file.exists():
            shutil.copy2(config_file, config.OUTPUT_DIR / 'config.yaml')
            logger.info("Copied config.yaml to output directory")
            
    except Exception as e:
        logger.error(f"Error setting up environment: {str(e)}")
        raise

def check_dependencies() -> bool:
    """Check for required dependencies."""
    required = [
        'numpy', 'pandas', 'matplotlib', 'seaborn', 
        'scikit-learn', 'rasterio', 'cartopy', 'nbformat',
        'nbconvert', 'jupyter', 'ipykernel'
    ]
    missing = []
    
    for package in required:
        try:
            importlib.import_module(package.split('.')[0])
        except ImportError:
            missing.append(package)
    
    if missing:
        logger.warning(f"Missing packages: {', '.join(missing)}")
        logger.info("Consider installing them with: pip install " + " ".join(missing))
        return False
    return True

def run_notebook(notebook_path: Path) -> Dict[str, Any]:
    """Execute a notebook and return execution metadata."""
    notebook_name = notebook_path.name
    logger.info(f"Starting execution of {notebook_name}")
    
    # Create a copy of the notebook in the temp directory
    temp_nb_path = config.TEMP_DIR / f"temp_{notebook_name}"
    shutil.copy2(notebook_path, temp_nb_path)
    
    with open(temp_nb_path) as f:
        nb = nbformat.read(f, as_version=4)
    
    # Configure the executor
    ep = ExecutePreprocessor(
        timeout=config.TIMEOUT,
        kernel_name=config.KERNEL_NAME,
        allow_errors=True
    )
    
    start_time = time.time()
    status = "completed"
    error = None
    output_nb_path = None
    
    try:
        # Execute the notebook
        ep.preprocess(nb, {'metadata': {'path': str(config.NOTEBOOKS_DIR)}})
        
        # Save the executed notebook
        with open(temp_nb_path, 'w', encoding='utf-8') as f:
            nbformat.write(nb, f)
            
        # Copy outputs to the output directory
        output_nb_path = config.OUTPUT_DIR / f"executed_{notebook_name}"
        shutil.copy2(temp_nb_path, output_nb_path)
        
    except Exception as e:
        status = "failed"
        error = f"{type(e).__name__}: {str(e)}\n\n{traceback.format_exc()}"
        logger.error(f"Error executing {notebook_name}: {error}")
    finally:
        # Clean up temporary file
        if config.CLEAN_TEMP and temp_nb_path.exists():
            temp_nb_path.unlink()
    
    duration = time.time() - start_time
    logger.info(f"Finished {notebook_name} in {duration:.1f} seconds - Status: {status.upper()}")
    
    return {
        "notebook": notebook_name,
        "status": status,
        "duration": duration,
        "error": error,
        "timestamp": datetime.now().isoformat(),
        "output_path": str(output_nb_path) if output_nb_path else None
    }

def generate_summary(report: List[Dict[str, Any]]) -> str:
    """Generate a markdown summary of the pipeline execution."""
    summary = "# Pipeline Execution Summary\n\n"
    summary += f"**Date:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
    summary += f"**Working Directory:** {os.getcwd()}\n"
    summary += f"**Output Directory:** {config.OUTPUT_DIR.absolute()}\n\n"
    
    # Summary table
    summary += "## Execution Status\n"
    summary += "| Notebook | Status | Duration (s) | Output |\n"
    summary += "|----------|--------|-------------|--------|\n"
    
    for entry in report:
        status_emoji = "✅" if entry['status'] == 'completed' else "❌"
        output_link = f"[View]({entry.get('output_path', '')})" if entry.get('output_path') else "-"
        summary += f"| {entry['notebook']} | {status_emoji} {entry['status'].upper()} | {entry['duration']:.1f} | {output_link} |\n"
    
    # Add any errors
    errors = [e for e in report if e['error']]
    if errors:
        summary += "\n## Errors\n"
        for error in errors:
            summary += f"### {error['notebook']}\n```\n{error['error']}\n```\n\n"
    
    # Save the report
    summary_path = config.OUTPUT_DIR / "pipeline_summary.md"
    with open(summary_path, "w") as f:
        f.write(summary)
    
    logger.info(f"Saved execution summary to {summary_path}")
    return summary

def main() -> List[Dict[str, Any]]:
    """Main execution function for the pipeline."""
    logger.info("=" * 80)
    logger.info("STARTING UNIFIED PIPELINE EXECUTION")
    logger.info("=" * 80)
    
    # Setup environment
    setup_environment()
    
    # Check dependencies
    if not check_dependencies():
        logger.warning("Some dependencies are missing. The pipeline may fail.")
    
    execution_report = []
    
    # Execute each notebook
    for nb_name in config.NOTEBOOKS:
        nb_path = config.NOTEBOOKS_DIR / nb_name
        if not nb_path.exists():
            logger.warning(f"Notebook not found: {nb_name}, skipping...")
            execution_report.append({
                "notebook": nb_name,
                "status": "skipped",
                "duration": 0,
                "error": "Notebook not found",
                "timestamp": datetime.now().isoformat(),
                "output_path": None
            })
            continue
            
        result = run_notebook(nb_path)
        execution_report.append(result)
        
        if result['status'] == 'failed':
            logger.error(f"Pipeline failed at {nb_name}")
            break  # Stop on first failure
    
    # Generate summary
    summary = generate_summary(execution_report)
    logger.info("\n" + "=" * 80)
    logger.info("PIPELINE EXECUTION COMPLETE")
    logger.info("=" * 80)
    
    # Save final execution report
    report_path = config.OUTPUT_DIR / "pipeline_execution_report.json"
    with open(report_path, "w") as f:
        json.dump(execution_report, f, indent=2)
    
    logger.info(f"Saved detailed report to {report_path}")
    return execution_report

# Initialize config
config = Config()

if __name__ == "__main__":
    try:
        execution_report = main()
        logger.info("Pipeline execution completed. Check the logs and summary for details.")
    except Exception as e:
        logger.error(f"Fatal error in pipeline execution: {str(e)}")
        logger.error(traceback.format_exc())
        sys.exit(1)

2025-12-03 01:35:07,020 - INFO - STARTING UNIFIED PIPELINE EXECUTION
2025-12-03 01:35:12,132 - INFO - Consider installing them with: pip install scikit-learn
2025-12-03 01:35:12,133 - INFO - Starting execution of 01_PSMC_Demographic_Reconstruction.ipynb


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


2025-12-03 01:36:52,508 - INFO - Finished 01_PSMC_Demographic_Reconstruction.ipynb in 100.4 seconds - Status: COMPLETED
2025-12-03 01:36:52,510 - INFO - Starting execution of 02_Paleoclimate_Data_Processing.ipynb


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


2025-12-03 01:39:33,641 - INFO - Finished 02_Paleoclimate_Data_Processing.ipynb in 161.1 seconds - Status: COMPLETED
2025-12-03 01:39:33,644 - INFO - Starting execution of 03_MaxEnt_Distribution_Modeling.ipynb


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


2025-12-03 01:39:38,775 - INFO - Finished 03_MaxEnt_Distribution_Modeling.ipynb in 5.1 seconds - Status: COMPLETED
2025-12-03 01:39:38,776 - INFO - Starting execution of 04_Demographic_Climate_Integration.ipynb


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


2025-12-03 01:39:55,884 - INFO - Finished 04_Demographic_Climate_Integration.ipynb in 17.1 seconds - Status: COMPLETED
2025-12-03 01:39:55,886 - INFO - Starting execution of 05_VCF_Processing_ABBA_BABA.ipynb


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


git version 2.15.0
make: Nothing to be done for `all'.
Dsuite software Version 0.5 r58
Written by Milan Malinsky.

2025-12-03 02:10:12,598 - INFO - Finished 05_VCF_Processing_ABBA_BABA.ipynb in 1816.7 seconds - Status: COMPLETED
2025-12-03 02:10:12,600 - INFO - Starting execution of 06_Sliding_Window_Introgression.ipynb


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


2025-12-03 09:50:43,679 - INFO - Finished 06_Sliding_Window_Introgression.ipynb in 27631.0 seconds - Status: COMPLETED
2025-12-03 09:50:43,680 - INFO - Starting execution of 08_Advanced_SDM_Analysis.ipynb


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


2025-12-03 09:50:52,564 - INFO - Finished 08_Advanced_SDM_Analysis.ipynb in 8.9 seconds - Status: COMPLETED
2025-12-03 09:50:52,565 - INFO - Saved execution summary to /Users/divyadhole/Capstone-project/outputs/pipeline_summary.md
2025-12-03 09:50:52,565 - INFO - 
2025-12-03 09:50:52,565 - INFO - PIPELINE EXECUTION COMPLETE
2025-12-03 09:50:52,567 - INFO - Saved detailed report to /Users/divyadhole/Capstone-project/outputs/pipeline_execution_report.json
2025-12-03 09:50:52,567 - INFO - Pipeline execution completed. Check the logs and summary for details.
