In [None]:
import yaml

with open('../config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Example usage:
classification_results_path = config['models']['classification_results']
regression_results_path = config['models']['regression_results']
report_dir = config['misc']['report_dir']


In [None]:
# Cell 1: Import libraries and load results
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import joblib
from sklearn.metrics import roc_curve, auc
import warnings
warnings.filterwarnings('ignore')

print("Starting comprehensive results analysis...")

# Load model results
with open('../data/models/trained_models/classification_results.json', 'r') as f:
    classification_results = json.load(f)
    
with open('../data/models/trained_models/regression_results.json', 'r') as f:
    regression_results = json.load(f)


In [None]:
# Cell 2: Load trained models and data
# Load models
classification_model = joblib.load('../data/models/trained_models/classification_model.pkl')
regression_model = joblib.load('../data/models/trained_models/regression_model.pkl')

# Load test data
classification_data = pd.read_csv('../data/processed/classification_data.csv')
regression_data = pd.read_csv('../data/processed/regression_data.csv')

print("Models and data loaded successfully!")


In [None]:
# Cell 3: Comprehensive Model Performance Summary
print("=== COMPREHENSIVE MODEL PERFORMANCE SUMMARY ===")

# Create performance summary table
performance_summary = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'AUC-ROC', 'MAE', 'RMSE', 'R²'],
    'Classification Model': [
        f"{classification_results['metrics']['random_forest']['accuracy']:.4f}",
        f"{classification_results['metrics']['random_forest']['precision']:.4f}",
        f"{classification_results['metrics']['random_forest']['recall']:.4f}",
        f"{classification_results['metrics']['random_forest']['f1']:.4f}",
        f"{classification_results['metrics']['random_forest']['auc']:.4f}",
        'N/A', 'N/A', 'N/A'
    ],
    'Regression Model': [
        'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
        f"{regression_results['metrics']['random_forest']['mae']:.2f} min",
        f"{regression_results['metrics']['random_forest']['rmse']:.2f} min",
        f"{regression_results['metrics']['random_forest']['r2']:.4f}"
    ]
})

print(performance_summary.to_string(index=False))


In [None]:
# Cell 4: Advanced Analytics Summary
fig, axes = plt.subplots(2, 3, figsize=(20, 12))

# ROC-AUC Comparison
axes[0,0].text(0.1, 0.8, 'Classification Model Performance', fontsize=14, fontweight='bold')
axes[0,0].text(0.1, 0.7, f"Standard AUC: {classification_results['oai_metrics']['standard_auc']:.4f}", fontsize=12)
axes[0,0].text(0.1, 0.6, f"OAI-weighted AUC: {classification_results['oai_metrics']['oai_auc']:.4f}", fontsize=12)
axes[0,0].text(0.1, 0.5, f"Best Model: {classification_results['best_model']}", fontsize=12)
axes[0,0].text(0.1, 0.4, f"Test Accuracy: {classification_results['metrics']['random_forest']['accuracy']:.4f}", fontsize=12)
axes[0,0].set_xlim(0, 1)
axes[0,0].set_ylim(0, 1)
axes[0,0].set_title('Classification Summary')
axes[0,0].axis('off')

# Regression Performance
axes[0,1].text(0.1, 0.8, 'Regression Model Performance', fontsize=14, fontweight='bold')
axes[0,1].text(0.1, 0.7, f"MAE: {regression_results['metrics']['random_forest']['mae']:.2f} minutes", fontsize=12)
axes[0,1].text(0.1, 0.6, f"RMSE: {regression_results['metrics']['random_forest']['rmse']:.2f} minutes", fontsize=12)
axes[0,1].text(0.1, 0.5, f"R² Score: {regression_results['metrics']['random_forest']['r2']:.4f}", fontsize=12)
axes[0,1].text(0.1, 0.4, f"Best Model: {regression_results['best_model']}", fontsize=12)
axes[0,1].set_xlim(0, 1)
axes[0,1].set_ylim(0, 1)
axes[0,1].set_title('Regression Summary')
axes[0,1].axis('off')

# OAI Analysis Summary
axes[0,2].text(0.1, 0.8, 'OAI Analysis Results', fontsize=14, fontweight='bold')
axes[0,2].text(0.1, 0.7, 'Classification OAI:', fontsize=12, fontweight='bold')
axes[0,2].text(0.1, 0.6, f"  Standard AUC: {classification_results['oai_metrics']['standard_auc']:.4f}", fontsize=10)
axes[0,2].text(0.1, 0.55, f"  OAI AUC: {classification_results['oai_metrics']['oai_auc']:.4f}", fontsize=10)
axes[0,2].text(0.1, 0.45, 'Regression OAI:', fontsize=12, fontweight='bold')
axes[0,2].text(0.1, 0.35, f"  RF OAI MAE: {regression_results['oai_metrics']['random_forest_oai_mae']:.2f} min", fontsize=10)
axes[0,2].text(0.1, 0.3, f"  LR OAI MAE: {regression_results['oai_metrics']['linear_regression_oai_mae']:.2f} min", fontsize=10)
axes[0,2].text(0.1, 0.2, 'OAI prioritizes controllable delays', fontsize=10, style='italic')
axes[0,2].set_xlim(0, 1)
axes[0,2].set_ylim(0, 1)
axes[0,2].set_title('OAI Metrics Summary')
axes[0,2].axis('off')

# Model Comparison Chart
models = ['Classification\n(Random Forest)', 'Regression\n(Random Forest)']
primary_metrics = [
    classification_results['metrics']['random_forest']['f1'],
    regression_results['metrics']['random_forest']['r2']
]
colors = ['skyblue', 'lightcoral']

axes[1,0].bar(models, primary_metrics, color=colors, alpha=0.7)
axes[1,0].set_title('Primary Model Performance')
axes[1,0].set_ylabel('Score')
axes[1,0].set_ylim(0, 1)
for i, v in enumerate(primary_metrics):
    axes[1,0].text(i, v + 0.01, f'{v:.4f}', ha='center', va='bottom')

# Feature Count Comparison
feature_counts = [
    classification_results['feature_count'],
    regression_results['feature_count']
]

axes[1,1].bar(models, feature_counts, color=colors, alpha=0.7)
axes[1,1].set_title('Feature Count Used')
axes[1,1].set_ylabel('Number of Features')
for i, v in enumerate(feature_counts):
    axes[1,1].text(i, v + 0.5, str(v), ha='center', va='bottom')

# Dataset Size Comparison
dataset_sizes = [
    classification_results['test_size'],
    regression_results['test_size']
]

axes[1,2].bar(models, dataset_sizes, color=colors, alpha=0.7)
axes[1,2].set_title('Test Set Size')
axes[1,2].set_ylabel('Number of Records')
for i, v in enumerate(dataset_sizes):
    axes[1,2].text(i, v + max(dataset_sizes)*0.01, f'{v:,}', ha='center', va='bottom')

plt.tight_layout()
plt.savefig('../visualizations/model_results/comprehensive_model_analysis.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# Cell 5: SHAP Analysis Summary
print("=== SHAP ANALYSIS SUMMARY ===")

# Load SHAP explainers and create summary insights
try:
    classification_explainer = joblib.load('../data/models/trained_models/classification_explainer.pkl')
    regression_explainer = joblib.load('../data/models/trained_models/regression_explainer.pkl')
    
    print("✅ SHAP explainers loaded successfully")
    print("📊 SHAP analysis provides:")
    print("   - Feature importance rankings")
    print("   - Individual prediction explanations")
    print("   - Model interpretability insights")
    print("   - Controllable vs uncontrollable factor identification")
    
except FileNotFoundError:
    print("⚠️  SHAP explainers not found - run individual model notebooks first")

# Create SHAP insights summary
shap_insights = {
    "classification": {
        "purpose": "Explains why a flight is predicted to be delayed",
        "key_features": ["carrier_ct", "late_aircraft_ct", "total_delay_incidents"],
        "actionable_insights": "Focus on carrier operations and aircraft turnaround"
    },
    "regression": {
        "purpose": "Explains predicted delay duration",
        "key_features": ["carrier_delay", "late_aircraft_delay", "total_delay_minutes"],
        "actionable_insights": "Minimize controllable delay sources"
    }
}

print("\n📈 SHAP Insights:")
for model_type, insights in shap_insights.items():
    print(f"\n{model_type.title()} Model:")
    print(f"  Purpose: {insights['purpose']}")
    print(f"  Key Features: {', '.join(insights['key_features'])}")
    print(f"  Actionable: {insights['actionable_insights']}")


In [None]:
# Cell 6: Business Impact Analysis
print("=== BUSINESS IMPACT ANALYSIS ===")

# Calculate potential business impact
total_flights = classification_data['arr_flights'].sum()
current_delay_rate = classification_data['is_delayed'].mean()
current_avg_delay = regression_data[regression_data['delay_duration'] > 0]['delay_duration'].mean()

# Simulate improvement scenarios
scenarios = {
    "Conservative (5% improvement)": 0.05,
    "Moderate (10% improvement)": 0.10,
    "Aggressive (15% improvement)": 0.15
}

print(f"Current Performance Baseline:")
print(f"  Total Flights Analyzed: {total_flights:,}")
print(f"  Current Delay Rate: {current_delay_rate:.2%}")
print(f"  Average Delay Duration: {current_avg_delay:.1f} minutes")

impact_analysis = []
for scenario, improvement in scenarios.items():
    improved_delay_rate = current_delay_rate * (1 - improvement)
    improved_avg_delay = current_avg_delay * (1 - improvement) 
    
    flights_saved = total_flights * (current_delay_rate - improved_delay_rate)
    minutes_saved = total_flights * current_delay_rate * (current_avg_delay - improved_avg_delay)
    
    # Rough cost savings calculation (assuming $50 per delayed flight operational cost)
    cost_savings = flights_saved * 50
    
    impact_analysis.append({
        'Scenario': scenario,
        'New Delay Rate': f"{improved_delay_rate:.2%}",
        'New Avg Delay': f"{improved_avg_delay:.1f} min",
        'Flights Saved': f"{flights_saved:,.0f}",
        'Minutes Saved': f"{minutes_saved:,.0f}",
        'Est. Cost Savings': f"${cost_savings:,.0f}"
    })

impact_df = pd.DataFrame(impact_analysis)
print(f"\nPotential Business Impact:")
print(impact_df.to_string(index=False))


In [None]:
# Cell 7: Actionable Recommendations
print("=== ACTIONABLE RECOMMENDATIONS ===")

recommendations = {
    "Immediate Actions (0-3 months)": [
        "Implement model predictions in operations dashboard",
        "Focus on controllable delays (carrier and late aircraft)",
        "Establish alert system for high-risk delay conditions",
        "Train operations team on model insights"
    ],
    "Short-term Improvements (3-6 months)": [
        "Optimize aircraft turnaround processes",
        "Improve carrier scheduling algorithms", 
        "Enhance ground crew resource allocation",
        "Develop proactive passenger communication system"
    ],
    "Long-term Strategic (6+ months)": [
        "Integrate weather forecasting with delay predictions",
        "Implement dynamic scheduling based on predictions",
        "Develop partnerships for shared delay mitigation",
        "Create data-driven crew scheduling optimization"
    ]
}

for timeline, actions in recommendations.items():
    print(f"\n{timeline}:")
    for i, action in enumerate(actions, 1):
        print(f"  {i}. {action}")

# Priority matrix based on OAI analysis
print(f"\n🎯 PRIORITY FOCUS AREAS (Based on OAI Analysis):")
print(f"  1. HIGH PRIORITY: Carrier delays (controllable)")
print(f"  2. HIGH PRIORITY: Late aircraft delays)
