In [None]:
from google.colab import files


# Step 1: Upload the file from your device
uploaded = files.upload()

In [None]:
# Install required packages with new additions for advanced models
!pip install -q ortools lightgbm networkx sentence-transformers

import pandas as pd
import numpy as np
import warnings
import os
import json
from datetime import datetime, timedelta

# Scikit-learn and LightGBM for predictive models
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (mean_absolute_error,
                             mean_squared_error,
                             r2_score,
                             median_absolute_error,
                             explained_variance_score,
                             accuracy_score)

# Try importing optional packages with fallbacks
try:
    import lightgbm as lgb
    LIGHTGBM_AVAILABLE = True
except ImportError:
    print("Warning: LightGBM not available. Using RandomForest only.")
    LIGHTGBM_AVAILABLE = False

try:
    import networkx as nx
    NETWORKX_AVAILABLE = True
except ImportError:
    print("Warning: NetworkX not available. Graph analysis will be skipped.")
    NETWORKX_AVAILABLE = False

try:
    from ortools.sat.python import cp_model
    ORTOOLS_AVAILABLE = True
except ImportError:
    print("Warning: OR-Tools not available. Schedule optimization will be skipped.")
    ORTOOLS_AVAILABLE = False

try:
    from sentence_transformers import SentenceTransformer, util
    SENTENCE_TRANSFORMERS_AVAILABLE = True
except ImportError:
    print("Warning: SentenceTransformers not available. NLP features will be limited.")
    SENTENCE_TRANSFORMERS_AVAILABLE = False

warnings.filterwarnings('ignore')


In [None]:
# ===============================================================================
# 1. ENHANCED DATA PROCESSOR
# ===============================================================================

class FlightDataProcessor:
    """Enhanced data processor with runway and gate modeling"""

    def __init__(self):
        self.encoders = {}
        self.scalers = {}
        self.runway_config = self._initialize_runway_config()
        self.gate_config = self._initialize_gate_config()

    def _initialize_runway_config(self):
        """Initialize runway configuration"""
        return {
            'runways': ['RW09L', 'RW09R', 'RW27L', 'RW27R'],
            'capacity_per_hour': {'RW09L': 30, 'RW09R': 30, 'RW27L': 28, 'RW27R': 28},
            'separation_matrix': {
                ('A320', 'A320'): 90, ('A320', 'B777'): 120,
                ('B777', 'A320'): 150, ('B777', 'B777'): 120,
                ('default', 'default'): 90
            }
        }

    def _initialize_gate_config(self):
        """Initialize gate configuration"""
        return {
            'terminal_1': [f'1A{i:02d}' for i in range(1, 21)],
            'terminal_2': [f'2B{i:02d}' for i in range(1, 16)],
            'terminal_3': [f'3C{i:02d}' for i in range(1, 31)]
        }

    def generate_synthetic_data(self, n_flights=1000):
        """Generate comprehensive synthetic flight data"""
        print("üîß Generating synthetic flight data...")
        np.random.seed(42)

        # Flight parameters
        airlines = ['6E', 'AI', 'SG', 'UK', 'G8', 'I5', '9W', 'S2']
        destinations = ['DEL', 'BOM', 'BLR', 'MAA', 'HYD', 'CCU', 'AMD', 'COK', 'GOI', 'PNQ', 'JAI', 'IXC']
        aircraft_types = ['A320', 'A321', 'B737', 'B777', 'A330', 'ATR72', 'CRJ200']

        data = []
        base_date = datetime.now().replace(hour=6, minute=0, second=0, microsecond=0)

        for i in range(n_flights):
            # Flight details
            airline = np.random.choice(airlines)
            flight_num = f"{airline}{np.random.randint(100, 9999)}"
            destination = np.random.choice(destinations)
            aircraft_type = np.random.choice(aircraft_types)
            aircraft = f"VT-A{np.random.randint(100, 120)}" # Fewer aircraft for more connections

            # Time scheduling (6 AM to 11 PM)
            hours_offset = np.random.exponential(2) * 8  # Concentrate in early hours
            std_offset = min(hours_offset * 60, 17 * 60)  # Cap at 11 PM
            std = base_date + timedelta(minutes=std_offset)

            # Realistic delay modeling
            dep_delay = max(0, np.random.gamma(2, 8) - 10)  # Skewed distribution

            # Arrival delay correlated with departure delay
            propagation_factor = np.random.uniform(0.6, 1.2)
            weather_impact = np.random.normal(0, 5)
            arr_delay = max(0, dep_delay * propagation_factor + weather_impact)  # Ensure non-negative

            # Flight duration based on destination
            duration_map = {
                'DEL': 120, 'BOM': 150, 'BLR': 90, 'MAA': 60, 'HYD': 75,
                'CCU': 180, 'AMD': 135, 'COK': 75, 'GOI': 105, 'PNQ': 120,
                'JAI': 105, 'IXC': 165
            }
            base_duration = duration_map.get(destination, 120)
            flight_duration = max(30, base_duration + np.random.normal(0, 15))  # Ensure minimum duration

            # Calculate times
            sta = std + timedelta(minutes=flight_duration)

            data.append({
                'flight_id': f"{flight_num}-{i}",
                'flight_number': flight_num,
                'airline': airline,
                'std': std,
                'sta': sta,
                'to': destination,
                'aircraft': aircraft,
                'aircraft_type': aircraft_type,
                'dep_delay': dep_delay,
                'arr_delay': arr_delay,
                'flight_duration': flight_duration
            })

        return pd.DataFrame(data).sort_values('std').reset_index(drop=True)

    def engineer_features(self, df):
        """Comprehensive feature engineering"""
        print("‚öôÔ∏è Engineering features...")
        df = df.copy()
        df['scheduled_hour'] = df['std'].dt.hour
        df['day_of_week'] = df['std'].dt.dayofweek
        df['flight_time_minutes'] = df['flight_duration']

        # Calculate hourly flight count
        hourly_counts = df.groupby('scheduled_hour').size().reset_index(name='hourly_flight_count')
        df = df.merge(hourly_counts, on='scheduled_hour', how='left')

        return df

In [None]:
# ===============================================================================
# 2. FLIGHT OPERATIONS SIMULATOR (INTEGRATED)
# ===============================================================================
class FlightSimulator:
    """Flight operations simulator"""

    def __init__(self, runway_config, gate_config):
        self.runway_config = runway_config
        self.gate_config = gate_config

    def simulate_day(self, flights_df):
        """Simulate flight operations under different scenarios"""
        print("üé≠ Running flight simulation...")
        scenarios = [
            {"name": "normal", "delay_factor": 1.0, "congestion_factor": 1.0},
            {"name": "bad_weather", "delay_factor": 1.8, "congestion_factor": 1.3},
            {"name": "peak_congestion", "delay_factor": 1.4, "congestion_factor": 2.0}
        ]
        results = {}

        for scenario in scenarios:
            simulated_delays = (flights_df['arr_delay'] * scenario['delay_factor']) + \
                                (flights_df['hourly_flight_count'].fillna(0) - 10).clip(lower=0) * scenario['congestion_factor']

            results[scenario['name']] = {
                'avg_delay': simulated_delays.mean(),
                'max_queue_length': (flights_df['hourly_flight_count'].fillna(0) - 15).clip(lower=0).max(),
                'flights_processed': len(flights_df),
                'total_delay': simulated_delays.sum()
            }
        return results


In [None]:
# ===============================================================================
# 3. MODEL EVALUATION FUNCTIONS (ENHANCED WITH NEW METRICS)
# ===============================================================================

def predict_with_model(model, X_test):
    """Step 1: Generate predictions for a given model."""
    try:
        if hasattr(model, 'predict'):
            return model.predict(X_test)
        else:
            print(f"Warning: Model {type(model).__name__} lacks a .predict() method.")
            return None
    except Exception as e:
        print(f"Error during prediction: {e}")
        return None

def calculate_regression_metrics(y_true, y_pred):
    """
    Step 2: Calculate standard and advanced regression metrics, including operational metrics.
    """
    if y_pred is None or len(y_pred) == 0:
        return {}

    try:
        # Ensure arrays are properly shaped and contain no NaN values
        y_true = np.array(y_true).flatten()
        y_pred = np.array(y_pred).flatten()

        # Remove any NaN or infinite values
        mask = np.isfinite(y_true) & np.isfinite(y_pred)
        if not np.any(mask):
            return {}

        y_true = y_true[mask]
        y_pred = y_pred[mask]

        metrics = {
            'MAE': mean_absolute_error(y_true, y_pred),
            'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
            'R2_Score': r2_score(y_true, y_pred),
            'MedAE': median_absolute_error(y_true, y_pred),
            'Explained_Variance': explained_variance_score(y_true, y_pred)
        }

        # --- NEW: Operational (Business) Metrics ---
        y_true_on_time = (y_true <= 15)
        y_pred_on_time = (y_pred <= 15)
        metrics['On_Time_Accuracy'] = accuracy_score(y_true_on_time, y_pred_on_time)

        y_true_severe = (y_true > 60)
        y_pred_severe = (y_pred > 60)
        metrics['Severe_Delay_Accuracy'] = accuracy_score(y_true_severe, y_pred_severe)

        return metrics
    except Exception as e:
        print(f"Error calculating metrics: {e}")
        return {}

def generate_evaluation_report(model_name, metrics):
    """
    Step 3: Print a formatted report for a model's performance with new metrics.
    """
    if not metrics:
        print(f"‚ùå {model_name}: No metrics available")
        return

    print(f"üìà {model_name.replace('_', ' ').title()} Results:")
    print("-" * 30)
    print("Statistical Performance:")
    print(f"  - Mean Absolute Error (MAE):      {metrics.get('MAE', 0):.2f} minutes")
    print(f"  - Median Absolute Error (MedAE):    {metrics.get('MedAE', 0):.2f} minutes  (More robust to outliers)")
    print(f"  - Root Mean Squared Error (RMSE):   {metrics.get('RMSE', 0):.2f} minutes  (Penalizes large errors)")
    print(f"  - R-squared (R¬≤):                 {metrics.get('R2_Score', 0):.3f}")
    print(f"  - Explained Variance:             {metrics.get('Explained_Variance', 0):.3f}")
    print("\nOperational Performance:")
    print(f"  - On-Time (<=15min) Accuracy:     {metrics.get('On_Time_Accuracy', 0) * 100:.2f}%")
    print(f"  - Severe Delay (>60min) Accuracy: {metrics.get('Severe_Delay_Accuracy', 0) * 100:.2f}%\n")

def evaluate_and_report_models(models, X_test, y_test):
    """
    Main Orchestrator: Evaluates a dictionary of models and reports their performance.
    """
    all_results = {}
    print("\nüìä Evaluating Model Performance:")
    print("=" * 40)

    for name, model in models.items():
        y_pred = predict_with_model(model, X_test)
        metrics = calculate_regression_metrics(y_test, y_pred)
        all_results[name] = metrics
        generate_evaluation_report(name, metrics)

    # Create results DataFrame only if we have results
    if all_results:
        results_df = pd.DataFrame(all_results).T
        print("üìä Evaluation Summary DataFrame:")
        print(results_df.round(3))
        return all_results, results_df
    else:
        return {}, pd.DataFrame()

In [None]:
# ===============================================================================
# 4. ADVANCED ML PREDICTOR (INTEGRATED WITH NEW EVALUATOR FUNCTIONS)
# ===============================================================================

class AdvancedFlightPredictor:
    """Enhanced ML predictor with ensemble methods"""
    def __init__(self):
        self.models = {}
        self.encoders = {}
        self.feature_columns = []

    def prepare_features(self, df):
        """Prepare features for modeling"""
        potential_features = [
            'dep_delay', 'airline', 'to', 'aircraft_type', 'flight_time_minutes',
            'scheduled_hour', 'day_of_week', 'hourly_flight_count'
        ]
        # Ensure only columns present in the dataframe are used
        self.feature_columns = [col for col in potential_features if col in df.columns]
        return self.feature_columns

    def train_ensemble_models(self, df):
        """Train Random Forest and LightGBM models"""
        print("ü§ñ Training ensemble ML models...")

        try:
            features = self.prepare_features(df)
            if not features:
                print("‚ùå No valid features found for training")
                return {}

            # Check if target column exists
            if 'arr_delay' not in df.columns:
                print("‚ùå Target column 'arr_delay' not found")
                return {}

            model_df = df[features + ['arr_delay']].copy().dropna()

            if len(model_df) < 10:
                print("‚ùå Insufficient data for training")
                return {}

            # Encode categorical features
            categorical_features = ['airline', 'to', 'aircraft_type']
            for col in categorical_features:
                if col in model_df.columns:
                    le = LabelEncoder()
                    model_df[col] = le.fit_transform(model_df[col].astype(str))
                    self.encoders[col] = le

            X = model_df[features]
            y = model_df['arr_delay']

            # Ensure we have valid data
            if X.empty or y.empty:
                print("‚ùå No valid data after preprocessing")
                return {}

            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

            # Train Random Forest
            print("Training Random Forest...")
            rf_model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1)
            rf_model.fit(X_train, y_train)
            self.models['random_forest'] = rf_model

            # Train LightGBM if available
            if LIGHTGBM_AVAILABLE:
                print("Training LightGBM...")
                lgbm_model = lgb.LGBMRegressor(random_state=42, verbose=-1)
                lgbm_model.fit(X_train, y_train)
                self.models['lightgbm'] = lgbm_model

            # Evaluate models
            if self.models:
                evaluate_and_report_models(self.models, X_test, y_test)

            return self.models

        except Exception as e:
            print(f"‚ùå Error during model training: {e}")
            return {}


In [None]:
# ===============================================================================
# 5. GRAPH-BASED CASCADING IMPACT ANALYZER (NEW & ADVANCED)
# ===============================================================================
class GraphImpactAnalyzer:
    """
    Models the airport as a network to find flights with the highest
    cascading delay potential using graph theory (PageRank).
    """
    def __init__(self, df):
        self.df = df.copy()
        if NETWORKX_AVAILABLE:
            self.graph = nx.DiGraph()
        else:
            self.graph = None

    def build_flight_network(self, min_turnaround_time=45):
        if not NETWORKX_AVAILABLE:
            print("‚ùå NetworkX not available. Skipping network analysis.")
            return

        print("üï∏Ô∏è Building flight network graph...")

        try:
            # Add nodes
            for index, flight in self.df.iterrows():
                self.graph.add_node(flight['flight_id'], hour=flight['std'].hour)

            # Add edges based on aircraft turnaround times
            for aircraft_reg, group in self.df.groupby('aircraft'):
                flights = group.sort_values('std').to_dict('records')
                for i in range(len(flights) - 1):
                    current_flight = flights[i]
                    next_flight = flights[i+1]

                    # Calculate turnaround time
                    turnaround = (next_flight['std'] - current_flight['sta']).total_seconds() / 60

                    # Add edge if there's a reasonable connection
                    if 0 < turnaround < min_turnaround_time * 2:
                        weight = 1/turnaround if turnaround > 0 else 100
                        self.graph.add_edge(current_flight['flight_id'],
                                            next_flight['flight_id'],
                                            weight=weight)
        except Exception as e:
            print(f"‚ùå Error building flight network: {e}")

    def find_top_disruptors(self, top_n=10):
        """
        Runs the PageRank algorithm to identify the most influential flights.
        """
        if not NETWORKX_AVAILABLE:
            print("‚ùå NetworkX not available. Using simple heuristic for impact analysis.")
            # Simple fallback: use combination of early departure time and frequency
            self.df['impact_score'] = (24 - self.df['scheduled_hour']) * 2 + np.random.random(len(self.df)) * 10
            return self.df.sort_values('impact_score', ascending=False).head(top_n)

        print("üí• Analyzing cascading impact with PageRank...")

        try:
            if not self.graph.nodes():
                self.build_flight_network()

            if self.graph.nodes() and self.graph.edges():
                pagerank_scores = nx.pagerank(self.graph, weight='weight')
                self.df['impact_score'] = self.df['flight_id'].map(pagerank_scores).fillna(0)

                # Normalize scores
                if self.df['impact_score'].max() > 0:
                    self.df['impact_score'] = (self.df['impact_score'] / self.df['impact_score'].max()) * 100
            else:
                print("‚ö†Ô∏è No connections found in flight network. Using random impact scores.")
                self.df['impact_score'] = np.random.random(len(self.df)) * 100

            print("‚úÖ Top flights identified as potential 'super-spreaders' of delays.")
            return self.df.sort_values('impact_score', ascending=False).head(top_n)

        except Exception as e:
            print(f"‚ùå Error in PageRank analysis: {e}")
            # Fallback to random scores
            self.df['impact_score'] = np.random.random(len(self.df)) * 100
            return self.df.sort_values('impact_score', ascending=False).head(top_n)

In [None]:
# ===============================================================================
# 6. CP-SAT SCHEDULE OPTIMIZER (NEW & ADVANCED)
# ===============================================================================
class CPSATScheduleOptimizer:
    """
    Uses Google's CP-SAT solver to find a mathematically optimal schedule.
    """
    def __init__(self, flights_df):
        self.flights_df = flights_df.copy()
        if len(self.flights_df) > 0:
            self.flights_df['original_start_min'] = (self.flights_df['std'].dt.hour * 60 +
                                                     self.flights_df['std'].dt.minute)

    def optimize(self, runway_capacity=3):
        if not ORTOOLS_AVAILABLE:
            print("‚ùå OR-Tools not available. Skipping schedule optimization.")
            return None

        print("üß† Optimizing schedule with CP-SAT Solver...")

        try:
            model = cp_model.CpModel()
            flights = self.flights_df.to_dict('records')

            if not flights:
                print("‚ùå No flights to optimize")
                return None

            intervals = []
            start_vars = []

            for i, f in enumerate(flights):
                start_var = model.NewIntVar(0, 24 * 60, f'start_{i}')
                duration = 2  # Runway usage is 2 minutes
                end_var = model.NewIntVar(0, 24 * 60, f'end_{i}')
                interval = model.NewIntervalVar(start_var, duration, end_var, f'interval_{i}')
                intervals.append(interval)
                start_vars.append(start_var)

            # Capacity constraint
            model.AddCumulative(intervals, [1] * len(flights), runway_capacity)

            # Minimize deviation from original schedule
            total_deviation = model.NewIntVar(0, 1000000, 'total_deviation')
            deviations = []
            for i, f in enumerate(flights):
                deviation = model.NewIntVar(0, 24 * 60, f'deviation_{i}')
                model.AddAbsEquality(deviation, start_vars[i] - int(f['original_start_min']))
                deviations.append(deviation)

            model.Add(total_deviation == sum(deviations))
            model.Minimize(total_deviation)

            solver = cp_model.CpSolver()
            solver.parameters.max_time_in_seconds = 20.0
            status = solver.Solve(model)

            if status in (cp_model.OPTIMAL, cp_model.FEASIBLE):
                print(f"‚úÖ Optimal schedule found! Minimized total deviation: {solver.ObjectiveValue():.0f} minutes.")

                optimized_starts = [solver.Value(s) for s in start_vars]
                self.flights_df['optimized_start_min'] = optimized_starts

                # Convert back to datetime
                base_date = self.flights_df['std'].iloc[0].date()
                self.flights_df['optimized_std'] = [
                    datetime.combine(base_date, datetime.min.time()) + timedelta(minutes=int(start_min))
                    for start_min in optimized_starts
                ]

                return self.flights_df
            else:
                print("‚ùå Could not find an optimal solution.")
                return None

        except Exception as e:
            print(f"‚ùå Error in schedule optimization: {e}")
            return None

In [None]:
# ===============================================================================
# 7. SEMANTIC NLP ASSISTANT (NEW & ADVANCED)
# ===============================================================================
class SemanticNLPAssistant:
    """
    An NLP assistant that understands the meaning of queries using a lightweight transformer.
    """
    def __init__(self, df):
        self.df = df
        self.model = None

        if SENTENCE_TRANSFORMERS_AVAILABLE:
            try:
                print("üß† Initializing Semantic NLP Assistant...")
                self.model = SentenceTransformer('all-MiniLM-L6-v2')
                self.known_questions = [
                    "What are the busiest hours?", "Show airline performance.",
                    "Which routes have the most delays?", "How can we optimize the schedule?"
                ]
                self.known_embeddings = self.model.encode(self.known_questions)
                print("‚úÖ NLP Assistant is ready.")
            except Exception as e:
                print(f"‚ùå Error initializing NLP Assistant: {e}")
                self.model = None
        else:
            print("‚ùå SentenceTransformers not available. Using simple keyword matching.")

    def process_query(self, query):
        if self.model is None:
            # Simple keyword-based fallback
            return self._simple_keyword_match(query)

        try:
            query_embedding = self.model.encode(query)

            # Use sentence-transformers utility for cosine similarity
            cosine_scores = util.cos_sim(query_embedding, self.known_embeddings)[0]
            best_match_idx = np.argmax(cosine_scores)
            best_match_score = cosine_scores[best_match_idx]

            print(f"   (Match: '{self.known_questions[best_match_idx]}', Score: {best_match_score:.2f})")

            if best_match_score > 0.5:
                return self._get_answer(int(best_match_idx))
            return "I'm not sure how to answer that. Please try rephrasing."

        except Exception as e:
            print(f"‚ùå Error processing query: {e}")
            return self._simple_keyword_match(query)

    def _simple_keyword_match(self, query):
        """Simple keyword-based matching as fallback"""
        query_lower = query.lower()

        if any(word in query_lower for word in ['busy', 'hour', 'traffic', 'peak']):
            return self._get_answer(0)
        elif any(word in query_lower for word in ['airline', 'performance', 'best', 'worst']):
            return self._get_answer(1)
        elif any(word in query_lower for word in ['route', 'delay', 'destination']):
            return self._get_answer(2)
        elif any(word in query_lower for word in ['optimize', 'improve', 'schedule']):
            return self._get_answer(3)
        else:
            return "I can help with questions about busy hours, airline performance, route delays, or schedule optimization."

    def _get_answer(self, index):
        try:
            if index == 0:  # Busiest hours
                if 'scheduled_hour' in self.df.columns:
                    analysis = self.df.groupby('scheduled_hour')['flight_id'].count().nlargest(3)
                    return f"The busiest hours are:\n{analysis.to_string()}"
                else:
                    return "Hour data not available in the dataset."

            elif index == 1:  # Airline performance
                if 'airline' in self.df.columns and 'arr_delay' in self.df.columns:
                    analysis = self.df.groupby('airline')['arr_delay'].mean().sort_values()
                    return f"Airline performance (avg delay):\n{analysis.round(2).to_string()}"
                else:
                    return "Airline performance data not available."

            elif index == 2:  # Route delays
                if 'to' in self.df.columns and 'arr_delay' in self.df.columns:
                    analysis = self.df.groupby('to')['arr_delay'].mean().sort_values(ascending=False).head(5)
                    return f"Routes with most delays:\n{analysis.round(2).to_string()}"
                else:
                    return "Route delay data not available."

            else:
                return "Schedule optimization analysis is available through the CP-SAT optimizer."

        except Exception as e:
            return f"Error analyzing data: {e}"


In [None]:
# ===============================================================================
# 8. MAIN EXECUTION
# ===============================================================================
def main():
    print("=" * 70)
    print("üöÄ ADVANCED FLIGHT SCHEDULING & OPTIMIZATION SYSTEM")
    print("=" * 70)

    try:
        # --- Step 1: Data Processing ---
        print("\nüìä Step 1: Data Processing")
        processor = FlightDataProcessor()
        df = processor.generate_synthetic_data(n_flights=1000)
        df = processor.engineer_features(df)
        print(f"‚úÖ Generated {len(df)} flight records")

        # --- Step 2: Flight Operations Simulation ---
        print("\nüé≠ Step 2: Flight Operations Simulation")
        simulator = FlightSimulator(processor.runway_config, processor.gate_config)
        simulation_results = simulator.simulate_day(df)

        print("\nSimulation Results:")
        for scenario, data in simulation_results.items():
            print(f"  - Scenario: {scenario.replace('_', ' ').title()}")
            print(f"    - Average Delay: {data['avg_delay']:.2f} mins")
            print(f"    - Max Queue Length: {data['max_queue_length']:.0f} flights")
            print(f"    - Total Delay: {data['total_delay'] / 60:.1f} hours")

        # --- Step 3: Predictive Analysis ---
        print("\nü§ñ Step 3: Predictive Analysis")
        predictor = AdvancedFlightPredictor()
        models = predictor.train_ensemble_models(df)

        if models:
            print(f"‚úÖ Trained {len(models)} models successfully")
        else:
            print("‚ö†Ô∏è Model training had issues, but continuing...")

        # --- Step 4: Cascading Impact Analysis ---
        print("\n" + "="*70)
        print("üí• Step 4: CASCADING IMPACT ANALYSIS (GRAPH-BASED)")
        print("="*70)
        impact_analyzer = GraphImpactAnalyzer(df)
        top_disruptors = impact_analyzer.find_top_disruptors()
        df = impact_analyzer.df

        print("\nTop 10 Potential Delay 'Super-Spreaders':")
        display_cols = ['flight_id', 'std', 'aircraft']
        if 'impact_score' in df.columns:
            display_cols.append('impact_score')
        print(top_disruptors[display_cols].head(10).round(2))

        # --- Step 5: Schedule Optimization ---
        print("\n" + "="*70)
        print("üß† Step 5: SCHEDULE OPTIMIZATION (CP-SAT SOLVER)")
        print("="*70)

        flights_to_optimize = df.head(50)  # Reduced for faster processing
        optimizer = CPSATScheduleOptimizer(flights_to_optimize)
        optimized_df = optimizer.optimize()

        if optimized_df is not None:
            print("\nSample of Optimized Schedule:")
            opt_display_cols = ['flight_id', 'std']
            if 'optimized_std' in optimized_df.columns:
                opt_display_cols.append('optimized_std')
            print(optimized_df[opt_display_cols].head())

        # --- Step 6: Semantic NLP Assistant ---
        print("\n" + "="*70)
        print("üí¨ Step 6: AI OPERATIONS ASSISTANT (SEMANTIC NLP)")
        print("="*70)

        nlp_assistant = SemanticNLPAssistant(df)
        queries = ["Show me airport traffic patterns", "Which airline is best?"]

        for q in queries:
            print(f"\nUser Query: '{q}'")
            response = nlp_assistant.process_query(q)
            print(f"Assistant: {response}")

        # --- Step 7: Saving Components for Dashboard ---
        print("\n" + "="*70)
        print("üíæ Step 7: SAVING COMPONENTS FOR DASHBOARD")
        print("="*70)

        try:
            df.to_csv('flight_data_with_impact.csv', index=False)
            print("‚úÖ Saved flight data with impact analysis")

            if optimized_df is not None:
                optimized_df.to_csv('optimized_schedule.csv', index=False)
                print("‚úÖ Saved optimized schedule")

            # Save model information
            model_info = {
                'model_trained': len(models) > 0,
                'features': predictor.feature_columns,
                'models_available': list(models.keys()) if models else [],
                'total_flights': len(df),
                'lightgbm_available': LIGHTGBM_AVAILABLE,
                'networkx_available': NETWORKX_AVAILABLE,
                'ortools_available': ORTOOLS_AVAILABLE,
                'sentence_transformers_available': SENTENCE_TRANSFORMERS_AVAILABLE
            }

            with open('model_info.json', 'w') as f:
                json.dump(model_info, f, indent=2)
            print("‚úÖ Saved model information")

            # Generate summary statistics
            summary_stats = {
                'total_flights': len(df),
                'avg_departure_delay': df['dep_delay'].mean(),
                'avg_arrival_delay': df['arr_delay'].mean(),
                'on_time_performance': (df['arr_delay'] <= 15).mean() * 100,
                'airlines': df['airline'].unique().tolist(),
                'destinations': df['to'].unique().tolist(),
                'aircraft_types': df['aircraft_type'].unique().tolist()
            }

            with open('summary_stats.json', 'w') as f:
                json.dump(summary_stats, f, indent=2, default=str)
            print("‚úÖ Saved summary statistics")

            print("‚úÖ All components saved successfully!")

        except Exception as e:
            print(f"‚ùå Error saving files: {e}")

        # --- Final Summary ---
        print("\n" + "="*70)
        print("üìã EXECUTION SUMMARY")
        print("="*70)
        print(f"‚úÖ Data Processing: Generated {len(df)} flights")
        print(f"‚úÖ Flight Simulation: Ran {len(simulation_results)} scenarios")
        print(f"‚úÖ ML Models: Trained {len(models)} models" if models else "‚ö†Ô∏è ML Models: Training had issues")
        print(f"‚úÖ Impact Analysis: Analyzed {len(df)} flights")
        print(f"‚úÖ Schedule Optimization: {'Completed' if optimized_df is not None else 'Skipped (OR-Tools unavailable)'}")
        print(f"‚úÖ NLP Assistant: {'Advanced mode' if SENTENCE_TRANSFORMERS_AVAILABLE else 'Keyword mode'}")

        print("\nüîß Package Availability:")
        print(f"  - LightGBM: {'‚úÖ' if LIGHTGBM_AVAILABLE else '‚ùå'}")
        print(f"  - NetworkX: {'‚úÖ' if NETWORKX_AVAILABLE else '‚ùå'}")
        print(f"  - OR-Tools: {'‚úÖ' if ORTOOLS_AVAILABLE else '‚ùå'}")
        print(f"  - SentenceTransformers: {'‚úÖ' if SENTENCE_TRANSFORMERS_AVAILABLE else '‚ùå'}")

        print("\nüéâ System execution completed successfully!")

    except Exception as e:
        print(f"\n‚ùå Critical error in main execution: {e}")
        print("üîß Please check your data and try again.")

# ===============================================================================
# EXECUTION ENTRY POINT
# ===============================================================================

if __name__ == "__main__":
    main()