In [3]:
import pandas as pd
import numpy as np
import tkinter as tk
from tkinter import ttk, messagebox
from PIL import Image, ImageTk

file_path = r"C:\Users\abhik\Downloads\Airline Delay.csv"
df = pd.read_csv(file_path)

with np.errstate(divide='ignore', invalid='ignore'):
    df['DELAY_PERCENTAGE'] = np.where(df['arr_flights'] > 0, (df['arr_del15'] / df['arr_flights']) * 100, 0)

def delay_state(percentage):
    if percentage <= 15:
        return 'Low Delay'
    elif 15 < percentage <= 25:
        return 'Moderate Delay'
    else:
        return 'High Delay'

df['DELAY_STATE'] = df['DELAY_PERCENTAGE'].apply(delay_state)

delay_causes = ['carrier_ct', 'nas_ct', 'late_aircraft_ct']

std_weights = df[delay_causes].std() / df[delay_causes].std().sum()
p_matrix = df[delay_causes].div(df[delay_causes].sum(axis=1), axis=0).replace(np.nan, 0)
entropy = -np.nansum(p_matrix * np.log(p_matrix + 1e-10), axis=0)
entropy_weights = (1 - entropy) / (1 - entropy).sum()
combined_weights = (std_weights + entropy_weights) / 2

df['WEIGHTED_DELAY'] = sum(df[cause] * combined_weights[cause] for cause in delay_causes)

df["TOTAL_DELAY_CAUSE"] = df[delay_causes].sum(axis=1)
for cause in delay_causes:
    df[f'{cause}_prob'] = np.where(df["TOTAL_DELAY_CAUSE"] > 0, df[cause] / df["TOTAL_DELAY_CAUSE"], 0)

airport_carrier_counts = df.groupby('airport')['carrier'].nunique()
valid_airports = airport_carrier_counts[airport_carrier_counts > 5].index
df_filtered = df[df['airport'].isin(valid_airports)]

carrier_transition_matrices = {}
initial_vectors = {}
carrier_cause_probabilities = {}

for carrier in df['carrier'].unique():
    carrier_data = df[df['carrier'] == carrier].copy()
    carrier_data['NEXT_DELAY_STATE'] = carrier_data['DELAY_STATE'].shift(-1)
    weighted_matrix = pd.crosstab(carrier_data['DELAY_STATE'], carrier_data['NEXT_DELAY_STATE'],
                                  values=carrier_data['WEIGHTED_DELAY'],
                                  aggfunc='sum', normalize='index').fillna(0)
    v0 = weighted_matrix.sum(axis=1).values / weighted_matrix.sum().sum()
    carrier_transition_matrices[carrier] = weighted_matrix
    initial_vectors[carrier] = v0
    
    carrier_cause_probabilities[carrier] = {
        cause: carrier_data[f'{cause}_prob'].mean()
        for cause in delay_causes
    }

def predict_future_distribution(v0, tpm, steps):
    v_future = v0
    for _ in range(steps):
        v_future = np.dot(v_future, tpm)
    return v_future

class DelayPredictionApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Airline Delay Prediction System")
        self.root.geometry("800x500")
        
        icon_path = r"C:\Users\abhik\OneDrive\Desktop\7893979.png"
        try:
            icon_image = Image.open(icon_path).resize((80, 80), Image.LANCZOS)
            self.icon_photo = ImageTk.PhotoImage(icon_image)
            icon_label = tk.Label(root, image=self.icon_photo)
        except:
            icon_label = tk.Label(root, text="✈", font=("Arial", 40))
        icon_label.pack(pady=5)

        title_label = ttk.Label(root, text="✈ Airline Delay Prediction", font=("Arial", 18, "bold"))
        title_label.pack(pady=5)
        
        frame = ttk.Frame(root)
        frame.pack(pady=5)

        ttk.Label(frame, text="Select Airport:").grid(row=0, column=0, padx=5, pady=5)
        self.airport_var = tk.StringVar()
        self.airport_dropdown = ttk.Combobox(frame, textvariable=self.airport_var, values=list(valid_airports))
        self.airport_dropdown.grid(row=0, column=1, padx=5, pady=5)
        self.airport_dropdown.bind("<<ComboboxSelected>>", self.update_carriers)

        ttk.Label(frame, text="Months (1-12):").grid(row=1, column=0, padx=5, pady=5)
        self.months_var = tk.IntVar()
        self.months_entry = ttk.Entry(frame, textvariable=self.months_var, width=5)
        self.months_entry.grid(row=1, column=1, padx=5, pady=5)

        self.predict_button = ttk.Button(root, text="Predict", command=self.predict_best_carrier)
        self.predict_button.pack(pady=10)
        
        self.output_label = ttk.Label(root, text="", font=("Arial", 12), justify="left")
        self.output_label.pack(pady=10)

    def update_carriers(self, event):
        airport = self.airport_var.get()
        self.carriers = df_filtered[df_filtered['airport'] == airport]['carrier'].unique()

    def predict_best_carrier(self):
        airport = self.airport_var.get()
        months = self.months_var.get()
        
        if airport not in valid_airports or not (1 <= months <= 12):
            messagebox.showerror("Error", "Select a valid airport and enter months (1-12).")
            return
        
        carriers = df_filtered[df_filtered['airport'] == airport]['carrier'].unique()
        results = {}
        
        for carrier in carriers:
            if carrier in carrier_transition_matrices:
                tpm = carrier_transition_matrices[carrier].values
                v0 = initial_vectors[carrier]
                future_distribution = predict_future_distribution(v0, tpm, months)
                
                weighted_score = (future_distribution[0] * 0.5 +
                                  future_distribution[1] * 0.3 +
                                  future_distribution[2] * 0.2)
                results[carrier] = weighted_score
        
        sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True)
        best_carrier = sorted_results[0][0]
        best_probs = predict_future_distribution(initial_vectors[best_carrier],
                                                 carrier_transition_matrices[best_carrier].values,
                                                 months)
        cause_probs = carrier_cause_probabilities[best_carrier]

        output_text = (f"Recommended Airline: {best_carrier}\n"
                       f"Low Delay: {best_probs[0]*100:.1f}%\n"
                       f"Moderate Delay: {best_probs[1]*100:.1f}%\n"
                       f"High Delay: {best_probs[2]*100:.1f}%\n\n"
                       f"Carrier Delay: {cause_probs['carrier_ct']*100:.1f}%\n"
                       f"NAS Delay: {cause_probs['nas_ct']*100:.1f}%\n"
                       f"Late Aircraft Delay: {cause_probs['late_aircraft_ct']*100:.1f}%")
        
        self.output_label.config(text=output_text)

if __name__ == "__main__":
    root = tk.Tk()
    app = DelayPredictionApp(root)
    root.mainloop()


In [4]:
import numpy as np

def compute_steady_state(tpm):
    """Computes the steady-state probabilities of a Markov chain."""
    eigvals, eigvecs = np.linalg.eig(tpm.T)
    steady_state = np.real(eigvecs[:, np.isclose(eigvals, 1)])
    steady_state = steady_state / steady_state.sum()
    return steady_state.flatten()

steady_states = {carrier: compute_steady_state(tpm) for carrier, tpm in carrier_transition_matrices.items()}

print("\n📊 Steady-State Analysis: Long-Term Delay Causes for Airlines\n")
for carrier, ss in steady_states.items():
    print(f"✈ Airline: {carrier}")
    print(f"   🔹 Carrier Delays: {ss[0]*100:.2f}%")
    print(f"   🔹 NAS Delays: {ss[1]*100:.2f}%")
    print(f"   🔹 Late Aircraft Delays: {ss[2]*100:.2f}%")

    # Interpretation of the dominant delay factor
    dominant_cause = ["Carrier Delays", "NAS Delays", "Late Aircraft Delays"][np.argmax(ss)]
    print(f"   ⚠️ Long-term dominant delay factor: **{dominant_cause}**\n")



📊 Steady-State Analysis: Long-Term Delay Causes for Airlines

✈ Airline: 9E
   🔹 Carrier Delays: 2.78%
   🔹 NAS Delays: 74.49%
   🔹 Late Aircraft Delays: 22.73%
   ⚠️ Long-term dominant delay factor: **NAS Delays**

✈ Airline: AA
   🔹 Carrier Delays: 2.00%
   🔹 NAS Delays: 23.28%
   🔹 Late Aircraft Delays: 74.73%
   ⚠️ Long-term dominant delay factor: **Late Aircraft Delays**

✈ Airline: AS
   🔹 Carrier Delays: 26.23%
   🔹 NAS Delays: 48.77%
   🔹 Late Aircraft Delays: 25.00%
   ⚠️ Long-term dominant delay factor: **NAS Delays**

✈ Airline: B6
   🔹 Carrier Delays: 55.87%
   🔹 NAS Delays: 6.90%
   🔹 Late Aircraft Delays: 37.23%
   ⚠️ Long-term dominant delay factor: **Carrier Delays**

✈ Airline: DL
   🔹 Carrier Delays: 3.31%
   🔹 NAS Delays: 55.31%
   🔹 Late Aircraft Delays: 41.38%
   ⚠️ Long-term dominant delay factor: **NAS Delays**

✈ Airline: F9
   🔹 Carrier Delays: 16.33%
   🔹 NAS Delays: 32.64%
   🔹 Late Aircraft Delays: 51.03%
   ⚠️ Long-term dominant delay factor: **Late Aircra

In [3]:
import numpy as np

def compute_steady_state(tpm):
    """Computes the steady-state probabilities of a Markov chain."""
    eigvals, eigvecs = np.linalg.eig(tpm.T)
    steady_state = np.real(eigvecs[:, np.isclose(eigvals, 1)])
    steady_state = steady_state / steady_state.sum()
    return steady_state.flatten()

steady_states = {carrier: compute_steady_state(tpm) for carrier, tpm in carrier_transition_matrices.items()}

def compute_reliability_score(steady_states):
    """Computes the reliability score for each airline based on late aircraft delay probability."""
    reliability_scores = {carrier: 100 - (ss[2] * 100) for carrier, ss in steady_states.items()}
    return reliability_scores

reliability_scores = compute_reliability_score(steady_states)

print("\n📊 Airline Reliability Scores: Which Airlines Are Least Affected by Cascading Delays?\n")
for carrier, score in sorted(reliability_scores.items(), key=lambda x: x[1], reverse=True):
    print(f"✈ {carrier}: ⭐ {score:.2f}/100 Reliability")

    if score >= 60:
        print(f"   ✅ Highly reliable! Minimal cascading delays.")
    elif 40 <= score < 60:
        print(f"   ⚠️ Moderate reliability. Some delays may persist.")
    else:
        print(f"   ❌ Unreliable! Frequent delay propagation.\n")



📊 Airline Reliability Scores: Which Airlines Are Least Affected by Cascading Delays?

✈ 9E: ⭐ 77.27/100 Reliability
   ✅ Highly reliable! Minimal cascading delays.
✈ AS: ⭐ 75.00/100 Reliability
   ✅ Highly reliable! Minimal cascading delays.
✈ EV: ⭐ 74.50/100 Reliability
   ✅ Highly reliable! Minimal cascading delays.
✈ HA: ⭐ 72.08/100 Reliability
   ✅ Highly reliable! Minimal cascading delays.
✈ OO: ⭐ 66.86/100 Reliability
   ✅ Highly reliable! Minimal cascading delays.
✈ G4: ⭐ 65.02/100 Reliability
   ✅ Highly reliable! Minimal cascading delays.
✈ OH: ⭐ 64.95/100 Reliability
   ✅ Highly reliable! Minimal cascading delays.
✈ MQ: ⭐ 63.43/100 Reliability
   ✅ Highly reliable! Minimal cascading delays.
✈ B6: ⭐ 62.77/100 Reliability
   ✅ Highly reliable! Minimal cascading delays.
✈ YX: ⭐ 59.56/100 Reliability
   ⚠️ Moderate reliability. Some delays may persist.
✈ DL: ⭐ 58.62/100 Reliability
   ⚠️ Moderate reliability. Some delays may persist.
✈ YV: ⭐ 57.37/100 Reliability
   ⚠️ Moderate 