In [None]:
# exploration.ipynb
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact

# Load enhanced data
df = pd.read_excel("flight_data_enhanced.xlsx", parse_dates=[
    'scheduled_departure', 'actual_departure',
    'scheduled_arrival', 'actual_arrival'
])

# 1. Flight Duration vs Delay
plt.figure(figsize=(12,6))
sns.scatterplot(x='flight_duration', y='delay_minutes', hue='route_weather', data=df)
plt.title("Flight Duration vs Delay by Weather Condition")
plt.show()

# 2. Interactive Temporal Analysis
@interact(hour=range(24),
          weather=sorted(df['route_weather'].unique()))
def plot_hourly_delays(hour, weather):
    subset = df[(df['departure_hour'] == hour) &
               (df['route_weather'] == weather)]

    plt.figure(figsize=(12,6))
    sns.boxplot(x='airline', y='delay_minutes', data=subset)
    plt.title(f"Delays at {hour}:00 with {weather} weather")
    plt.xticks(rotation=45)
    plt.show()

# 3. Route Weather Impact
weather_impact = df.groupby('route_weather')['delay_minutes'].agg(['mean', 'count'])
weather_impact = weather_impact[weather_impact['count'] > 10]  # Filter rare conditions

plt.figure(figsize=(12,6))
weather_impact['mean'].sort_values().plot(kind='bar')
plt.title("Average Delay by En-Route Weather Condition")
plt.ylabel("Average Delay (minutes)")
plt.show()

# 4. Temporal Patterns
plt.figure(figsize=(16,8))
plt.subplot(2,2,1)
sns.boxplot(x='departure_hour', y='delay_minutes', data=df)
plt.title("Delays by Hour of Day")

plt.subplot(2,2,2)
sns.boxplot(x='departure_day', y='delay_minutes', data=df)
plt.title("Delays by Day of Week")

plt.subplot(2,2,3)
sns.scatterplot(x='route_temp', y='delay_minutes', data=df)
plt.title("Delays vs Temperature")

plt.subplot(2,2,4)
sns.scatterplot(x='route_wind', y='delay_minutes', data=df)
plt.title("Delays vs Wind Speed")

plt.tight_layout()
plt.show()