# ✈️ Flight Delay & Weather Data EDA
This notebook explores the relationship between flight delays and weather conditions for JFK Airport in January 2023.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the merged dataset
df = pd.read_csv('data/merged_flight_weather_Jan2023.csv')


## 📊 Distribution of Departure Delays

In [None]:
# Filter delays for better visualization
df_filtered = df[df['Departure delay (Minutes)'].between(-20, 200)]

# Plot histogram
plt.figure(figsize=(10,6))
plt.hist(df_filtered['Departure delay (Minutes)'], bins=30, edgecolor='black', color='orange')
plt.title('Distribution of Departure Delays')
plt.xlabel('Delay (Minutes)')
plt.ylabel('Number of Flights')
plt.grid(True)
plt.show()


## 📊 Boxplot of Departure Delays

In [None]:
plt.figure(figsize=(8,6))
sns.boxplot(x=df['Departure delay (Minutes)'])
plt.title('Boxplot of Departure Delays')
plt.xlabel('Delay (Minutes)')
plt.show()


## 🌡️ Average Daily Temperature

In [None]:
df['DATE'] = pd.to_datetime(df['Date'])
daily_temp = df.groupby('DATE')['TAVG'].mean()

plt.figure(figsize=(12,6))
daily_temp.plot(marker='o')
plt.title('Average Daily Temperature - January 2023')
plt.xlabel('Date')
plt.ylabel('Temperature (°F)')
plt.grid(True)
plt.show()


## 🌧️ Rainy and Snowy Days Count

In [None]:
rainy_days = df['WT02'].sum()
snowy_days = df['WT03'].sum()

plt.bar(['Rainy Days', 'Snowy Days'], [rainy_days, snowy_days], color=['blue', 'gray'])
plt.title('Number of Rainy and Snowy Days in January 2023')
plt.ylabel('Days')
plt.show()


## ☔ Impact of Rain on Departure Delays

In [None]:
df['Rain'] = df['WT02'].fillna(0)
plt.figure(figsize=(8,6))
sns.boxplot(x=df['Rain'], y=df['Departure delay (Minutes)'])
plt.title('Departure Delays on Rainy vs. Clear Days')
plt.xlabel('Rainy Day (1=Yes, 0=No)')
plt.ylabel('Departure Delay (Minutes)')
plt.show()


## 🌧️ Precipitation vs. Departure Delay

In [None]:
plt.figure(figsize=(8,6))
plt.scatter(df['PRCP'], df['Departure delay (Minutes)'], alpha=0.5)
plt.title('Precipitation vs. Departure Delay')
plt.xlabel('Precipitation (inches)')
plt.ylabel('Departure Delay (Minutes)')
plt.show()


## 🔥 Correlation Matrix

In [None]:
plt.figure(figsize=(10,8))
corr = df[['Departure delay (Minutes)', 'PRCP', 'TMAX', 'TMIN', 'AWND']].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()
