In [13]:
import pandas as pd
from scipy.stats import ttest_ind, pearsonr, spearmanr

# Load merged dataset
df = pd.read_csv('data/merged_flight_weather_Jan2023.csv')

df.head()



Unnamed: 0,Carrier Code,Flight Number,Tail Number,Destination Airport,Scheduled departure time,Actual departure time,Departure delay (Minutes),Delay Carrier (Minutes),Delay Weather (Minutes),Delay National Aviation System (Minutes),...,WDF2,WDF5,WSF2,WSF5,WT01,WT02,WT03,WT04,WT06,WT08
0,DL,308.0,N507DZ,SEA,07:00,07:09,9.0,0.0,0.0,0.0,...,300.0,330.0,18.1,23.0,1.0,1.0,,,,
1,DL,311.0,N908DN,LAS,17:35,17:32,-3.0,0.0,0.0,0.0,...,300.0,330.0,18.1,23.0,1.0,1.0,,,,
2,DL,312.0,N176DN,HNL,09:00,09:49,49.0,0.0,0.0,0.0,...,300.0,330.0,18.1,23.0,1.0,1.0,,,,
3,DL,315.0,N811DZ,SLC,18:20,18:18,-2.0,0.0,0.0,19.0,...,300.0,330.0,18.1,23.0,1.0,1.0,,,,
4,DL,325.0,N704X,SFO,10:05,10:08,3.0,0.0,0.0,0.0,...,300.0,330.0,18.1,23.0,1.0,1.0,,,,


In [14]:
# Separate delays for rainy and clear days
rainy_days_delays = df[df['WT02'] == 1]['Departure delay (Minutes)'].dropna()
clear_days_delays = df[df['WT02'].isna()]['Departure delay (Minutes)'].dropna()

# Perform two-sample t-test
t_stat, p_value = ttest_ind(rainy_days_delays, clear_days_delays, equal_var=False)

print(f"T-Test Results:\nT-Statistic: {t_stat:.3f}, P-Value: {p_value:.3f}")

if p_value < 0.05:
    print("Conclusion: Significant difference in delays between rainy and clear days.")
else:
    print("Conclusion: No significant difference in delays between rainy and clear days.")


T-Test Results:
T-Statistic: 0.205, P-Value: 0.838
Conclusion: No significant difference in delays between rainy and clear days.


In [15]:
# Pearson Correlation (linear relationship)
pearson_corr, pearson_p = pearsonr(df['PRCP'].fillna(0), df['Departure delay (Minutes)'].fillna(0))
print(f"Pearson Correlation: {pearson_corr:.3f}, P-Value: {pearson_p:.3f}")

# Spearman Correlation (monotonic relationship)
spearman_corr, spearman_p = spearmanr(df['PRCP'].fillna(0), df['Departure delay (Minutes)'].fillna(0))
print(f"Spearman Correlation: {spearman_corr:.3f}, P-Value: {spearman_p:.3f}")



Pearson Correlation: 0.017, P-Value: 0.418
Spearman Correlation: 0.054, P-Value: 0.010
