In [19]:
import pandas as pd
from sqlalchemy import create_engine, types
from sqlalchemy import text 
from dotenv import dotenv_values
import plotly.express as px
import plotly.graph_objects as go

In [20]:
config = dotenv_values()

pg_user = config['POSTGRES_USER'] 
pg_host = config['POSTGRES_HOST']
pg_port = config['POSTGRES_PORT']
pg_db = config['POSTGRES_DB']
pg_schema = config['POSTGRES_SCHEMA']
pg_pass = config['POSTGRES_PASS']

url = f'postgresql://{pg_user}:{pg_pass}@{pg_host}:{pg_port}/{pg_db}'

In [21]:
engine = create_engine(url, echo=False)

In [22]:
with engine.begin() as conn: 
    result = conn.execute(text(f'SET search_path TO {pg_schema};'))

flights = pd.read_sql(sql=text("SELECT * FROM debby_prep_flights;"), con=engine)

flights.head(3)

In [23]:
weather = pd.read_sql(sql=text("SELECT * FROM debby_prep_weather_daily;"), con=engine)

In [24]:
weather.head(3)

Unnamed: 0,airport_code,station_id,date,avg_temp_c,min_temp_c,max_temp_c,precipitation_mm,max_snow_mm,avg_wind_direction,avg_wind_speed_kmh,wind_peakgust_kmh,avg_pressure_hpa,sun_minutes,date_day
0,MCO,72205,2024-07-01,27.6,23.3,33.3,0.0,0,174,10.1,,1015.5,,1.0
1,TPA,72211,2024-07-01,27.7,25.0,32.2,0.0,0,193,11.9,,1015.6,,1.0
2,JFK,74486,2024-07-01,23.3,17.8,27.8,0.0,0,352,24.8,,1016.2,,1.0


In [25]:
weather_TPA = weather.query('airport_code == "TPA"')
weather_TPA.head(3)

Unnamed: 0,airport_code,station_id,date,avg_temp_c,min_temp_c,max_temp_c,precipitation_mm,max_snow_mm,avg_wind_direction,avg_wind_speed_kmh,wind_peakgust_kmh,avg_pressure_hpa,sun_minutes,date_day
1,TPA,72211,2024-07-01,27.7,25.0,32.2,0.0,0,193,11.9,,1015.6,,1.0
4,TPA,72211,2024-07-02,29.4,26.7,33.3,0.0,0,145,9.4,,1016.7,,2.0
6,TPA,72211,2024-07-03,29.7,25.0,34.4,6.9,0,76,8.3,,1017.9,,3.0


In [26]:
weather_JFK = weather.query('airport_code == "JFK"')
weather_MCO = weather.query('airport_code == "MCO"')

In [27]:
weather_JFK.describe()

Unnamed: 0,station_id,avg_temp_c,min_temp_c,max_temp_c,precipitation_mm,max_snow_mm,avg_wind_direction,avg_wind_speed_kmh,avg_pressure_hpa,date_day
count,62.0,62.0,62.0,62.0,62.0,62.0,62.0,62.0,62.0,62.0
mean,74486.0,24.61129,20.983871,28.874194,2.670968,0.0,194.516129,15.869355,1016.209677,16.0
std,0.0,1.894003,2.356936,2.335958,6.184019,0.0,80.396098,4.269049,4.175169,9.017288
min,74486.0,19.4,14.4,22.8,0.0,0.0,3.0,9.4,1005.7,1.0
25%,74486.0,23.6,19.55,27.8,0.0,0.0,156.75,13.0,1013.375,8.25
50%,74486.0,24.8,21.7,28.9,0.0,0.0,194.0,15.1,1016.15,16.0
75%,74486.0,25.7,22.2,30.45,1.3,0.0,235.25,16.9,1019.075,23.75
max,74486.0,28.4,25.6,35.0,32.5,0.0,352.0,31.7,1024.9,31.0


In [28]:
flights_information_debby = pd.read_sql(sql=text("SELECT * FROM flights_information_debby;"), con=engine)

In [29]:
flights_MCO = pd.read_sql(sql=text("SELECT * FROM flights_MCO;"), con=engine)
flights_JFK = pd.read_sql(sql=text("SELECT * FROM flights_JFK;"), con=engine)
flights_TPA = pd.read_sql(sql=text("SELECT * FROM flights_TPA;"), con=engine)

In [30]:
flights_information_debby.head(3)

Unnamed: 0,flight_date,total_flights,total_cancelled,avg_dep_delay,avg_arr_delay
0,2024-07-01,1930,43,21.46,13.07
1,2024-07-02,1907,6,14.85,4.3
2,2024-07-03,1922,2,8.94,-1.75


In [31]:
flights_MCO = flights_MCO.merge(weather_MCO[['date', 'avg_temp_c', 'precipitation_mm', 'avg_wind_speed_kmh', 'avg_pressure_hpa']], 
                                how='left', 
                                left_on='flight_date', 
                                right_on='date').drop(columns = ['date'])

flights_TPA = flights_TPA.merge(weather_TPA[['date', 'avg_temp_c', 'precipitation_mm', 'avg_wind_speed_kmh', 'avg_pressure_hpa']], 
                                how='left', 
                                left_on='flight_date', 
                                right_on='date').drop(columns = ['date'])

flights_JFK = flights_JFK.merge(weather_JFK[['date', 'avg_temp_c', 'precipitation_mm', 'avg_wind_speed_kmh', 'avg_pressure_hpa']], 
                                how='left', 
                                left_on='flight_date', 
                                right_on='date').drop(columns = ['date'])

# Caculate the Correlations #

In [32]:
flights_MCO.columns

Index(['flight_date', 'total_flights', 'total_cancelled', 'avg_dep_delay',
       'avg_arr_delay', 'avg_temp_c', 'precipitation_mm', 'avg_wind_speed_kmh',
       'avg_pressure_hpa'],
      dtype='object')

In [33]:
flights_MCO[['total_flights', 'total_cancelled', 'avg_dep_delay',
       'avg_arr_delay', 'avg_temp_c', 'precipitation_mm', 'avg_wind_speed_kmh',
       'avg_pressure_hpa']].corr()

Unnamed: 0,total_flights,total_cancelled,avg_dep_delay,avg_arr_delay,avg_temp_c,precipitation_mm,avg_wind_speed_kmh,avg_pressure_hpa
total_flights,1.0,0.189695,0.458424,0.451838,0.049099,0.016036,-0.175949,0.30018
total_cancelled,0.189695,1.0,0.72584,0.679,-0.120837,0.173514,0.601542,-0.227933
avg_dep_delay,0.458424,0.72584,1.0,0.982294,-0.135655,0.262864,0.242488,0.002104
avg_arr_delay,0.451838,0.679,0.982294,1.0,-0.183676,0.330356,0.22755,0.011086
avg_temp_c,0.049099,-0.120837,-0.135655,-0.183676,1.0,-0.276452,-0.00894,-0.181994
precipitation_mm,0.016036,0.173514,0.262864,0.330356,-0.276452,1.0,0.075715,0.02694
avg_wind_speed_kmh,-0.175949,0.601542,0.242488,0.22755,-0.00894,0.075715,1.0,-0.6501
avg_pressure_hpa,0.30018,-0.227933,0.002104,0.011086,-0.181994,0.02694,-0.6501,1.0


# Visualisation #

## Airport MCO ##

In [34]:
x_mco = flights_MCO['flight_date']
y_mco = flights_MCO['total_cancelled']
y_wind = flights_MCO['avg_wind_speed_kmh']

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x_mco, y=y_mco,
    mode='lines',
    name='Cancellation',
    yaxis='y1'  # Assign to first y-axis
))

fig.add_trace(go.Scatter(
    x=x_mco, y=y_wind,
    mode='lines',
    name='Average Wind Speed (km/h)',
    yaxis='y2'  # Assign to second y-axis
))

fig.update_layout(
    title="Flight Cancellations vs. Wind Speed in MCO",
    xaxis_title="Date",
    yaxis=dict(
        title="Cancellations",
    ),
    yaxis2=dict(
        title="Wind Speed (km/h)",
        overlaying='y',  # Makes it share the same x-axis
        side='right'
    ),
    legend=dict(x=0, y=1),  # Position legend
    font=dict(size=14)  # Sets overall font size
)

fig.show()

In [35]:
x_mco = flights_MCO['flight_date']
y_mco = flights_MCO['avg_dep_delay']
y_wind = flights_MCO['avg_wind_speed_kmh']

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x_mco, y=y_mco,
    mode='lines',
    name='Average Departure Delay',
    yaxis='y1'  # Assign to first y-axis
))

fig.add_trace(go.Scatter(
    x=x_mco, y=y_wind,
    mode='lines',
    name='Average Wind Speed (km/h)',
    yaxis='y2'  # Assign to second y-axis
))

fig.update_layout(
    title="Average Departure Delay vs. Wind Speed in MCO",
    xaxis_title="Date",
    yaxis=dict(
        title="Average Departure Delay",
    ),
    yaxis2=dict(
        title="Wind Speed (km/h)",
        overlaying='y',  # Makes it share the same x-axis
        side='right'
    ),
    legend=dict(x=0, y=1),  # Position legend
    font=dict(size=14)  # Sets overall font size
)

fig.show()

In [36]:
x_mco = flights_MCO['flight_date']
y_mco = flights_MCO['avg_arr_delay']
y_wind = flights_MCO['avg_wind_speed_kmh']

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x_mco, y=y_mco,
    mode='lines',
    name='Average Arrival Delay',
    yaxis='y1'  # Assign to first y-axis
))

fig.add_trace(go.Scatter(
    x=x_mco, y=y_wind,
    mode='lines',
    name='Average Wind Speed (km/h)',
    yaxis='y2'  # Assign to second y-axis
))

fig.update_layout(
    title="Average Arrival Delay vs.Wind Speed in MCO",
    xaxis_title="Date",
    yaxis=dict(
        title="Average Arrival Delay",
    ),
    yaxis2=dict(
        title="Wind Speed (km/h)",
        overlaying='y',  # Makes it share the same x-axis
        side='right'
    ),
    legend=dict(x=0, y=1),  # Position legend
    font=dict(size=14)  # Sets overall font size
)

fig.show()

## Airport TPA ## 

In [37]:
x_tpa = flights_TPA['flight_date']
y_tpa = flights_TPA['total_cancelled']
y_wind = flights_TPA['avg_wind_speed_kmh']

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x_tpa, y=y_tpa,
    mode='lines',
    name='Cancellation',
    yaxis='y1'  # Assign to first y-axis
))

fig.add_trace(go.Scatter(
    x=x_tpa, y=y_wind,
    mode='lines',
    name='Average Wind Speed (km/h)',
    yaxis='y2'  # Assign to second y-axis
))

fig.update_layout(
    title="Flight Cancellations vs. Wind Speed in TPA",
    xaxis_title="Date",
    yaxis=dict(
        title="Cancellations",
    ),
    yaxis2=dict(
        title="Wind Speed (km/h)",
        overlaying='y',  # Makes it share the same x-axis
        side='right'
    ),
    legend=dict(x=0, y=1),  # Position legend
    font=dict(size=14)  # Sets overall font size
)

fig.show()

In [38]:
x_tpa = flights_TPA['flight_date']
y_tpa = flights_TPA['avg_dep_delay']
y_wind = flights_TPA['avg_wind_speed_kmh']

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x_tpa, y=y_tpa,
    mode='lines',
    name='Average Departure Delay',
    yaxis='y1'  # Assign to first y-axis
))

fig.add_trace(go.Scatter(
    x=x_tpa, y=y_wind,
    mode='lines',
    name='Average Wind Speed (km/h)',
    yaxis='y2'  # Assign to second y-axis
))

fig.update_layout(
    title="Average Departure Delay vs. Wind Speed in TPA",
    xaxis_title="Date",
    yaxis=dict(
        title="Average Departure Delay",
    ),
    yaxis2=dict(
        title="Wind Speed (km/h)",
        overlaying='y',  # Makes it share the same x-axis
        side='right'
    ),
    legend=dict(x=0, y=1),  # Position legend
    font=dict(size=14)  # Sets overall font size
)

fig.show()

In [39]:
x_tpa = flights_TPA['flight_date']
y_tpa = flights_TPA['avg_arr_delay']
y_wind = flights_TPA['avg_wind_speed_kmh']

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x_tpa, y=y_tpa,
    mode='lines',
    name='Average Arrival Delay',
    yaxis='y1'  # Assign to first y-axis
))

fig.add_trace(go.Scatter(
    x=x_tpa, y=y_wind,
    mode='lines',
    name='Average Wind Speed (km/h)',
    yaxis='y2'  # Assign to second y-axis
))

fig.update_layout(
    title="Average Arrival Delay vs. Wind Speed in TPA",
    xaxis_title="Date",
    yaxis=dict(
        title="Average Arrival Delay",
    ),
    yaxis2=dict(
        title="Wind Speed (km/h)",
        overlaying='y',  # Makes it share the same x-axis
        side='right'
    ),
    legend=dict(x=0, y=1),  # Position legend
    font=dict(size=14)  # Sets overall font size
)

fig.show()

## Airport JFK ##

In [40]:
x_jfk = flights_JFK['flight_date']
y_jfk = flights_JFK['total_cancelled']
y_wind = flights_JFK['avg_wind_speed_kmh']

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x_tpa, y=y_tpa,
    mode='lines',
    name='Cancellation',
    yaxis='y1'  # Assign to first y-axis
))

fig.add_trace(go.Scatter(
    x=x_tpa, y=y_wind,
    mode='lines',
    name='Average Wind Speed (km/h)',
    yaxis='y2'  # Assign to second y-axis
))

fig.update_layout(
    title="Flight Cancellations vs. Wind Speed in JFK",
    xaxis_title="Date",
    yaxis=dict(
        title="Cancellations",
    ),
    yaxis2=dict(
        title="Wind Speed (km/h)",
        overlaying='y',  # Makes it share the same x-axis
        side='right'
    ),
    legend=dict(x=0, y=1),  # Position legend
    font=dict(size=14)  # Sets overall font size
)

fig.show()

In [42]:
fig = px.line(flights_information_debby, x="flight_date", y="total_cancelled", title='Toatal Cancellation in the three Airports')
fig.show()

In [15]:
fig = px.line(flights_MCO, x="flight_date", y="total_cancelled", title='Toatal Cancellation in Airports MCO')
fig.show()

In [16]:
fig = px.line(flights_TPA, x="flight_date", y="total_cancelled", title='Toatal Cancellation in Airport TPA')
fig.show()

In [17]:
fig = px.line(flights_JFK, x="flight_date", y="total_cancelled", title='Toatal Cancellation in Airport JFK')
fig.show()

In [None]:
fig = px.line(weather_MCO, x="date", y="avg_wind_speed_kmh", title='Average Wind Speed in MCO kmh')
fig.show()

In [20]:
fig = px.line(weather, x="date", y="avg_wind_speed_kmh", title='Average Wind Speed in kmh')
fig.show()