In [None]:
import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go

In [None]:
flights = pd.read_csv("data/flights.csv")
airports = pd.read_csv("data/airports.csv")

In [None]:
# Map plot of airports
fig = px.scatter_mapbox(airports, lat='LATITUDE', lon='LONGITUDE', hover_name='AIRPORT', hover_data=['CITY', 'STATE', 'COUNTRY'], zoom=3, title='Map of Airports')
fig.update_layout(mapbox_style="open-street-map")
fig.show()

In [None]:
flights.columns

For some plots random sample was chosen, because the data is too big, and it fails to plot for all

In [None]:
delayed_flights = flights[flights['ARRIVAL_DELAY'] > 0]
random_sample = delayed_flights.sample(n=100, random_state=42)

In [None]:
# Departure delay vs arrival delay
fig = px.scatter(random_sample, x='DEPARTURE_DELAY', y='ARRIVAL_DELAY', color='AIRLINE', hover_name='ORIGIN_AIRPORT', title='Departure Delay vs. Arrival Delay')
fig.show()

In [None]:
# Scatter plot with animation by year, color by airline, and size by distance
fig = px.scatter(data_frame=random_sample, x='DEPARTURE_DELAY', y='ARRIVAL_DELAY', animation_frame='YEAR',
                  range_x=[-20, 100], color='AIRLINE', size='DISTANCE',
                  title='Animated scatter Plot')
fig.show()

Some general histograms and barplots

In [None]:
# Departure delays
fig = px.histogram(flights, x='DEPARTURE_DELAY', title='Distribution of Departure Delays', range_x=(-30, 60))
fig.show()

In [None]:
# Arrival delays
fig = px.histogram(flights, x='ARRIVAL_DELAY', title='Distribution of Arrival Delays', range_x=(-70, 100), range_y=(0, 180000))
fig.show()

In [None]:
# The number of delayed flights per airline
delayed_flight_count_by_airline = delayed_flights['AIRLINE'].value_counts().reset_index()
delayed_flight_count_by_airline.columns = ['AIRLINE', 'COUNT']
fig = px.bar(delayed_flight_count_by_airline, x='AIRLINE', y='COUNT', title='Number of Delayed Flights per Airline')
fig.show()

In [None]:
# The number of flight cancellations by reason

reason_mapping = {'A': 'Airline/Carrier', 'B': 'Weather', 'C': 'National Air System', 'D': 'Security'}

cancellation_reason_count = flights['CANCELLATION_REASON'].value_counts().reset_index()
cancellation_reason_count.columns = ['CANCELLATION_REASON', 'COUNT']
cancellation_reason_count['CANCELLATION_REASON'] = cancellation_reason_count['CANCELLATION_REASON'].map(reason_mapping)
fig = px.bar(cancellation_reason_count, x='CANCELLATION_REASON', y='COUNT',
             title='Flight Cancellations by Reason',
             labels={'CANCELLATION_REASON': 'Cancellation Reason', 'COUNT': 'Count'})
fig.show()