In [2]:
import numpy as np
import pandas as pd

airline_df = pd.read_csv('../airline_data/airlines_and_weather.csv')

# Flights per month
month_flights = airline_df['MONTH'].value_counts()
month_flights = month_flights.sort_index()

# Bar graph of cancellations per month
cancellations = airline_df[airline_df['CANCELLED'] == 1]
month_cancellations = cancellations['MONTH'].value_counts()
month_cancellations = month_cancellations.sort_index()

print(month_cancellations)

1     17712
2     18552
3     10496
4      8032
5      7117
6      9808
7     10212
8      9726
9      7177
10     7341
11     5436
12    11665
Name: MONTH, dtype: int64


In [3]:
from bokeh.io import output_notebook, show
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category20c_12
from bokeh.plotting import figure

output_notebook()

month_ints = month_cancellations.index.to_list()
month_strings = [str(i) for i in month_ints]

source = ColumnDataSource(data=dict(months=month_strings, counts=month_cancellations.to_list(), color=Category20c_12))

p = figure(x_range=month_strings, y_range=(0,30000), plot_height=250, title="Cancellation Counts")

p.vbar(x='months', top='counts', width=0.9, color='color', legend_field="months", source=source)

p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_center"

show(p)

In [4]:
month_ints = month_flights.index.to_list()
month_strings = [str(i) for i in month_ints]

source = ColumnDataSource(data=dict(months=month_strings, counts=month_flights.to_list(), color=Category20c_12))

p = figure(x_range=month_strings, y_range=(0,1500000), plot_height=250, title="Flight Counts")

p.vbar(x='months', top='counts', width=0.9, color='color', legend_field="months", source=source)

p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_center"

show(p)

In [50]:
from math import pi
from bokeh.palettes import Category10
from bokeh.transform import cumsum

output_notebook()

code_counts = cancellations['CANCELLATION_CODE'].value_counts()
code_counts = code_counts.rename(index={0: "Carrier", 1: "Weather", 2: "NAS", 3: "Security"})
data = code_counts.reset_index(name='value').rename(columns={'index':'type'})
data['angle'] = data['value']/data['value'].sum() * 2*pi
data['color'] = Category10[len(code_counts.index)]
#data['angle'][3] = 0.02

p = figure(plot_height=350, title="Cancellation Reasons", toolbar_location="right",
           tools="hover,pan,wheel_zoom,box_zoom,reset", tooltips="@type: @value", x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_field='type', source=data)

p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None

show(p)

In [80]:
weather_df = pd.read_csv('../airline_data/L_WEATHER.csv')
weather_df.rename(columns={"Code": "ORIGIN_WEATHER"}, inplace=True)

weather_cancellations = airline_df[airline_df['CANCELLATION_CODE'] == 1]
weather_cancellations = weather_cancellations.merge(weather_df, on="ORIGIN_WEATHER")


weather_counts = weather_cancellations['Description'].value_counts()

weather_counts = weather_counts.rename(index={" 0": " no data"})
weather_counts = weather_counts.head(20)
data = weather_counts.reset_index(name='value').rename(columns={'index':'weather'})
data['angle'] = data['value']/data['value'].sum() * 2*pi
data['color'] = Category20c[len(weather_counts.index)]

p = figure(plot_height=350, title="Cancellation Weather Distr.", toolbar_location="right",
           tools="hover,pan,wheel_zoom,box_zoom,reset", tooltips="@weather: @value", x_range=(-0.5, 1.5))

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_field='weather', source=data)

p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None
p.legend.label_text_font_size = "10pt"

show(p)