# Airline delays

In [None]:
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.palettes import RdYlGn8
from bokeh.plotting import figure, show
import numpy as np
import pandas as pd

output_notebook()

## Airport delays

The original data set contains the total number of arriving flights separated by airline and airport along with the number late arriving flights for each monght from 2003 to late 2015.

In [None]:
flights = pd.read_csv('airline_delay_causes.csv.gz',
                      parse_dates={'period': [0,1]},
                      usecols=['year', ' month', 'airport', 'carrier', 'arr_flights', 'arr_del15'],
                     dtype={'arr_del15':float})

flights = flights.groupby(['airport', 'period']).sum()

In [None]:
idx = pd.IndexSlice
flights.loc[idx['PIT', '2008':'2008'], :]

Compute percentage of delayed flights.

In [None]:
flights['late_pct'] = flights.arr_del15 / flights.arr_flights * 100
flights['color'] = pd.qcut(flights.late_pct, q=8, labels=RdYlGn8)
flights['size'] = pd.cut(flights.arr_flights, bins=[0, 500, 5000, 50000], labels=[5,10,15])

## ColumnDataSource

### State borders

Only include airports in the the lower 48 states

In [None]:
from bokeh.sampledata.us_states import data as states

states = pd.DataFrame(states).T
states.drop(["AK", "HI"], inplace=True)

In [None]:
max_lon = max(states.lons.apply(max))
min_lon = min(states.lons.apply(min))

In [None]:
states_source = ColumnDataSource(states)

### Airport locations

Add latitude and longitude positions of airports.

In [None]:
airports = pd.read_csv('airports.csv', usecols=['country', 'iata/faa', 'latitude', 'longitude', 'name'])
airports = airports[(airports.longitude < max_lon) &
                    (airports.longitude > min_lon) &
                    (airports.country=='United States')]
airports.rename(columns={'iata/faa': 'airport'}, inplace=True)
airports.set_index('airport', inplace=True)

<big><big><b><font color='green'>Choose a month to plot</font></b></big>
<br><br>
Between June 2003 and Novemeber 2015</big>

In [None]:
month = pd.Timestamp('YYYY-MM-01')

Filter by year and month.

In [None]:
delays = airports.join(flights.xs(month, level='period')).dropna()

delays_source = ColumnDataSource(delays)

## Plot

* `size`: total number of flights
* `color`: Green-yellow-orange-red, increasing percentage of late flights.

In [None]:
hover = HoverTool(names=['delays'])
hover.tooltips = [
    ("Name", "@name"),
    ("Airport Code", "@airport"),
    ("No. Arrivals", "@arr_flights"),
    ("Pct Delayed", "@late_pct")
]

title = 'US Airport Delays {}'.format(month.strftime("%B %Y"))
p = figure(title=title, tools=[hover,'wheel_zoom', 'pan', 'reset'], 
           plot_width=875, plot_height=540)
p.patches(xs='lons', ys='lats', 
          fill_alpha=0, line_color="#884444", line_width=2, line_alpha=0.3, 
          source=states_source)
p.circle(x='longitude', y='latitude', name='delays',
         size='size', color='color', alpha=0.7, line_color='black', 
         source=delays_source)
p.grid.grid_line_alpha = 0.0
p.xaxis.axis_label = "Longitude (deg)"
p.yaxis.axis_label = "Latitude (deg)"
show(p)