In [41]:
# bokeh basics
from bokeh.plotting import figure
from bokeh.io import show, output_notebook

# Create a blank figure with labels
p = figure(plot_width = 600, plot_height = 600, 
           title = 'Example Glyphs',
           x_axis_label = 'X', y_axis_label = 'Y')

# Example data
squares_x = [1, 3, 4, 5, 8]
squares_y = [8, 7, 3, 1, 10]
circles_x = [9, 12, 4, 3, 15]
circles_y = [8, 4, 11, 6, 10]

# Add squares glyph
p.square(squares_x, squares_y, size = 12, color = 'navy', alpha = 0.6)
# Add circle glyph
p.circle(circles_x, circles_y, size = 12, color = 'red')

# Set to output the plot in the notebook
output_notebook()
# Show the plot
show(p)

In [42]:
import pandas as pd
read_data = pd.read_csv('https://raw.githubusercontent.com/roberthryniewicz/datasets/master/airline-dataset/flights/flights.csv')
read_data.columns

Index(['Year', 'Month', 'DayofMonth', 'DayOfWeek', 'DepTime', 'CRSDepTime',
       'ArrTime', 'CRSArrTime', 'UniqueCarrier', 'FlightNum', 'TailNum',
       'ActualElapsedTime', 'CRSElapsedTime', 'AirTime', 'ArrDelay',
       'DepDelay', 'Origin', 'Dest', 'Distance', 'TaxiIn', 'TaxiOut',
       'Cancelled', 'CancellationCode', 'Diverted', 'CarrierDelay',
       'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay'],
      dtype='object')

In [43]:
read_data.ArrDelay.describe()

count    98698.000000
mean         5.729954
std         30.966959
min        -57.000000
25%         -9.000000
50%         -2.000000
75%         10.000000
max        500.000000
Name: ArrDelay, dtype: float64

In [44]:
import numpy as np

arr_hist, edges = np.histogram(read_data.ArrDelay, bins = int(180/5), range=[-60, 120])

delays = pd.DataFrame({'flights':arr_hist,
                       'left':edges[:-1],
                       'right':edges[1:]
                       })

In [45]:
edges[:-1]

array([-60., -55., -50., -45., -40., -35., -30., -25., -20., -15., -10.,
        -5.,   0.,   5.,  10.,  15.,  20.,  25.,  30.,  35.,  40.,  45.,
        50.,  55.,  60.,  65.,  70.,  75.,  80.,  85.,  90.,  95., 100.,
       105., 110., 115.])

In [46]:
edges[1:]

array([-55., -50., -45., -40., -35., -30., -25., -20., -15., -10.,  -5.,
         0.,   5.,  10.,  15.,  20.,  25.,  30.,  35.,  40.,  45.,  50.,
        55.,  60.,  65.,  70.,  75.,  80.,  85.,  90.,  95., 100., 105.,
       110., 115., 120.])

In [47]:
delays.head(4)

Unnamed: 0,flights,left,right
0,2,-60.0,-55.0
1,4,-55.0,-50.0
2,26,-50.0,-45.0
3,63,-45.0,-40.0


In [48]:
# Create the blank plot
p = figure(plot_height = 600, plot_width = 600, 
           title = 'Histogram of Arrival Delays',
          x_axis_label = 'Delay (min)]', 
           y_axis_label = 'Number of flights')

# Add a quad glyph
p.quad(bottom=0, top=delays['flights'], 
       left=delays['left'], right=delays['right'], 
       fill_color='red', line_color='black')

# Show the plot
show(p)

In [49]:
# Import the ColumnDataSource class
from bokeh.models import ColumnDataSource, HoverTool # Convert dataframe to column data source
src = ColumnDataSource(delays)
src.data.keys()

dict_keys(['index', 'flights', 'left', 'right'])

In [50]:
# Add a quad glyph with source this time
p.quad(source = src, bottom=0, top='flights', 
       left='left', right='right', 
       fill_color='red', line_color='black')

In [51]:
# Hover tool referring to our own data field using @ and
# a position on the graph using $
h = HoverTool(tooltips = [('Delay Interval Left ', '@left'),
                          ('(x,y)', '($x, $y)')])

In [53]:
# Add a column showing the extent of each interval
delays['f_interval'] = ['%d to %d minutes' % (left, right) for left, right in zip(delays['left'], delays['right'])]

In [59]:
# Create the blank plot
p = figure(plot_height = 600, plot_width = 600, 
           title = 'Histogram of Arrival Delays',
          x_axis_label = 'Delay (min)]', 
           y_axis_label = 'Number of Flights')

# Add a quad glyph with source this time
p.quad(bottom=0, top='flights', left='left', right='right', source=src,
       fill_color='red', line_color='black', fill_alpha = 0.75,
       hover_fill_alpha = 1.0, hover_fill_color = 'navy')

# Add a hover tool referring to the formatted columns
hover = HoverTool(tooltips = [('Delay', '@f_interval'),
                             ('Num of Flights', '@f_flights')])

# Style the plot

# Add the hover tool to the graph
p.add_tools(hover)

# Show the plot
show(p)