In [1]:
# Standard imports 

from bokeh.io import output_notebook, show
output_notebook()

In [45]:
# Plot a complex chart with intearctive hover in a few lines of code

from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap

df.cyl = df.cyl.astype(str)
df.yr = df.yr.astype(str)

group = df.groupby(('cyl', 'mfr'))
source = ColumnDataSource(group)

p = figure(plot_width=800, plot_height=300, title="Mean MPG by # Cylinders and Manufacturer",
           x_range=group, toolbar_location=None, tools="")

p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2

index_cmap = factor_cmap('cyl_mfr', palette=['#2b83ba', '#abdda4', '#ffffbf', '#fdae61', '#a7654c'], 
                         factors=sorted(df.cyl.unique()), end=1)

p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=source,
       line_color="white", fill_color=index_cmap, 
       hover_line_color="black", hover_fill_color="red")

p.add_tools(HoverTool(tooltips=[("Weight", "@weight_mean"), ("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")]))

show(p)

  # This is added back by InteractiveShellApp.init_path()


In [3]:
df.cyl = df.cyl.astype(str)
df.yr = df.yr.astype(str)

print(df.cyl.head())
print(df.yr.head())


0    8
1    8
2    8
3    8
4    8
Name: cyl, dtype: object
0    70
1    70
2    70
3    70
4    70
Name: yr, dtype: object


In [4]:
group = df.groupby(('cyl', 'mfr'))
# print(group.mean())
print(group)

<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x112f96f60>


  """Entry point for launching an IPython kernel.


In [5]:
source = ColumnDataSource(group)
source

In [6]:
p = figure(plot_width=800, plot_height=300, title="Mean MPG by # Cylinders and Manufacturer",
           x_range=group, toolbar_location=None, tools="")

In [7]:
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2

In [8]:
index_cmap = factor_cmap('cyl_mfr', palette=['#2b83ba', '#abdda4', '#ffffbf', '#fdae61', '#d7191c'], 
                         factors=sorted(df.cyl.unique()), end=1)

In [9]:
p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=source,
       line_color="white", fill_color=index_cmap, 
       hover_line_color="black", hover_fill_color=index_cmap)

In [10]:
p.add_tools(HoverTool(tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")]))

In [11]:
show(p)

In [3]:
# Create and deploy interactive data applications

from IPython.display import IFrame
IFrame('https://demo.bokehplots.com/apps/sliders', width=900, height=500)

In [5]:
from IPython.core.display import Markdown
Markdown(open("README.md").read())

# Time-Series-Interactive-Visualization

## Purpose

This project will use Bokeh to create an interactive time series visualization.

## How to Use

Run this command:
    
    jupyter notebook
    
Then open the notebook

## Resources

How to use jupyter notebooks with a virtual environment:
https://anbasile.github.io/programming/2017/06/25/jupyter-venv/

In [6]:
from IPython import __version__ as ipython_version
from pandas import __version__ as pandas_version
from bokeh import __version__ as bokeh_version
print("IPython - %s" % ipython_version)
print("Pandas - %s" % pandas_version)
print("Bokeh - %s" % bokeh_version)

IPython - 6.4.0
Pandas - 0.23.1
Bokeh - 0.13.0


In [114]:
import pandas as pd
df = pd.read_csv("data/annual-sheep-population-1000s-in.csv")
df.rename(index=str, columns={"Year": "year", "Annual sheep population (1000s) in England & Wales 1867 ? 1939": "sheep"}, inplace=True)
df.iloc[0:4]

Unnamed: 0,year,sheep
0,1867,2203.0
1,1868,2360.0
2,1869,2254.0
3,1870,2165.0


##### Drop the Last Row

In [115]:
df = df[0:73]
df.shape

(73, 2)

In [120]:
from datetime import datetime
df['year'] = df[['year']].apply(lambda row: int(row['year']), axis=1)
df['sqldate'] = df[['year']].apply(lambda row: datetime(year=row['year'], month=1, day=1), axis=1)

In [121]:


df.describe()

Unnamed: 0,year,sheep
count,73.0,73.0
mean,1903.0,1856.671233
std,21.217131,222.798617
min,1867.0,1338.0
25%,1885.0,1717.0
50%,1903.0,1853.0
75%,1921.0,1991.0
max,1939.0,2360.0


In [122]:
df.iloc[0:4]

Unnamed: 0,year,sheep,sqldate
0,1867,2203.0,1867-01-01
1,1868,2360.0,1868-01-01
2,1869,2254.0,1869-01-01
3,1870,2165.0,1870-01-01


In [123]:
import numpy as np

from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_file

# def datetime(x):
#     return np.array(x, dtype=np.datetime64)

In [132]:
p1 = figure(x_axis_type="datetime", title="Sheep in England and Wales")
p1.grid.grid_line_alpha=1.0
p1.xaxis.axis_label = 'Year'
p1.yaxis.axis_label = 'Sheep (1000s)'
p1.line(df['sqldate'], df['sheep'], color='#A6CEE3', legend='Sheep')
p1.legend.location = "top_left"
show(p1)  # open a browser

In [126]:
p1 = figure(x_axis_type="datetime", title="Stock Closing Prices")
p1.grid.grid_line_alpha=0.3
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Price'

p1.line(datetime(AAPL['date']), AAPL['adj_close'], color='#A6CEE3', legend='AAPL')
p1.line(datetime(GOOG['date']), GOOG['adj_close'], color='#B2DF8A', legend='GOOG')
p1.line(datetime(IBM['date']), IBM['adj_close'], color='#33A02C', legend='IBM')
p1.line(datetime(MSFT['date']), MSFT['adj_close'], color='#FB9A99', legend='MSFT')
p1.legend.location = "top_left"



NameError: name 'AAPL' is not defined

In [46]:
import numpy as np

from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_file

def datetime(x):
    return np.array(x, dtype=np.datetime64)

p1 = figure(x_axis_type="datetime", title="Stock Closing Prices")
p1.grid.grid_line_alpha=0.3
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Price'

p1.line(datetime(AAPL['date']), AAPL['adj_close'], color='#A6CEE3', legend='AAPL')
p1.line(datetime(GOOG['date']), GOOG['adj_close'], color='#B2DF8A', legend='GOOG')
p1.line(datetime(IBM['date']), IBM['adj_close'], color='#33A02C', legend='IBM')
p1.line(datetime(MSFT['date']), MSFT['adj_close'], color='#FB9A99', legend='MSFT')
p1.legend.location = "top_left"

aapl = np.array(AAPL['adj_close'])
aapl_dates = np.array(AAPL['date'], dtype=np.datetime64)

window_size = 30
window = np.ones(window_size)/float(window_size)
aapl_avg = np.convolve(aapl, window, 'same')

p2 = figure(x_axis_type="datetime", title="AAPL One-Month Average")
p2.grid.grid_line_alpha = 0
p2.xaxis.axis_label = 'Date'
p2.yaxis.axis_label = 'Price'
p2.ygrid.band_fill_color = "olive"
p2.ygrid.band_fill_alpha = 0.1

p2.circle(aapl_dates, aapl, size=4, legend='close',
          color='darkgrey', alpha=0.2)

p2.line(aapl_dates, aapl_avg, legend='avg', color='navy')
p2.legend.location = "top_left"

output_file("stocks.html", title="stocks.py example")

show(gridplot([[p1,p2]], plot_width=400, plot_height=400))  # open a browser

RuntimeError: bokeh sample data directory does not exist, please execute bokeh.sampledata.download()

In [54]:
import bokeh.sampledata
bokeh.sampledata.download()

Creating /Users/peterjmyers/.bokeh directory
Creating /Users/peterjmyers/.bokeh/data directory
Using data directory: /Users/peterjmyers/.bokeh/data


URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:749)>