In [60]:
import pandas as pd
import math
from bokeh.models import ColumnDataSource, Whisker, Range1d, NumeralTickFormatter
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
output_notebook()

In [46]:
def whisker_plot(task, coef, error):
    jobs = ["Data scientist", "Software engineer", "Data analyst", "Research scientist",
        "Research assistant", "Consultant", "Data Engineer", "Business Analyst", "Manager"]
    
    title = "Percent of time spent " + task
    p = figure(x_range=jobs, plot_height=350, title=title,
               toolbar_location=None, tools="")
    p.scatter(x=jobs, y=coef, radius=0.07)

    lower = [c - error[i] for i, c in enumerate(coef)]
    upper = [c +error[i] for i, c in enumerate(coef)]
    source_error = ColumnDataSource(data=dict(base=jobs, lower=lower, upper=upper))
    p.add_layout(
        Whisker(source=source_error, base="base", upper="upper", lower="lower")
    )
    p.xaxis.major_label_orientation = math.pi/4
    p.y_range = Range1d(min(lower)-0.5, max(upper)+0.5)
    return p

In [47]:
# clean data plot
data_clean_coef = [29.13, 24.79, 31.30, 25.13, 25.05, 29.10, 30.21, 29.95, 28.13]
data_clean_error = [.6462, .6882, .6977, .9106, 1.105, .9459, .9279, .9118, 1.031]
clean_data_plot = whisker_plot("cleaning data", data_clean_coef, data_clean_error)
show(clean_data_plot)

In [48]:
# data viz plot
data_viz_coef = [14.69, 14.48, 17.15, 15.03, 14.71, 16.11, 14.11, 16.63, 15.93]
data_viz_error = [.4157, .4427, .4488, .5858, .7111, .6084, .5969, .5865, .6635]
data_viz_plot = whisker_plot("visualizing data", data_viz_coef, data_viz_error)
show(data_viz_plot)

In [50]:
# model build plot
model_build_coef = [16.73, 18.27, 12.40, 19.61, 19.41, 14.69, 15.43, 12.55, 13.57]
model_build_error = [.5995, .6384, .6473, .8448, 1.026, .8775, .8608, .8459, .9568]
model_build_plot = whisker_plot("building models", model_build_coef, model_build_error)
show(model_build_plot)

In [52]:
# model production plot
model_prod_coef = [7.820, 7.052, 4.865, 5.453, 5.555, 4.196, 8.268, 4.378, 5.678]
model_prod_error = [.3918, .4173, .4230, .5521, .6703, .5735, .5626, .5529, .6154]
model_prod_plot = whisker_plot("putting models into production", model_prod_coef, model_prod_error)
show(model_prod_plot)

In [53]:
# communicating insights plot
comm_in_coef = [12.71, 9.793, 14.42, 10.82, 9.43, 13.19, 10.06, 14.83, 13.65]
comm_in_error = [.4554, .4850, .4917, .6418, .7791, .6667, .6540, .6427, .7269]
comm_in_plot = whisker_plot("communicating insights", comm_in_coef, comm_in_error)
show(comm_in_plot)

In [73]:
# average salary plot
salary_coef = [105046.51, 93363.33, 71014.82, 76336.89, 45127.40, 88684.49, 91159.04, 72973.68, 121576.86]
salary_error = [6317.60, 6456.82, 6718.99, 7820.62, 9339.76, 8060.51, 8332.38, 7816.47, 8475.37]

jobs = ["Data scientist", "Software engineer", "Data analyst", "Research scientist",
    "Research assistant", "Consultant", "Data Engineer", "Business Analyst", "Manager"]

salary_plot = figure(x_range=jobs, plot_height=350, title="Average salary",
           toolbar_location=None, tools="")
salary_plot.scatter(x=jobs, y=salary_coef, radius=0.07)

lower = [c - salary_error[i] for i, c in enumerate(salary_coef)]
upper = [c + salary_error[i] for i, c in enumerate(salary_coef)]
source_error = ColumnDataSource(data=dict(base=jobs, lower=lower, upper=upper))
salary_plot.add_layout(
    Whisker(source=source_error, base="base", upper="upper", lower="lower")
)
salary_plot.xaxis.major_label_orientation = math.pi/4
salary_plot.y_range = Range1d(min(lower)-1000, max(upper)+1000)
salary_plot.axis[1].formatter = NumeralTickFormatter(format="$0,0.00")

show(salary_plot)