In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact, FloatSlider
from bokeh.io import output_notebook, push_notebook, show, curdoc
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper, Slider
from bokeh.layouts import widgetbox, row
import numpy as np
from sklearn.metrics import mean_squared_error
output_notebook()

In [2]:
df_galton = pd.read_csv('Galton.txt', sep='\t')
source = ColumnDataSource(df_galton)

In [3]:
df_galton.head()

Unnamed: 0,Family,Father,Mother,Gender,Height,Kids
0,1,78.5,67.0,M,73.2,4
1,1,78.5,67.0,F,69.2,4
2,1,78.5,67.0,F,69.0,4
3,1,78.5,67.0,F,69.0,4
4,2,75.5,66.5,M,73.5,4


In [4]:
hover = HoverTool(tooltips=[("(x,y)", "($x, $y)")])
mapper = CategoricalColorMapper(factors=['M', 'F'], palette=['green', 'blue'])
plot1 = figure(x_axis_label='Father', y_axis_label='Child', plot_width=500, tools=[hover, 'crosshair', 'box_select, lasso_select', 'pan,box_zoom'])
c = plot1.circle('Father', 'Height', source=source, hover_color='red', color={'field': 'Gender', 'transform': mapper})

In [5]:
slope, intercept = np.polyfit(df_galton['Father'], df_galton['Height'], deg=1)

In [6]:
X = [df_galton['Father'].min(), df_galton['Father'].max()]
y = [x * slope + intercept for x in X]
line_source = ColumnDataSource({'x':X, 'y':y})
l = plot1.line('x', 'y', source=line_source, line_color='orange')

In [7]:
weights, step = np.linspace(-1,1.8,100, retstep=True)
loss = [mean_squared_error(df_galton['Height'], df_galton['Father'] * slope + intercept) for slope in weights]
loss_source = ColumnDataSource({'x':weights, 'y':loss})
point_source = ColumnDataSource({'x':[weights[0]], 'y':[loss[0]]})

plot2 = figure(x_axis_label='Weights (slopes)', y_axis_label='Loss (Cost/Error)', tools=[hover, 'crosshair', 'box_select, lasso_select', 'pan,box_zoom'])
l2 = plot2.line('x', 'y', source=loss_source)
c2 = plot2.circle('x', 'y', source=point_source, color='red', size=5)

In [8]:
def update(slope, intercept):
    y_pred = [x * slope + intercept for x in df_galton['Father']]    
    l.data_source.data['y'] = [x * slope + intercept for x in X]
    c2.data_source.data['x'] = [slope]
    c2.data_source.data['y'] = [mean_squared_error(df_galton['Height'], y_pred)]
    push_notebook()

In [9]:
layout = row(plot1, plot2)
show(layout, notebook_handle=True)

In [10]:
interact(update, slope=FloatSlider(value=-1, min=-1, max=1.8, step=step), intercept=FloatSlider(value=80, min=30, max=80, step=1))

interactive(children=(FloatSlider(value=-1.0, description='slope', max=1.8, min=-1.0, step=0.02828282828282828…

<function __main__.update(slope, intercept)>

In [11]:
source.data['Father'].mean(), source.data['Height'].mean()

(69.232850779510017, 66.760690423162586)

In [12]:
slope,intercept

(0.39938126589856471, 39.110386837075403)