In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact, FloatSlider
from bokeh.io import output_notebook, push_notebook, show, curdoc
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper, Slider, CustomJS
from bokeh.layouts import widgetbox, row, column
import numpy as np
from sklearn.metrics import mean_squared_error
output_notebook()

In [3]:
df_galton = pd.read_csv('Galton.txt', sep='\t')
source = ColumnDataSource(df_galton)
source.data.keys()

dict_keys(['Family', 'Father', 'Mother', 'Gender', 'Height', 'Kids', 'index'])

In [4]:
df_galton.head()

Unnamed: 0,Family,Father,Mother,Gender,Height,Kids
0,1,78.5,67.0,M,73.2,4
1,1,78.5,67.0,F,69.2,4
2,1,78.5,67.0,F,69.0,4
3,1,78.5,67.0,F,69.0,4
4,2,75.5,66.5,M,73.5,4


In [5]:
hover = HoverTool(tooltips=[("(x,y)", "($x, $y)")])
mapper = CategoricalColorMapper(factors=['M', 'F'], palette=['green', 'blue'])
plot1 = figure(x_axis_label='Father', y_axis_label='Child', plot_width=300, plot_height=300, tools=[hover, 'crosshair', 'box_select, lasso_select', 'pan,box_zoom'])
c = plot1.circle('Father', 'Height', source=source, hover_color='red', color={'field': 'Gender', 'transform': mapper})

In [6]:
slope, intercept = np.polyfit(df_galton['Father'], df_galton['Height'], deg=1)

In [7]:
X = [df_galton['Father'].min(), df_galton['Father'].max()]
y = [x * slope + intercept for x in X]
line_source = ColumnDataSource({'x':X, 'y':y, 'i': np.repeat(intercept, len(X)), 's':np.repeat(slope, len(X))})
l = plot1.line('x', 'y', source=line_source, line_color='orange')

In [8]:
weights, step = np.linspace(-1,1.8,100, retstep=True)
loss = [mean_squared_error(df_galton['Height'], df_galton['Father'] * slope + intercept) for slope in weights]
loss_source = ColumnDataSource({'x':weights, 'y':loss})
point_source = ColumnDataSource({'x':[weights[0]], 'y':[loss[0]]})

plot2 = figure(x_axis_label='Weights (slopes)', y_axis_label='Loss (Cost/Error)', plot_width=300, plot_height=300, tools=[hover, 'crosshair', 'box_select, lasso_select', 'pan,box_zoom'])
l2 = plot2.line('x', 'y', source=loss_source)
c2 = plot2.circle('x', 'y', source=point_source, color='red', size=5)

In [13]:
callback_slope = CustomJS(args=dict(line=line_source, source=source, point=point_source), code="""
    var line_data = line.data;
    var source_data = source.data;
    var point_data = point.data;
    var new_s = cb_obj.value
    
    var x = line_data['x']
    var y = line_data['y']
    var i = line_data['i']
    var s = line_data['s']
    var p1 = point_data['x']
    var p2 = point_data['y']
    
    for (var j = 0; j < x.length; j++) {
        s[j] = new_s
        y[j] = x[j] * new_s + i[j]
    }
    
    father = source_data['Father'];    
    y_pred = [];
    for (var j = 0; j < father.length; j++) {
        y_pred[j] = father[j] * new_s + i[0]
    }
    
    var sum = 0;
    child = source_data['Height'];
    for (var j = 0; j < child.length; j++) {
        sum = sum + Math.pow((child[j] - y_pred[j]), 2);
    }
    
    for (var j = 0; j < p1.length; j++) {
        p1[j] = s[0]
        p2[j] = (sum / child.length)
    }
    
    line.change.emit();
    point.change.emit();
""")

slider_slope = Slider(start=-1, end=1.8, value=1.6, step=step, title="slope")
slider_slope.js_on_change('value', callback_slope)

In [14]:
callback_intercept = CustomJS(args=dict(line=line_source, source=source, point=point_source), code="""
    var line_data = line.data;
    var source_data = source.data;
    var point_data = point.data;
    var new_i = cb_obj.value
    
    var x = line_data['x']
    var y = line_data['y']
    var i = line_data['i']
    var s = line_data['s']
    var p1 = point_data['x']
    var p2 = point_data['y']
    
    for (var j = 0; j < x.length; j++) {
        i[j] = new_i
        y[j] = x[j] * s[j] + new_i
    }
    
    father = source_data['Father'];    
    y_pred = [];
    for (var j = 0; j < father.length; j++) {
        y_pred[j] = father[j] * s[0] + new_i
    }
    
    var sum = 0;
    child = source_data['Height'];
    for (var j = 0; j < child.length; j++) {
        sum = sum + Math.pow((child[j] - y_pred[j]), 2);
    }
    
    for (var j = 0; j < p1.length; j++) {
        p1[j] = s[0]
        p2[j] = (sum / child.length)
    }
    
    line.change.emit();
    point.change.emit();
""")

slider_intercept = Slider(start=0, end=100, value=66, step=1, title="intercept")
slider_intercept.js_on_change('value', callback_intercept)

In [15]:
layout = column(row(slider_slope, slider_intercept),row(plot1, plot2))
show(layout, notebook_handle=True)

In [12]:
source.data['Father'].mean(), source.data['Height'].mean()

(69.232850779510017, 66.760690423162586)

In [13]:
slope,intercept

(0.39938126589856471, 39.110386837075403)