## Bias - Variance
### Explore bias-variance tradeoff numerically

In [1]:
%matplotlib inline
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import numpy as np
import numpy.random as npr
import numpy.linalg as nlg
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel 

In [2]:
def generate_fit(n_data, degree):
    
    lbd = 0.00
    LB = 1
    UB = 4
    
    if degree >= n_data:
        print("Reducing degree to equal one less than the number of points!")
        degree = n_data - 1
    
    
    YNOISE = 0.3
    XNOISE = 0.0
    
    NVAL = 50
    
    def func(x):
        return np.log(x) + 0.5 * np.sin(3 * x) + x * 0.5
    
    def x_to_X(x):
        out = np.zeros((len(x), degree + 1))
        for i in range(degree + 1):
            out[:, i] = x ** i
        return out
    
    npr.seed(13371387)
    
    x_raw = np.linspace(LB, UB, num=NVAL) + npr.normal(scale=XNOISE, size=NVAL)
    y_raw = func(x_raw) + npr.normal(scale=YNOISE, size=NVAL)
    
    ixes = npr.permutation(NVAL)
    
    x = x_raw[ixes[:n_data]]
    y = y_raw[ixes[:n_data]]
    
    xv = np.linspace(LB, UB, num=NVAL) + npr.normal(scale=XNOISE, size=NVAL)
    yv = func(xv) + npr.normal(scale=YNOISE, size=NVAL)
    
    X = x_to_X(x)
    
    coef = nlg.inv(X.T @ X + lbd * n_data * np.eye(degree + 1)) @ X.T @ y
    
    x_fit = np.linspace(np.min(xv), np.max(xv), num=NVAL)
    X_fit = x_to_X(x_fit)
    y_fit = X_fit @ coef
    
    plt.cla()
    
    # training plot
    plt.subplot(1, 2, 1)
    plt.plot(x_fit, y_fit)
    plt.scatter(x, y)
    plt.ylim((0, 4))
    
    ytfit = X @ coef
    loss = np.mean((y - ytfit)**2)
    st2 = np.sum((y - np.mean(y))**2)
    rt2 = np.sum((y - ytfit)**2)
    plt.title(f"Train set: $R^2$ {1- rt2/st2:0.2f}")
    
    # validation plot
    plt.subplot(1, 2, 2)
    plt.plot(x_fit, y_fit)
    plt.scatter(xv, yv)
    plt.ylim((0, 4))

    Xv = x_to_X(xv)
    yvfit = Xv @ coef
    loss = np.mean((yv - yvfit)**2)
    sv2 = np.sum((yv - np.mean(yv))**2)
    rv2 = np.sum((yv - yvfit)**2)
    plt.title(f"Test set: $R^2$ {1 - rv2/sv2:0.2f}") 
    
    # epilogue
    plt.gcf().set_size_inches((14, 6))
    plt.show()
    
    return (1 - rt2/st2, 1 - rv2/sv2)

In [3]:
interact(generate_fit,
         n_data=widgets.IntSlider(min=2,max=30,step=1,value=10),
         degree=widgets.IntSlider(min=1,max=5,step=1,value=3))

interactive(children=(IntSlider(value=10, description='n_data', max=30, min=2), IntSlider(value=3, description…

<function __main__.generate_fit(n_data, degree)>