In [None]:
import ipywidgets as widgets
from ipywidgets import interact, fixed

from mpl_toolkits.mplot3d import Axes3D

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

## Univariate linear regression

We will start with an example about univariate linear regression

In [None]:
def generate_data(n, w_true, b_true, sigma_true):
    x = np.random.rand(n)
    y = w_true*x + b_true + np.random.randn(n)*sigma_true
    return x, y

In [None]:
# Generate non-linear data
np.random.seed(1)
x = np.arange(-1, 1, 0.1).reshape(-1, 1)
y = 0.5*x + 2*x**2

In [None]:
# Plotting linear regression lines
def plot_lr(x=None, y=None, w=1, b=0, show_errors=False, show_mae=False, show_mse=False, show_boxes=False, ax=None):
    x_min = np.min(x)
    x_max = np.max(x)
    y_min = np.min(y)
    y_max = np.max(y)
    
    n = x.shape[0]
    
    ax = ax or plt.gca()
    ax.scatter(x, y)
    
    epsilon = 0.3
    ax.set_xlim(x_min-epsilon, x_max+epsilon)
    ax.set_ylim(y_min-epsilon, y_max+epsilon)
    
    x_plot = np.asarray([x_min,x_max])
    y_plot = w*x_plot + b
    
    ax.plot(x_plot, y_plot, c='black')
    
    abs_err = 0
    sq_err = 0
    
    if show_boxes:
        show_errors = False
    
    for i in range(len(x)):
        xi = x[i]
        yi = y[i]
        pred_y = w*xi + b
        if show_errors:
            ax.plot([xi, xi], [yi, pred_y], c='r')
        if show_boxes:
            diff = yi - pred_y
            ax.plot([xi, xi], [yi, pred_y], c='r')
            ax.plot([xi-diff, xi-diff], [yi, pred_y], c='r')
            ax.plot([xi-diff, xi], [yi, yi], c='r')
            ax.plot([xi-diff, xi], [pred_y, pred_y], c='r')
        abs_err += np.abs(yi - pred_y)
        sq_err +=  (yi - pred_y)**2
    if show_mae:
        print('Mean absolute error: %.3f' % (abs_err/n))
    if show_mse:
        print('Mean squared error: %.3f' % (sq_err/n))
    plt.show()

In [None]:
# Generate data
np.random.seed(1)

# These are the parameters that guide data generating
w_true = 0.6 # Slope of the data-generating line
b_true = 0.2 # Intercept of the data generating line
sigma_true = np.sqrt(0.02) # Standard deviation of the noise (which is square root of the variance)
n_samples = 5 # Sample size

x, y = generate_data(n_samples, w_true, b_true, sigma_true)

Below you can change values of parameters $w$ (slope of the regression line) and $b$ (intercept of the regression line) and see how the values affect mean absolute and mean squared errors.

You can also change the parameters for the data-generating in the function above and try with different data sets. 

In [None]:
%matplotlib inline

from ipywidgets import interact, fixed

ww = widgets.FloatSlider(min=-5, max=5)
bb = widgets.FloatSlider(min=-1, max=1)
show_errors = widgets.Checkbox(description='Show error bars')
show_boxes = widgets.Checkbox(description='Show error boxes')
show_mae = widgets.Checkbox(description='Show mean absolute error')
show_mse = widgets.Checkbox(description='Show mean squared error')

interact(plot_lr, x=fixed(x), y=fixed(y), w=ww, b=bb, show_errors=show_errors, show_mae=show_mae, show_mse=show_mse, show_boxes=show_boxes, ax=fixed(None));

Next, we will find the optimal values for $w$ and $b$ by minimizing mean squared error

In [None]:
numerator = 0
denominator = 0

x_bar = np.mean(x)
y_bar = np.mean(y)

for i in range(len(y)):
    numerator += (x[i] - x_bar)*(y[i] - y_bar)
    denominator += (x[i] - x_bar)**2

w_hat = numerator/denominator
b_hat = y_bar - w_hat*x_bar

print('w=%f' % w_hat)
print('b=%f' % b_hat)

In [None]:
plt.scatter(x, y)

x_plot = np.asarray([np.min(x), np.max(x)])
y_plot = w_hat*x_plot + b_hat

plt.plot(x_plot, y_plot, c='black')
plt.show()

In [None]:
pred = w_hat*x + b_hat
mse = mean_squared_error(pred, y)

print('MSE: %.3f' % mse)