In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
# Only use this if running the notebook on your local machine
#plt.style.use('notebook.mplstyle')

In [None]:
# Parameters
n_points = 50
x_lim = [-5, 5]
# parameters for the data that we generate: y = kx + b
k = 1
b = 1

# Generate linearly spaced data points
x = np.linspace(x_lim[0], x_lim[1], n_points)
y = k*x + b
# Add normally distributed noise
y = y + np.random.randn(n_points)

# Visualize what we have
fig, ax = plt.subplots(1, 1)
ax.plot(x, y, 'o', alpha=0.5)
ax.set(xlabel='x', ylabel='y');

In [None]:
# Randon guess for k and b
k_guess = np.random.randn(1) + k
b_guess = np.random.randn(1) + b

# Make model predictions
y_hat = k_guess*x + b_guess
# Compute the mean squarred error (MSE)
e = y - y_hat
e2 = e**2
mse = e2.sum()

# Visualize what we have
fig, ax = plt.subplots(1, 1)
ax.plot(np.stack([x, x]), np.stack([y, y_hat]), 'r-')
ax.plot(x, y, 'o', alpha=0.5)
ax.plot(x, y_hat, 'k-', alpha=0.5, label='y={:.1f}x + {:.1f}'.format(k_guess[0], b_guess[0]))
ax.set(xlabel='x', ylabel='y', title='MSE: {:.1f}'.format(mse))
ax.legend();

In [None]:
# Get a mesh grid of k and b values
n_res = 51
k_values = np.linspace(k-3, k+3, n_res)
b_values = np.linspace(b-4, b+4, n_res)
K, B = np.meshgrid(k_values, b_values)

# Compute the MSE for every location on the grid
MSE = np.zeros_like(K)
for i in range(n_res):
    for j in range(n_res):
        # Select current parameter values
        k_guess_tmp = K[i, j]
        b_guess_tmp = B[i, j]

        # Compute model predictions
        y_hat_tmp = k_guess_tmp*x + b_guess_tmp
        # Compute the error and the MSE
        e_tmp = y - y_hat_tmp
        e2_tmp = e_tmp**2
        MSE[i,j] = e2_tmp.mean()

# Plot the MSE surface and the guessed parmaters above
fig, ax = plt.subplots(1, 1)
ch = ax.contourf(K, B, MSE, 50, cmap=cm.coolwarm)
ax.plot(k_guess, b_guess, 'ko')
ax.set(xlabel='k', ylabel='b')
fig.colorbar(ch, label='MSE')

In [None]:
# Gradient descent hyper parameters
n_steps = 100
learning_rate = 0.1

# Create a matrix with colors (RGB values) ranging from light gray to black
c_vals = np.linspace(0.85, 0., n_steps+1)
colors = np.stack([c_vals, c_vals, c_vals]).T

# Initialize vectors for storing parameter values at each update step
k_guess_gd = np.zeros(n_steps+1)
b_guess_gd = np.zeros(n_steps+1)

# Start from our inital random guess
k_guess_gd[0] = k_guess.copy()
b_guess_gd[0] = b_guess.copy()

# Make model predictions
y_hat_gd = k_guess_gd[0]*x + b_guess_gd[0]
# Compute the error
e_gd = y - y_hat_gd

fig, axs = plt.subplots(1, 2, figsize=[12, 5])
axs[0].plot(x, y_hat_gd, '-', color=colors[0, :])
axs[0].set(xlabel='x', ylabel='y');
axs[1].contourf(K, B, MSE, 50, cmap=cm.coolwarm)
axs[1].plot(k_guess_gd[0], b_guess_gd[0], 'o', color=colors[0, :])
axs[1].set(xlabel='k', ylabel='b')

for i in range(n_steps):
    
    # Compute the gradient
    k_der = - np.mean(e_gd * x)
    b_der = - np.mean(e_gd)
    
    # Take a gradient descent step
    k_guess_gd[i+1] = k_guess_gd[i] - learning_rate * k_der
    b_guess_gd[i+1] = b_guess_gd[i] - learning_rate * b_der
    
    # Make new model predictions
    y_hat_gd = k_guess_gd[i+1]*x + b_guess_gd[i+1]
    # Compute the error for the updated parameters
    e_gd = y - y_hat_gd
    
    # Visualize the progress
    axs[0].plot(x, y_hat_gd, '-', color=colors[i+1, :])
    axs[1].plot(k_guess_gd[i+1], b_guess_gd[i+1], 'o', color=colors[i+1, :])
    
# Plot final results
axs[0].plot(x, y, 'o', alpha=0.5)
axs[0].plot(x, y_hat_gd, 'k-', label='y={:.1f}x + {:.1f}'.format(k_guess_gd[-1], b_guess_gd[-1]))
axs[0].legend()
axs[1].plot(k_guess_gd, b_guess_gd, 'k:')

In [None]:
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression
from sklearn.linear_model import LinearRegression
model = LinearRegression(fit_intercept=True)
x2d = x.reshape(x.size, 1)
model.fit(x2d, y)

# Visualize what we have
fig, ax = plt.subplots(1, 1)
ax.plot(x, y, 'o', alpha=0.5)
model_label = '{:.1f}x + {:.1f}'.format(model.coef_[0], model.intercept_)
ax.plot(x, model.predict(x2d), 'k-', alpha=0.5, label=model_label)
ax.set(xlabel='x', ylabel='y')
ax.legend();

### Presentation figures

In [None]:
plt.rcParams.update({'font.size': 14})

# data plot
bias_guess = 0.7
slope_guess = 3
y_hat = slope_guess*x + bias_guess
fig, ax = plt.subplots(1, 1, figsize=[4, 4])
plt.subplots_adjust(top = 0.95, bottom = 0.2, right = 0.95, left = 0.2)
ax.plot(x, y, 'bo', alpha=0.5)
model_label = 'y={:.1f}x + {:.1f}'.format(slope_guess, bias_guess)
ax.plot(x, y_hat, 'k-', label=model_label)
ax.set(xlabel='x', ylabel='y', xlim=[-6, 6], ylim=[-5, 8], facecolor='none')
ax.legend(facecolor='none', frameon=False, handlelength=1);
plt.savefig('LineRegData.png', dpi=300)

In [None]:
# Contour plot
fig, ax = plt.subplots(1, 1, figsize=[4, 4])
plt.subplots_adjust(top = 0.95, bottom = 0.2, right = 0.95, left = 0.2)
ch = ax.contourf(B, K, MSE, 50, cmap=cm.coolwarm)
ax.set(xlabel='bias', ylabel='slope')
plt.savefig('LineRegContourPlot.png', dpi=300)

In [None]:
# 3D plot
fig = plt.figure(figsize=[4, 4])
ax = fig.add_subplot(111, projection='3d')
plt.subplots_adjust(top = 0.95, bottom = 0.1, right = 0.95, left = 0.1)
ch = ax.plot_surface(B, K, MSE, cmap=cm.coolwarm)
ax.set(xlabel='bias', ylabel='slope', zticks=[], facecolor='none')
ax.set_zlabel('loss', rotation=90, labelpad=-10)  # Set rotation for Z-axis label
ax.view_init(elev=30, azim=45)
plt.savefig('LineRegSurfacePlot.png', dpi=300)