In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV
from sklearn.model_selection import KFold
# Only use this if running the notebook on your local machine
#plt.style.use('notebook.mplstyle')

In [None]:
# Parameters
n_points = 11
x_lim = [-1.5, 1.5]
# parameters for the data that we generate: y = w0x + w1x^2 + w2x^3 + ... w9x^10 + b
w = np.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0]).reshape(10, 1)
powers = np.arange(1, w.size+1)

# Generate linearly spaced data points
x_data = np.linspace(x_lim[0], x_lim[1], n_points)
# Compute the powers of x
X_train = np.stack([x_data**i for i in powers]).T
# @ denotes matrix multiplication
y_train = X_train @ w
# Add normally distributed noise
y_train = y_train + np.random.randn(n_points, 1)

# Visualize what we have
fig, ax = plt.subplots(1, 1)
ax.plot(x_data, y_train, 'o', alpha=0.5)
ax.set(xlabel='x', ylabel='y');

In [None]:
# Create an array of linearly spaced dense x values to show
# model prediction between the original data points (interpolation)
x_dense = np.linspace(x_lim[0], x_lim[1], 101)
X_dense = np.stack([x_dense**i for i in powers]).T

# Fit a 10th degree polynomial using linear regression
lin_reg = LinearRegression(fit_intercept=True)
lin_reg.fit(X_train, y_train)
y_hat = lin_reg.predict(X_dense)

# Visualize what we have
fig, ax = plt.subplots(1, 1)
ax.plot(x_data, y_train, 'o', alpha=0.5, label='Data')
ax.plot(x_dense, y_hat, 'k-', label='Model')
ax.set(xlabel='x', ylabel='y');
ax.legend()

# Print the fitted 10th degree polynomial function
polynom = ['+ {:1.3g}x^{:g}'.format(lin_reg.coef_[0, i], i+1) for i in range(10)]
polynom = '{:1.3g} '.format(lin_reg.intercept_[0]) +  ' '.join(polynom)
print(polynom)

In [None]:
# Define alpha (regularization) values to try
alpha_vals = np.logspace(-4, 3, 8)

# Create a figure window
fig = plt.figure(figsize=[17, 6])

# Loop over all alpha values
for i in range(alpha_vals.size):
    # Create a subplot
    ax = fig.add_subplot(2, 4, i+1)
    
    # Ridge regression
    ridge_reg = Ridge(fit_intercept=True, alpha=alpha_vals[i], max_iter=1e6)
    ridge_reg.fit(X_train, y_train)
    y_hat = ridge_reg.predict(X_dense)
    
    # Plot the fitted model
    ax.plot(x_data, y_train, 'o', alpha=0.5)
    ax.plot(x_dense, y_hat, 'k-', label='alpha={:1.5g}'.format(alpha_vals[i]))
    ax.set(xticks=[], yticks=[]);
    ax.legend(fontsize=12)

In [None]:
# K-fold cross-validation
n_folds = 5
kf = KFold(n_splits=n_folds, shuffle=True, random_state=None)

fig = plt.figure(figsize=[17, 4])

# Loop over all splits
split_id = 1
for train_index, val_index in kf.split(X_train):
    # Create a subplot
    ax = fig.add_subplot(1, n_folds, split_id)
    # Plot training and validation data points with separate colors
    ax.plot(x_data[train_index], y_train[train_index], 'o', label='Train.')
    ax.plot(x_data[val_index], y_train[val_index], 'o', label='Val.')
    ax.set(xlabel='x', ylabel='y', xticks=[], yticks=[]);
    ax.legend()
    split_id += 1
    

In [None]:
mse_per_alpha = []
# Loop over all alpha values
for i in range(alpha_vals.size):
    
    ridge_reg = Ridge(fit_intercept=True, alpha=alpha_vals[i], max_iter=1e6)
    
    mse_tmp = 0
    # Loop over all division into training and validation sets
    for train_index, val_index in kf.split(X_train):
        X_cv_train, X_cv_val = X_train[train_index, :], X_train[val_index, :]
        y_cv_train, y_cv_val = y_train[train_index, :], y_train[val_index, :]
        # Fit a model and compute the MSE for validation set predictions
        ridge_reg.fit(X_cv_train, y_cv_train)
        y_hat_cv_val = ridge_reg.predict(X_cv_val)
        mse_tmp += np.sum((y_cv_val - y_hat_cv_val)**2)
    # Store away the final MSE value for each alpha value
    mse_per_alpha.append(mse_tmp)
    
# Plot the found MSE value for each alpha value
fig, ax = plt.subplots(1, 1)
ax.plot(alpha_vals, mse_per_alpha, 'ko-')
ax.set(xlabel='alpha', ylabel='MSE', xscale='log', yscale='log');

In [None]:
# Use a ridge regression model with built in cross-validation
ridge_reg_cv = RidgeCV(fit_intercept=True, alphas=alpha_vals, cv=None)
ridge_reg_cv.fit(X_train, y_train)
y_hat = ridge_reg_cv.predict(X_dense)

# Visualize what we have
fig, ax = plt.subplots(1, 1)
ax.plot(x_data, y_train, 'o', alpha=0.5, label='Data')
ax.plot(x_dense, y_hat, 'k-', label='alpha={:1.4g}'.format(ridge_reg_cv.alpha_))
ax.set(xlabel='x', ylabel='y');
ax.legend()