In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt


The cell below plots the log of true generalization error and the log of our predicted error. Try different values of $n$ and $\sigma$ and report how close our prediction is to the true error.


In [None]:
n = 70

sigma = 1e1
X = (np.ones(n) + sigma * np.random.randn(n))[:, np.newaxis]
y = np.ones(n)
reg = LinearRegression(fit_intercept=False)
errs = []
max_m = 10 * n
for _ in range(max_m):
    X = sigma * np.random.randn(n, _+1) + np.ones((n, _+1))
    reg.fit(X, y)
    errs += [(np.sum(reg.coef_) - 1.)**2 + sigma**2 * np.linalg.norm(reg.coef_)**2]

fig = plt.figure(figsize=(20,10))
plt.plot(np.log(errs), label='experiment')
m = np.arange(n) + 1
pred_err_upto_n = n * sigma**2/(m + sigma**2) /(n-m)
pred_err_from_n = sigma**2 * (n**2 + np.arange(n+1, max_m+1) * sigma**2)/(n+sigma**2)**2 /(np.arange(n+1, max_m+1) - n)
plt.plot(np.log(np.concatenate((pred_err_upto_n, pred_err_from_n))), label='theory')
plt.axvline(x=n, label='m=n', c='black', linestyle='dashed')
plt.axhline(y=2 * np.log(sigma**2/(n + sigma**2)), label='risk at m=$\infty$', c='green', linestyle='dashed')
plt.legend()
