In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error as mse

In [2]:
NUM_DATASETS = 50       # Want to perform this over 50 different data sets
NOISE_VARIANCE = 0.5    # This is our amount of noise 
MAX_POLY = 12           # Will go up to the 12th order polynomial, i.e. the most complex
N = 25                  # Each example has 25 data points
Ntrain = int(0.9 * N)   # Training size of 22 data points

np.random.seed(2)       # Set random number generator seed for debugging

In [19]:
""" Function to generate a polynomial up to order D based on input x"""
def make_poly(x, D):
    N = len(x)                    # Get length of input
    X = np.empty((N, D + 1))      # Plus 1 is for the bias term, creating 2d matrix
    
    for d in range(D + 1):
        X[:, d] = x ** d          # Sets X value for dth column of new matrix
        if d > 1:                 # Normalize column, unless it is the bias column
            X[:, d] = (X[:, d] - X[:, d].mean()) / X[:, d].std()     
    return X
"""Function that takes in a 1 dimensional X, and returns sin(X)"""
def f(X):
    return np.sin(X)

In [20]:
"""Define axis data for plotting purposes"""
x_axis = np.linspace(-np.pi, np.pi, 100)
y_axis = f(x_axis)

"""Define actual data set X"""
X = np.linspace(-np.pi, np.pi, N)     # 25 points evenly spaced from -pi to pi
np.random.shuffle(X)                  # Shuffle those points
f_X = f(X)                            # Ground Truth function response

In [21]:
"""Create a polynomial of order MAX_POLY from X"""
Xpoly = make_poly(X, MAX_POLY)            # Shape (25 x 13)

In [24]:
"""Create empty arrays to hold data as we loop through experiment"""

# Each dataset/polynomial pair train and test score
train_scores = np.zeros((NUM_DATASETS, MAX_POLY))    # Shape (50 x 12)
test_scores = np.zeros((NUM_DATASETS, MAX_POLY))  

# Set of training predictions for each dataset/polynomial pair
train_predictions = np.zeros((Ntrain, NUM_DATASETS, MAX_POLY)) 
prediction_curves = np.zeros((100, NUM_DATASETS, MAX_POLY))

**Experiment:**
> 1. Create 50 different Y response based on our ground truth function and random noise

In [None]:
"""Create 50 different Y responses, based on ground truth function and random noise"""
for k in range(NUM_DATASETS):
    Y = f_X + np.random.randn(N) + NOISE_VARIANCE    # Generating specific Y response data
    
    Xtrain = Xpoly[:Ntrain]                          # Training Data, 22 x 13
    Ytrain = Y[:Ntrain]                              # Response we are trying to map, 22 x 1
    
    Xtest = Xpoly[Ntrain:]                           # Test data, 3 x 13
    Ytest = Y[Ntrain:]                               # Test response, 3 x 1
    
    """Create a linear regression model of order 1 to 12"""
    for d in range(MAX_POLY):
        # leaving off here