In [1]:

# Numpy and pandas as usual
import numpy as np
import pandas as pd

# Scikit-Learn for fitting models
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error

# For plotting in the notebook
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

# Default parameters for plots
matplotlib.rcParams['font.size'] = 12
matplotlib.rcParams['figure.titlesize'] = 16
matplotlib.rcParams['figure.figsize'] = [9, 7]

In [16]:

# Set the random seed for reproducible results
np.random.seed(42)

# "True" generating function representing a process in real life
def true_gen(x):
    y = np.sin(1.2 * x * np.pi) 
    return(y)

# x values and y value with a small amount of random noise
x = np.sort(np.random.rand(120))
y = true_gen(x) + 0.1 * np.random.randn(len(x))

In [17]:
# Random indices for creating training and testing sets
random_ind = np.random.choice(list(range(120)), size = 120, replace=False)
xt = x[random_ind]
yt = y[random_ind]

# Training and testing observations
train = xt[:int(0.7 * len(x))]
test = xt[int(0.7 * len(x)):]

y_train = yt[:int(0.7 * len(y))]
y_test = yt[int(0.7 * len(y)):]

# Model the true curve
x_linspace = np.linspace(0, 1, 1000)
y_true = true_gen(x_linspace)

In [20]:
x

array([0.00552212, 0.02058449, 0.02541913, 0.03142919, 0.03438852,
       0.04522729, 0.04645041, 0.05808361, 0.06355835, 0.06505159,
       0.07404465, 0.07455064, 0.07697991, 0.0884925 , 0.09767211,
       0.10789143, 0.11586906, 0.11959425, 0.12203823, 0.13949386,
       0.14092422, 0.15599452, 0.15601864, 0.16122129, 0.17052412,
       0.18182497, 0.18340451, 0.18485446, 0.18657006, 0.19598286,
       0.19871568, 0.19967378, 0.21233911, 0.22879817, 0.24929223,
       0.25877998, 0.27134903, 0.28093451, 0.28975145, 0.29122914,
       0.29214465, 0.30424224, 0.30461377, 0.31098232, 0.31171108,
       0.31435598, 0.32518332, 0.32533033, 0.33089802, 0.35675333,
       0.35846573, 0.36636184, 0.37454012, 0.38867729, 0.41038292,
       0.42754102, 0.43194502, 0.44015249, 0.45606998, 0.47221493,
       0.4937956 , 0.49517691, 0.50857069, 0.51423444, 0.52006802,
       0.52273283, 0.52475643, 0.53934224, 0.54269608, 0.54671028,
       0.5612772 , 0.59241457, 0.59789998, 0.59865848, 0.60111