<font size=5 color='blue'>
Simple examples to ilustrate the concepts of underfitten and overfitting

In [None]:
import numpy as np
import random

from sklearn import datasets, linear_model
from sklearn.preprocessing import PolynomialFeatures
from matplotlib import pyplot as plt
%matplotlib inline

import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

np.random.seed(1)

<font size=4>
The samples described with the coordinates $(X, Y)$ are generated using the following equation: $$ $$  

$$ Y = g(X)+\eta$$

$g(X)$ is the function to generate the samples, $\eta$ is a noise function defined with normal distribution.

To model the correlations between the vairable $X$ and $Y$, we will consider a family of functions $F_\alpha(W_\alpha,X)$ that depend on the parameters $W_\alpha=(w_{\alpha1}, w_{\alpha2}, w_{\alpha3}, ...)$. 


# Getting samples

## Modeling the samples

In [None]:
# Number of samples
N_samples=16

#N_samples equally spaced values are generated from 0.05 to 0.95
x=np.linspace(0.05,0.95,N_samples)

# Gaussian uncorrelated noise
# We generate N_samples values at random wint a normal distribution

sigma_samples=0.3    #Maximal noise amplitud
s = sigma_samples*np.random.randn(N_samples)

# polynomial of order 10
y= 2*x - 5*x**5 + 15*x**10 + s

samples = []
for i in range(N_samples) :
    samples.append((x[i], y[i]))


### We consider a function to generate the samples

In [None]:
def split_samples(samples, val_ratio=0.2, shuffle=True):
    
    if shuffle==True:
        random.shuffle(samples)
             
    learn_ratio = int((1.0-val_ratio)*len(samples))
    learn = samples[0:learn_ratio]
    val = samples[learn_ratio:]

    learn_x=[]
    learn_y=[]
    for i in range(len(learn)):
        learn_x.append(learn[i][0])
        learn_y.append(learn[i][1])
    
    x_learn = np.array(learn_x)
    y_learn = np.array(learn_y)

    val_x=[]
    val_y=[]
    for i in range(len(val)):
        val_x.append(val[i][0])
        val_y.append(val[i][1])
    
    x_val = np.array(val_x)
    y_val = np.array(val_y)
    
    return x_learn, y_learn, x_val, y_val


In [None]:
val_ratio = 0.2
x_learn, y_learn, x_val, y_val = split_samples(samples, val_ratio=val_ratio, shuffle=False)

fig = plt.figure(figsize=(10, 8))
plt.ylabel('X', size=20)
plt.xlabel('Y', size=20)
plt.rc('xtick', labelsize=18) 
plt.rc('ytick', labelsize=18)

p1=plt.plot(x_learn, y_learn, "o", ms=8, alpha=0.5, label='Training', color='blue')
p1=plt.plot(x_val, y_val, "o", ms=8, alpha=0.5, label='Training', color='red')
plt.legend(['Learn', 'Validation'], loc='upper left', prop={'size': 16})


In [None]:
val_ratio = 0.2
x_learn, y_learn, x_val, y_val = split_samples(samples, val_ratio=val_ratio, shuffle=True)

fig = plt.figure(figsize=(10, 8))
plt.ylabel('X', size=20)
plt.xlabel('Y', size=20)
plt.rc('xtick', labelsize=18) 
plt.rc('ytick', labelsize=18)

p1=plt.plot(x_learn, y_learn, "o", ms=8, alpha=0.5, label='Training', color='blue')
p1=plt.plot(x_val, y_val, "o", ms=8, alpha=0.5, label='Training', color='red')

plt.legend(['Learn', 'Validation'], loc='upper left', prop={'size': 16})

In [None]:
print(x_learn.shape)
print(y_learn.shape)

In [None]:
def transform_samples(x_learn, x_val, degree = 2):
    
    # The features will defined by the coeficientes of the polinomium
    # Therfore, only the X variables will be transformed
    x_learn_transf = np.expand_dims(x_learn, axis=1)
    x_val_transf = np.expand_dims(x_val, axis=1)
    
    # Define the number of features for the sample transformation
    # It can include or exclude a bias
    # It can include interaction between the polynomium term
    poly = PolynomialFeatures(degree=degree, include_bias=False, interaction_only = False)  
    
    x_learn_transf = poly.fit_transform(x_learn_transf)
    x_val_transf = poly.fit_transform(x_val_transf)
    
    return x_learn_transf, x_val_transf

In [None]:
def poly_fit(x_learn_pre, x_val_pre, x_learn, y_learn, x_val, y_val, epochs=500, lr=0.01):
    
    inp = Input((n)) 
    #since one of the features is 1, we need an extra input
    out = Dense(1)(inp)
    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer=Adam(lr=lr), loss="mean_squared_error")
    history=model.fit(x_learn, y_learn, epochs=epochs, validation_data=(x_val,y_val), verbose=0)
    y_predicted = model.predict(x_learn)
    y_predicted = np.squeeze(y_predicted,axis=1)
    
    x=list(x_learn_pre)
    y=list(model.predict(x_learn).squeeze())
    pairs=list(zip(x,y))
    pairs.sort(key=lambda a: a[0])
    pairs
    x_plot, y_plot = zip(*pairs)
    
    fig, ax = plt.subplots(figsize=(10, 8))
    plt.rc('xtick', labelsize=18) 
    plt.rc('ytick', labelsize=18)
    plt.ylabel('Y', size=20)
    plt.xlabel('X', size=20)

    ax.scatter(x_learn_pre, y_learn, color='blue')
    ax.scatter(x_val_pre, y_val, color='red')
    ax.plot(x_plot, y_plot, color="green")
    
    plt.legend(['Fitting function', 'Learn', 'Validation'], loc='upper left', prop={'size': 16})
    
    return history

In [None]:
n=3
x_learn_transf, x_val_transf = transform_samples(x_learn, x_val, degree=n)

learning_rate = 0.04
epochs =500

history=poly_fit(x_learn, x_val, x_learn_transf, y_learn, x_val_transf, y_val, epochs=epochs, lr=learning_rate)

In [None]:
fig = plt.figure(figsize=(10, 8))
plt.rc('xtick', labelsize=18) 
plt.rc('ytick', labelsize=18)

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Cost function', size=20)
plt.ylabel('Cost', size=20)
plt.xlabel('Epoch', size=20)
plt.legend(['Train', 'Validation'], loc='upper right', prop={'size': 16})
plt.show()


In [None]:
def fit_poly(xplot, x_learn, y_learn, x_val, y_val, poly_degree):

    fig = plt.figure(figsize=(10, 8))
    plt.ylabel('Y', size=20)
    plt.xlabel('X', size=20)
    plt.rc('xtick', labelsize=18) 
    plt.rc('ytick', labelsize=18)


    # plot teh samples:
    p1=plt.plot(x_learn, y_learn, "o", ms=8, alpha=0.5, label='Training', color='blue')
    p1=plt.plot(x_val, y_val, 'o', ms=10, alpha=0.5, label='test data', color='red')

    # Polynomial Regression
    poly = PolynomialFeatures(degree=poly_degree)

    # Construct polynomial features
    X = poly.fit_transform(x_learn[:,np.newaxis])
    clf = linear_model.LinearRegression()
    clf.fit(X,y_learn)

    Xplot=poly.fit_transform(xplot[:,np.newaxis])
    poly_plot=plt.plot(xplot, clf.predict(Xplot), label='Poly', color='green')
    
    plt.legend(['Learn', 'Validation', 'Fitting function'], loc='upper left', prop={'size': 16})

## Fit with a Linear Regression

In [None]:
xplot=np.linspace(0.05,0.95,200)

fit_poly(xplot, x_learn, y_learn, x_val, y_val, poly_degree=1)

##  Polynomial Regression: seconth order

In [None]:
fit_poly(xplot, x_learn, y_learn, x_val, y_val, poly_degree=2)

##  Polynomial Regression: third order

In [None]:
fit_poly(xplot, x_learn, y_learn, x_val, y_val, poly_degree=3)

##  Polynomial Regression: fifth order

In [None]:
fit_poly(xplot, x_learn, y_learn, x_val, y_val, poly_degree=5)

##  Polynomial Regression: tenth order

In [None]:
fit_poly(xplot, x_learn, y_learn, x_val, y_val, poly_degree=10)

##  Polynomial Regression: eleventh order

In [None]:
fit_poly(xplot, x_learn, y_learn, x_val, y_val, poly_degree=11)