In [None]:
#Plain Lasso Regression

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split

def MSE(y_data,y_model):
    n = np.size(y_model)
    return np.sum((y_data-y_model)**2)/n

#grid size
n = 200 

# Make data set.
x = np.linspace(0,(np.pi)/2,n).reshape(-1, 1)

#Northern H. parameters
s0 = 1
s2 = -0.473
a0 = 0.675
a2 = -0.192
i2 = -0.165

#flux function (eqn. (14) from Stone_1978)
y = 0.5*(s0*a2+s2*a0+(2/7)*s2*a2-i2)*((np.sin(x))**3-np.sin(x))

#noisy flux function
y_noisy = np.random.normal(y, abs(y*0.05)) 

for degree in range (maxdegree):

    poly = PolynomialFeatures(degree=degree)
    X = poly.fit_transform(x)

    #splitting of the data
    X_train, X_test, y_train, y_test = train_test_split(X, y_noisy, test_size=0.25, random_state=42) 

    # Decide which values of lambda to use
    nlambdas = 100

    MSEPredict = np.zeros(nlambdas)
    MSEfit = np.zeros(nlambdas)
    
    lambdas = np.logspace(-2, 4, nlambdas)

    for i in range(nlambdas):
        lmb = lambdas[i]
        model= linear_model.Lasso(lmb)
        model.fit(X_train,y_train)

        ypredict = model.predict(X_test)
        MSEPredict[i] = MSE(y_test,ypredict)

        yfit = model.predict(X_train)
        MSEfit[i] = MSE(y_train,yfit)

    #I'm plotting the prediction
    plt.figure()
    plt.plot(np.log10(lambdas), MSEPredict, label = 'MSE Lasso test')
    plt.plot(np.log10(lambdas), MSEfit, label = 'MSE Lasso train')
    plt.xlabel('log10(lambda)')
    plt.ylabel('MSE')
    plt.legend()
    plt.show()

In [None]:
#Bootstrap on LASSO

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.utils import resample
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split

def MSE(y_data,y_model):
    n = np.size(y_model)
    return np.sum((y_data-y_model)**2)/n

#grid size
n = 200 

# Make data set.
x = np.linspace(0,(np.pi)/2,n).reshape(-1, 1)

#Northern H. parameters
s0 = 1
s2 = -0.473
a0 = 0.675
a2 = -0.192
i2 = -0.165

#flux function (eqn. (14) from Stone_1978)
y = 0.5*(s0*a2+s2*a0+(2/7)*s2*a2-i2)*((np.sin(x))**3-np.sin(x))

#noisy flux function
y_noisy = np.random.normal(y, abs(y*0.05)) 

#Polynomial fir but now for degree=ct.=5
poly = PolynomialFeatures(degree=5)
X = poly.fit_transform(x)
    
#splitting of the data
X_train, X_test, y_train, y_test = train_test_split(X, y_noisy, test_size=0.25, random_state=42) 

# Decide which values of lambda to use
nlambdas = 100

#number of bootstrap samples
n_boostraps = 20 

lambdas = np.logspace(0, 4, nlambdas)

MSE_Bootstrap = np.zeros(nlambdas)
bias = np.zeros(nlambdas)
variance = np.zeros(nlambdas)

for i in range(nlambdas):
    lmb = lambdas[i]
    model = linear_model.Lasso(lmb)
    ypredict = np.empty((y_test.shape[0], n_boostraps))    

    for j in range(n_boostraps):
        x_, y_ = resample(X_train, y_train)
                
        #applying Linear Regression to the training data set
        ypredict[:, j] = model.fit(x_, y_).predict(X_test).ravel()       

    MSE_Bootstrap[i] = MSE(y_test, ypredict)
    bias[i] = np.mean( (y_test - np.mean(ypredict, axis=1, keepdims=True))**2 )
    variance[i] = np.mean( np.var(ypredict, axis=1, keepdims=True) )

plt.plot(np.log10(lambdas), MSE_Bootstrap, label='MSE')
plt.plot(np.log10(lambdas), bias, label='bias')
plt.plot(np.log10(lambdas), variance, label='Variance')
plt.xlabel('log10(lambda)')
plt.ylabel('Prediction Error')
plt.legend()
plt.show()

In [None]:
#Cross Validation

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.model_selection import KFold
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split

def MSE(y_data,y_model):
    n = np.size(y_model)
    return np.sum((y_data-y_model)**2)/n

#grid size
n = 200 

# Make data set.
x = np.linspace(0,(np.pi)/2,n)#.reshape(-1, 1)

#Northern H. parameters
s0 = 1
s2 = -0.473
a0 = 0.675
a2 = -0.192
i2 = -0.165

#flux function (eqn. (14) from Stone_1978)
y = 0.5*(s0*a2+s2*a0+(2/7)*s2*a2-i2)*((np.sin(x))**3-np.sin(x))

#noisy flux function
y_noisy = np.random.normal(y, abs(y*0.05)) 


#Polynomial fir but now for degree=ct.=5
poly = PolynomialFeatures(degree=5)

# Decide which values of lambda to use
nlambdas = 100
lambdas = np.logspace(0, 4, nlambdas)

for k in [5, 10, n]:
    kfold = KFold(n_splits = k)

    # Perform the cross-validation to estimate MSE
    MSE_KFold = np.zeros((nlambdas, k))

    i = 0
    for i in range(nlambdas):
        lmb = lambdas[i]
        model = linear_model.Lasso(lmb)
        j = 0
        for train_inds, test_inds in kfold.split(x):
            xtrain = x[train_inds]
            ytrain = y_noisy[train_inds]

            xtest = x[test_inds]
            ytest = y_noisy[test_inds]

            Xtrain = poly.fit_transform(xtrain[:, np.newaxis])
            model.fit(Xtrain, ytrain[:, np.newaxis])

            Xtest = poly.fit_transform(xtest[:, np.newaxis])
            ypred = model.predict(Xtest)

            MSE_KFold[i,j] = MSE(ytest[:, np.newaxis], ypred)

            j += 1
        i += 1

    estimated_mse_KFold = np.mean(MSE_KFold, axis = 1)

    ## Cross-validation using cross_val_score from sklearn along with KFold

    # kfold is an instance initialized above as:
    # kfold = KFold(n_splits = k)

    estimated_mse_sklearn = np.zeros(nlambdas)
    
    d = 0
    for i in range(nlambdas):
        lmb = lambdas[i]
        model = linear_model.Lasso(lmb)
        X = poly.fit_transform(x[:, np.newaxis])
        estimated_mse_folds = cross_val_score(model, X, y_noisy[:, np.newaxis], scoring='neg_mean_squared_error', cv=k)

        # cross_val_score return an array containing the estimated negative mse for every fold.
        # we have to the the mean of every array in order to get an estimate of the mse of the model
        estimated_mse_sklearn[d] = np.mean(-estimated_mse_folds)

        d += 1

    ## Plot    
        
    if k==5:          
        plt.figure()
        plt.plot(np.log10(lambdas), estimated_mse_sklearn, 'b', label = 'cross_val_score')
        plt.plot(np.log10(lambdas), estimated_mse_KFold, 'r--',  label = 'MSE KFold, k=5')
        plt.plot(np.log10(lambdas), MSE_Bootstrap, label='MSE Bootstrap')
        plt.xlabel('log10(lambda)')
        plt.ylabel('MSE')
        plt.legend()
        plt.show()
    elif k==10:
        plt.figure()
        plt.plot(np.log10(lambdas), estimated_mse_sklearn, 'b', label = 'cross_val_score')
        plt.plot(np.log10(lambdas), estimated_mse_KFold, 'r--',  label = 'MSE KFold, k=10')
        plt.plot(np.log10(lambdas), MSE_Bootstrap, label='MSE Bootstrap')
        plt.xlabel('log10(lambda)')
        plt.ylabel('MSE')
        plt.legend()
        plt.show()
    else:
        plt.figure()
        plt.plot(np.log10(lambdas), estimated_mse_sklearn, 'b', label = 'cross_val_score')
        plt.plot(np.log10(lambdas), estimated_mse_KFold, 'r--',  label = 'MSE LOO')
        plt.plot(np.log10(lambdas), MSE_Bootstrap, label='MSE Bootstrap')
        plt.xlabel('log10(lambda)')
        plt.ylabel('MSE')
        plt.legend()
        plt.show()