In [61]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error as MSE
from sklearn.model_selection import cross_val_score

In [62]:
# Load the datasets
X = np.load("Xtrain_Regression_Part1.npy")
X_TESTE = np.load("Xtest_Regression_Part1.npy")
Y = np.load("Ytrain_Regression_Part1.npy")

X_test = X[-20:] #Select the last 20 points for testing
X_train = X[:-20] #Select the 80 1st points for training

#Same thing for Y
Y_test = Y[-20:]
Y_train = Y[:-20]

In [63]:
# Create linear regression object
LM_model = linear_model.LinearRegression()

# Train the model using the training sets
LM_model.fit(X_train, Y_train)

# Make predictions using the testing set
Y_pred = LM_model.predict(X_test)

linear_score = MSE(Y_test,Y_pred)

print(f"MSE score (LINEAR REGRESSION) {linear_score}")

R_model = linear_model.Ridge(alpha=0.001)
R_model.fit(X_train, Y_train)

Y_pred = R_model.predict(X_test)

ridge_score = MSE(Y_test,Y_pred)

print(f"MSE score (RIDGE REGRESSION) {ridge_score}")

L_model = linear_model.Lasso(alpha=0.0035)
L_model.fit(X_train, Y_train)

Y_pred = L_model.predict(X_test)

lasso_score = MSE(Y_test, Y_pred)
print(f"MSE score (LASSO REGRESSION) {lasso_score}")

best = min(linear_score, ridge_score, lasso_score)

print("Best is ", end='')
if best==linear_score:
    print("Linear ", end='')
elif best==ridge_score:
    print("Ridge ", end='')
elif best==lasso_score:
    print("Lasso ", end='')
print(f"with MSE: {best}")

MSE score (LINEAR REGRESSION) 0.02308231170738404
MSE score (RIDGE REGRESSION) 0.02308821238419561
MSE score (LASSO REGRESSION) 0.022733795486092424
Best is Lasso with MSE: 0.022733795486092424


Ridge Regression with cross validation ??

In [64]:
# # alphas = np.arange(0.0000001, 0.000001, 0.000000001)
# # alphas = (0.0001, 0.001, 0.035, 0.1, 1, 10)
# alphas = np.arange(0.0001, 0.5, 0.0001) 
# print(alphas.shape)
# print((0.035 in alphas))
# Ridge_cv = linear_model.RidgeCV(alphas=(alphas), scoring='neg_mean_squared_error', cv=5)
# model = Ridge_cv.fit(X, Y)
# print(f"Best alpha {model.alpha_} -- MSE: {model.best_score_}")

Lasso Regression with cross validation

In [65]:
#alphas = np.arange(0.0000001, 0.000001, 0.000000001)
# alphas = np.array([0.001, 0.002, 0.003, 0.004, 0.005])
# alphas = np.arange(0.0001, 0.5, 0.00001) 
# print(alphas)
# print((0.035 in alphas))
# Lasso_cv = linear_model.LassoCV(alphas=(alphas), cv=5)
# model = Lasso_cv.fit(X, np.ravel(Y))
# print(f"Best alpha {model.alpha_}")

CROSS VALIDATION

In [66]:
#FINDING THE BEST ALPHA FOR THE LASSO
alphas = np.arange(0.0001, 0.2, 0.0001)
scores = {}

for alpha in alphas:
    scores[str(alpha)] = np.average(-1*cross_val_score(linear_model.Lasso(alpha=alpha), X, Y, scoring='neg_mean_squared_error', cv=5))

best=min( list(scores.values()) )
print(f"Lasso best error {best} from alpha={ list(scores.keys())[ list(scores.values()).index(best) ] }")

#And for Ridge
for alpha in alphas:
    scores[str(alpha)] = np.average(-1*cross_val_score(linear_model.Ridge(alpha=alpha), X, Y, scoring='neg_mean_squared_error', cv=5))

best=min( list(scores.values()) )
print(f"Ridge best error {best} from alpha={ list(scores.keys())[ list(scores.values()).index(best) ] }")

Lasso best error 0.016713602190598364 from alpha=0.0002
Ridge best error 0.016730181681383776 from alpha=0.0001


In [67]:
#testing with the previous "best" alpha value

scores = {}

#first the linear regression
scores['Linear'] = -1*cross_val_score(linear_model.LinearRegression(), X, Y, scoring='neg_mean_squared_error', cv=5)
scores['Linear'] = np.average(scores['Linear'])

#0.0035
scores['Ridge'] = -1*cross_val_score(linear_model.Ridge(alpha=0.0001), X, Y, scoring='neg_mean_squared_error', cv=5)
scores['Ridge'] = np.average(scores['Ridge'])

#RUCA
scores['Lasso'] = -1*cross_val_score(linear_model.Lasso(alpha=0.0035), X, Y, scoring='neg_mean_squared_error', cv=5)
scores['Lasso'] = np.average(scores['Lasso'])

scores['Lasso1'] = -1*cross_val_score(linear_model.Lasso(alpha=0.0002), X, Y, scoring='neg_mean_squared_error', cv=5)
scores['Lasso1'] = np.average(scores['Lasso1'])

print(f"LINEAR: {scores['Linear']} ")
print(f"RIDGE: {scores['Ridge']} ")
print(f"LASSO: {scores['Lasso']} ")
print(f"LASSO1: {scores['Lasso1']} ")

best=min( scores.values() )
print(f"best is {best} from { list(scores.keys())[ list(scores.values()).index(best) ] }")

LINEAR: 0.016729948323158324 
RIDGE: 0.016730181681383776 
LASSO: 0.01710049693686596 
LASSO1: 0.016713602190598364 
best is 0.016713602190598364 from Lasso1


CÓDIGO PARA ENTREGAR

In [70]:
model = linear_model.Lasso(alpha=0.0002)

model.fit(X, Y)

Y_pred = model.predict(X_TESTE)

with open('Ytest_Regression_Part1.npy', 'wb') as f:
    np.save(f, Y_pred)