In [32]:
# Following https://www.pluralsight.com/guides/linear-lasso-ridge-regression-scikit-learn

# Loading required libraries and modules
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LassoCV
from sklearn.linear_model import ElasticNetCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
import pandas

In [20]:
# Creating train and test data, using preprocessed and already split up data

path_test_data = "results/Combined_All_Test_90.csv"
test_data = pandas.read_csv(path_test_data, header=None)
test_set = test_data.values
X_test = test_set[:,0:400].astype(float)
y_test = test_set[:,400]

path_train_data = "results/Combined_All_Train_90.csv"
train_data = pandas.read_csv(path_train_data, header=None)
train_set = train_data.values
X_train = train_set[:,0:400].astype(float)
y_train = train_set[:,400]


In [21]:
# Linear Regression

# Instantiation of model and fitting to training data
lr = LinearRegression()
lr.fit(X_train, y_train)

# Predigt, print RMSE and R-squared
pred_train_lr= lr.predict(X_train)
print("RMSE train: ")
print(np.sqrt(mean_squared_error(y_train,pred_train_lr)))
print("R2 score train: ")
print(r2_score(y_train, pred_train_lr))


pred_test_lr= lr.predict(X_test)
print("RMSE test: ")
print(np.sqrt(mean_squared_error(y_test,pred_test_lr))) 
print("R2 score test: ")
print(r2_score(y_test, pred_test_lr))


RMSE train: 
2.418761501997702
R2 score train: 
0.3596901678357556
RMSE test: 
4.060785282042404
R2 score test: 
-0.7888041542501503


In [42]:
# Ridge regression
rr = RidgeCV(alphas=np.linspace(17.0,19.0,100))
rr.fit(X_train, y_train) 
pred_train_rr= rr.predict(X_train)
print(np.sqrt(mean_squared_error(y_train,pred_train_rr)))
print(r2_score(y_train, pred_train_rr))

pred_test_rr= rr.predict(X_test)
print(np.sqrt(mean_squared_error(y_test,pred_test_rr))) 
print(r2_score(y_test, pred_test_rr))

print(rr.alpha_)

2.4224179970945716
0.3577527635488672
3.9361626094684268
-0.6806946019576354
18.171717171717173


In [68]:
# Lasso regression
model_lasso = LassoCV(alphas= np.linspace(0.13,0.135, 500))
model_lasso.fit(X_train, y_train) 
pred_train_lasso= model_lasso.predict(X_train)
print(np.sqrt(mean_squared_error(y_train,pred_train_lasso)))
print(r2_score(y_train, pred_train_lasso, multioutput='variance_weighted'))

pred_test_lasso= model_lasso.predict(X_test)
print(np.sqrt(mean_squared_error(y_test,pred_test_lasso))) 
print(r2_score(y_test, pred_test_lasso, multioutput='variance_weighted'))

print(model_lasso.alpha_)



2.9468324387748392
0.04958132930767123
3.0768756561979305
-0.026981531344533405
0.13


In [64]:
#Elastic Net Regression
model_enet = ElasticNetCV(alphas=np.linspace(0.2, 0.3,100), max_iter= 10000)
model_enet.fit(X_train, y_train) 
pred_train_enet= model_enet.predict(X_train)
print(np.sqrt(mean_squared_error(y_train,pred_train_enet)))
print(r2_score(y_train, pred_train_enet))

pred_test_enet= model_enet.predict(X_test)
print(np.sqrt(mean_squared_error(y_test,pred_test_enet)))
print(r2_score(y_test, pred_test_enet))

print(model_enet.alpha_)

2.94309959695059
0.051987652442136545
3.078441613955759
-0.02802714977070897
0.23333333333333334
