In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

In [72]:
from sklearn.model_selection import train_test_split, cross_val_score, KFold, RandomizedSearchCV
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import GridSearchCV

In [95]:
data = pd.read_csv('Walmart_Store_sales.csv')

## Data Exploration

In [96]:
data

Unnamed: 0,Store,Date,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment
0,6.0,18-02-2011,1572117.54,,59.61,3.045,214.777523,6.858
1,13.0,25-03-2011,1807545.43,0.0,42.38,3.435,128.616064,7.470
2,17.0,27-07-2012,,0.0,,,130.719581,5.936
3,11.0,,1244390.03,0.0,84.57,,214.556497,7.346
4,6.0,28-05-2010,1644470.66,0.0,78.89,2.759,212.412888,7.092
...,...,...,...,...,...,...,...,...
145,14.0,18-06-2010,2248645.59,0.0,72.62,2.780,182.442420,8.899
146,7.0,,716388.81,,20.74,2.778,,
147,17.0,11-06-2010,845252.21,0.0,57.14,2.841,126.111903,
148,8.0,12-08-2011,856796.10,0.0,86.05,3.638,219.007525,


In [97]:
data['Date'] = pd.to_datetime(data['Date'], dayfirst=False, errors='coerce')


data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month
data['Day'] = data['Date'].dt.day
data['DayOfWeek'] = data['Date'].dt.day_name()


data.drop(columns=['Date'], inplace=True)
data





Unnamed: 0,Store,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment,Year,Month,Day,DayOfWeek
0,6.0,1572117.54,,59.61,3.045,214.777523,6.858,2011.0,2.0,18.0,Friday
1,13.0,1807545.43,0.0,42.38,3.435,128.616064,7.470,2011.0,3.0,25.0,Friday
2,17.0,,0.0,,,130.719581,5.936,2012.0,7.0,27.0,Friday
3,11.0,1244390.03,0.0,84.57,,214.556497,7.346,,,,
4,6.0,1644470.66,0.0,78.89,2.759,212.412888,7.092,2010.0,5.0,28.0,Friday
...,...,...,...,...,...,...,...,...,...,...,...
145,14.0,2248645.59,0.0,72.62,2.780,182.442420,8.899,2010.0,6.0,18.0,Friday
146,7.0,716388.81,,20.74,2.778,,,,,,
147,17.0,845252.21,0.0,57.14,2.841,126.111903,,2010.0,6.0,11.0,Friday
148,8.0,856796.10,0.0,86.05,3.638,219.007525,,2011.0,8.0,12.0,Friday


In [98]:
data.dropna(subset=['Weekly_Sales'], inplace=True)
data.dropna(subset=['Holiday_Flag'], inplace=True)
data.dropna(subset=['Store'], inplace=True)

In [99]:
data.drop(columns=['DayOfWeek'], inplace=True)
data['Week_of_Month'] = ((data['Day'] - 1) // 7 + 1)
#data.drop(columns=['Day'], inplace=True)

In [100]:
data

Unnamed: 0,Store,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment,Year,Month,Day,Week_of_Month
1,13.0,1807545.43,0.0,42.38,3.435,128.616064,7.470,2011.0,3.0,25.0,4.0
3,11.0,1244390.03,0.0,84.57,,214.556497,7.346,,,,
4,6.0,1644470.66,0.0,78.89,2.759,212.412888,7.092,2010.0,5.0,28.0,4.0
5,4.0,1857533.70,0.0,,2.756,126.160226,7.896,2010.0,5.0,28.0,4.0
6,15.0,695396.19,0.0,69.80,4.069,134.855161,7.658,2011.0,6.0,3.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...
144,3.0,424513.08,0.0,73.44,3.594,226.968844,6.034,2012.0,10.0,19.0,3.0
145,14.0,2248645.59,0.0,72.62,2.780,182.442420,8.899,2010.0,6.0,18.0,3.0
147,17.0,845252.21,0.0,57.14,2.841,126.111903,,2010.0,6.0,11.0,2.0
148,8.0,856796.10,0.0,86.05,3.638,219.007525,,2011.0,8.0,12.0,2.0


In [101]:
data_cor = data.copy()  
data_cor.dropna(subset=['CPI'], inplace=True)
data_cor['Weekly_Sales'] = (data_cor['Weekly_Sales'] / data_cor['CPI']) * 100

In [102]:
data_cor.drop(columns=['CPI'], inplace=True)
data_cor

Unnamed: 0,Store,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,Unemployment,Year,Month,Day,Week_of_Month
1,13.0,1.405381e+06,0.0,42.38,3.435,7.470,2011.0,3.0,25.0,4.0
3,11.0,5.799825e+05,0.0,84.57,,7.346,,,,
4,6.0,7.741859e+05,0.0,78.89,2.759,7.092,2010.0,5.0,28.0,4.0
5,4.0,1.472361e+06,0.0,,2.756,7.896,2010.0,5.0,28.0,4.0
6,15.0,5.156615e+05,0.0,69.80,4.069,7.658,2011.0,6.0,3.0,1.0
...,...,...,...,...,...,...,...,...,...,...
144,3.0,1.870358e+05,0.0,73.44,3.594,6.034,2012.0,10.0,19.0,3.0
145,14.0,1.232523e+06,0.0,72.62,2.780,8.899,2010.0,6.0,18.0,3.0
147,17.0,6.702398e+05,0.0,57.14,2.841,,2010.0,6.0,11.0,2.0
148,8.0,3.912177e+05,0.0,86.05,3.638,,2011.0,8.0,12.0,2.0


In [162]:
store_counts = data['Store'].value_counts()
print(store_counts)

Store
3     9
13    7
18    7
7     6
19    6
5     5
4     4
20    4
8     4
1     4
2     4
14    3
10    3
17    3
16    3
15    3
6     2
11    2
Name: count, dtype: int64


In [163]:
fig_day = px.histogram(data, x='Week_of_Month', y='Weekly_Sales', title="Weekly Sales by Week of the Month", nbins=5, histfunc='avg')
fig_day.update_traces(marker=dict(line=dict(width=0.5, color="black")))  
fig_day.update_layout(bargap=0.2)
fig_month = px.histogram(data, x='Month', y='Weekly_Sales', title="Weekly Sales by Month", nbins=12, histfunc='avg')
fig_month.update_traces(marker=dict(line=dict(width=0.5, color="black")))
fig_month.update_layout(bargap=0.2)
fig_year = px.histogram(data, x='Year', y='Weekly_Sales', title="Weekly Sales by Year", nbins=len(data['Year'].unique()), histfunc='avg')
fig_year.update_traces(marker=dict(line=dict(width=0.5, color="black")))
fig_year.update_layout(bargap=0.2)
fig_holiday = px.histogram(data, x='Holiday_Flag', y='Weekly_Sales', title="Weekly Sales by Holiday Flag", histfunc='avg')
fig_holiday.update_traces(marker=dict(line=dict(width=0.5, color="black")))
fig_holiday.update_layout(bargap=0.2)
fig_store = px.histogram(data, x='Store', y='Weekly_Sales', title="Weekly Sales by Store", nbins=20, histfunc='avg')
fig_store.update_traces(marker=dict(line=dict(width=0.5, color="black")))
fig_store.update_layout(bargap=0.2)
fig_scatter_temp = px.scatter(data, x='Temperature', y='Weekly_Sales', title="Weekly Sales by Temperature")
fig_scatter_fuel = px.scatter(data, x='Fuel_Price', y='Weekly_Sales', title="Weekly Sales by Fuel Price")
fig_scatter_unemployment = px.scatter(data, x='Unemployment', y='Weekly_Sales', title="Weekly Sales by Unemployment")


fig_day.show()
fig_month.show()
fig_year.show()
fig_holiday.show()
fig_store.show()
fig_scatter_temp.show()
fig_scatter_fuel.show()
fig_scatter_unemployment.show()

In [104]:
fig_day = px.histogram(data_cor, x='Week_of_Month', y='Weekly_Sales', title="Weekly Sales by Week of the Month", nbins=5, histfunc='avg')
fig_day.update_traces(marker=dict(line=dict(width=0.5, color="black")))  
fig_day.update_layout(bargap=0.2)
fig_month = px.histogram(data_cor, x='Month', y='Weekly_Sales', title="Weekly Sales by Month", nbins=12, histfunc='avg')
fig_month.update_traces(marker=dict(line=dict(width=0.5, color="black")))
fig_month.update_layout(bargap=0.2)
fig_year = px.histogram(data_cor, x='Year', y='Weekly_Sales', title="Weekly Sales by Year", nbins=len(data['Year'].unique()), histfunc='avg')
fig_year.update_traces(marker=dict(line=dict(width=0.5, color="black")))
fig_year.update_layout(bargap=0.2)
fig_holiday = px.histogram(data_cor, x='Holiday_Flag', y='Weekly_Sales', title="Weekly Sales by Holiday Flag", histfunc='avg')
fig_holiday.update_traces(marker=dict(line=dict(width=0.5, color="black")))
fig_holiday.update_layout(bargap=0.2)
fig_scatter_temp = px.scatter(data_cor, x='Temperature', y='Weekly_Sales', title="Weekly Sales by Temperature")
fig_scatter_fuel = px.scatter(data_cor, x='Fuel_Price', y='Weekly_Sales', title="Weekly Sales by Fuel Price")
fig_scatter_unemployment = px.scatter(data_cor, x='Unemployment', y='Weekly_Sales', title="Weekly Sales by Unemployment")

fig_day.show()
fig_month.show()
fig_year.show()
fig_holiday.show()
fig_scatter_temp.show()
fig_scatter_fuel.show()
fig_scatter_unemployment.show()

## Removing Outliers

In [105]:
cols_to_check = ['Temperature', 'CPI', 'Unemployment', 'Fuel_Price']

for col in cols_to_check:
    mean = data[col].mean()
    std = data[col].std()
    data = data[(data[col] >= mean - 3 * std) & (data[col] <= mean + 3 * std)]

In [106]:
data

Unnamed: 0,Store,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment,Year,Month,Day,Week_of_Month
1,13.0,1807545.43,0.0,42.38,3.435,128.616064,7.470,2011.0,3.0,25.0,4.0
4,6.0,1644470.66,0.0,78.89,2.759,212.412888,7.092,2010.0,5.0,28.0,4.0
6,15.0,695396.19,0.0,69.80,4.069,134.855161,7.658,2011.0,6.0,3.0,1.0
7,20.0,2203523.20,0.0,39.93,3.617,213.023622,6.961,2012.0,2.0,3.0,1.0
9,3.0,418925.47,0.0,60.12,3.555,224.132020,6.833,,,,
...,...,...,...,...,...,...,...,...,...,...,...
139,7.0,532739.77,0.0,50.60,3.804,197.588605,8.090,2012.0,5.0,25.0,4.0
143,3.0,396968.80,0.0,78.53,2.705,214.495838,7.343,2010.0,6.0,4.0,1.0
144,3.0,424513.08,0.0,73.44,3.594,226.968844,6.034,2012.0,10.0,19.0,3.0
145,14.0,2248645.59,0.0,72.62,2.780,182.442420,8.899,2010.0,6.0,18.0,3.0


## Linear Regressoin

In [131]:
data = data[data['Store'] != 9]


In [107]:
target = 'Weekly_Sales'

categorical_features = ['Store', 'Holiday_Flag']
numerical_features = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment', 'Year', 'Month', 'Day']

In [132]:
X = data[categorical_features + numerical_features]
y = data[target]


In [133]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=456)

preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[('imputer', SimpleImputer(strategy='mean')), ('scaler', StandardScaler())]), numerical_features),
        ('cat', OneHotEncoder(drop='first'), categorical_features)
    ]
)



In [134]:
pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', LinearRegression())])
pipeline.fit(X_train, y_train)

train_score = pipeline.score(X_train, y_train)
test_score = pipeline.score(X_test, y_test)

cv_scores = cross_val_score(pipeline, X, y, cv=5, scoring='r2')
cv_mean, cv_std = cv_scores.mean(), cv_scores.std()


y_train_pred = pipeline.predict(X_train)
y_test_pred = pipeline.predict(X_test)

train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

print("Model Evaluation Metrics:\n")
print(f"Training R² Score: {train_score:.4f}")
print(f"Testing R² Score: {test_score:.4f}")
print(f"Training Mean Squared Error (MSE): {train_mse:.2f}")
print(f"Testing Mean Squared Error (MSE): {test_mse:.2f}")
print(f"Mean R² Score: {cv_mean:.4f}")
print(f"Standard Deviation of R² Score: {cv_std:.4f}")


Model Evaluation Metrics:

Training R² Score: 0.9834
Testing R² Score: 0.9691
Training Mean Squared Error (MSE): 7629762839.96
Testing Mean Squared Error (MSE): 12693435494.39
Mean R² Score: 0.9509
Standard Deviation of R² Score: 0.0268


## Ridge and Lasso

In [148]:
from sklearn.model_selection import  StratifiedKFold, KFold

param_grid = {
    'model__alpha': [0.01, 0.1, 1, 10] 
}


ridge_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', Ridge(max_iter=100000))])
lasso_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', Lasso(max_iter=100000))])


ridge_grid_search = GridSearchCV(ridge_pipeline, param_grid, scoring='r2', n_jobs=-1)
ridge_grid_search.fit(X_train, y_train)

lasso_grid_search = GridSearchCV(lasso_pipeline, param_grid, scoring='r2', n_jobs=-1)
lasso_grid_search.fit(X_train, y_train)


ridge_best_params = ridge_grid_search.best_params_
ridge_best_score = ridge_grid_search.best_score_

lasso_best_params = lasso_grid_search.best_params_
lasso_best_score = lasso_grid_search.best_score_

ridge_train_score = ridge_grid_search.score(X_train, y_train)
ridge_test_score = ridge_grid_search.score(X_test, y_test)

lasso_train_score = lasso_grid_search.score(X_train, y_train)
lasso_test_score = lasso_grid_search.score(X_test, y_test)


print("Model Evaluation Metrics:\n")
print(f"Ridge best params: {ridge_best_params}")
print(f"Ridge best score: {ridge_best_score:.4f}")
print(f"Ridge train score: {ridge_train_score:.4f}")
print(f"Ridge test score: {ridge_test_score:.4f}")
print(f"Lasso best params: {lasso_best_params}")
print(f"Lasso best score: {lasso_best_score:.4f}")
print(f"Lasso train score: {lasso_train_score:.4f}")
print(f"Lasso test score: {lasso_test_score:.4f}")


Model Evaluation Metrics:

Ridge best params: {'model__alpha': 0.01}
Ridge best score: 0.9338
Ridge train score: 0.9831
Ridge test score: 0.9693
Lasso best params: {'model__alpha': 10}
Lasso best score: 0.9304
Lasso train score: 0.9834
Lasso test score: 0.9692


In [136]:
from sklearn.linear_model import ElasticNet

In [149]:

# Ensure preprocessing steps are set up (assuming data and preprocessing setup)
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[('imputer', SimpleImputer(strategy='mean')), ('scaler', StandardScaler())]), numerical_features),
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_features)
    ]
)

# Define ElasticNet pipeline
elasticnet_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', ElasticNet(max_iter=10000))])

# Define parameter grid for alpha and l1_ratio
param_grid_elasticnet = {
    'model__alpha': [0.01, 0.1, 1, 10],
    'model__l1_ratio': [0.1, 0.5, 0.7, 0.9, 1.0]
}

# Initialize GridSearchCV
elasticnet_grid_search = GridSearchCV(elasticnet_pipeline, param_grid_elasticnet, scoring='r2', n_jobs=-1)
elasticnet_grid_search.fit(X_train, y_train)

# Extract best parameters and scores
best_params = elasticnet_grid_search.best_params_
best_score = elasticnet_grid_search.best_score_

# Train and test scores
train_score = elasticnet_grid_search.score(X_train, y_train)
test_score = elasticnet_grid_search.score(X_test, y_test)

# Mean squared error on train and test sets
train_mse = mean_squared_error(y_train, elasticnet_grid_search.predict(X_train))
test_mse = mean_squared_error(y_test, elasticnet_grid_search.predict(X_test))

print("ElasticNet Model Evaluation Metrics:\n")
print(f"Best Parameters: {best_params}")
print(f"Cross-Validation R² Score: {best_score:.4f}")
print(f"Training R² Score: {train_score:.4f}")
print(f"Testing R² Score: {test_score:.4f}")
print(f"Training Mean Squared Error (MSE): {train_mse:.2f}")
print(f"Testing Mean Squared Error (MSE): {test_mse:.2f}")

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


ElasticNet Model Evaluation Metrics:

Best Parameters: {'model__alpha': 0.01, 'model__l1_ratio': 0.9}
Cross-Validation R² Score: 0.9316
Training R² Score: 0.9822
Testing R² Score: 0.9692
Training Mean Squared Error (MSE): 8193449482.30
Testing Mean Squared Error (MSE): 12654699442.81


## Test avec Data Corriger de l'inflation

In [150]:
data_cor = data_cor[data_cor['Store'] != 9]

In [151]:
data_cor

Unnamed: 0,Store,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,Unemployment,Year,Month,Day,Week_of_Month
1,13.0,1.405381e+06,0.0,42.38,3.435,7.470,2011.0,3.0,25.0,4.0
3,11.0,5.799825e+05,0.0,84.57,,7.346,,,,
4,6.0,7.741859e+05,0.0,78.89,2.759,7.092,2010.0,5.0,28.0,4.0
5,4.0,1.472361e+06,0.0,,2.756,7.896,2010.0,5.0,28.0,4.0
6,15.0,5.156615e+05,0.0,69.80,4.069,7.658,2011.0,6.0,3.0,1.0
...,...,...,...,...,...,...,...,...,...,...
144,3.0,1.870358e+05,0.0,73.44,3.594,6.034,2012.0,10.0,19.0,3.0
145,14.0,1.232523e+06,0.0,72.62,2.780,8.899,2010.0,6.0,18.0,3.0
147,17.0,6.702398e+05,0.0,57.14,2.841,,2010.0,6.0,11.0,2.0
148,8.0,3.912177e+05,0.0,86.05,3.638,,2011.0,8.0,12.0,2.0


In [155]:
target = 'Weekly_Sales'

categorical_features = ['Store', 'Holiday_Flag']
numerical_features = ['Temperature', 'Fuel_Price', 'Unemployment', 'Year', 'Month', 'Day']

In [156]:
X = data_cor[categorical_features + numerical_features]
y = data_cor[target]

In [157]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=456)

preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[('imputer', SimpleImputer(strategy='mean')), ('scaler', StandardScaler())]), numerical_features),
        ('cat', OneHotEncoder(drop='first'), categorical_features)
    ]
)


In [158]:
pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', LinearRegression())])
pipeline.fit(X_train, y_train)

train_score = pipeline.score(X_train, y_train)
test_score = pipeline.score(X_test, y_test)

cv_scores = cross_val_score(pipeline, X, y, cv=5, scoring='r2')
cv_mean, cv_std = cv_scores.mean(), cv_scores.std()


y_train_pred = pipeline.predict(X_train)
y_test_pred = pipeline.predict(X_test)

train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

print("Model Evaluation Metrics:\n")
print(f"Training R² Score: {train_score:.4f}")
print(f"Testing R² Score: {test_score:.4f}")
print(f"Training Mean Squared Error (MSE): {train_mse:.2f}")
print(f"Testing Mean Squared Error (MSE): {test_mse:.2f}")
print(f"Mean R² Score: {cv_mean:.4f}")
print(f"Standard Deviation of R² Score: {cv_std:.4f}")

Model Evaluation Metrics:

Training R² Score: 0.9761
Testing R² Score: 0.9691
Training Mean Squared Error (MSE): 5008301517.26
Testing Mean Squared Error (MSE): 6400807646.95
Mean R² Score: 0.9477
Standard Deviation of R² Score: 0.0252


In [159]:

param_grid = {
    'model__alpha': [0.01, 0.1, 1, 10] 
}


ridge_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', Ridge(max_iter=100000))])
lasso_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', Lasso(max_iter=100000))])


ridge_grid_search = GridSearchCV(ridge_pipeline, param_grid, scoring='r2', n_jobs=-1)
ridge_grid_search.fit(X_train, y_train)

lasso_grid_search = GridSearchCV(lasso_pipeline, param_grid, scoring='r2', n_jobs=-1)
lasso_grid_search.fit(X_train, y_train)


ridge_best_params = ridge_grid_search.best_params_
ridge_best_score = ridge_grid_search.best_score_

lasso_best_params = lasso_grid_search.best_params_
lasso_best_score = lasso_grid_search.best_score_

ridge_train_score = ridge_grid_search.score(X_train, y_train)
ridge_test_score = ridge_grid_search.score(X_test, y_test)

lasso_train_score = lasso_grid_search.score(X_train, y_train)
lasso_test_score = lasso_grid_search.score(X_test, y_test)


print("Model Evaluation Metrics:\n")
print(f"Ridge best params: {ridge_best_params}")
print(f"Ridge best score: {ridge_best_score:.4f}")
print(f"Ridge train score: {ridge_train_score:.4f}")
print(f"Ridge test score: {ridge_test_score:.4f}")
print(f"Lasso best params: {lasso_best_params}")
print(f"Lasso best score: {lasso_best_score:.4f}")
print(f"Lasso train score: {lasso_train_score:.4f}")
print(f"Lasso test score: {lasso_test_score:.4f}")


Model Evaluation Metrics:

Ridge best params: {'model__alpha': 0.1}
Ridge best score: 0.9297
Ridge train score: 0.9753
Ridge test score: 0.9703
Lasso best params: {'model__alpha': 0.01}
Lasso best score: 0.9202
Lasso train score: 0.9761
Lasso test score: 0.9691
