In [1]:
import pandas as pd
import numpy as np
import sqlite3

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, \
                            mean_absolute_error
from sklearn.metrics import make_scorer
from sklearn.linear_model import \
    ElasticNet, \
    Lasso

import time

In [2]:
clean = '../../datas/CLEAN/E2/'

con = sqlite3.connect(clean + 'E2.db')
X = pd.read_sql('select * from X', con, index_col='index')
y = pd.read_sql('select * from y', con, index_col='index')

con.close()

X.shape, y.shape

((99025, 20), (99025, 1))

In [3]:
model = ElasticNet(random_state=0)
# model = Lasso(random_state=0)

scorers = {
    'MSE': make_scorer(mean_squared_error, greater_is_better=False),
    'MAE': make_scorer(mean_absolute_error, greater_is_better=False)
}
params = {
    'alpha': [1e-5, 1e-2, 1e-1, 1],
    # 'alpha': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
    'l1_ratio': np.arange(0.1, 0.9, 0.25)
}
grid = GridSearchCV(
    estimator=model, 
    param_grid=params, 
    cv=3,
    scoring=scorers,
    refit='MSE',
    n_jobs=-1
)

In [4]:
start_time = time.time()

result = grid.fit(X, y)

print("--- %.3s seconds ---" % (time.time() - start_time))

--- 5.2 seconds ---


In [5]:
print(grid.best_params_, grid.best_score_, '\n')
stop_int = 10
for param, squared, absolute, stop in zip(grid.cv_results_['params'], grid.cv_results_['mean_test_MSE'], grid.cv_results_['mean_test_MAE'], range(stop_int)):
    if stop == stop_int: break
    print(param, '\t%.2f\t%.2f' %(squared, absolute))

{'alpha': 0.01, 'l1_ratio': 0.8499999999999999} -35.70650911989904 

{'alpha': 1e-05, 'l1_ratio': 0.1} 	-36.17	-4.40
{'alpha': 1e-05, 'l1_ratio': 0.35} 	-36.17	-4.40
{'alpha': 1e-05, 'l1_ratio': 0.6} 	-36.18	-4.40
{'alpha': 1e-05, 'l1_ratio': 0.8499999999999999} 	-36.18	-4.40
{'alpha': 0.01, 'l1_ratio': 0.1} 	-35.73	-4.40
{'alpha': 0.01, 'l1_ratio': 0.35} 	-35.71	-4.40
{'alpha': 0.01, 'l1_ratio': 0.6} 	-35.71	-4.40
{'alpha': 0.01, 'l1_ratio': 0.8499999999999999} 	-35.71	-4.41
{'alpha': 0.1, 'l1_ratio': 0.1} 	-36.48	-4.46
{'alpha': 0.1, 'l1_ratio': 0.35} 	-36.46	-4.46


# Sauvegarde du modèle

In [6]:
from joblib import dump, load
from pathlib import Path

dossier_pickle = 'pickle/'
Path(dossier_pickle).mkdir(parents=True, exist_ok=True)
dump(grid.best_estimator_, dossier_pickle + 'model' + '.pkl')

['pickle/model.pkl']