In [6]:
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('bus_trafic_clean.csv')
df.columns

In [None]:
df_pred = df[['ecart_horaire_en_secondes', 
            'etat_SAE_du_vehicule', 
            'nom_de_la_ligne',
            #'mne_de_l_arret',
            'latitude', 'longitude', 
            'Heure_estimee_de_passage_a_L_arret',
            #'year', 
            'month', 
            'day', 
            'hours', 
            #'minutes', 
            'jour_semaine', 
            'OPINION'
            ]]

df_pred = df_pred.astype({"Heure_estimee_de_passage_a_L_arret": "datetime64"})
#df_pred = df_pred.astype({"identifiant_SAE_de_ligne": "object"})       

#df_pred["year_passage_a_l_arret"] = df_pred["Heure_estimee_de_passage_a_L_arret"].dt.year
df_pred["month_passage_a_l_arret"] = df_pred["Heure_estimee_de_passage_a_L_arret"].dt.month
df_pred["day_passage_a_l_arret"] = df_pred["Heure_estimee_de_passage_a_L_arret"].dt.day
df_pred["hours_passage_a_l_arret"] = df_pred["Heure_estimee_de_passage_a_L_arret"].dt.hour
#df_pred["minutes_passage_a_l_arret"] = df_pred["Heure_estimee_de_passage_a_L_arret"].dt.minute
df_pred.drop(columns=['Heure_estimee_de_passage_a_L_arret'], inplace=True)

print(df_pred.shape)
df_pred.dtypes


In [None]:
# outlier quantile ecart_horaire_en_secondes
q1 = df_pred['ecart_horaire_en_secondes'].quantile(0.25)
q3 = df_pred['ecart_horaire_en_secondes'].quantile(0.75)
iqr = q3 - q1

print(df_pred.shape)
df_pred = df_pred[(df_pred['ecart_horaire_en_secondes'] >= q1 - 1.5 * iqr) & (df_pred['ecart_horaire_en_secondes'] <= q3 + 1.5 * iqr)]
print(df_pred.shape)


In [None]:
df_pred = pd.get_dummies(df_pred)
df_pred.shape

In [None]:
# corr df_pred
#corr = df_pred.corr()
#corr.style.background_gradient(cmap='coolwarm')

In [None]:
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X = df_pred.drop(columns=['ecart_horaire_en_secondes'])
scaler.fit(X)
X = scaler.transform(X)

y = df_pred['ecart_horaire_en_secondes'].apply(lambda x: 1 if x > 0 else 0)


# plot y distribution
sns.countplot(x=y)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# arbre 
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


In [None]:
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

import numpy as np
from sklearn.preprocessing import StandardScaler

scalerx = StandardScaler()
scalery = StandardScaler()

X = df_pred.drop(columns=['ecart_horaire_en_secondes'])
y = df_pred['ecart_horaire_en_secondes']
y = np.array(y).reshape(-1,1)
print(y)

scalerx.fit(X)
X = scalerx.transform(X)
scalery.fit(y)
y = scalery.transform(y)

# plot y distribution
#sns.countplot(x=y)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import numpy as np
lr= LinearRegression()

lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

r2_score(y_test, y_pred), mean_absolute_error(y_test, y_pred), np.sqrt(mean_squared_error(y_test, y_pred)), mean_absolute_percentage_error(y_test, y_pred)


In [None]:
print(y_pred)
y_pred_inverse = scalery.inverse_transform(y_pred)
print(y_pred_inverse)

In [None]:
# shap for lr with feature names
import shap
shap.initjs()
explainer = shap.LinearExplainer(lr, X_train, feature_dependence="independent")
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test, feature_names=df_pred.drop(columns=['ecart_horaire_en_secondes']).columns)

In [None]:
# decision tree regressor
from sklearn.tree import DecisionTreeRegressor

clf = DecisionTreeRegressor()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

In [None]:
print("R2: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))
print("MAPE: ", mean_absolute_percentage_error(y_test, y_pred))

In [None]:
# decision tree regressor
from sklearn.ensemble import RandomForestRegressor

clf = RandomForestRegressor(n_estimators=100)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

In [None]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

In [None]:
print("R2: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))
print("MAPE: ", mean_absolute_percentage_error(y_test, y_pred))

In [None]:
importances = list(clf.feature_importances_)
feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(df_pred.drop(columns=['ecart_horaire_en_secondes']).columns, importances)]
feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)
[print('Variable: {:20} Importance: {}'.format(*pair)) for pair in feature_importances];


In [None]:
from sklearn.linear_model import SGDRegressor

sgdr = SGDRegressor().fit(X_train, y_train)

y_pred = sgdr.predict(X_test)

print("R2: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))
print("MAPE: ", mean_absolute_percentage_error(y_test, y_pred))


In [7]:
import pickle
import time
from sklearn.model_selection import GridSearchCV

In [8]:
df = pd.read_csv('bus_trafic_clean.csv')

In [9]:
df_pred = df[['ecart_horaire_en_secondes', 
            'etat_SAE_du_vehicule', 
            'nom_de_la_ligne', 
            'identifiant_du_vehicule',
            'date',
            'month', 
            'day', 
            'hours', 
            'jour_semaine', 
            'OPINION'
            ]]

# outlier quantile ecart_horaire_en_secondes
#q1 = df_pred['ecart_horaire_en_secondes'].quantile(0.25)
#q3 = df_pred['ecart_horaire_en_secondes'].quantile(0.75)
#iqr = q3 - q1

#print(df_pred.shape)
#df_pred = df_pred[(df_pred['ecart_horaire_en_secondes'] >= q1 - 1.5 * iqr) & (df_pred['ecart_horaire_en_secondes'] <= q3 + 1.5 * iqr)]
print(df_pred.shape)



(739410, 10)


In [239]:
df_pred.columns

Index(['ecart_horaire_en_secondes', 'etat_SAE_du_vehicule', 'nom_de_la_ligne',
       'identifiant_du_vehicule', 'date', 'month', 'day', 'hours',
       'jour_semaine', 'OPINION'],
      dtype='object')

In [10]:
df_date = df_pred[['date', 'month', 'day', 'jour_semaine', 'OPINION']].drop_duplicates()

df_etat_SAE_du_vehicule = df_pred[['date', 'etat_SAE_du_vehicule', 'identifiant_du_vehicule']].drop_duplicates()
df_etat_SAE_du_vehicule["count"] = 1
df_etat_SAE_du_vehicule = df_etat_SAE_du_vehicule.groupby(['date', 'etat_SAE_du_vehicule']).sum().reset_index().drop(columns=['identifiant_du_vehicule'])
df_etat_SAE_du_vehicule = df_etat_SAE_du_vehicule.pivot(index='date', columns='etat_SAE_du_vehicule', values='count').reset_index().fillna(0)

df_nom_de_la_ligne = df_pred[['date', 'nom_de_la_ligne', 'identifiant_du_vehicule']].drop_duplicates()
df_nom_de_la_ligne["count"] = 1
df_nom_de_la_ligne = df_nom_de_la_ligne.groupby(['date', 'nom_de_la_ligne']).sum().reset_index().drop(columns=['identifiant_du_vehicule'])
df_nom_de_la_ligne = df_nom_de_la_ligne.pivot(index='date', columns='nom_de_la_ligne', values='count').reset_index().fillna(0)

df_ecart = df_pred[['date', 'ecart_horaire_en_secondes']]
df_ecart = df_ecart.groupby(['date']).sum().reset_index()
#display(df_ecart)

df_pred = df_date.merge(df_etat_SAE_du_vehicule, on='date', how='left').merge(df_nom_de_la_ligne, on='date', how='left').merge(df_ecart, on='date', how='left').drop(columns=['date'])

df_pred = pd.get_dummies(df_pred, columns=['jour_semaine', 'OPINION'], drop_first=True)

Unnamed: 0,month,day,jour_semaine,OPINION,DEV,DEVP,GARE,HC,HL,HLP,...,SOIR TRELAZE <> LORRAINE,SOUCELLES PELLOUAILLES <> GARE,SOULAINES <> GARE,ST CLEMENT St LAMBERT <> GARE,ST LEGER St LAMBERT <> GARE,ST LEZIN SORGES <> SCHWEITZER,ST MARTIN St JEAN <> GARE,ST MATHURIN <> GARE,ST SYLVAIN BANCHAIS <>TRELAZE,ecart_horaire_en_secondes
0,8,5,Lundi,météo défavorable,0.0,9.0,0.0,0.0,0.0,1.0,...,1.0,1.0,0.0,0.0,1.0,3.0,0.0,0.0,8.0,22344.0
1,8,6,Mardi,météo défavorable,7.0,39.0,0.0,6.0,22.0,19.0,...,1.0,8.0,0.0,3.0,5.0,5.0,4.0,1.0,10.0,467078.0
2,8,6,Mardi,météo favorable,7.0,39.0,0.0,6.0,22.0,19.0,...,1.0,8.0,0.0,3.0,5.0,5.0,4.0,1.0,10.0,467078.0
3,8,7,Mercredi,météo favorable,6.0,38.0,0.0,2.0,18.0,22.0,...,1.0,8.0,0.0,3.0,5.0,5.0,4.0,0.0,10.0,377981.0
4,8,8,Jeudi,météo favorable,4.0,37.0,0.0,4.0,11.0,19.0,...,1.0,9.0,0.0,3.0,6.0,5.0,5.0,0.0,9.0,368732.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185,12,10,Mardi,météo défavorable,32.0,84.0,0.0,13.0,86.0,35.0,...,1.0,13.0,0.0,8.0,16.0,11.0,8.0,1.0,17.0,1238182.0
186,12,11,Mercredi,météo défavorable,29.0,84.0,0.0,8.0,39.0,29.0,...,1.0,13.0,1.0,6.0,16.0,12.0,9.0,0.0,19.0,1444810.0
187,12,11,Mercredi,météo très défavorable,29.0,84.0,0.0,8.0,39.0,29.0,...,1.0,13.0,1.0,6.0,16.0,12.0,9.0,0.0,19.0,1444810.0
188,12,12,Jeudi,météo très défavorable,13.0,57.0,0.0,5.0,18.0,16.0,...,0.0,8.0,0.0,4.0,10.0,11.0,5.0,0.0,17.0,545116.0


In [11]:
df_pred.columns

Index(['month', 'day', 'DEV', 'DEVP', 'GARE', 'HC', 'HL', 'HLP', 'HLPR',
       'HLPS', 'HS', 'INC', 'LIGN', 'TARR', 'TDEP',
       'A - Remplacement Tram par Bus', 'ARDENNE <> ROSERAIE',
       'BEAUCOUZE <> ST BARTHELEMY', 'BELLE BEILLE <> MONPLAISIR',
       'BELLE BEILLE EXPRESS <> GARES', 'BOUCHEMAINE <> Z I  EST',
       'BRIOLLAY <> GARE', 'CIRCULAIRE VERNEAU GARE EUROPE', 'CORNE <> GARE',
       'CORNE <> GARE TRELAZE', 'D NAVETTE MARCHE MONPLAISIR',
       'DJF  BELLE BEILLE <> MONPLAISIR', 'DJF  TRELAZE <>  ST SYLVAIN',
       'DJF LORRAINE <> ST BARTHELEMY', 'DJF MURS ERIGNE <> MONPLAISIR',
       'DJF VILLAGE SANTE <> LORRAINE', 'ECOUFLANT GRIMORELLE <> GARE',
       'ECUILLE SOULAIRE <> GARE', 'ESPACE ANJOU <> EVENTARD',
       'EXPRESS CHANTOURTEAU <> GARES', 'EXPRESS MONTREUIL <> GARES',
       'FENEU CANTENAY <> GARE', 'HOPITAL <> MONTREUIL JUIGNE',
       'LA MEMBROLLE <> GARE', 'LAC MAINE <> STE GEMMES CL ANJOU',
       'M-MARCILLE <> ST AUBIN LA SALLE', 'MURS ERIGNE 

In [245]:
df_result = pd.DataFrame(columns=["model", "CV", "R2", "MSE", "MAE", "MAPE", "Temps d'execution"])

In [246]:
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import StandardScaler

scalerx = StandardScaler()
scalery = StandardScaler()

X = df_pred.drop(columns=['ecart_horaire_en_secondes'])
y = df_pred['ecart_horaire_en_secondes']
y = np.array(y).reshape(-1,1)

scalerx.fit(X)
X = scalerx.transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42            )

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

time_start = time.time()

lr= LinearRegression()

lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

print("R2: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))
print("MAPE: ", mean_absolute_percentage_error(y_test, y_pred))

time_end = time.time()

tab = ["lr", "false", r2_score(y_test, y_pred), mean_squared_error(y_test, y_pred), mean_absolute_error(y_test, y_pred), mean_absolute_percentage_error(y_test, y_pred), time_end - time_start]
df_result = df_result.append(pd.Series(tab, index=df_result.columns), ignore_index=True)

In [None]:
time_start = time.time()

param_grid = {'fit_intercept': [True, False], 'normalize': [True, False], 'copy_X': [True, False]}
grid = GridSearchCV(LinearRegression(), param_grid, refit = True, verbose = 3, cv=5)
grid.fit(X_train, y_train)
y_pred = grid.predict(X_test)

print("R2: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))
print("MAPE: ", mean_absolute_percentage_error(y_test, y_pred))

time_end = time.time()

tab = ["lr_grid", "true", r2_score(y_test, y_pred), mean_squared_error(y_test, y_pred), mean_absolute_error(y_test, y_pred), mean_absolute_percentage_error(y_test, y_pred), time_end - time_start]
df_result = df_result.append(pd.Series(tab, index=df_result.columns), ignore_index=True)

In [None]:
from sklearn.linear_model import Ridge

time_start = time.time()

ridge = Ridge()
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)

print("R2: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))
print("MAPE: ", mean_absolute_percentage_error(y_test, y_pred))

time_end = time.time()

tab = ["ridge", "false", r2_score(y_test, y_pred), mean_squared_error(y_test, y_pred), mean_absolute_error(y_test, y_pred), mean_absolute_percentage_error(y_test, y_pred), time_end - time_start]
df_result = df_result.append(pd.Series(tab, index=df_result.columns), ignore_index=True)

In [None]:
time_start = time.time()

param_grid = {'alpha': [0.1, 1, 10, 100, 1000], 'fit_intercept': [True, False], 'normalize': [True, False], 'copy_X': [True, False]}
grid = GridSearchCV(Ridge(), param_grid, refit = True, verbose = 3, cv=5)
grid.fit(X_train, y_train)
y_pred = grid.predict(X_test)

print("R2: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))
print("MAPE: ", mean_absolute_percentage_error(y_test, y_pred))

time_end = time.time()

tab = ["ridge_grid", "true", r2_score(y_test, y_pred), mean_squared_error(y_test, y_pred), mean_absolute_error(y_test, y_pred), mean_absolute_percentage_error(y_test, y_pred), time_end - time_start]
df_result = df_result.append(pd.Series(tab, index=df_result.columns), ignore_index=True)

In [247]:
from sklearn.ensemble import RandomForestRegressor

time_start = time.time()

clf = RandomForestRegressor(n_estimators=100)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print("R2: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))
print("MAPE: ", mean_absolute_percentage_error(y_test, y_pred))

time_end = time.time()

tab = ["RandomForestRegressor", "false", r2_score(y_test, y_pred), mean_squared_error(y_test, y_pred), mean_absolute_error(y_test, y_pred), mean_absolute_percentage_error(y_test, y_pred), time_end - time_start]
df_result = df_result.append(pd.Series(tab, index=df_result.columns), ignore_index=True)

Pkl_Filename = "Model_1_full_regressor.pkl"  
with open(Pkl_Filename, 'wb') as file:  
    pickle.dump(clf, file)

  clf.fit(X_train, y_train)


R2:  0.9827743434723129
MSE:  4355465908.57512
MAE:  48070.39078947369
MAPE:  0.0889353756499897


  df_result = df_result.append(pd.Series(tab, index=df_result.columns), ignore_index=True)


In [None]:
time_start = time.time()

param_grid = {
    'bootstrap': [True],
    'max_depth': [80, 90, 100, 110],
    'max_features': [2, 3],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300, 1000]
}

rf = RandomForestRegressor()
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid,
                            cv = 3, n_jobs = -1, verbose = 2)

grid_search.fit(X_train, y_train)

print(grid_search.best_params_)

y_pred = grid_search.predict(X_test)

print("R2: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))
print("MAPE: ", mean_absolute_percentage_error(y_test, y_pred))

time_end = time.time()

tab = ["RandomForestRegressor", "true", r2_score(y_test, y_pred), mean_squared_error(y_test, y_pred), mean_absolute_error(y_test, y_pred), mean_absolute_percentage_error(y_test, y_pred), time_end - time_start]
df_result = df_result.append(pd.Series(tab, index=df_result.columns), ignore_index=True)

In [None]:
from sklearn.linear_model import SGDRegressor

time_start = time.time()

sgdr = SGDRegressor().fit(X_train, y_train)

y_pred = sgdr.predict(X_test)

print("R2: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))
print("MAPE: ", mean_absolute_percentage_error(y_test, y_pred))

time_end = time.time()

tab = ["SGDRegressor", "false", r2_score(y_test, y_pred), mean_squared_error(y_test, y_pred), mean_absolute_error(y_test, y_pred), mean_absolute_percentage_error(y_test, y_pred), time_end - time_start]
df_result = df_result.append(pd.Series(tab, index=df_result.columns), ignore_index=True)

In [None]:
# grid search SGDRegressor
from sklearn.model_selection import GridSearchCV

time_start = time.time()

param_grid = {
    'alpha': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2', 'elasticnet'],
    'loss': ['huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'],
    'learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'],
    'eta0': [0.01, 0.1, 1, 10, 100]
}

sgdr = SGDRegressor()
grid_search = GridSearchCV(estimator = sgdr, param_grid = param_grid,
                            cv = 3, n_jobs = -1, verbose = 2)
    
grid_search.fit(X_train, y_train)
y_pred = grid_search.predict(X_test)

print("R2: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))
print("MAPE: ", mean_absolute_percentage_error(y_test, y_pred))

time_end = time.time()

tab = ["SGDRegressor", "true", r2_score(y_test, y_pred), mean_squared_error(y_test, y_pred), mean_absolute_error(y_test, y_pred), mean_absolute_percentage_error(y_test, y_pred), time_end - time_start]
df_result = df_result.append(pd.Series(tab, index=df_result.columns), ignore_index=True)

In [None]:
from sklearn.neural_network import MLPRegressor

time_start = time.time()

mlp = MLPRegressor(hidden_layer_sizes=(100, 100, 100), max_iter=1000, random_state=42)

mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)

print("R2: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))
print("MAPE: ", mean_absolute_percentage_error(y_test, y_pred))

time_end = time.time()

tab = ["MLPRegressor", "false", r2_score(y_test, y_pred), mean_squared_error(y_test, y_pred), mean_absolute_error(y_test, y_pred), mean_absolute_percentage_error(y_test, y_pred), time_end - time_start]
df_result = df_result.append(pd.Series(tab, index=df_result.columns), ignore_index=True)

In [None]:
time_start = time.time()

param_grid = {'hidden_layer_sizes': [(100, 100, 100), (100, 100, 100, 100), (100, 100, 100, 100, 100)],
                'max_iter': [1000, 2000, 3000],
                'random_state': [42]}
grid = GridSearchCV(MLPRegressor(), param_grid, refit=True, verbose=3, n_jobs=-1)

grid.fit(X_train, y_train)
y_pred = grid.predict(X_test)

print("R2: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))
print("MAE: ", mean_absolute_error(y_test, y_pred))
print("MAPE: ", mean_absolute_percentage_error(y_test, y_pred))

time_end = time.time()

tab = ["MLPRegressor", "true", r2_score(y_test, y_pred), mean_squared_error(y_test, y_pred), mean_absolute_error(y_test, y_pred), mean_absolute_percentage_error(y_test, y_pred), time_end - time_start]
df_result = df_result.append(pd.Series(tab, index=df_result.columns), ignore_index=True)

In [None]:
df_result["Temps d'execution"] = df_result["Temps d'execution"].apply(lambda x: round(x, 2))

# export result to csv
df_result.to_csv("result.csv", index=False)


In [220]:
df_result

Unnamed: 0,model,CV,R2,MSE,MAE,MAPE,Temps d'execution
0,lr,False,0.929462,17835310000.0,96371.04,0.230179,0.01
1,lr_grid,True,-7.068861,2040192000000.0,1277254.0,3.365445,0.16
2,ridge,False,0.951871,12169410000.0,84553.54,0.214227,0.0
3,ridge_grid,True,0.966365,8504410000.0,70769.3,0.195885,0.39
4,RandomForestRegressor,False,0.981549,4665257000.0,50562.47,0.091107,0.23
5,RandomForestRegressor,True,0.961015,9857256000.0,75723.35,0.133046,161.94
6,SGDRegressor,False,0.961764,9667776000.0,80146.94,0.21049,0.01
7,SGDRegressor,True,0.969026,7831578000.0,67735.62,0.165246,37.68
8,MLPRegressor,False,0.920689,20053660000.0,101576.8,0.168595,3.62
9,MLPRegressor,True,0.952113,12108120000.0,78130.77,0.14254,121.62
