In [31]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline

In [32]:
# Définition des functions à utiliser 

# MAPE
def score_mape(predictions, observations):
    y_true, y_pred  = np.array(observations), np.array(predictions)
    MAPE = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return np.round(MAPE, 2)

# RMSE
def score_rmse(predictions, observations):
    return np.sqrt(((predictions - observations) ** 2).mean())

# MSE
def score_mse(predictions, observations):
    return mean_squared_error(predictions,observations)


In [33]:
# préparation des données.

df = pd.read_csv("./Data/Donnees_Quotidiennes.csv", index_col=0,parse_dates=['Date'],sep=";")

# BD filtrée par Ile de France
df = df[df['Code INSEE région'] == 11]

# Creer les variables Année, Mois, Jour
df['Annee'] = pd.to_datetime(df.Date).dt.year
df["Mois"] = pd.to_datetime(df.Date).dt.month
df['Jour']=pd.to_datetime(df['Date']).dt.weekday

# Dichotomisation des colonnes Jour, Mois, Annee
df = df.join(pd.get_dummies(df.Jour, prefix='J'))
df = df.join(pd.get_dummies(df.Mois, prefix='M'))
df = df.join(pd.get_dummies(df.Annee, prefix='A'))

# Suppresion des variables devenues no necessaires
df = df.drop(['Région','Jour', 'Mois','Annee', 'Code INSEE région'], axis=1)

## paramétrage de la date en tant qu'index.
df.set_index('Date',inplace=True)

# DF pour les séries temporelles, car traitement different
df_st = df[['Consommation (MW)', 'Température (°C)']]

In [34]:
# Spliter les données pour les séries temporelles
# Découpage des variables explicatives et la variable cible
target_st = df_st[['Consommation (MW)']]

train=target_st[target_st.index < '2020-11-02']
test=target_st[target_st.index > '2020-11-01']

In [35]:
# Spliter les données pour les séries temporelles avec variable externe
# Découpage des variables explicatives et la variable cible
target_st_x = df_st[['Consommation (MW)', 'Température (°C)']]

train_x=target_st_x[target_st_x.index < '2020-11-02']
test_x=target_st_x[target_st_x.index > '2020-11-01']

# Séparation des données meteo et conso
train_conso = train_x['Consommation (MW)']
train_meteo = train_x['Température (°C)']
test_conso = test_x['Consommation (MW)']
test_meteo = test_x['Température (°C)']

In [36]:
# normalisation des colonnes ['Consommation (MW)', 'Température (°C)']
scaller = preprocessing.StandardScaler().fit(df[['Consommation (MW)', 'Température (°C)']])
df[['Consommation (MW)','Température (°C)']] = scaller.fit_transform(df[['Consommation (MW)','Température (°C)']])

# Découpage des variables explicatives et la variable cible
target = df[['Consommation (MW)']]
data = df.drop(['Consommation (MW)'], axis=1)



## Modèle Ridge

In [37]:
# modèle de régression RidgeCV
from sklearn.linear_model import RidgeCV

# Spliter les données
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.01, random_state = 100, shuffle=False)

ridge_reg = RidgeCV(alphas= (0.001, 0.01, 0.1, 0.3, 0.7, 1, 10, 50, 100))
ridge_reg.fit(X_train, y_train) 

# Afficher l'intercept et les coefficients estimés pour chaque variable de data
coeffs = list(ridge_reg.coef_)
coeffs = np.insert(coeffs, 0, ridge_reg.intercept_[0])

feats = list(data.columns)
feats.insert(0, 'intercept')

valeurs_ridge = pd.DataFrame({'valeur estimée': coeffs}, index = feats)

ridge_pred_train = ridge_reg.predict(X_train)
ridge_pred_test = ridge_reg.predict(X_test)


#### Résultats Ridge

In [38]:
moyenne = scaller.mean_[0]
ecart = scaller.scale_[0]

ridge_values = []
for i in ridge_pred_test:
    for x in i :
        ridge_values.append(x)
    ridge_values

ridge_model = pd.DataFrame({'predits': ridge_values}, index = X_test.index)

ridge_model_1 = pd.DataFrame({'Reel': np.round((y_test['Consommation (MW)']*ecart)+moyenne),
    'predits' : np.round((ridge_model['predits']*ecart)+moyenne),
    'T' : (X_test['Température (°C)']*scaller.scale_[-1])+scaller.mean_[-1]}, index = X_test.index)

In [39]:
ridge_model_1.head()

Unnamed: 0_level_0,Reel,predits,T
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-11-02 00:00:00+00:00,7069.0,7698.0,16.525
2020-11-03 00:00:00+00:00,7971.0,8896.0,8.875
2020-11-04 00:00:00+00:00,8659.0,9252.0,6.5875
2020-11-05 00:00:00+00:00,8654.0,9090.0,7.5
2020-11-06 00:00:00+00:00,8597.0,8937.0,7.8375


#### Critères d'évaluation Ridge

##### Score R2

In [40]:
print("score train :", ridge_reg.score(X_train, y_train))
print("score test :", ridge_reg.score(X_test, y_test))

score_r_ridge_train = ridge_reg.score(X_train, y_train)
score_r_ridge_test = ridge_reg.score(X_test, y_test)

score train : 0.898392924430891
score test : 0.5348099110977844


##### MSE

In [41]:
# MSE échantillon Train avec normalisation
# pas prendre

score_mse_ridge_train = score_mse(ridge_pred_train, y_train)
score_mse_ridge_train

0.10243166316946213

In [42]:
# MSE échantillon Test

score_mse_ridge_test = score_mse(ridge_model_1.predits, ridge_model_1.Reel)
score_mse_ridge_test

317871.1379310345

##### RMSE

In [43]:
#Racine carrée de la Moyenne des résidus au carré.
# RMSE

score_rmse_ridge = score_rmse(ridge_model_1.predits, ridge_model_1.Reel)
score_rmse_ridge

563.8006189523336

##### MAPE

In [44]:
# MAPE

score_mape_ridge = score_mape(ridge_model_1.predits, ridge_model_1.Reel)
score_mape_ridge

5.75

## Modèle Lasso

In [45]:
# modèle de régression LassoCV
from sklearn.linear_model import LassoCV

# Spliter les données
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.01, random_state = 200, shuffle=False)

#alpha = model_lasso.alpha_ 0.0009265291644131591
lasso_model = LassoCV(alphas = [0.0009265291644131591], cv = 10).fit(X_train, y_train)

# Afficher l'intercept et les coefficients estimés pour chaque variable de data
coeffs = list(lasso_model.coef_)
coeffs = np.insert(coeffs, 0, lasso_model.intercept_)

feats = list(data.columns)
feats.insert(0, 'intercept')

valeurs_lasso = pd.DataFrame({'valeur estimée': coeffs}, index = feats)

lasso_pred_train = lasso_model.predict(X_train)
lasso_pred_test = lasso_model.predict(X_test)

  return f(*args, **kwargs)


#### Résultats Lasso

In [46]:
moyenne = scaller.mean_[0]
ecart = scaller.scale_[0]

lasso_values = []
for i in lasso_pred_test:
    lasso_values.append(i)
lasso_values

lassomodel = pd.DataFrame({'predits': lasso_values}, index = X_test.index)

lasso_model_1 = pd.DataFrame({'Reel': np.round((y_test['Consommation (MW)']*ecart)+moyenne),
    'predits' : np.round((lassomodel['predits']*ecart)+moyenne),
    'T' : (X_test['Température (°C)']*scaller.scale_[-1])+scaller.mean_[-1]}, index = X_test.index)

In [47]:
lasso_model_1.head()

Unnamed: 0_level_0,Reel,predits,T
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-11-02 00:00:00+00:00,7069.0,7662.0,16.525
2020-11-03 00:00:00+00:00,7971.0,8878.0,8.875
2020-11-04 00:00:00+00:00,8659.0,9248.0,6.5875
2020-11-05 00:00:00+00:00,8654.0,9082.0,7.5
2020-11-06 00:00:00+00:00,8597.0,8939.0,7.8375


#### Critères d'évaluation Lasso

##### Score R2

In [48]:
# score R2
print("score train:",lasso_model.score(X_train, y_train))
print("score test:", lasso_model.score(X_test, y_test))

score_r_lasso_train = lasso_model.score(X_train, y_train)
score_r_lasso_test = lasso_model.score(X_test, y_test)

score train: 0.8980750661200599
score test: 0.5626842341051221


##### MSE

In [49]:
# MSE échantillon Train avec normalisation
# pas prendre
score_mse_lasso_train = score_mse(lasso_pred_train, y_train)
score_mse_lasso_train

0.10275210104495751

In [50]:
# MSE échantillon Test
score_mse_lasso_test = score_mse(lasso_model_1.predits, lasso_model_1.Reel)
score_mse_lasso_test

298859.3103448276

##### RMSE

In [51]:
# Racine carrée de la Moyenne des résidus au carré.
# RMSE
score_rmse_lasso = score_rmse(lasso_model_1.predits, lasso_model_1.Reel)
score_rmse_lasso

546.6802633576848

##### MAPE

In [52]:
score_mape_lasso = score_mape(lasso_model_1.predits, lasso_model_1.Reel)
score_mape_lasso

5.57

## Modèle Elastic Net

In [53]:
# modèle de régression Elastic Net, validation croisée à 8 échantillons
from sklearn.linear_model import ElasticNetCV

# Spliter les données
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.01, random_state = 300, shuffle=False)

model_ElasticNet = ElasticNetCV(cv=8, l1_ratio = (0.1, 0.25, 0.5, 0.7, 0.75, 0.8, 0.85, 0.9, 0.99), 
                        alphas= (0.001,0.01,0.02,0.025,0.05,0.1,0.25,0.5,0.8,1.0))
model_ElasticNet.fit(X_train, y_train)

# Afficher l'intercept et les coefficients estimés pour chaque variable de data

coeffs = list(model_ElasticNet.coef_)
coeffs.insert(0, model_ElasticNet.intercept_)
feats = list(data.columns)
feats.insert(0, 'intercept')

values_EN = pd.DataFrame({'valeur estimée': coeffs}, index = feats)

pred_train_EN = model_ElasticNet.predict(X_train)
pred_test_EN = model_ElasticNet.predict(X_test)


  return f(*args, **kwargs)


#### Résultats Elastic Net

In [54]:
moyenne = scaller.mean_[0]
ecart = scaller.scale_[0]

EN_model = pd.DataFrame({'Reel': np.round((y_test['Consommation (MW)']*ecart)+moyenne), 
            'predits' : np.round((pred_test_EN*ecart)+moyenne), 
            'T' : (X_test['Température (°C)']*scaller.scale_[-1])+scaller.mean_[-1]}, index = X_test.index)



In [55]:
EN_model.head()

Unnamed: 0_level_0,Reel,predits,T
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-11-02 00:00:00+00:00,7069.0,7402.0,16.525
2020-11-03 00:00:00+00:00,7971.0,8851.0,8.875
2020-11-04 00:00:00+00:00,8659.0,9296.0,6.5875
2020-11-05 00:00:00+00:00,8654.0,9106.0,7.5
2020-11-06 00:00:00+00:00,8597.0,8946.0,7.8375


### Critères d'évaluation Elastic Net

#### Score R2

In [56]:
# score R2
print("score train:",model_ElasticNet.score(X_train, y_train))
print("score test:", model_ElasticNet.score(X_test, y_test))

score_r_en_train = model_ElasticNet.score(X_train, y_train)
score_r_en_test = model_ElasticNet.score(X_test, y_test)

score train: 0.8894895702404388
score test: 0.6584148017806617


#### MSE

In [57]:
# MSE échantillon Train avec normalisation
# Pas prendre

score_mse_en_train = score_mse(pred_train_EN, y_train)
score_mse_en_train

0.11140727212589278

In [58]:
#MSE échantillon Test
score_mse_en_test = score_mse(EN_model.predits, EN_model.Reel)
score_mse_en_test

233417.10344827586

#### RMSE

In [59]:
score_rmse_en = score_rmse(EN_model.predits, EN_model.Reel)
score_rmse_en

483.13259406531023

#### MAPE

In [60]:
score_mape_en = score_mape(EN_model.predits, EN_model.Reel)
score_mape_en

4.87

## Modèle SGDRegressor

In [61]:
# modèle de régression SGD
from sklearn.linear_model import SGDRegressor

# Spliter les données
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.01, random_state = 300, shuffle=False)

model_sgd = SGDRegressor(penalty='elasticnet', l1_ratio=0.85)
model_sgd.fit(X_train, y_train)

# Afficher l'intercept et les coefficients estimés pour chaque variable de data
coeffs = list(model_sgd.coef_)
coeffs.insert(0, model_sgd.intercept_)
feats = list(data.columns)
feats.insert(0, 'intercept')

values_sgd = pd.DataFrame({'valeur estimée': coeffs}, index = feats)

pred_train_sgd = model_sgd.predict(X_train)
pred_test_sgd = model_sgd.predict(X_test)


  return f(*args, **kwargs)


### Résultats SGDRegressor

In [62]:
moyenne = scaller.mean_[0]
ecart = scaller.scale_[0]

sgd_model = pd.DataFrame({'Reel': np.round((y_test['Consommation (MW)']*ecart)+moyenne), 
            'predits' : np.round((pred_test_sgd*ecart)+moyenne), 
            'T' : (X_test['Température (°C)']*scaller.scale_[-1])+scaller.mean_[-1]}, index = X_test.index)

In [63]:
sgd_model.head()

Unnamed: 0_level_0,Reel,predits,T
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-11-02 00:00:00+00:00,7069.0,7326.0,16.525
2020-11-03 00:00:00+00:00,7971.0,8864.0,8.875
2020-11-04 00:00:00+00:00,8659.0,9328.0,6.5875
2020-11-05 00:00:00+00:00,8654.0,9134.0,7.5
2020-11-06 00:00:00+00:00,8597.0,8965.0,7.8375


### Critères d'évaluation SGDRegressor

#### Score R2

In [64]:
# score R2
print("score train:",model_sgd.score(X_train, y_train))
print("score test:", model_sgd.score(X_test, y_test))

score_r_en_train = model_sgd.score(X_train, y_train)
score_r_en_test = model_sgd.score(X_test, y_test)

score train: 0.8890043636281671
score test: 0.66446198698107


#### MSE

In [65]:
# MSE échantillon Train avec normalisation
# Pas prendre

score_sgd_en_train = score_mse(pred_train_sgd, y_train)
score_sgd_en_train

0.11189641641035751

In [66]:
#MSE échantillon Test
score_mse_sgd_test = score_mse(sgd_model.predits, sgd_model.Reel)
score_mse_sgd_test

229272.1724137931

#### RMSE

In [67]:
score_rmse_sgd = score_rmse(sgd_model.predits, sgd_model.Reel)
score_rmse_sgd

478.82373835660354

#### MAPE

In [68]:
score_mape_sgd = score_mape(sgd_model.predits, sgd_model.Reel)
score_mape_sgd

4.64

## Modèle ARIMA

In [69]:
#### Avec un 
model_arima = SARIMAX(train, order=(2,1,3), enforce_stationarity=False, enforce_invertibility=False, freq='D', trend='n').fit()
# "Steps" concerne le nombre de predictions à efectuer
forecast_arima = model_arima.get_forecast(steps=29)

# Determiner nos intervalles de confiance
mean_forecast_arima = forecast_arima.predicted_mean
confidence_intervals_arima = forecast_arima.conf_int()
lower_limits_arima = confidence_intervals_arima['lower Consommation (MW)']
upper_limits_arima = confidence_intervals_arima['upper Consommation (MW)']




### Résultats ARIMA

In [70]:
model_arima_df = pd.DataFrame({'Reel': test['Consommation (MW)'], 
            'predits' : mean_forecast_arima.values,
            'lower_limits' : lower_limits_arima,
            'upper_limits' : upper_limits_arima }, index = test.index)

### Critères d'évaluation ARIMA

#### MSE

In [71]:
# MSE échantillon Test
score_mse_arima_test = score_mse(model_arima_df.predits, model_arima_df.Reel)
score_mse_arima_test

1955230.5064087296

#### RMSE

In [72]:
score_rmse_arima = score_rmse(model_arima_df.predits, model_arima_df.Reel)
score_rmse_arima

1398.2955719048564

#### MAPE

In [73]:
score_mape_arima = score_mape(model_arima_df.predits, model_arima_df.Reel)
score_mape_arima

13.27

## Modèle SARIMA

In [74]:
#### Avec un modèle SARIMA (3,1,0)(3,1,0)[7]

model_sarima= SARIMAX(train, order=(3,1,0), seasonal_order=(3,1,0,7)).fit()

# "Steps" concerne le nombre de predictions à efectuer
forecast_sarima = model_sarima.get_forecast(steps=29)

# Determiner nos intervalles de confiance
mean_forecast_sarima = forecast_sarima.predicted_mean
confidence_intervals_sarima = forecast_sarima.conf_int()
lower_limits_sarima = confidence_intervals_sarima['lower Consommation (MW)']
upper_limits_sarima = confidence_intervals_sarima['upper Consommation (MW)']



### Résultats SARIMA

In [75]:
model_sarima_df = pd.DataFrame({'Reel': test['Consommation (MW)'], 
            'predits' : mean_forecast_sarima.values,
            'lower_limits' : lower_limits_sarima,
            'upper_limits' : upper_limits_sarima }, index = test.index)

### Critères d'évaluation SARIMA

#### MSE

In [76]:
# MSE échantillon Test
score_mse_sarima_test = score_mse(model_sarima_df.predits, model_sarima_df.Reel)
score_mse_sarima_test

2110887.2569592926

#### RMSE

In [77]:
score_rmse_sarima = score_rmse(model_sarima_df.predits, model_sarima_df.Reel)
score_rmse_sarima

1452.889278974586

#### MAPE

In [78]:
score_mape_sarima = score_mape(model_sarima_df.predits, model_sarima_df.Reel)
score_mape_sarima

13.49

## Modèle SARIMAX

In [79]:
#### Avec un modèle SARIMAX (3,1,0)(3,1,0)[7]
model_sarimax = SARIMAX(train_conso, order=(3,1,0), seasonal_order=(3,1,0,7), exog=train_meteo, 
                enforce_stationarity=False, enforce_invertibility=False, freq='D', trend='n').fit()

# "Steps" concerne le nombre de predictions à efectuer
forecast_sarimax = model_sarimax.get_forecast(steps=29, exog = test_meteo)

# Determiner nos intervalles de confiance
mean_forecast_sarimax = forecast_sarimax.predicted_mean
confidence_intervals_sarimax = forecast_sarimax.conf_int()
lower_limits_sarimax = confidence_intervals_sarimax['lower Consommation (MW)']
upper_limits_sarimax = confidence_intervals_sarimax['upper Consommation (MW)']



### Résultats SARIMAX

In [80]:
model_sarimax_df = pd.DataFrame({'Reel': test_x['Consommation (MW)'], 
            'predits' : mean_forecast_sarimax.values,
            'lower_limits' : lower_limits_sarimax,
            'upper_limits' : upper_limits_sarimax,
            'T' : test_x['Température (°C)'] }, index = test_x.index)

### Critères d'évaluation SARIMAX

#### MSE

In [81]:
# MSE échantillon Test
score_mse_sarimax = score_mse(model_sarimax_df.predits, model_sarimax_df.Reel)
score_mse_sarimax

483046.08348483045

#### RMSE

In [82]:
score_rmse_sarimax = score_rmse(model_sarimax_df.predits, model_sarimax_df.Reel)
score_rmse_sarimax

695.0151678091856

#### MAPE

In [83]:
score_mape_sarimax = score_mape(model_sarimax_df.predits, model_sarimax_df.Reel)
score_mape_sarimax

6.39

## Scores Comparaison

In [92]:
lists_scores = [(score_mape_ridge,score_mse_ridge_test,score_rmse_ridge,'Ridge'),
    (score_mape_lasso,score_mse_lasso_test,score_rmse_lasso,'Lasso'),
    (score_mape_en,score_mse_en_test,score_rmse_en,'Elastic_Net'),
    (score_mape_sgd,score_mse_sgd_test,score_rmse_sgd,'SGDRegressor'),
    (score_mape_arima,score_mse_arima_test,score_rmse_arima,'Arima'),
    (score_mape_sarima,score_mse_sarima_test,score_rmse_sarima,'Sarima'),
    (score_mape_sarimax,score_mse_sarimax,score_rmse_sarimax,'Sarimax')
    ]

columns_comparaison = ['MAPE','MSE','RMSE','Modele']


comparaison_scores = pd.DataFrame( lists_scores, columns = columns_comparaison)
comparaison_scores.MSE = np.round(comparaison_scores.MSE,2)
comparaison_scores.RMSE = np.round(comparaison_scores.RMSE,2)
comparaison_scores

Unnamed: 0,MAPE,MSE,RMSE,Modele
0,5.75,317871.14,563.8,Ridge
1,5.57,298859.31,546.68,Lasso
2,4.87,233417.1,483.13,Elastic_Net
3,4.64,229272.17,478.82,SGDRegressor
4,13.27,1955230.51,1398.3,Arima
5,13.49,2110887.26,1452.89,Sarima
6,6.39,483046.08,695.02,Sarimax


## Graphique

In [85]:
from math import pi
from bokeh.plotting import figure, output_notebook, show, ColumnDataSource, output_file, save
from bokeh.models import Legend, DatetimeTickFormatter, formatters, HoverTool, LinearAxis, Range1d, LabelSet, Label, Arrow, NormalHead, OpenHead
from bokeh.palettes import Bokeh

output_notebook()

### Comparaison modèles

In [93]:
Bokeh[7]

('#EC1557', '#F05223', '#F6A91B', '#A5CD39', '#20B254', '#00AAAE', '#892889')

In [94]:
#Source
source1 = ColumnDataSource(ridge_model_1)
source2 = ColumnDataSource(lasso_model_1)
source3 = ColumnDataSource(EN_model)
source4 = ColumnDataSource(model_arima_df)
source5 = ColumnDataSource(model_sarima_df)
source6 = ColumnDataSource(model_sarimax_df)
source7 = ColumnDataSource(sgd_model)


# List de tools
TOOLS="crosshair,pan,wheel_zoom,box_zoom,reset"


y_overlimit = 0.05 
p = figure(plot_width = 900, plot_height = 550,     
           title = "Comparaison modèles",                    
           x_axis_label = 'Date', x_axis_type="datetime",
           y_axis_label = 'Consommation Moyenne',
           toolbar_location="below",
           tools=TOOLS)  


p.title.text_color = "darkblue"
p.title.text_font = "times"
p.title.text_font_size = "20px"
p.title.align = 'center'


p.xaxis.major_label_orientation = pi/4
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.xaxis.ticker.desired_num_ticks = 40

#p.varea(x = 'Date', y1 = 'lower_limits', y2='upper_limits', color='pink', alpha=0.5, source = source6)

p.line(x='Date', y = 'Reel', color = "navy", legend_label = "Valeurs réellees", source = source6)   
#p.circle(x='Date', y ='Reel', color = "navy",fill_color='white', size=8, source = source6)


#Ridge
p.line(x='Date', y ='predits', color = "#EC1557", legend_label = "Ridge", source = source1) 
#p.circle(x='Date', y='predits', color = "#EC1557", fill_color='white',size=8, source = source1)

#Lasso 
p.line(x='Date', y ='predits', color = "#892889",legend_label = "Lasso", source = source2) 
#p.circle(x='Date', y='predits', color = "#892889", fill_color='white',size=8, source = source2)

#Elastic Net
p.line(x='Date', y ='predits', color = "#F6A91B",legend_label = "Elastic", source = source3) 
#p.circle(x='Date', y='predits',  color = "#F6A91B", fill_color='white',size=8, source = source3)

#SGD
p.line(x='Date', y ='predits', color = "#892889",legend_label = "SGD", source = source7) 
#p.circle(x='Date', y='predits',  color = "#892889", fill_color='white',size=8, source = source7)

#Arima
p.line(x='Date', y ='predits', color = "#A5CD39", legend_label = "ARIMA", source = source4) 
#p.circle(x='Date', y='predits', color = "#A5CD39", fill_color='white',size=8, source = source4)

#Sarima
p.line(x='Date', y ='predits', color = "#20B254", legend_label = "SARIMA", source = source5) 
#p.circle(x='Date', y='predits', color = "#20B254", fill_color='white',size=8, source = source5)

#Sarimax
p.line(x='Date', y ='predits', color = "#00AAAE", legend_label = "SARIMAX", source = source6) 
#p.circle(x='Date', y='predits', color = "#00AAAE",fill_color='white',size=8, source = source6)



# Activation de l'interaction avec la légende
p.legend.location = "top_left"
p.legend.click_policy = 'hide'

# Style hover
p.add_tools(HoverTool(
    tooltips=[('Date', '@Date{%Y-%m-%d}'),
        ('Prédiction', '@predits{0.00}'),
        ('Valeur réelle', '@Reel{0.00}')],
    formatters={'@Date': 'datetime'}
))

show(p);

### Comparaison modèles, avec variable méteo

In [108]:
#Source
source1 = ColumnDataSource(ridge_model_1)
source2 = ColumnDataSource(lasso_model_1)
source3 = ColumnDataSource(EN_model)
source7 = ColumnDataSource(sgd_model)
source4 = ColumnDataSource(model_arima_df)
source5 = ColumnDataSource(model_sarima_df)
source6 = ColumnDataSource(model_sarimax_df)




# List de tools
TOOLS="crosshair,pan,wheel_zoom,box_zoom,reset"


y_overlimit = 0.05 
p = figure(plot_width = 900, plot_height = 550,     
           title = "Comparaison Ridge, Lasso, Elastic-Net, SGDR",                    
           x_axis_label = 'Date', x_axis_type="datetime",
           y_axis_label = 'Consommation Moyenne',
           toolbar_location="below",
           tools=TOOLS)  


p.title.text_color = "darkblue"
p.title.text_font = "times"
p.title.text_font_size = "20px"
p.title.align = 'center'

p.xaxis.major_label_orientation = pi/4
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.xaxis.ticker.desired_num_ticks = 29

#p.varea(x = 'Date', y1 = 'lower_limits', y2='upper_limits', color='pink', alpha=0.5, source = source6)

p.line(x='Date', y = 'Reel', color = "navy", legend_label = "Valeurs réellees", source = source6)   
#p.circle(x='Date', y ='Reel', color = "navy",fill_color='white', size=8, source = source6)


#Ridge
p.line(x='Date', y ='predits', color = "#EC1557", legend_label = "Ridge", source = source1) 
#p.circle(x='Date', y='predits', color = "#EC1557", fill_color='white',size=8, source = source1)

#Lasso 
p.line(x='Date', y ='predits', color = "#892889",legend_label = "Lasso", source = source2) 
#p.circle(x='Date', y='predits', color = "#892889", fill_color='white',size=8, source = source2)

#Elastic Net
p.line(x='Date', y ='predits', color = "#F6A91B",legend_label = "Elastic", source = source3) 
#p.circle(x='Date', y='predits',  color = "#F6A91B", fill_color='white',size=8, source = source3)

#SGD
p.line(x='Date', y ='predits', color = "#892889",legend_label = "SGD", source = source7) 
#p.circle(x='Date', y='predits',  color = "#892889", fill_color='white',size=8, source = source7)

#Arima
#p.line(x='Date', y ='predits', color = "#A5CD39", legend_label = "ARIMA", source = source4) 
#p.circle(x='Date', y='predits', color = "#A5CD39", fill_color='white',size=8, source = source4)

#Sarima
#p.line(x='Date', y ='predits', color = "#20B254", legend_label = "SARIMA", source = source5) 
#p.circle(x='Date', y='predits', color = "#20B254", fill_color='white',size=8, source = source5)

#Sarimax
#p.line(x='Date', y ='predits', color = "#00AAAE", legend_label = "SARIMAX", source = source6) 
#p.circle(x='Date', y='predits', color = "#00AAAE",fill_color='white',size=8, source = source6)

# axis y, gauche
p.y_range = Range1d(ridge_model_1.Reel.min() * (1 - y_overlimit), ridge_model_1.predits.max() * (1 + y_overlimit))


# Axis y, droite
y_column2_range = "T" + "_range"
p.extra_y_ranges = {
    y_column2_range: Range1d(
        start=ridge_model_1['T'].min() * (1 - y_overlimit),
        end=ridge_model_1['T'].max() * (1 + y_overlimit),
    )
}
p.add_layout(LinearAxis(y_range_name=y_column2_range), "right")

p.line( x='Date', y = 'T', color="grey", legend_label="T (C°)", y_range_name=y_column2_range, source = source1)
#p.circle(x='Date', y = 'T', color = "grey",fill_color='white', size=8, y_range_name=y_column2_range, source = source1)

# Activation de l'interaction avec la légende
p.legend.location = "top_left"
p.legend.click_policy = 'hide'

# Style hover
p.add_tools(HoverTool(
    tooltips=[('Date', '@Date{%Y-%m-%d}'),
        ('Prédiction', '@predits{0.00}'),
        ('Valeur réelle', '@Reel{0.00}'),
        ('C°', "@T{0.00}")],
    formatters={'@Date': 'datetime'}
))

show(p);

### Graphique MAPE

In [95]:
colors = Bokeh[7]
comparaison_scores['Colors'] = colors
comparaison_scores

Unnamed: 0,MAPE,MSE,RMSE,Modele,Colors
0,5.75,317871.14,563.8,Ridge,#EC1557
1,5.57,298859.31,546.68,Lasso,#F05223
2,4.87,233417.1,483.13,Elastic_Net,#F6A91B
3,4.64,229272.17,478.82,SGDRegressor,#A5CD39
4,13.27,1955230.51,1398.3,Arima,#20B254
5,13.49,2110887.26,1452.89,Sarima,#00AAAE
6,6.39,483046.08,695.02,Sarimax,#892889


In [96]:
source_scores = ColumnDataSource(comparaison_scores)
#ouput_file('source_scores.html')
# List de tools
TOOLS="pan,wheel_zoom,box_zoom,reset"

# Instanciation de la figure

p = figure(y_range = comparaison_scores.Modele,           
           plot_width = 900, plot_height = 400,
           title = 'Mean Absolute Prediction Error - MAPE',
           x_axis_label = "Probabilité Moyenne d'erreur %",
           y_axis_label = 'Modèle',
           toolbar_location="below",
           tools=TOOLS) 
# tooltips = "MAPE %', '@comparaison_scores.MAPE{0.00}"
p.title.text_color = "darkblue"
p.title.text_font = "times"
p.title.text_font_size = "20px"
p.title.align = 'center'

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
#p.xaxis.ticker.desired_num_ticks = 10


# Instanciation d'un diagramme à barres horizontales
p.hbar(y = 'Modele',  right = 'MAPE', height = 0.5, color='Colors', source=source_scores)                  
# legend_group='Modele'
# Style hover
#p.add_tools(HoverTool(
#    tooltips=[('MAPE %', '@MAPE{0.00}')]))

labels = LabelSet(x='MAPE', y='Modele', text='MAPE',source=source_scores)

#citation = Label(x=400, y=70, x_units='screen', y_units='screen',
#                 text="Probabilité Moyenne d'erreur", render_mode='css',
#                 border_line_color='black', border_line_alpha=1.0,
#                 background_fill_color='white', background_fill_alpha=1.0)


# Iteraction legend
p.add_layout(labels)
#p.add_layout(citation)
# Activation de l'interaction avec la légende
#p.legend.location = "bottom_right"
#p.legend.click_policy = 'hide'

# Affichage de la figure
#save(p)
show(p);


MAPE, moyenne des écarts en valeur absolue par rapport aux valeurs observées

### Graphique MSE

In [103]:
source_scores = ColumnDataSource(comparaison_scores)
#ouput_file('source_scores.html')
# List de tools
TOOLS="pan,wheel_zoom,box_zoom,reset"

# Instanciation de la figure

p = figure(y_range = comparaison_scores.Modele,           
           plot_width = 900, plot_height = 400,
           title = 'Mean Square Error - MSE',
           x_axis_label = "Carré moyen des erreurs",
           y_axis_label = 'Modèle',
           toolbar_location="below",
           tools=TOOLS) 
# tooltips = "MAPE %', '@comparaison_scores.MAPE{0.00}"
p.title.text_color = "darkblue"
p.title.text_font = "times"
p.title.text_font_size = "20px"
p.title.align = 'center'

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
#p.xaxis.ticker.desired_num_ticks = 10


# Instanciation d'un diagramme à barres horizontales
p.hbar(y = 'Modele',  right = 'MSE', height = 0.5, color='Colors', source=source_scores)                  
# legend_group='Modele'
# Style hover
#p.add_tools(HoverTool(
#    tooltips=[('MAPE %', '@MAPE{0.00}')]))

#labels = LabelSet(x='MSE', y='Modele', text='MSE',source=source_scores)

p.add_layout(Arrow(end=OpenHead(line_color="darkblue", line_width=4),
                   x_start=400000, y_start=3.5, x_end=250000, y_end=3.5))
citation = Label(x=190, y=135, x_units='screen', y_units='screen',
                 text="valeur MSE la plus bas", render_mode='css')


# Iteraction legend
#p.add_layout(labels)
p.add_layout(citation)
# Activation de l'interaction avec la légende
#p.legend.location = "bottom_right"
#p.legend.click_policy = 'hide'

# Affichage de la figure
#save(p)
show(p);


MSE, c’est la moyenne arithmétique des carrés des écarts entre prévisions du modèle et observations.
Cette moyenne n'est autre que la VARIANCE RÉSIDUELLE que l'on cherche à minimiser.
Si on compare les MSE, le meilleur est bien sûr celui qui présente la valeur MSE la plus faible, ici SGDRegressor


### Graphique RMSE

In [107]:
source_scores = ColumnDataSource(comparaison_scores)
#ouput_file('source_scores.html')
# List de tools
TOOLS="pan,wheel_zoom,box_zoom,reset"

# Instanciation de la figure

p = figure(y_range = comparaison_scores.Modele,           
           plot_width = 900, plot_height = 400,
           title = 'Root Mean Square Error - RMSE',
           x_axis_label = "Racine carrée de la moyenne des erreurs quadratiques",
           y_axis_label = 'Modèle',
           toolbar_location="below",
           tools=TOOLS) 
# tooltips = "MAPE %', '@comparaison_scores.MAPE{0.00}"
p.title.text_color = "darkblue"
p.title.text_font = "times"
p.title.text_font_size = "20px"
p.title.align = 'center'

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
#p.xaxis.ticker.desired_num_ticks = 10


# Instanciation d'un diagramme à barres horizontales
p.hbar(y = 'Modele',  right = 'RMSE', height = 0.5, color='Colors', source=source_scores)                  
# legend_group='Modele'
# Style hover
#p.add_tools(HoverTool(
#    tooltips=[('MAPE %', '@MAPE{0.00}')]))

labels = LabelSet(x='RMSE', y='Modele', text='RMSE',source=source_scores)

p.add_layout(Arrow(end=OpenHead(line_color="darkblue", line_width=4),
                   x_start=700, y_start=3.5, x_end=600, y_end=3.5))
citation = Label(x=390, y=135, x_units='screen', y_units='screen',
                 text="valeur RMSE la plus bas", render_mode='css')


# Iteraction legend
p.add_layout(labels)
p.add_layout(citation)
# Activation de l'interaction avec la légende
#p.legend.location = "bottom_right"
#p.legend.click_policy = 'hide'

# Affichage de la figure
#save(p)
show(p);


RMSE, c'est la racine carrée des différences entre les valeurs prédites et les valeurs observées. Ces écarts sont appelés résidus.
Il s'agit d'une mesure de précision qui sert à comparer les erreurs de différents modèles prédictifs pour un ensemble de données particulier.
Une valeur de RMSE plus petite indique une meilleur précision qu'une valeur de RMSE plus élevée.