# Import the necessary libraries

In [46]:
import sqlite3
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from fbprophet.make_holidays import make_holidays_df
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from prophet import Prophet
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import copy
import matplotlib.pyplot as plt
import plotly.offline as pyoff
import plotly.graph_objs as go
import optuna
from sklearn.model_selection import cross_val_score
from fbprophet import Prophet
from fbprophet.plot import add_changepoints_to_plot
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics
from fbprophet.plot import plot_cross_validation_metric
import itertools

# Connect to the database

In [47]:
conn = sqlite3.connect('clean_database.db')
cursor = conn.cursor()

# Check all the tables available within the database

In [48]:
cursor.execute("SELECT name FROM sqlite_master WHERE type ='table';")
print(cursor.fetchall())

[('Meteostat_Data',), ('Entsoe_Data',), ('Entsoe_Meteostat_Data',), ('Entsoe_real_data',), ('Entsoe_real_values_and_Meteostat_data',), ('Entsoe_forecasted_data',), ('Entsoe_forecasted_data_and_Meteostat_data',), ('Feature_selected_real_data',), ('Feature_selected_forecasted_data',), ('RandomForest_Train_Real_Test_Forecast',), ('LinearRegresion_Train_Real_Test_Forecast',), ('LinearRegresion_Train_Forecast_Test_Forecast',), ('LinearRegresion_Train_Real_Test_Real',), ('RandomForest_Train_Forecast_Test_Forecast',), ('RandomForest_Train_Real_Test_Real',), ('Prophet_Train_Real_Test_Forecast',), ('Prophet_Train_Forecast_Test_Forecast',), ('Optimized_Prophet_Train_Forecast_Test_Forecast',), ('Prophet_Train_Real_Test_Real',), ('Optimized_Prophet_Train_Real_Test_Forecast',), ('y_train',), ('y_test',), ('X_train_real',), ('X_test_real',), ('X_train_forecasted',), ('X_test_forecasted',), ('Optimized_Prophet_Train_Real_Test_Real',)]


# Retrieving the necesary tables from database

In [49]:
X_train = pd.read_sql("SELECT * FROM X_train_real;", conn)
X_train["timestamp"] = pd.to_datetime(X_train["timestamp"])
X_train = X_train.set_index("timestamp")

In [50]:
X_test = pd.read_sql("SELECT * FROM X_test_real;", conn)
X_test["timestamp"] = pd.to_datetime(X_test["timestamp"])
X_test = X_test.set_index("timestamp")

In [51]:
y_train = pd.read_sql("SELECT * FROM y_train;", conn)
y_train["timestamp"] = pd.to_datetime(y_train["timestamp"])
y_train = y_train.set_index("timestamp")

In [52]:
y_test = pd.read_sql("SELECT * FROM y_test;", conn)
y_test["timestamp"] = pd.to_datetime(y_test["timestamp"])
y_test = y_test.set_index("timestamp")

# Prophet

### Train

In [53]:
y_train_r = np.ravel(y_train, order='C')
y_test_r = np.ravel(y_test, order='C')

In [54]:
# Create features list
features = list(X_train.columns.to_list())

In [55]:
def holidays_features(df: pd.DataFrame, country='RO'):
    """Holidays features selecton"""
    # Prophet mode
    from fbprophet.make_holidays import make_holidays_df
    year_list = df.index.year.unique().tolist()

    # Identify the final year, as an integer, and increase it by 1
    year_list.append(year_list[-1] + 1)
    holidays_df = make_holidays_df(year_list=year_list, country=country)

    return holidays_df

In [56]:
holidays_df = holidays_features(y_train)
holidays_df

Unnamed: 0,ds,holiday
0,2017-01-01,Anul Nou
1,2017-01-02,Anul Nou
2,2017-01-24,Unirea Principatelor Române
3,2017-04-14,Paștele
4,2017-04-16,Paștele
...,...,...
85,2022-08-15,Adormirea Maicii Domnului
86,2022-11-30,Sfântul Andrei
87,2022-12-01,Ziua Națională a României
88,2022-12-25,Crăciunul


In [57]:
X_train_prophet = X_train.copy()
X_train_prophet['y'] = y_train["real_energy_load"]
X_train_prophet = X_train_prophet.reset_index()
X_train_prophet = X_train_prophet.rename(columns = {'timestamp': 'ds'})
X_train_prophet

Unnamed: 0,ds,real_energy_produced,imported_real_energy,exported_real_energy,avg_air_temp (°C),avg_rel_humidity (%),avg_wind_speed (km/h),avg_sea-lvl_air_pres (hPa),hour,day_of_week,day_of_year,holidays_encoded,real_energy_load_lag24,real_energy_load_roll_min168,y
0,2017-10-03 01:00:00,-1.271889,0.476815,-0.229048,-0.774438,1.016265,-0.958030,1.224477,-1.517054,-0.999896,0.894458,-0.182989,-1.491324,-0.186323,5866.00
1,2017-10-03 02:00:00,-1.321034,0.304496,-0.255141,-0.809492,1.058201,-0.958030,1.171745,-1.372586,-0.999896,0.894458,-0.182989,-1.573375,-0.186323,5759.00
2,2017-10-03 03:00:00,-1.420270,0.387783,-0.249342,-0.855331,1.149061,-0.958030,1.166642,-1.228119,-0.999896,0.894458,-0.182989,-1.586732,-0.186323,5686.00
3,2017-10-03 04:00:00,-1.385301,0.361936,-0.420395,-0.880948,1.197986,-0.958030,1.149632,-1.083652,-0.999896,0.894458,-0.182989,-1.505635,-0.186323,5770.00
4,2017-10-03 05:00:00,-1.318199,0.459583,-0.620440,-0.899823,1.149061,-1.568678,1.142828,-0.939184,-0.999896,0.894458,-0.182989,-1.242311,-0.186323,5946.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33591,2021-08-02 20:00:00,0.294622,0.888943,-1.191583,1.367883,-0.731052,0.059716,-0.736799,1.227827,-1.499933,0.314136,-0.182989,0.148011,0.000206,7991.75
33592,2021-08-02 21:00:00,0.317068,0.694127,-1.264063,1.187221,-0.248793,-0.347382,-0.665356,1.372294,-1.499933,0.314136,-0.182989,0.345504,0.000206,8087.00
33593,2021-08-02 22:00:00,-0.088382,0.507210,-1.203180,1.075319,-0.109007,-0.245608,-0.605820,1.516761,-1.499933,0.314136,-0.182989,0.049026,0.000206,7604.50
33594,2021-08-02 23:00:00,-0.578891,0.692931,-1.165490,1.009256,-0.004168,-1.082421,-0.542883,1.661229,-1.499933,0.314136,-0.182989,-0.395333,0.000206,6991.25


In [None]:
# Prophet model
model = Prophet(growth='linear',          
                changepoint_range=0.3,
                changepoint_prior_scale=0.01,
                yearly_seasonality=True,
                weekly_seasonality=True,
                daily_seasonality=True,
                holidays=holidays_df,
                seasonality_mode='additive',
                seasonality_prior_scale=0.5,
                holidays_prior_scale=1.0,
                #mcmc_samples=10,
                interval_width=0.90,
                uncertainty_samples=1000,
                stan_backend=None)

# Add feature data
for feature in features:
    model.add_regressor(feature)

# Add holidays data
model.add_country_holidays(country_name='RO')

# Fit model
model = model.fit(X_train_prophet)

# Make prediction
training_demand_forecast = model.predict(X_train_prophet)

In [None]:
# Fit the model with data and define a horizon 
df_cv = cross_validation(model,
                         horizon='7 days',
                         period='7 days',
                         initial='1120 days')

cutoffs = df_cv.groupby('cutoff').mean().reset_index()['cutoff']
cutoff = df_cv['cutoff'].unique()[0]
df_cv = df_cv[df_cv['cutoff'].values == cutoff]

In [None]:
def getPerfomanceMetrics(model):
    return performance_metrics(getCrossValidationData(model))

def getCrossValidationData(model):
    return cross_validation(model, initial='1120 days', period = '7 days', horizon = '7 days')

In [None]:
def create_param_combinations(**param_dict):
    param_iter = itertools.product(*param_dict.values())
    params =[]
    for param in param_iter:
        params.append(param) 
    params_df = pd.DataFrame(params, columns=list(param_dict.keys()))
    return params_df

def single_cv_run(history_df, metrics, param_dict, parallel):
    model = Prophet(holidays=holidays_df, **param_dict)
    
    # Add feature data
    for feature in features:
        model.add_regressor(feature)

    # Add holidays data
    model.add_country_holidays(country_name='RO')
    model.fit(history_df)
    df_cv = getCrossValidationData(model)
    df_p = performance_metrics(df_cv, rolling_window=1)
    df_p['params'] = str(param_dict)
    df_p = df_p.loc[:, metrics]
    return df_p

# 'changepoint_range': [0.6, 0.7, 0.75, 0.8, 0.9],
# 'changepoint_prior_scale': [0.01, 0.05, 0.1, 0.25, 0.5],
# 'seasonality_prior_scale':[0.5, 1.0, 2.5, 5],
# 'holidays_prior_scale':[1.0, 5.0, 10.0, 15.0],
# 'yearly_seasonality':[5, 10, 15, 20],
# 'weekly_seasonality':[5, 10, 15, 20],
pd.set_option('display.max_colwidth', None)
param_grid = {                  
                'changepoint_prior_scale': [0.01],
                'changepoint_range': [0.3],
                'holidays_prior_scale':[1.0],
                'seasonality_prior_scale':[0.5],
                'yearly_seasonality':[20],
                'weekly_seasonality':[5],
              }
metrics = ['horizon', 'rmse', 'mae', 'params'] 
results = []

#Prophet(,)
params_df = create_param_combinations(**param_grid)
for param in params_df.values:
    param_dict = dict(zip(params_df.keys(), param))
    cv_df = single_cv_run(X_train_prophet,  metrics, param_dict, parallel="processes")
    results.append(cv_df)
results_df = pd.concat(results).reset_index(drop=True)
best_param = results_df.loc[results_df['rmse'] == min(results_df['rmse']), ['params']]
print(f'\n The best param combination is {best_param.values[0][0]}')
results_df.mean()

In [None]:
# Prophet model
model = Prophet(growth='linear',          
                changepoint_range=0.3,
                changepoint_prior_scale=0.01,
                yearly_seasonality= 20,
                weekly_seasonality=5.0,
                daily_seasonality=True,
                holidays=holidays_df,
                seasonality_mode='additive',
                seasonality_prior_scale=0.5,
                holidays_prior_scale=1.0,
                #mcmc_samples=10,
                interval_width=0.90,
                uncertainty_samples=1000,
                stan_backend=None)

# Add feature data
for feature in features:
    model.add_regressor(feature)

# Add holidays data
model.add_country_holidays(country_name='RO')

# Fit model
model = model.fit(X_train_prophet)

# Make prediction
training_demand_forecast = model.predict(X_train_prophet)

In [None]:
training_demand_forecast

In [None]:
yp5 = training_demand_forecast["yhat"]

In [None]:
rmse5 = mean_squared_error(y_true = y_train_r, y_pred = yp5, squared=False)
mae5 = mean_absolute_error(y_true = y_train_r, y_pred = yp5)
mape5 = mean_absolute_percentage_error(y_true = y_train_r, y_pred = yp5)
print(f"RMSE value: {rmse5}. MAE value: {mae5}. MAPE value: {mape5}")

In [None]:
# Plot demand forecasting
fig_2 = go.Figure()

# fig_2.add_trace(go.Scatter(x=training_data['date'], y=training_data['quantity'], name='Actual training',))

fig_2.add_trace(go.Scatter(x=X_train_prophet['ds'], y=X_train_prophet['y'], name='Actual'))
fig_2.add_trace(go.Scatter(x=training_demand_forecast['ds'], y=training_demand_forecast['yhat'], name='Predicted'))

In [None]:
# Plot demand forecasting
fig_2 = go.Figure()

# fig_2.add_trace(go.Scatter(x=training_data['date'], y=training_data['quantity'], name='Actual training',))

fig_2.add_trace(go.Scatter(x=training_demand_forecast['ds'], y=training_demand_forecast['yhat'], name='Predicted'))
fig_2.add_trace(go.Scatter(x=X_train_prophet['ds'], y=X_train_prophet['y'], name='Actual'))

### Test

In [None]:
X_test_prophet = X_test.copy()
X_test_prophet['y'] = y_test["real_energy_load"]
X_test_prophet = X_test_prophet.reset_index()
X_test_prophet = X_test_prophet.rename(columns = {'timestamp': 'ds'})

In [None]:
training_demand_forecast_2 = model.predict(X_test_prophet)

In [None]:
yp6 = training_demand_forecast_2["yhat"]

In [None]:
rmse6 = mean_squared_error(y_true = y_test_r, y_pred = yp6, squared=False)
mae6 = mean_absolute_error(y_true = y_test_r, y_pred = yp6)
mape6 = mean_absolute_percentage_error(y_true = y_test_r, y_pred = yp6)
print(f"RMSE value: {rmse6}. MAE value: {mae6}. MAPE value: {mape6}")

In [None]:
# Plot demand forecasting
fig_3 = go.Figure()

# fig_2.add_trace(go.Scatter(x=training_data['date'], y=training_data['quantity'], name='Actual training',))

fig_3.add_trace(go.Scatter(x = X_test_prophet['ds'], y= X_test_prophet['y'], name='Actual'))
fig_3.add_trace(go.Scatter(x = training_demand_forecast_2['ds'], y=training_demand_forecast_2['yhat'], name='Predicted'))

In [None]:
# Plot demand forecasting
fig_3 = go.Figure()

# fig_2.add_trace(go.Scatter(x=training_data['date'], y=training_data['quantity'], name='Actual training',))

fig_3.add_trace(go.Scatter(x = training_demand_forecast_2['ds'], y=training_demand_forecast_2['yhat'], name='Predicted'))
fig_3.add_trace(go.Scatter(x = X_test_prophet['ds'], y= X_test_prophet['y'], name='Actual'))

In [None]:
results6 = pd.DataFrame(yp6)
results6.index = y_test.index
results6["real_energy_load"] = y_test
results6 = results6.rename(columns = {'yhat': 'yp_test_Prophet_Optimized'})
results6

In [None]:
results6.to_sql("Optimized_Prophet_Train_Real_Test_Real", conn, if_exists='replace', index=True, index_label=None, chunksize=None, dtype=None, method=None)