In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from plotly.subplots import make_subplots
from statsmodels.tsa.stattools import adfuller
import pmdarima as pm
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
from prophet import Prophet
from greykite.framework.templates.autogen.forecast_config import ForecastConfig
from greykite.framework.templates.autogen.forecast_config import MetadataParam
from greykite.framework.templates.forecaster import Forecaster
from greykite.framework.templates.model_templates import ModelTemplateEnum
import warnings
warnings.filterwarnings('ignore')
plt.rcParams["figure.figsize"] = (20,7)

In [2]:
def plot_regions(var):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=var.index, y=var.loc[:, "Piemonte"], line={'color': 'red'}, name="Piemonte"))
    fig.add_trace(go.Scatter(x=var.index, y=var.loc[:, "Emilia-Romagna"], line={'color': 'blue'}, name="Emilia-Romagna"))
    fig.add_trace(go.Scatter(x=var.index, y=var.loc[:, "Umbria"], line={'color': 'green'}, name="Umbria"))
    fig.add_trace(go.Scatter(x=var.index, y=var.loc[:, "Puglia"], line={'color': 'orange'}, name="Puglia"))
    fig.add_trace(go.Scatter(x=var.index, y=var.loc[:, "Sicilia"], line={'color': 'cyan'}, name="Sicilia"))
    fig.add_trace(go.Scatter(x=var.index, y=var.loc[:, "Sardegna"], line={'color': 'black'}, name="Sardegna"))
    fig.update_layout(title_text="Evapotranspiration")
    fig.update_xaxes(title_text="Date")
    fig.update_yaxes(title_text="mm/mese")
    fig.show()

def split_train_test(data):
    perc = 0.9
    data_length = len(data)
    split_index = int(perc * data_length)
    if type(data) is pd.DataFrame:
        train = data.iloc[:split_index, :]
        test = data.iloc[split_index:, :]
    else:
        train = data.iloc[:split_index]
        test = data.iloc[split_index:]
    return train, test

def model_estimation(df, reg, flag_arima, flag_sarima, flag_prophet, flag_greykite):
    entire_ts = df[reg]
    train, test = split_train_test(entire_ts)
    plot_name = 'Evapotranspiration in ' + reg
    yaxes_name = 'Evapotraspiration'
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=test.index, y=test, line={'color': 'black'}, name='Real values'))
    print("Analysis for " + reg)
    if flag_arima == True:
        arima = pm.auto_arima(train, seasonal=False, error_action="ignore")
        arima_forecast = arima.predict(n_periods=len(test))
        fig.add_trace(go.Scatter(x=test.index, y=arima_forecast, line={'color': 'orange'}, name='Predicted points (ARIMA)'))
        error_arima = [mean_absolute_error(test, arima_forecast), mean_squared_error(test, arima_forecast)]
    else:
        error_arima = [np.nan, np.nan]

    if flag_sarima == True:
        sarima = pm.auto_arima(train, seasonal=True, m=12, error_action="ignore")
        sarima_forecast = sarima.predict(n_periods=len(test))
        fig.add_trace(go.Scatter(x=test.index, y=sarima_forecast, line={'color': 'red'}, name='Predicted points (SARIMA)'))
        error_sarima = [mean_absolute_error(test, sarima_forecast), mean_squared_error(test, sarima_forecast)]
    else:
        error_sarima = [np.nan, np.nan]

    if flag_prophet == True:
        m = Prophet()
        prophet_train = train.reset_index().rename(columns={'month': 'ds', train.name: 'y'})
        m.fit(prophet_train)
        future = m.make_future_dataframe(periods=len(test), freq=train.index.freq)
        prophet_forecast = m.predict(future).loc[:, ['ds', 'yhat', 'yhat_lower', 'yhat_upper']].set_index('ds')  
        fig.add_trace(go.Scatter(x=prophet_forecast.loc[test.index[0]:, :].index, y=prophet_forecast.loc[test.index[0]:, 'yhat'], line={'color': 'green'}, name='Predicted points (Prophet)'))
        fig.add_trace(go.Scatter(x=prophet_forecast.loc[test.index[0]:, :].index, y=prophet_forecast.loc[test.index[0]:, 'yhat_upper'], line={'color':'lightgreen', 'dash':'dash'}, name='Predicted upper confidence (Prophet)', visible="legendonly"))
        fig.add_trace(go.Scatter(x=prophet_forecast.loc[test.index[0]:, :].index, y=prophet_forecast.loc[test.index[0]:, 'yhat_lower'], line={'color':'lightgreen', 'dash':'dash'}, name='Predicted lower confidence (Prophet)', visible="legendonly"))
        error_prophet = [mean_absolute_error(test, prophet_forecast.loc[test.index[0]:, 'yhat']), mean_squared_error(test, prophet_forecast.loc[test.index[0]:, 'yhat'])]
    else:
        error_prophet = [np.nan, np.nan]

    if flag_greykite == True:
        forecaster = Forecaster()
        greykite_train = train.reset_index().rename(columns={'month': 'ts', train.name: 'y'})
        metadata = MetadataParam(time_col="ts", value_col="y", freq="MS")
        result = forecaster.run_forecast_config(df=greykite_train, config=ForecastConfig(model_template=ModelTemplateEnum.SILVERKITE.name, forecast_horizon=len(test), coverage=0.95, metadata_param=metadata))
        greykite_forecast = result.forecast.df
        greykite_forecast = greykite_forecast[np.isnan(greykite_forecast.actual)]
        fig.add_trace(go.Scatter(x=greykite_forecast.ts, y=greykite_forecast.forecast, line={'color': 'blue'}, name="Predicted points (Greykite)"))
        fig.add_trace(go.Scatter(x=greykite_forecast.ts, y=greykite_forecast.forecast_lower, line={'color': 'cyan', 'dash':'dash'}, name="Predicted lower confidence (Greykite)", visible="legendonly"))
        fig.add_trace(go.Scatter(x=greykite_forecast.ts, y=greykite_forecast.forecast_upper, line={'color': 'cyan', 'dash':'dash'}, name="Predicted upper confidence (Greykite)", visible="legendonly"))
        error_greykite = [mean_absolute_error(test, greykite_forecast.forecast), mean_squared_error(test, greykite_forecast.forecast)]
    else:
        error_greykite = [np.nan, np.nan]

    fig.add_trace(go.Scatter(x=train.index, y=train, line={'color': 'grey'}, name='Train', visible="legendonly"))
    fig.update_layout(title_text = plot_name)
    fig.update_xaxes(title_text = 'Date')
    fig.update_yaxes(title_text = yaxes_name)
    fig.show()
    print("MAE/RMSE of ARIMA model: " + str(error_arima))
    print("MAE/RMSE of SARIMA model: " + str(error_sarima))
    print("MAE/RMSE of Prophet model: " + str(error_prophet))
    print("MAE/RMSE of Greykite model: " + str(error_greykite))
    return pd.DataFrame({"region": reg,"error_ARIMA": [error_arima], "error_SARIMA": [error_sarima], "error_Prophet": [error_prophet], "error_Greykite": [error_greykite]})

In [3]:
ae_all = pd.read_csv("output/ae.csv", parse_dates=["month"], index_col="month")
tp_all = pd.read_csv("output/tp.csv", parse_dates=["month"], index_col="month")
regions = ["Piemonte", "Emilia-Romagna", "Umbria", "Puglia", "Sicilia", "Sardegna"]
# regions = ["Piemonte", "Valle d\'Aosta", "Lombardia", "Trentino-Alto Adige", "Veneto", "Friuli Venezia Giulia", "Liguria", "Emilia-Romagna", "Toscana", "Umbria", "Marche", "Lazio", "Abruzzo", "Molise", "Campania", "Puglia", "Basilicata", "Calabria", "Sicilia", "Sardegna"]
# regions = ["Piemonte"]
ae = ae_all.filter(regions, axis=1)
tp = tp_all.filter(regions, axis=1)
ae.index.freq = ae.index.inferred_freq
tp.index.freq = tp.index.inferred_freq
# plot_regions(ae)
# plot_regions(tp)

### Evapotranspiration

In [4]:
errors_ae = pd.DataFrame(columns=["region","error_ARIMA", "error_SARIMA", "error_Prophet", "error_Greykite"])

for region in regions:
    temp = model_estimation(ae, region, True, True, True, True)
    errors_ae = errors_ae.append(temp)

Analysis for Piemonte


INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


MAE/RMSE of ARIMA model: [17.81602662934409, 575.7477284150868]
MAE/RMSE of SARIMA model: [8.503994555320125, 132.92696636303418]
MAE/RMSE of Prophet model: [6.843507211454198, 90.25159474636499]
MAE/RMSE of Greykite model: [19.984614573059847, 660.9700625936653]
Analysis for Emilia-Romagna


INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


MAE/RMSE of ARIMA model: [18.416909006795496, 507.7605413116343]
MAE/RMSE of SARIMA model: [8.693855001948883, 139.8894328103377]
MAE/RMSE of Prophet model: [9.9321479475413, 172.21566149219868]
MAE/RMSE of Greykite model: [17.230321724943547, 531.4425191319804]
Analysis for Umbria


INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


MAE/RMSE of ARIMA model: [19.468603207269755, 565.9061304957431]
MAE/RMSE of SARIMA model: [9.91500714147089, 197.78074621481215]
MAE/RMSE of Prophet model: [10.515743124782224, 189.4690829066596]
MAE/RMSE of Greykite model: [18.55511651697517, 627.1334659416111]
Analysis for Puglia


INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


MAE/RMSE of ARIMA model: [18.78057915652025, 542.6739351028394]
MAE/RMSE of SARIMA model: [9.860945936257263, 199.02654607093532]
MAE/RMSE of Prophet model: [9.596176586136295, 185.1998391803156]
MAE/RMSE of Greykite model: [14.054103207165838, 374.18258047492105]
Analysis for Sicilia


INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


MAE/RMSE of ARIMA model: [16.53160738758708, 389.49370879991585]
MAE/RMSE of SARIMA model: [7.527511445622006, 157.08609643152127]
MAE/RMSE of Prophet model: [8.108646250087828, 152.3883097669628]
MAE/RMSE of Greykite model: [12.180761969566817, 278.5185553142565]
Analysis for Sardegna


INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


MAE/RMSE of ARIMA model: [16.512985356887317, 434.47249476629105]
MAE/RMSE of SARIMA model: [10.932256592406063, 256.64613916269394]
MAE/RMSE of Prophet model: [10.572525621871486, 267.02002109270467]
MAE/RMSE of Greykite model: [14.518106115567768, 414.3979315486219]


In [5]:
errors_ae.reset_index(drop=True)
errors_ae["MSE Prophet [%]"] = errors_ae.error_SARIMA.str[0] / errors_ae.error_Prophet.str[0] * 100 - 100
errors_ae["RMSE Prophet [%]"] = errors_ae.error_SARIMA.str[1] / errors_ae.error_Prophet.str[1] * 100 - 100
errors_ae["MSE GreyKite [%]"] = errors_ae.error_SARIMA.str[0] / errors_ae.error_Greykite.str[0] * 100 - 100
errors_ae["RMSE GreyKite [%]"] = errors_ae.error_SARIMA.str[1] / errors_ae.error_Greykite.str[1] * 100 - 100
errors_ae = errors_ae.sort_values(by="RMSE Prophet [%]", ascending=False).reset_index(drop=True)
errors_ae

Unnamed: 0,region,error_ARIMA,error_SARIMA,error_Prophet,error_Greykite,MSE Prophet [%],RMSE Prophet [%],MSE GreyKite [%],RMSE GreyKite [%]
0,Piemonte,"[17.81602662934409, 575.7477284150868]","[8.503994555320125, 132.92696636303418]","[6.843507211454198, 90.25159474636499]","[19.984614573059847, 660.9700625936653]",24.26369,47.284895,-57.447293,-79.889109
1,Puglia,"[18.78057915652025, 542.6739351028394]","[9.860945936257263, 199.02654607093532]","[9.596176586136295, 185.1998391803156]","[14.054103207165838, 374.18258047492105]",2.759113,7.465831,-29.835822,-46.810312
2,Umbria,"[19.468603207269755, 565.9061304957431]","[9.91500714147089, 197.78074621481215]","[10.515743124782224, 189.4690829066596]","[18.55511651697517, 627.1334659416111]",-5.71273,4.386818,-46.564565,-68.462734
3,Sicilia,"[16.53160738758708, 389.49370879991585]","[7.527511445622006, 157.08609643152127]","[8.108646250087828, 152.3883097669628]","[12.180761969566817, 278.5185553142565]",-7.166854,3.082774,-38.201637,-43.599414
4,Sardegna,"[16.512985356887317, 434.47249476629105]","[10.932256592406063, 256.64613916269394]","[10.572525621871486, 267.02002109270467]","[14.518106115567768, 414.3979315486219]",3.402507,-3.885058,-24.699155,-38.067707
5,Emilia-Romagna,"[18.416909006795496, 507.7605413116343]","[8.693855001948883, 139.8894328103377]","[9.9321479475413, 172.21566149219868]","[17.230321724943547, 531.4425191319804]",-12.467524,-18.770783,-49.543281,-73.67741


### Precipitation

In [6]:
errors_pt = pd.DataFrame(columns=["region","error_ARIMA", "error_SARIMA", "error_Prophet", "error_Greykite"])

for region in regions:
    temp = model_estimation(tp, region, True, True, True, True) 
    errors_pt = errors_pt.append(temp)

Analysis for Piemonte


INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


MAE/RMSE of ARIMA model: [46.535967902563335, 4530.2358464056615]
MAE/RMSE of SARIMA model: [46.70609175743357, 4146.693330707717]
MAE/RMSE of Prophet model: [45.39904227881323, 3929.6962881489953]
MAE/RMSE of Greykite model: [43.37014406818052, 3807.2848435984833]
Analysis for Emilia-Romagna


INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


MAE/RMSE of ARIMA model: [39.93059548048085, 2631.9472427955766]
MAE/RMSE of SARIMA model: [38.06301334925333, 2446.553361951468]
MAE/RMSE of Prophet model: [36.36065285867931, 2448.08966991046]
MAE/RMSE of Greykite model: [36.763612264968145, 2426.9132789486603]
Analysis for Umbria


INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


MAE/RMSE of ARIMA model: [58.24439486884765, 4450.814823470659]
MAE/RMSE of SARIMA model: [64.81946620655927, 5487.168770407577]
MAE/RMSE of Prophet model: [33.69259466645942, 1996.5784944529587]
MAE/RMSE of Greykite model: [35.346243740060906, 2067.505578094261]
Analysis for Puglia


INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


MAE/RMSE of ARIMA model: [28.59908793009079, 1270.8159605390433]
MAE/RMSE of SARIMA model: [26.13109962084411, 1049.6515819499114]
MAE/RMSE of Prophet model: [26.83032329576939, 1061.3940640924966]
MAE/RMSE of Greykite model: [27.95649451398378, 1299.3569311890617]
Analysis for Sicilia


INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


MAE/RMSE of ARIMA model: [36.94339121662002, 2297.5403999450637]
MAE/RMSE of SARIMA model: [26.940984671084387, 1441.295074506331]
MAE/RMSE of Prophet model: [26.967307831547966, 1357.6995089283134]
MAE/RMSE of Greykite model: [32.32822499570599, 1949.9406137541052]
Analysis for Sardegna


INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


MAE/RMSE of ARIMA model: [38.06903356132877, 2254.503729544611]
MAE/RMSE of SARIMA model: [32.73617322169906, 1800.355575898981]
MAE/RMSE of Prophet model: [30.968601889118993, 1745.4106061064656]
MAE/RMSE of Greykite model: [35.59784254074778, 2224.0628466935286]


In [7]:
errors_pt.reset_index(drop=True)
errors_pt["MSE Prophet [%]"] = errors_pt.error_SARIMA.str[0] / errors_pt.error_Prophet.str[0] * 100 - 100
errors_pt["RMSE Prophet [%]"] = errors_pt.error_SARIMA.str[1] / errors_pt.error_Prophet.str[1] * 100 - 100
errors_pt["MSE GreyKite [%]"] = errors_pt.error_SARIMA.str[0] / errors_pt.error_Greykite.str[0] * 100 - 100
errors_pt["RMSE GreyKite [%]"] = errors_pt.error_SARIMA.str[1] / errors_pt.error_Greykite.str[1] * 100 - 100
errors_pt = errors_pt.sort_values(by="RMSE Prophet [%]", ascending=False).reset_index(drop=True)
errors_pt

Unnamed: 0,region,error_ARIMA,error_SARIMA,error_Prophet,error_Greykite,MSE Prophet [%],RMSE Prophet [%],MSE GreyKite [%],RMSE GreyKite [%]
0,Umbria,"[58.24439486884765, 4450.814823470659]","[64.81946620655927, 5487.168770407577]","[33.69259466645942, 1996.5784944529587]","[35.346243740060906, 2067.505578094261]",92.384905,174.828602,83.384313,165.400434
1,Sicilia,"[36.94339121662002, 2297.5403999450637]","[26.940984671084387, 1441.295074506331]","[26.967307831547966, 1357.6995089283134]","[32.32822499570599, 1949.9406137541052]",-0.097611,6.157148,-16.664201,-26.085181
2,Piemonte,"[46.535967902563335, 4530.2358464056615]","[46.70609175743357, 4146.693330707717]","[45.39904227881323, 3929.6962881489953]","[43.37014406818052, 3807.2848435984833]",2.879024,5.52198,7.691807,8.914712
3,Sardegna,"[38.06903356132877, 2254.503729544611]","[32.73617322169906, 1800.355575898981]","[30.968601889118993, 1745.4106061064656]","[35.59784254074778, 2224.0628466935286]",5.707624,3.147968,-8.038884,-19.051048
4,Emilia-Romagna,"[39.93059548048085, 2631.9472427955766]","[38.06301334925333, 2446.553361951468]","[36.36065285867931, 2448.08966991046]","[36.763612264968145, 2426.9132789486603]",4.681875,-0.062755,3.534476,0.809262
5,Puglia,"[28.59908793009079, 1270.8159605390433]","[26.13109962084411, 1049.6515819499114]","[26.83032329576939, 1061.3940640924966]","[27.95649451398378, 1299.3569311890617]",-2.606095,-1.106326,-6.529413,-19.21761
