In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import sys
import torch

sys.path.append("../../../")
from utils.data_processing import add_day_ahead_column
from utils.error_metrics import _calc_mae, _calc_mse, _calc_rmse, _calc_nrmse, _calc_mape, _calc_mase, _calc_msse, _seas_naive_fcst, _calc_metrics

### Plotting
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.size'] = 12  # Font size
stanford_colors = ['#8C1515', '#175E54', '#279989', '#8F993E', '#6FA287', '#4298B5', 
                   '#007C92', '#E98300', '#E04F39', '#FEDD5C', '#620059', '#651C32', 
                   '#5D4B3C', '#7F7776', '#DAD7CB']
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=stanford_colors)

### ML AZURE
from azureml.core import Workspace, Dataset, Datastore
import mlflow
from config import subscription_id, resource_group, workspace_name
workspace = Workspace(subscription_id, resource_group, workspace_name)
datastore = Datastore.get(workspace, "workspaceblobstore")

# Benchmark results

In [None]:
### Load original data

df = pd.read_csv('../00_load_country.csv')
df = df.drop(columns=['temp', 'humidity', 'precipitation', 'cloud', 'wind'])
df['ds'] = pd.to_datetime(df['ds'])
df = df.rename(columns={'country': 'ID'})

## 1. XGBoost

In [None]:
### Load xgboost results

name = 'funny_match_827'
path = f'azureml/{name}/results/'

file = 'forecasts_forecast_test.csv'
dataset = Dataset.Tabular.from_delimited_files(path=(datastore, path + file))
forecast_test = dataset.to_pandas_dataframe()
forecast_test['ds'] = pd.to_datetime(forecast_test['ds'])

forecast_xgboost = df.merge(forecast_test, on=['ds', 'ID'], how='left')
forecast_xgboost.rename(columns={'yhat': 'yhat_xgboost'}, inplace=True)
forecast_xgboost = forecast_xgboost[forecast_xgboost['ds'] >= '2014-01-01'].reset_index(drop=True)

In [None]:
### Prototype Plot

import plotly.express as px
df_id = forecast_xgboost[forecast_xgboost['ID']=='ALB']
fig = px.line(df_id, x='ds', y=['y', 'yhat_xgboost'])
fig.show()

### 1.1. Error metrics

In [None]:
### Error metrics

forecast_xgboost['snaive'] = forecast_xgboost.groupby('ID')['y'].shift(48)
metrics = pd.DataFrame()

for ID in forecast_xgboost['ID'].unique():
    df_id = forecast_xgboost[forecast_xgboost['ID'] == ID]
    rmse = _calc_rmse(predictions=df_id['yhat_xgboost'], truth=df_id['y'])
    mae = _calc_mae(predictions=df_id['yhat_xgboost'], truth=df_id['y'])
    mape = _calc_mape(predictions=df_id['yhat_xgboost'], truth=df_id['y'])
    mase = _calc_mase(predictions=df_id['yhat_xgboost'], truth=df_id['y'], snaive_predictions=df_id['snaive'])
    msse = _calc_msse(predictions=df_id['yhat_xgboost'], truth=df_id['y'], snaive_predictions=df_id['snaive'])
    new_row = {'ID':ID, 'RMSE':rmse, 'MAE':mae, 'MAPE':mape, 'MASE':mase, 'MSSE':msse}
    metrics = pd.concat([metrics, pd.DataFrame([new_row])])

metrics.to_csv('metrics_xgboost.csv', index=False)

## 2. ARIMA

In [None]:
### Load arima results

IDs = df['ID'].unique()
print(IDs[:8])
print(IDs[8:])
job_names1 = [f'arima_{ID}_1625' for ID in IDs[:8]]
job_names2 = [f'arima_{ID}_1626' for ID in IDs[8:]]
job_names = job_names1 + job_names2
forecast_arima = pd.DataFrame()

for name in job_names:
    path = f'azureml/{name}/results/'
    file = 'forecasts_forecast_test.csv'
    dataset = Dataset.Tabular.from_delimited_files(path=(datastore, path + file))
    forecast_test = dataset.to_pandas_dataframe()
    forecast_test['ds'] = pd.to_datetime(forecast_test['ds'])
    forecast_arima = pd.concat([forecast_arima, forecast_test])

forecast_arima = forecast_arima.merge(df[['ID', 'ds', 'y']], on=['ID', 'ds'], how='left')
forecast_arima.rename(columns={'yhat': 'yhat_arima'}, inplace=True)

### 2.1. Error metrics

In [None]:
### Error metrics
 
forecast_arima['snaive'] = forecast_arima.groupby('ID')['y'].shift(48)
metrics_arima = pd.DataFrame()

for ID in forecast_arima['ID'].unique():
    df_id = forecast_arima[forecast_arima['ID'] == ID]
    rmse = _calc_rmse(predictions=df_id['yhat_arima'], truth=df_id['y'])
    mae = _calc_mae(predictions=df_id['yhat_arima'], truth=df_id['y'])
    mape = _calc_mape(predictions=df_id['yhat_arima'], truth=df_id['y'])
    mase = _calc_mase(predictions=df_id['yhat_arima'], truth=df_id['y'], snaive_predictions=df_id['snaive'])
    msse = _calc_msse(predictions=df_id['yhat_arima'], truth=df_id['y'], snaive_predictions=df_id['snaive'])
    new_row = {'ID':ID, 'RMSE':rmse, 'MAE':mae, 'MAPE':mape, 'MASE':mase, 'MSSE':msse}
    metrics_arima = pd.concat([metrics_arima, pd.DataFrame([new_row])])

metrics_arima.to_csv('metrics_arima.csv', index=False)

## 3. kNN

In [None]:
forecast_knn = pd.read_csv('result_knn.csv', parse_dates=['ds'])
forecast_knn.rename(columns={'y_pred': 'yhat_knn'}, inplace=True)

## 4. Seasonal Naive

In [None]:
### Error metrics

forecast_snaive = df.copy()
forecast_snaive['yhat_snaive'] = forecast_snaive.groupby('ID')['y'].shift(48)
metrics_snaive = pd.DataFrame()

for id in df['ID'].unique():
    df_id = forecast_snaive[forecast_snaive['ID'] == id]
    df_id = df_id[df_id['ds'] >= '2021-01-01']
    rmse = _calc_rmse(predictions=df_id['yhat_snaive'], truth=df_id['y'])
    mae = _calc_mae(predictions=df_id['yhat_snaive'], truth=df_id['y'])
    mape = _calc_mape(predictions=df_id['yhat_snaive'], truth=df_id['y'])
    mase = _calc_mase(predictions=df_id['yhat_snaive'], truth=df_id['y'], snaive_predictions=df_id['yhat_snaive'])
    msse = _calc_msse(predictions=df_id['yhat_snaive'], truth=df_id['y'], snaive_predictions=df_id['yhat_snaive'])
    new_row = {'ID':id, 'RMSE':rmse, 'MAE':mae, 'MAPE':mape, 'MASE':mase, 'MSSE':msse}
    metrics_snaive = pd.concat([metrics_snaive, pd.DataFrame([new_row])])

pd.options.display.float_format = '{:.2f}'.format
metrics_snaive.to_csv('metrics_snaive.csv', index=False)

# 5. NBeats

In [None]:
### Load results

name = 'amusing_monkey_wcg1zy2'
path = f'azureml/{name}/results/'
file = 'forecasts_forecast_test.csv'
dataset = Dataset.Tabular.from_delimited_files(path=(datastore, path + file))
forecast_test = dataset.to_pandas_dataframe()
forecast_test['ds'] = pd.to_datetime(forecast_test['ds'])

forecast_nbeats = df.merge(forecast_test, on=['ds', 'ID'], how='left')
forecast_nbeats.rename(columns={'yhat': 'yhat_nbeats'}, inplace=True)
forecast_nbeats = forecast_nbeats[forecast_nbeats['ds'] >= '2014-01-02'].reset_index(drop=True)

In [None]:
### Error metrics

forecast_nbeats['snaive'] = forecast_nbeats.groupby('ID')['y'].shift(48)
metrics = pd.DataFrame()

for ID in forecast_nbeats['ID'].unique():
    df_id = forecast_nbeats[forecast_nbeats['ID'] == ID]
    rmse = _calc_rmse(predictions=df_id['yhat_nbeats'], truth=df_id['y'])
    mae = _calc_mae(predictions=df_id['yhat_nbeats'], truth=df_id['y'])
    mape = _calc_mape(predictions=df_id['yhat_nbeats'], truth=df_id['y'])
    mase = _calc_mase(predictions=df_id['yhat_nbeats'], truth=df_id['y'], snaive_predictions=df_id['snaive'])
    msse = _calc_msse(predictions=df_id['yhat_nbeats'], truth=df_id['y'], snaive_predictions=df_id['snaive'])
    new_row = {'ID':ID, 'RMSE':rmse, 'MAE':mae, 'MAPE':mape, 'MASE':mase, 'MSSE':msse}
    metrics = pd.concat([metrics, pd.DataFrame([new_row])])

metrics.to_csv('metrics_nbeats.csv', index=False)
metrics[['RMSE', 'MAE', 'MAPE', 'MASE', 'MSSE']].mean()

# 6. TFT

In [None]:
### Load results

name = 'tidy_atemoya_fpsfw8r'
path = f'azureml/{name}/results/'
file = 'forecasts_forecast_test.csv'
dataset = Dataset.Tabular.from_delimited_files(path=(datastore, path + file))
forecast_test = dataset.to_pandas_dataframe()
forecast_test['ds'] = pd.to_datetime(forecast_test['ds'])

forecast_tft = df.merge(forecast_test, on=['ds', 'ID'], how='left')
forecast_tft.rename(columns={'yhat': 'yhat_tft'}, inplace=True)
forecast_tft = forecast_tft[forecast_tft['ds'] >= '2014-01-02'].reset_index(drop=True)

In [None]:
### Error metrics

forecast_tft['snaive'] = forecast_tft.groupby('ID')['y'].shift(48)
metrics = pd.DataFrame()

for ID in forecast_tft['ID'].unique():
    df_id = forecast_tft[forecast_tft['ID'] == ID]
    rmse = _calc_rmse(predictions=df_id['yhat_tft'], truth=df_id['y'])
    mae = _calc_mae(predictions=df_id['yhat_tft'], truth=df_id['y'])
    mape = _calc_mape(predictions=df_id['yhat_tft'], truth=df_id['y'])
    mase = _calc_mase(predictions=df_id['yhat_tft'], truth=df_id['y'], snaive_predictions=df_id['snaive'])
    msse = _calc_msse(predictions=df_id['yhat_tft'], truth=df_id['y'], snaive_predictions=df_id['snaive'])
    new_row = {'ID':ID, 'RMSE':rmse, 'MAE':mae, 'MAPE':mape, 'MASE':mase, 'MSSE':msse}
    metrics = pd.concat([metrics, pd.DataFrame([new_row])])

metrics.to_csv('metrics_tft.csv', index=False)
metrics[['RMSE', 'MAE', 'MAPE', 'MASE', 'MSSE']].mean()

## 99. Merge all  together

In [None]:
df_benchmarks = forecast_xgboost.merge(forecast_arima[['ID', 'ds', 'yhat_arima']], on=['ID', 'ds'], how='left')
df_benchmarks = df_benchmarks.merge(forecast_knn[['ID', 'ds', 'yhat_knn']], on=['ID', 'ds'], how='left')
df_benchmarks = df_benchmarks.merge(forecast_snaive[['ID', 'ds', 'yhat_snaive']], on=['ID', 'ds'], how='left')
df_benchmarks = df_benchmarks.merge(forecast_nbeats[['ID', 'ds', 'yhat_nbeats']], on=['ID', 'ds'], how='left')
df_benchmarks = df_benchmarks.merge(forecast_tft[['ID', 'ds', 'yhat_tft']], on=['ID', 'ds'], how='left')
df_benchmarks.to_csv('../../../07_data/result_benchmarks_v2.csv', index=False)