### Code Description

This code is responsible for calculating several evaluation metrics for the 60 predicted points from the models. The metrics calculated are:

1. **MAPE (Mean Absolute Percentage Error)**: Measures the mean absolute percentage error between the predicted and actual values. It is a common metric for assessing the accuracy of forecast models.

2. **POCID (Percentage of Correctly Identified Data)**: Measures the percentage of correctly identified data by the models in relation to the total data.

3. **PBE (Prediction Bias Error)**: Measures the prediction bias error, indicating the average difference between the predicted and actual values.

4. **MASE (Mean Absolute Scaled Error)**: A scaled error metric that allows comparisons between different time series. Calculated for the years 2020, 2021, 2022, 2023, and 2024.

5. **RRMSE (Relative Root Mean Squared Error)**: Measures the relative root mean squared error, evaluating the magnitude of the prediction error in relation to the actual data.


In [10]:
###### Metrics Platform (5 years) #######

import pandas as pd
import numpy as np
import os
from sklearn.metrics import mean_absolute_percentage_error as mape

file_path = 'Time-MoE/results_model_local/results_time_moe_product_5_years.xlsx'
data = pd.read_excel(file_path)

In [11]:
horizon = 60

###################################################

def rrmse(y_true, y_pred, mean_y_true_serie_completa):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
    mean_y_true = np.mean(y_true)
    rrmse_value = rmse / mean_y_true_serie_completa
 
    return rrmse_value

def baseline_persistent_window(series, h):
  subsequence = (series[-h*2:-h]).values
  return subsequence

epislon = 1e-20

def pbe(y_true, y_pred):
  if np.sum(y_true)!=0:
    return 100*(np.sum(y_true-y_pred)/np.sum(y_true))
  else:
    return 100*(np.sum(y_true-y_pred)/(np.sum(y_true) + epislon))
  
def mase(y_true, y_pred, y_baseline):
  mae_pred = np.mean(np.abs(y_true - y_pred))
  mae_naive = np.mean(np.abs(y_true - y_baseline))
  if mae_naive == 0:
    result = mae_pred/(mae_naive + epislon)
  else:
    result = mae_pred/mae_naive
  return result 

def pocid(y_true, y_pred):
  n = len(y_true)
  D = [1 if (y_pred[i] - y_pred[i-1]) * (y_true[i] - y_true[i-1]) > 0 else 0 for i in range(1, n)]
  POCID = 100 * np.sum(D) / (n-1)
  return POCID

def save_results(model, mape_result, pocid_result, pbe_result, mase_result, rrmse_result, state, produtct, pontos, filename="Plot_Metrics_5_years_Output.csv"):
    data = {
        "TYPE_PREDICTIONS": [model],
        "STATE": state,
        "PRODUCT": produtct,
        "MAPE": [mape_result],
        "POCID": [pocid_result],
        "PBE": [pbe_result],
        "MASE(2020,2021,2022,2023,2024)": [mase_result],
        "RRMSE": [rrmse_result],
        "PONTOS": [pontos]
    }
    df = pd.DataFrame(data)

    if not os.path.isfile(filename):
        df.to_csv(filename, index=False)  
    else:
        df.to_csv(filename, mode='a', index=False, header=False)  


#######################################

for i in range(1, data.shape[0]):
  data = data.astype(str)

  product = data.iloc[i, 4]  # First cell for product
  state = data.iloc[i, 3]    # Second cell for state
  model = data.iloc[i, 2] # Third cell for model

  predictions_clean = data.iloc[i]["PREDICTIONS"].strip('[]')  
  predictions_list = predictions_clean.split(',') 

  predictions_float = [float(x) for x in predictions_list] 

  predictions_df = pd.DataFrame(predictions_float, columns=['Predictions'])
  

  ################ Metrics Platform ##############

  df = pd.read_csv(f"database/venda_process/mensal/uf/{product}/mensal_{state}_{product}.csv", header=0, sep=";")
  df = pd.DataFrame(df)

  last_date = df['timestamp'].tail(horizon)
  last_m3 = df['m3'].tail(horizon)

  new_df = pd.DataFrame({
              'timestamp': last_date,
              'product': product,
              'state': state,
              'm3': last_m3.values})
  new_df = new_df.reset_index(drop=True)

  predictions_df = pd.DataFrame(predictions_float, columns=['Predictions'])

  mape_result = mape(new_df['m3'], predictions_df['Predictions'])
  pocid_result = pocid(new_df['m3'], predictions_df['Predictions']) 
  pbe_result = pbe(new_df['m3'], predictions_df['Predictions'])
  rrmse_result = rrmse(new_df['m3'], predictions_df['Predictions'], df['m3'].mean())
  
  print(new_df['m3'], predictions_df['Predictions'], df['m3'].mean())

  ##################### NEW MASE #################################

  h=12 # horizonte do baseline

  y_baseline5 = baseline_persistent_window(df, h)
  y_baseline4 = baseline_persistent_window(df.iloc[:-h], h)
  y_baseline3 = baseline_persistent_window(df.iloc[:-h*2], h)
  y_baseline2 = baseline_persistent_window(df.iloc[:-h*3], h)
  y_baseline1 = baseline_persistent_window(df.iloc[:-h*4], h)


  mase5 = mase(new_df['m3'].tail(h), predictions_df['Predictions'].tail(h),y_baseline5[:,1])
  mase4 = mase(new_df['m3'].iloc[-h*2:-h], predictions_df['Predictions'].iloc[-h*2:-h], y_baseline4[:, 1])
  mase3 = mase(new_df['m3'].iloc[-h*3:-h*2], predictions_df['Predictions'].iloc[-h*3:-h*2], y_baseline3[:, 1])
  mase2 = mase(new_df['m3'].iloc[-h*4:-h*3], predictions_df['Predictions'].iloc[-h*4:-h*3], y_baseline2[:, 1])
  mase1 = mase(new_df['m3'].iloc[-h*5:-h*4], predictions_df['Predictions'].iloc[-h*5:-h*4], y_baseline1[:, 1])

  formatted_array = np.array([mase1, mase2, mase3, mase4, mase5])
  formatted_array2 = [f"{value:.4f}" for value in formatted_array]
  mase_result = ', '.join(formatted_array2)

  ############### Métricas ##############

  save_results(model, mape_result, pocid_result, pbe_result, mase_result,rrmse_result, state, product, predictions_df['Predictions'].tolist())

  ############### Plot Predictions ##############

  print(f"MODELO: {model}, MAPE: {mape_result:.5f}, POCID: {pocid_result:.5f}, PBE: {pbe_result:.5f}, MASE: {mase_result}, RRMSE: {rrmse_result:.5f}")

  import matplotlib.pyplot as plt
  import pandas as pd 
  date_range = pd.date_range(start='2019-02-01', periods=len(predictions_df['Predictions']), freq='MS')

  predictions_df = predictions_df.head(len(date_range))

  # plt.figure(figsize=(6, 3)) 
  # plt.plot(date_range, new_df['m3'].tail(horizon), label='Valores Reais', color='blue')
  # plt.plot(date_range, predictions_df['Predictions'], label='Forecast', color='red', linestyle='--')

  # plt.title(f'Product: {product}, Estado: {state}, Modelo: {model}')
  # plt.xlabel(f'{product}')
  # plt.ylabel('Valores em m3')
  # plt.legend()
  # plt.show()


0     106428.200
1     113170.535
2     114419.900
3     107548.372
4     116282.268
5     118671.210
6     115249.584
7     121956.502
8     118565.424
9     123581.000
10    112079.184
11    107729.500
12     94904.843
13     81030.070
14     84202.486
15     94996.050
16    105046.761
17    107417.010
18    113355.027
19    125905.921
20    117662.295
21    128744.867
22    113979.300
23     95564.336
24     99242.879
25     96530.465
26    104949.574
27    108032.380
28    114439.752
29    119538.636
30    116157.000
31    119687.998
32    106117.250
33    123087.400
34    102121.814
35    101711.350
36    101023.050
37    100827.842
38    105101.774
39     98627.086
40    110063.759
41    124984.550
42    127520.600
43    124463.710
44    117846.450
45    140134.650
46    121224.450
47    118101.950
48    120694.150
49    112617.600
50    131617.050
51    120166.500
52    117565.351
53    122843.350
54    118815.050
55    123491.550
56    121959.000
57    134999.651
58    125083.9