In [6]:
############# Libraries ##############

import pandas as pd
from bcb import sgs

from matplotlib import pyplot as plt
import numpy as np

# Evaluation metrics
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_percentage_error as mape

epislon = 1e-20  # Define a small epsilon value for division by zero cases

def rmse(y_true, y_pred):
  return np.sqrt(mse(y_true, y_pred))

def mase(y_true, y_pred, y_baseline):
    # Calcula o MAE do modelo
    mae_pred = np.mean(np.abs(y_true - y_pred))
    # Calcula o MAE do modelo baseline Persistent Window (i.e., últimas h observações antes do teste)
    mae_naive = np.mean(np.abs(y_true - y_baseline))
    result = mae_pred/mae_naive
    return result

def pbe(y_true, y_pred):
  if np.sum(y_true)!=0:
    return 100*(np.sum(y_true - y_pred)/np.sum(y_true))
  else:
    return 100*(np.sum(y_true - y_pred)/(np.sum(y_true) + epislon))

def pocid(y_true, y_pred):
  n = len(y_true)
  D = [1 if (y_pred[i] - y_pred[i-1]) * (y_true[i] - y_true[i-1]) > 0 else 0 for i in range(1, n)]
  POCID = 100 * np.sum(D) / (n-1)
  return POCID

def mcpm(rmse_result, mape_result, pocid_result):
  er_result = 100 - pocid_result

  A1 = (rmse_result * mape_result * np.sin((2*np.pi)/3))/2
  A2 = (mape_result * er_result * np.sin((2*np.pi)/3))/2
  A3 = (er_result * rmse_result * np.sin((2*np.pi)/3))/2
  total = A1 + A2 + A3
  return total

def znorm(x):
  if np.std(x) != 0: 
      x_znorm = (x - np.mean(x)) / np.std(x)
  else:
      x_znorm = (x - np.mean(x)) / (np.std(x) + epislon)
  return x_znorm

def znorm_reverse(x, mean_x, std_x):
  x_denormalized = (np.array(x) * std_x) + mean_x
  return x_denormalized

def get_stats_norm(series, horizon, window):
  last_subsequence = series[-(horizon+window):-horizon].values
  last_mean = np.mean(last_subsequence)
  last_std = np.std(last_subsequence)
  return last_mean, last_std



# Para predição de vendas por UF (mensal), será considerado horizon = 12
# Para predição de vendas por município (anual), será considerado horizon = 1
def train_test_split(data, horizon):
  X = data.iloc[:,:-1] # features
  y = data.iloc[:,-1] # target

  X_train = X[:-horizon] # features train
  X_test =  X[-horizon:] # features test

  y_train = y[:-horizon] # target train
  y_test = y[-horizon:] # target test
  return X_train, X_test, y_train, y_test

def recursive_multistep_forecasting(X_test, model, horizon):
  # example é composto pelas últimas observações vistas
  # na prática, é o pbeprimeiro exemplo do conjunto de teste
  example = X_test.iloc[0].values.reshape(1,-1)

  preds = []
  for i in range(horizon):
    pred = model.predict(example)[0]
    preds.append(pred)

    # Descartar o valor da primeira posição do vetor de características
    example = example[:,1:]

    # Adicionar o valor predito na última posição do vetor de características
    example = np.append(example, pred)
    example = example.reshape(1,-1)
  return preds

def baseline_mean(series, horizon):
  # como as séries são normalizadas, esse baseline irá retornar uma reta próxima de zero
  pred = np.repeat(np.mean(znorm(series[:-horizon])), horizon)
  return pred

def baseline_persistent(series, horizon):
  return np.repeat(znorm(series[-2*horizon:-horizon]).values[-1], horizon)

def baseline_persistent_window(series, horizon):
  subsequence = znorm(series[-horizon*2:-horizon]).values
  return subsequence

def baseline_persistent_window2(series, horizon):
  subsequence2 = series[-horizon*2:-horizon].values
  return subsequence2

# Em geral, considera-se um tamanho de janela capaz de capturar um ciclo dos dados
# Por exemplo, 12 observações no caso dos dados com frequência mensal
def rolling_window(series, window):
  data = []
  for i in range(len(series)-window):
    example = znorm(np.array(series[i:i+window+1]))
    data.append(example)
  df = pd.DataFrame(data)
  return df


In [7]:
############# DEFs ##############

import os
import csv

def extract_estado(file_name):
    parts = file_name.split('_')
    estado = parts[1]
    return estado

def read_csv_files(folder_path):
    estados = []
    files = os.listdir(folder_path)
    for file_name in files:
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            with open(file_path, 'r', newline='') as csvfile:
                reader = csv.reader(csvfile)
                headers = next(reader)
                estado = extract_estado(file_name)
                estados.append(estado)
                estados.sort()
    return estados


########### BCB codes #############

state_codes = {
    'ac': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 12569, 12597, 13144, 13151, 14002, 14029, 14056, 15861, 15893, 15925],
    'al': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 12745, 13343, 13348, 13353, 14003, 14030, 14057, 15862, 15894, 15926],
    'am': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 25411, 25412, 12572, 12598, 13146, 13153, 14005, 14032, 14059, 15864, 15896, 15928],
    'ap': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 12570, 12596, 13145, 13152, 14004, 14031, 14058, 15863, 15895, 15927],
    'ba': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 21944, 21945, 25415, 25416, 12991, 13137, 13138, 13139, 14006, 14033, 14060, 15865, 15897, 15929],
    'ce': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 21959, 21960, 25390, 25391, 13010, 13093, 13094, 13166, 14007, 14034, 14061, 15866, 15898, 15930],
    'df': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 13069, 13071, 13072, 14008, 14035, 14062, 15867, 15899, 15931, 21988],
    'es': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 21926, 21928, 25398, 25399, 12985, 13386, 13387, 13388, 14009, 14036, 14063, 15868, 15900, 15932],
    'go': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 21937, 21941, 25383, 25384, 13030, 13031, 13061, 14010, 14037, 14064, 15869, 15901, 15933, 21987],
    'ma': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 13011, 13244, 13245, 13247, 14011, 14038, 14065, 15870, 15902, 15934],
    'mg': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 21936, 21940, 25379, 25380, 10746, 13026, 13028, 13029, 14014, 14041, 14068, 15873, 15905, 15937, 21986],
    'ms': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 13065, 13067, 13068, 14013, 14040, 14067, 15872, 15904, 15936, 21990],
    'mt': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 21938, 13062, 13063, 13064, 14012, 14039, 14066, 15871, 15903, 15935, 21989],
    'pa': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 25409, 25410, 12573, 12599, 13147, 13154, 14015, 14042, 14069],
    'pb': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 12743, 13342, 13347, 13352, 14016, 14043, 14070, 15875, 15907, 15939],
    'pe': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 25417, 25418, 12742, 13341, 13346, 13351, 14018, 14045, 14072, 15877, 15909, 15941],
    'pi': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 13012, 13167, 13168, 13169, 14019, 14046, 14073, 15878, 15910, 15942],
    'pr': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 21979, 21980, 25408, 25413, 12619, 13073, 13074, 13075, 14017, 14044, 14071, 15876, 15908, 15940],
    'rj': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 25396, 25397, 21927, 21929, 14020, 14047, 14074, 15879, 15911, 15943],
    'rn': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 12744, 13344, 13349, 13354, 14021, 14048, 14075, 15880, 15912, 15944],
    'ro': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 12574, 12600, 13148, 13155, 14023, 14050, 14077, 15882, 15914, 15946],
    'rr': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 12576, 12601, 13149, 13156, 14024, 14051, 14078, 15883, 15915, 15947],
    'rs': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 21932, 21933, 25401, 25404, 10753, 12553, 13078, 13079, 13080, 14022, 14049, 14076, 15881, 15913, 15945],
    'sc': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 21981, 21982, 25402, 25405, 12625, 13081, 13082, 13083, 14025, 14052, 14079, 15884, 15916, 15948],
    'se': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 12746, 13345, 13350, 13356, 14027, 14054, 14081, 15886, 15918, 15950],
    'sp': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 21925, 25392, 25394, 12867, 13021, 13022, 13023, 13024, 13025, 14026, 14053, 14080, 15885, 15917, 15949, 7493, 10755, 193],
    'to': [21924, 21930, 21931, 21935, 21939, 21943, 21942, 21934, 1376, 1373, 1374, 1382, 1383, 1384, 1385, 1386, 1379, 1387, 1388, 28527, 28528, 28529, 28530, 1380, 1378, 1394, 1391, 1393, 1397, 1398, 1399, 1400, 1395, 1389, 1392, 1396, 1390, 1401, 20716, 25359, 20715, 20714, 20740, 20718, 20717, 20757, 20756, 22701, 12577, 12602, 13150, 13157, 14028, 14055, 14082, 15887, 15919, 15951]

}

In [8]:
########### Prophet AUTO ##################

from prophet import Prophet
import os

horizon = 12
window = 12

products = sorted([name for name in os.listdir('./uf/') if os.path.isdir(os.path.join('./uf/', name))])
# products = ['etanolhidratado', 'gasolinac', 'glp', 'oleodiesel']

############################# Coleta de dados externos do BCB ##########################################
# dfext = pd.read_csv(f"./01-EXT-PrecoANP.csv", header=0, sep=",")
# dfext = pd.DataFrame(dfext)
# dfext.iloc[:, 0] = pd.to_datetime(dfext.iloc[:, 0], format='%m/%d/%Y').dt.strftime('%Y-%m-%d')
########################################################################################################

# product = 'gasolinac'
# estado = 'sp'

# print(estados)

for product in products:
    folder_path = f'./uf/{product}/'
    estados = read_csv_files(folder_path)
    for estado in estados:

        df = pd.read_csv(f"./uf/{product}/mensal_{estado}_{product}.csv", header=0, sep=";")
        df = pd.DataFrame(df)

        df['timestamp'] = pd.to_datetime(df['timestamp'].astype(str), format='%Y%m')

        series = df["m3"]

        series.index = range(0, len(series))

        monthly_data = df.groupby(df['timestamp'].dt.to_period('M'))['m3'].sum().reset_index()
        monthly_data['timestamp'] = monthly_data['timestamp'].dt.to_timestamp()

        ##################################################################################

        monthly_data.rename(columns={'m3': 'y'}, inplace=True)

        ########## Dados Externos #####################3

        # filtered_df = dfext[(dfext['PRODUTO'] == product) & (dfext['ESTADO'] == estado)]
        # filtered_df.rename(columns={'DATA': 'ds'}, inplace=True)
        # filtered_df['ds'] = pd.to_datetime(filtered_df['ds'])

        # df_data = series.tail(len(filtered_df)).reset_index(drop=True)

        # filtered_df = filtered_df.reset_index(drop=True)
        # filtered_df['y']=df_data


        ############################# Coleta de dados externos do BCB ########################################## 
        all_extdata = []
        for code in state_codes[estado]:
            try:
                bcb_data = sgs.get({'series_id': code})
                if isinstance(bcb_data, pd.DataFrame):
                    all_extdata.append(bcb_data)
                else:
                    print(f"Received data for series ID {code} is not a DataFrame.")
            except Exception as e:
                print(f"An error occurred while fetching data for series ID {code}: {e}")

        prophet_data2=pd.DataFrame()
        prophet_data2['ds']=df['timestamp']

        numb_extdata = len(all_extdata)
        for i in range(numb_extdata):
            df = all_extdata[i]
            df_filtered = df[df.index <= '2024-02-01']
            df2 = pd.DataFrame(list(df_filtered['series_id'].items()), columns=['ds', f'{i}'])
            first_column = df2.iloc[:, 0]
            normalized_data = df2.iloc[:, 1:].apply(znorm, axis=0)
            df3 = pd.concat([first_column, normalized_data], axis=1)
            prophet_data2 = pd.merge(prophet_data2, df3[['ds', f'{i}']], left_on='ds', right_on='ds', how='left')
            
        # prophet_data2 = prophet_data2.fillna(0)
        prophet_data2.dropna(inplace=True)
        prophet_data2 = prophet_data2.reset_index(drop=True)

        target_train = rolling_window(series, window)
        target_train = target_train[window]
        target_train = target_train.head(len(target_train)-horizon)
        target_train = target_train.tail(len(prophet_data2)).reset_index(drop=True)

        prophet_data2['y']=target_train


        # prophet_data3 = pd.merge(prophet_data2, df3[['ds', f'{i}']], left_on='ds', right_on='ds', how='left')


        # temp_y = merged_df['y']
        # merged_df = merged_df.drop('y', axis=1)
        # merged_result = pd.merge(merged_df, prophet_data3, on='ds')
        # new_column_names = ['ds'] + [f'{i}' for i in range(1, len(merged_result.columns))]
        # merged_result.columns = new_column_names
        # merged_result['y'] = temp_y


        ###### Normalização ##########

        # filtered_df2 = filtered_df['ds']
        # filtered_df2 = filtered_df2.tail(len(filtered_df)-window).reset_index(drop=True)
        # filtered_df2 = pd.DataFrame(filtered_df2)


        # filtered_df_no_ds = filtered_df.loc[:, ~filtered_df.columns.isin(['ds', 'PRODUTO', 'ESTADO'])]

        # # filtered_df_no_ds['y'] = series.tail(len(filtered_df)).reset_index(drop=True)

        # for column in filtered_df_no_ds:
        #     temp = rolling_window(filtered_df[column], window)
        #     filtered_df2[column] = temp[window]

        X_train, X_test, y_train, y_test = train_test_split(prophet_data2, horizon)


        #### Correlation ################################

        # dataE = pd.DataFrame({f'{i+1}': df_corr[col] for i, col in enumerate(columns_ext)})
        dataE = prophet_data2.head(len(prophet_data2)-horizon)
        dataE_ds = dataE['ds']
        dataE.drop('ds', axis=1, inplace=True)

        pearson_corr = dataE.corr(method='pearson')
        spearman_corr = dataE.corr(method='spearman')
        kendall_corr = dataE.corr(method='kendall')

        correlation_with_y = pd.DataFrame({
            'Pearson': pearson_corr['y'].drop('y'),
            'Spearman': spearman_corr['y'].drop('y'),
            'Kendall': kendall_corr['y'].drop('y')
        })

        threshold = 0.0

        correlation_with_y['Pearson2'] = correlation_with_y['Pearson'].abs()
        correlation_with_y['Spearman2'] = correlation_with_y['Spearman'].abs()
        correlation_with_y['Kendall2'] = correlation_with_y['Kendall'].abs()
        correlation_with_y['total'] = correlation_with_y['Pearson2'] + correlation_with_y['Spearman2'] + correlation_with_y['Kendall2']
        DataTBI = correlation_with_y['total'][(correlation_with_y['Pearson2'] > threshold) | (correlation_with_y['Spearman2'] > threshold) | (correlation_with_y['Kendall2'] > threshold)]
        DataTBI_sorted = DataTBI.sort_values(ascending=False)
        DataTBI_SelCol = dataE.loc[:, DataTBI_sorted.index]
        DataTBI_SelCol.insert(0, 'ds', dataE_ds)


        columns_to_select = DataTBI_SelCol.columns
        new_df = X_train[columns_to_select]


        ## FOR PARA CADA DADO EXTERNO ######################

        selected_columns = new_df.drop(columns=['ds']).columns
        # data_forecast_ve = pd.DataFrame()
        df_forecasts = pd.DataFrame()
        df_forecasts['ds'] = X_test['ds']
        for col in selected_columns:
            model_ve = Prophet()
            X2_train = new_df[['ds', col]]
            X2_train.rename(columns={col: 'y'}, inplace=True)
            model_ve.fit(X2_train)
            future_dates = pd.DataFrame(X_test['ds']).reset_index(drop=True)
            data_forecast_ve = model_ve.predict(future_dates)
            df_forecasts[f'{col}'] = data_forecast_ve['yhat'].values
            
        # df_forecasts['y'] = y_test
        new_df['y'] = y_train.tail(len(new_df))

        new_df2 = pd.DataFrame()
        if len(X_train.columns)<3:
            temp2 = rolling_window(series, window)
            temp3 = temp2[window].tail(398)   
            temp3 = temp3.head(386)
            temp4 = series['timestamp'].tail(398) 
            new_df2['ds'] = temp4.head(386).reset_index(drop=True)
            new_df2['y'] = temp3
            new_df= new_df2

        ################################################################f
        model = Prophet()

        new_column_names = []
        for col in new_df.columns:
            if col == 'ds':
                new_column_names.append('ds')
            elif col == 'y':
                new_column_names.append('y')
            else:
                new_column_names.append(f'{col}')
                model.add_regressor(f'{col}', standardize=False)

        model.fit(new_df)

        # future_dates2 = df_forecasts.drop(columns=['y'])

        data_forecastf = model.predict(df_forecasts)

        # TEST DATA
        monthly_data_norm_last = y_test
        monthly_data_norm_last = monthly_data_norm_last.reset_index(drop=True)

        future_forecast = data_forecastf['yhat'].tail(12)
        future_forecast_12 = future_forecast.reset_index(drop=True)

        #### Save model as pkl
        # import pickle
        # with open(f'../00-MODELS_UF_MENSAL/{estado}_{product}_Prophet_RawData_ExtDataSP_model.pkl', 'wb') as fd: pickle.dump({model}, fd)

        Valores_Reais = monthly_data['y'].tail(12)
        Valores_Reais = Valores_Reais.reset_index(drop=True)

        ################# DeNormalized #######################
        # Recupera a média e desvio-padrão da última subsequência observada
        mean_norm, std_norm = get_stats_norm(series, horizon, window)

        # Reescala a predição
        predictions_rescaled = znorm_reverse(future_forecast_12, mean_norm, std_norm)
        predictions_df2 = pd.DataFrame(predictions_rescaled, columns=['Predictions'])
        predictions_df2.rename(columns={'yhat': 'Predictions'}, inplace=True)


        p1 = ', '.join(map(str, predictions_df2['Predictions'].values))

        rmse_result2 = rmse(Valores_Reais, predictions_df2['Predictions'])
        mape_result2 = mape(Valores_Reais, predictions_df2['Predictions'])
        pocid_result2 = pocid(Valores_Reais, predictions_df2['Predictions'])
        # mcpm_result2 = mcpm(rmse_result2, mape_result2, pocid_result2)
        pbe_result2 = pbe(Valores_Reais, predictions_df2['Predictions'])
        basepredictions = baseline_persistent_window2(series, horizon)
        mase_result2 = mase(Valores_Reais, predictions_df2['Predictions'], basepredictions)


        # CSV Output VALORES REAIS
        with open(f'Prophet_ExtData_{window}_Exp5_BCB5_{threshold}_output.csv', 'a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([
                # product, estado, 'Prophet', fd.name, mape_result2, pocid_result2, pbe_result2, mase_result2, *future_forecast_12.values
                product, estado, 'Prophet', mape_result2, pocid_result2, pbe_result2, mase_result2, p1
            ])






  from .autonotebook import tqdm as notebook_tqdm
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataE.drop('ds', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X2_train.rename(columns={col: 'y'}, inplace=True)
15:28:48 - cmdstanpy - INFO - Chain [1] start processing
15:28:49 - cmdstanpy - INFO - Chain [1] done processing
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X2_train.rename(columns={col: 'y'}, inplace=True)
15:28:49 - cmdstanpy - INFO - Chain [1] start processing
15:28:49 - cmdsta

KeyboardInterrupt: 

In [8]:
X2_train

Unnamed: 0,ds,y
0,2013-01-01,-0.998879
1,2013-02-01,0.262995


In [83]:
dataE

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,y
0,0.055053,-0.313364,-0.608993,-0.784859,-1.029153,-1.416239,-1.104750,1.251920,0.835044,1.287966,-0.665273
1,-0.074652,-0.388075,-0.660437,-0.779169,-1.027566,-1.414467,-1.103041,1.314016,0.906684,1.367879,-0.665273
2,0.437420,-0.355458,-0.624280,-0.753482,-1.025418,-1.410924,-1.100710,0.902632,0.158444,0.755212,-0.665273
3,0.846028,-0.382890,-0.644912,-0.754978,-1.023737,-1.398966,-1.097292,0.956966,0.572364,0.977192,-0.665273
4,0.425424,-0.358944,-0.638794,-0.778129,-1.021870,-1.393208,-1.094728,0.995776,0.898724,1.154777,-0.665273
...,...,...,...,...,...,...,...,...,...,...,...
225,0.235740,0.633002,0.989702,1.090847,2.171106,2.004655,2.158085,-1.107714,-1.059437,-1.198219,0.000000
226,-0.050661,0.447324,0.967790,1.322876,2.222739,2.051601,2.209359,-1.099953,-1.083317,-1.198219,0.000000
227,-1.238248,0.054027,0.690285,1.339069,2.276145,2.095448,2.261488,-1.068905,-1.003717,-1.144944,0.000000
228,-0.600970,0.192207,0.876142,1.515106,2.329364,2.111835,2.308645,-0.983523,-0.820637,-1.029514,0.000000


In [70]:
DataTBI_SelCol

Unnamed: 0,7,ds,4,6,5,2,9
0,1.251920,2004-01-01,-1.029153,-1.104750,-1.416239,-0.608993,1.287966
1,1.314016,2004-02-01,-1.027566,-1.103041,-1.414467,-0.660437,1.367879
2,0.902632,2004-03-01,-1.025418,-1.100710,-1.410924,-0.624280,0.755212
3,0.956966,2004-04-01,-1.023737,-1.097292,-1.398966,-0.644912,0.977192
4,0.995776,2004-05-01,-1.021870,-1.094728,-1.393208,-0.638794,1.154777
...,...,...,...,...,...,...,...
225,-1.107714,2022-10-01,2.171106,2.158085,2.004655,0.989702,-1.198219
226,-1.099953,2022-11-01,2.222739,2.209359,2.051601,0.967790,-1.198219
227,-1.068905,2022-12-01,2.276145,2.261488,2.095448,0.690285,-1.144944
228,-0.983523,2023-01-01,2.329364,2.308645,2.111835,0.876142,-1.029514


True

In [15]:
############### Plot Prophet

from prophet.plot import plot_plotly, plot_components_plotly

# Plot the forecast
fig1 = plot_plotly(model, data_forecastf)
fig1.show()

# Plot forecast components
fig2 = plot_components_plotly(model, data_forecastf)
fig2.show()