In [1]:
############# Libraries ##############

import pandas as pd
from bcb import sgs

from matplotlib import pyplot as plt
import numpy as np

# Evaluation metrics
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_percentage_error as mape

epislon = 1e-20  # Define a small epsilon value for division by zero cases

def rmse(y_true, y_pred):
  return np.sqrt(mse(y_true, y_pred))

def mase(y_true, y_pred, y_baseline):
    # Calcula o MAE do modelo
    mae_pred = np.mean(np.abs(y_true - y_pred))
    # Calcula o MAE do modelo baseline Persistent Window (i.e., últimas h observações antes do teste)
    mae_naive = np.mean(np.abs(y_true - y_baseline))
    result = mae_pred/mae_naive
    return result

def pbe(y_true, y_pred):
  if np.sum(y_true)!=0:
    return 100*(np.sum(y_true - y_pred)/np.sum(y_true))
  else:
    return 100*(np.sum(y_true - y_pred)/(np.sum(y_true) + epislon))

def pocid(y_true, y_pred):
  n = len(y_true)
  D = [1 if (y_pred[i] - y_pred[i-1]) * (y_true[i] - y_true[i-1]) > 0 else 0 for i in range(1, n)]
  POCID = 100 * np.sum(D) / (n-1)
  return POCID

def mcpm(rmse_result, mape_result, pocid_result):
  er_result = 100 - pocid_result

  A1 = (rmse_result * mape_result * np.sin((2*np.pi)/3))/2
  A2 = (mape_result * er_result * np.sin((2*np.pi)/3))/2
  A3 = (er_result * rmse_result * np.sin((2*np.pi)/3))/2
  total = A1 + A2 + A3
  return total

def znorm(x):
  if np.std(x) != 0: 
      x_znorm = (x - np.mean(x)) / np.std(x)
  else:
      x_znorm = (x - np.mean(x)) / (np.std(x) + epislon)
  return x_znorm

def znorm_reverse(x, mean_x, std_x):
  x_denormalized = (np.array(x) * std_x) + mean_x
  return x_denormalized

def get_stats_norm(series, horizon, window):
  last_subsequence = series[-(horizon+window):-horizon].values
  last_mean = np.mean(last_subsequence)
  last_std = np.std(last_subsequence)
  return last_mean, last_std



# Para predição de vendas por UF (mensal), será considerado horizon = 12
# Para predição de vendas por município (anual), será considerado horizon = 1
def train_test_split(data, horizon):
  X = data.iloc[:,:-1] # features
  y = data.iloc[:,-1] # target

  X_train = X[:-horizon] # features train
  X_test =  X[-horizon:] # features test

  y_train = y[:-horizon] # target train
  y_test = y[-horizon:] # target test
  return X_train, X_test, y_train, y_test

def recursive_multistep_forecasting(X_test, model, horizon):
  # example é composto pelas últimas observações vistas
  # na prática, é o pbeprimeiro exemplo do conjunto de teste
  example = X_test.iloc[0].values.reshape(1,-1)

  preds = []
  for i in range(horizon):
    pred = model.predict(example)[0]
    preds.append(pred)

    # Descartar o valor da primeira posição do vetor de características
    example = example[:,1:]

    # Adicionar o valor predito na última posição do vetor de características
    example = np.append(example, pred)
    example = example.reshape(1,-1)
  return preds

def baseline_mean(series, horizon):
  # como as séries são normalizadas, esse baseline irá retornar uma reta próxima de zero
  pred = np.repeat(np.mean(znorm(series[:-horizon])), horizon)
  return pred

def baseline_persistent(series, horizon):
  return np.repeat(znorm(series[-2*horizon:-horizon]).values[-1], horizon)

def baseline_persistent_window(series, horizon):
  subsequence = znorm(series[-horizon*2:-horizon]).values
  return subsequence

def baseline_persistent_window2(series, horizon):
  subsequence2 = series[-horizon*2:-horizon].values
  return subsequence2

# Em geral, considera-se um tamanho de janela capaz de capturar um ciclo dos dados
# Por exemplo, 12 observações no caso dos dados com frequência mensal
def rolling_window(series, window):
  data = []
  for i in range(len(series)-window):
    example = znorm(np.array(series[i:i+window+1]))
    data.append(example)
  df = pd.DataFrame(data)
  return df


In [2]:
############# DEFs ##############

import os
import csv

def extract_estado(file_name):
    parts = file_name.split('_')
    estado = parts[1]
    return estado

def read_csv_files(folder_path):
    estados = []
    files = os.listdir(folder_path)
    for file_name in files:
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            with open(file_path, 'r', newline='') as csvfile:
                reader = csv.reader(csvfile)
                headers = next(reader)
                estado = extract_estado(file_name)
                estados.append(estado)
                estados.sort()
    return estados



In [3]:
########### Prophet TEST ##################

from prophet import Prophet
import os

horizon = 12
window = 12

# products = sorted([name for name in os.listdir('../uf/') if os.path.isdir(os.path.join('../uf/', name))])
products = ['etanolhidratado', 'gasolinac', 'glp', 'oleodiesel']

############################# Coleta de dados externos do BCB ##########################################
dfext = pd.read_csv(f"./01-EXT-PrecoANP.csv", header=0, sep=",")
dfext = pd.DataFrame(dfext)
dfext.iloc[:, 0] = pd.to_datetime(dfext.iloc[:, 0], format='%m/%d/%Y').dt.strftime('%Y-%m-%d')
########################################################################################################

product = 'gasolinac'
estado = 'sp'

# print(estados)

# for product in products:
#     folder_path = f'./uf/{product}/'
#     estados = read_csv_files(folder_path)
#     for estado in estados:

df = pd.read_csv(f"./uf/{product}/mensal_{estado}_{product}.csv", header=0, sep=";")
df = pd.DataFrame(df)

df['timestamp'] = pd.to_datetime(df['timestamp'].astype(str), format='%Y%m')

series = df["m3"]

series.index = range(0, len(series))

monthly_data = df.groupby(df['timestamp'].dt.to_period('M'))['m3'].sum().reset_index()
monthly_data['timestamp'] = monthly_data['timestamp'].dt.to_timestamp()

##################################################################################

monthly_data.rename(columns={'m3': 'y'}, inplace=True)

########## Dados Externos #####################3

filtered_df = dfext[(dfext['PRODUTO'] == product) & (dfext['ESTADO'] == estado)]
filtered_df.rename(columns={'DATA': 'ds'}, inplace=True)
filtered_df['ds'] = pd.to_datetime(filtered_df['ds'])

df_data = series.tail(len(filtered_df)).reset_index(drop=True)

filtered_df = filtered_df.reset_index(drop=True)
filtered_df['y']=df_data

###### Normalização ##########



filtered_df2 = filtered_df['ds']
filtered_df2 = filtered_df2.tail(len(filtered_df)-window).reset_index(drop=True)
filtered_df2 = pd.DataFrame(filtered_df2)


filtered_df_no_ds = filtered_df.loc[:, ~filtered_df.columns.isin(['ds', 'PRODUTO', 'ESTADO'])]

# filtered_df_no_ds['y'] = series.tail(len(filtered_df)).reset_index(drop=True)

for column in filtered_df_no_ds:
    temp = rolling_window(filtered_df[column], window)
    filtered_df2[column] = temp[window]

X_train, X_test, y_train, y_test = train_test_split(filtered_df2, horizon)

  from .autonotebook import tqdm as notebook_tqdm
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.rename(columns={'DATA': 'ds'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['ds'] = pd.to_datetime(filtered_df['ds'])


In [4]:
X_train

Unnamed: 0,ds,BP-WTI,DollarCotacaoCompra,DollarCotacaoVenda,ParidadeCompra,ParidadeVenda,EuroCotacaoCompra,EuroCotacaoVenda,SELIC-Taxa,SELIC-FatorDiario,...,SELIC-Media,SELIC-Mediana,SELIC-Moda,SELIC-DesvioPadrao,SELIC-IndiceCurtose,PRECO MEDIO REVENDA,DESVIO PADRAO REVENDA,PRECO MINIMO REVENDA,PRECO MAXIMO REVENDA,COEF DE VARIACAO REVENDA
0,2002-07-01,0.978674,2.298594,2.299498,3.209022,2.550043,2.535047,2.537189,-1.582909,0.981793,...,0.816953,0.818699,0.818218,2.777392,-0.274302,1.067693,1.150904,0.122810,2.127065,0.434293
1,2002-08-01,1.325312,2.698041,2.701441,0.889827,1.731428,2.184223,2.190358,-1.958720,0.949490,...,0.802376,0.803460,0.803072,1.562957,-0.397874,0.632606,1.186487,0.680744,1.850843,0.661878
2,2002-09-01,1.558645,2.069018,2.066718,0.818904,1.491474,2.138577,2.135634,-1.505574,0.873126,...,-0.922292,-0.922320,-0.922309,-0.712027,-0.761687,0.551300,0.752222,0.730730,1.579328,0.400892
3,2002-10-01,1.196150,2.261534,2.259034,0.731770,1.283018,2.285675,2.286593,1.837364,0.803141,...,1.246612,1.246543,1.246291,0.805262,0.699182,0.622797,0.030789,0.830156,-0.712220,-0.340693
4,2002-11-01,0.423040,1.474461,1.472993,2.646978,1.430831,1.627441,1.625374,2.811669,0.680026,...,0.469494,0.469542,0.469432,0.237970,0.488898,2.307850,-0.922689,1.770051,3.276505,-1.768750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,2022-10-01,-0.425761,-0.187408,-0.192531,-1.161666,-1.595988,-0.967703,-0.967558,1.063334,1.054769,...,1.063334,1.063338,1.063334,-0.777147,0.490065,-1.861744,1.125446,-1.585744,-0.676376,1.970006
243,2022-11-01,-0.702083,0.234988,0.232955,-0.144438,-0.831661,-0.412773,-0.412492,0.976999,0.971289,...,0.976999,0.977028,0.976999,-0.662677,0.214777,-1.445597,0.681712,-1.309173,-0.455592,1.296512
244,2022-12-01,-1.289272,-0.054166,-0.059699,1.143281,-0.029616,0.091616,0.091770,0.886888,0.882816,...,0.886888,0.887413,0.886888,-0.754876,-0.358415,-1.264610,0.715824,-1.237749,-0.236007,1.251808
245,2023-01-01,-1.267898,0.859207,0.861201,1.574722,0.443595,0.466176,0.466192,0.792963,0.789770,...,0.792963,0.792882,0.792963,-0.884545,0.544643,-0.995366,1.178979,-1.067011,0.504141,1.343174


In [5]:
##### TODO ##################################################################################


#### Correlation ################################

# dataE = pd.DataFrame({f'{i+1}': df_corr[col] for i, col in enumerate(columns_ext)})

dataE = filtered_df_no_ds.head(len(filtered_df_no_ds)-horizon)


pearson_corr = dataE.corr(method='pearson')
spearman_corr = dataE.corr(method='spearman')
kendall_corr = dataE.corr(method='kendall')

correlation_with_y = pd.DataFrame({
    'Pearson': pearson_corr['y'].drop('y'),
    'Spearman': spearman_corr['y'].drop('y'),
    'Kendall': kendall_corr['y'].drop('y')
})

threshold = 0.3

correlation_with_y['Pearson2'] = correlation_with_y['Pearson'].abs()
correlation_with_y['Spearman2'] = correlation_with_y['Spearman'].abs()
correlation_with_y['Kendall2'] = correlation_with_y['Kendall'].abs()
correlation_with_y['total'] = correlation_with_y['Pearson2'] + correlation_with_y['Spearman2'] + correlation_with_y['Kendall2']
DataTBI = correlation_with_y['total'][(correlation_with_y['Pearson2'] > threshold) | (correlation_with_y['Spearman2'] > threshold) | (correlation_with_y['Kendall2'] > threshold)]
DataTBI_sorted = DataTBI.sort_values(ascending=False)
DataTBI_SelCol = dataE.loc[:, DataTBI_sorted.index]
DataTBI_SelCol.insert(0, 'ds', pd.NA)


columns_to_select = DataTBI_SelCol.columns
new_df = X_train[columns_to_select]



In [7]:
## FOR PARA CADA DADO EXTERNO ######################

selected_columns = new_df.drop(columns=['ds']).columns
# data_forecast_ve = pd.DataFrame()
df_forecasts = pd.DataFrame()
df_forecasts['ds'] = X_test['ds']
for col in selected_columns:
    model_ve = Prophet()
    X2_train = new_df[['ds', col]]
    X2_train.rename(columns={col: 'y'}, inplace=True)
    model_ve.fit(X2_train)
    future_dates = pd.DataFrame(X_test['ds']).reset_index(drop=True)
    data_forecast_ve = model_ve.predict(future_dates)
    df_forecasts[f'{col}'] = data_forecast_ve['yhat'].values
    
# df_forecasts['y'] = y_test
new_df['y'] = y_train.tail(len(new_df))

new_df2 = pd.DataFrame()
if len(new_df.columns)<3:
    temp2 = rolling_window(series, window)
    temp3 = temp2[window].tail(398)   
    temp3 = temp3.head(386)
    temp4 = df['timestamp'].tail(398) 
    new_df2['ds'] = temp4.head(386).reset_index(drop=True)
    new_df2['y'] = temp3
    new_df= new_df2

################################################################f
model = Prophet()

new_column_names = []
for col in new_df.columns:
    if col == 'ds':
        new_column_names.append('ds')
    elif col == 'y':
        new_column_names.append('y')
    else:
        new_column_names.append(f'{col}')
        model.add_regressor(f'{col}', standardize=False)

model.fit(new_df)

# future_dates2 = df_forecasts.drop(columns=['y'])

data_forecastf = model.predict(df_forecasts)

# TEST DATA
monthly_data_norm_last = y_test
monthly_data_norm_last = monthly_data_norm_last.reset_index(drop=True)

future_forecast = data_forecastf['yhat'].tail(12)
future_forecast_12 = future_forecast.reset_index(drop=True)

#### Save model as pkl
# import pickle
# with open(f'../00-MODELS_UF_MENSAL/{estado}_{product}_Prophet_RawData_ExtDataSP_model.pkl', 'wb') as fd: pickle.dump({model}, fd)

Valores_Reais = monthly_data['y'].tail(12)
Valores_Reais = Valores_Reais.reset_index(drop=True)

################# DeNormalized #######################
# Recupera a média e desvio-padrão da última subsequência observada
mean_norm, std_norm = get_stats_norm(series, horizon, window)

# Reescala a predição
predictions_rescaled = znorm_reverse(future_forecast_12, mean_norm, std_norm)
predictions_df2 = pd.DataFrame(predictions_rescaled, columns=['Predictions'])
predictions_df2.rename(columns={'yhat': 'Predictions'}, inplace=True)


p1 = ', '.join(map(str, predictions_df2['Predictions'].values))

rmse_result2 = rmse(Valores_Reais, predictions_df2['Predictions'])
mape_result2 = mape(Valores_Reais, predictions_df2['Predictions'])
pocid_result2 = pocid(Valores_Reais, predictions_df2['Predictions'])
# mcpm_result2 = mcpm(rmse_result2, mape_result2, pocid_result2)
pbe_result2 = pbe(Valores_Reais, predictions_df2['Predictions'])
basepredictions = baseline_persistent_window2(series, horizon)
mase_result2 = mase(Valores_Reais, predictions_df2['Predictions'], basepredictions)


# CSV Output VALORES REAIS
with open(f'Prophet_ExtData_{window}_Exp4_output_TEST.csv', 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow([
        # product, estado, 'Prophet', fd.name, mape_result2, pocid_result2, pbe_result2, mase_result2, *future_forecast_12.values
        product, estado, 'Prophet', mape_result2, pocid_result2, pbe_result2, mase_result2, p1
    ])





A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X2_train.rename(columns={col: 'y'}, inplace=True)
10:43:56 - cmdstanpy - INFO - Chain [1] start processing
10:43:56 - cmdstanpy - INFO - Chain [1] done processing
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X2_train.rename(columns={col: 'y'}, inplace=True)
10:43:56 - cmdstanpy - INFO - Chain [1] start processing
10:43:56 - cmdstanpy - INFO - Chain [1] done processing
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X2_train.rename(columns={col: 'y'}, inplace=True)


In [4]:
########### Prophet TEST ##################

from prophet import Prophet
import os

horizon = 12
window = 12

# products = sorted([name for name in os.listdir('../uf/') if os.path.isdir(os.path.join('../uf/', name))])
products = ['etanolhidratado', 'gasolinac', 'glp', 'oleodiesel']

############################# Coleta de dados externos do BCB ##########################################
dfext = pd.read_csv(f"./01-EXT-PrecoANP.csv", header=0, sep=",")
dfext = pd.DataFrame(dfext)
dfext.iloc[:, 0] = pd.to_datetime(dfext.iloc[:, 0], format='%m/%d/%Y').dt.strftime('%Y-%m-%d')
########################################################################################################

# product = 'gasolinac'
# estado = 'sp'

# print(estados)

for product in products:
    folder_path = f'./uf/{product}/'
    estados = read_csv_files(folder_path)
    for estado in estados:

        df = pd.read_csv(f"./uf/{product}/mensal_{estado}_{product}.csv", header=0, sep=";")
        df = pd.DataFrame(df)

        df['timestamp'] = pd.to_datetime(df['timestamp'].astype(str), format='%Y%m')

        series = df["m3"]

        series.index = range(0, len(series))

        monthly_data = df.groupby(df['timestamp'].dt.to_period('M'))['m3'].sum().reset_index()
        monthly_data['timestamp'] = monthly_data['timestamp'].dt.to_timestamp()

        ##################################################################################

        monthly_data.rename(columns={'m3': 'y'}, inplace=True)

        ########## Dados Externos #####################3

        filtered_df = dfext[(dfext['PRODUTO'] == product) & (dfext['ESTADO'] == estado)]
        filtered_df.rename(columns={'DATA': 'ds'}, inplace=True)
        filtered_df['ds'] = pd.to_datetime(filtered_df['ds'])

        df_data = series.tail(len(filtered_df)).reset_index(drop=True)

        filtered_df = filtered_df.reset_index(drop=True)
        filtered_df['y']=df_data

        ###### Normalização ##########



        filtered_df2 = filtered_df['ds']
        filtered_df2 = filtered_df2.tail(len(filtered_df)-window).reset_index(drop=True)
        filtered_df2 = pd.DataFrame(filtered_df2)


        filtered_df_no_ds = filtered_df.loc[:, ~filtered_df.columns.isin(['ds', 'PRODUTO', 'ESTADO'])]

        # filtered_df_no_ds['y'] = series.tail(len(filtered_df)).reset_index(drop=True)

        for column in filtered_df_no_ds:
            temp = rolling_window(filtered_df[column], window)
            filtered_df2[column] = temp[window]

        X_train, X_test, y_train, y_test = train_test_split(filtered_df2, horizon)

        ##### TODO ##################################################################################


        #### Correlation ################################

        # dataE = pd.DataFrame({f'{i+1}': df_corr[col] for i, col in enumerate(columns_ext)})

        dataE = filtered_df_no_ds.head(len(filtered_df_no_ds)-horizon)


        pearson_corr = dataE.corr(method='pearson')
        spearman_corr = dataE.corr(method='spearman')
        kendall_corr = dataE.corr(method='kendall')

        correlation_with_y = pd.DataFrame({
            'Pearson': pearson_corr['y'].drop('y'),
            'Spearman': spearman_corr['y'].drop('y'),
            'Kendall': kendall_corr['y'].drop('y')
        })

        threshold = 0.7

        correlation_with_y['Pearson2'] = correlation_with_y['Pearson'].abs()
        correlation_with_y['Spearman2'] = correlation_with_y['Spearman'].abs()
        correlation_with_y['Kendall2'] = correlation_with_y['Kendall'].abs()
        correlation_with_y['total'] = correlation_with_y['Pearson2'] + correlation_with_y['Spearman2'] + correlation_with_y['Kendall2']
        DataTBI = correlation_with_y['total'][(correlation_with_y['Pearson2'] > threshold) | (correlation_with_y['Spearman2'] > threshold) | (correlation_with_y['Kendall2'] > threshold)]
        DataTBI_sorted = DataTBI.sort_values(ascending=False)
        DataTBI_SelCol = dataE.loc[:, DataTBI_sorted.index]
        DataTBI_SelCol.insert(0, 'ds', pd.NA)


        columns_to_select = DataTBI_SelCol.columns
        new_df = X_train[columns_to_select]


        ## FOR PARA CADA DADO EXTERNO ######################

        selected_columns = new_df.drop(columns=['ds']).columns
        # data_forecast_ve = pd.DataFrame()
        df_forecasts = pd.DataFrame()
        df_forecasts['ds'] = X_test['ds']
        for col in selected_columns:
            model_ve = Prophet()
            X2_train = new_df[['ds', col]]
            X2_train.rename(columns={col: 'y'}, inplace=True)
            model_ve.fit(X2_train)
            future_dates = pd.DataFrame(X_test['ds']).reset_index(drop=True)
            data_forecast_ve = model_ve.predict(future_dates)
            df_forecasts[f'{col}'] = data_forecast_ve['yhat'].values
            
        # df_forecasts['y'] = y_test
        new_df['y'] = y_train.tail(len(new_df))

        new_df2 = pd.DataFrame()
        if len(new_df.columns)<3:
            temp2 = rolling_window(series, window)
            temp3 = temp2[window].tail(398)   
            temp3 = temp3.head(386)
            temp4 = df['timestamp'].tail(398) 
            new_df2['ds'] = temp4.head(386).reset_index(drop=True)
            new_df2['y'] = temp3
            new_df= new_df2

        ################################################################f
        model = Prophet()

        new_column_names = []
        for col in new_df.columns:
            if col == 'ds':
                new_column_names.append('ds')
            elif col == 'y':
                new_column_names.append('y')
            else:
                new_column_names.append(f'{col}')
                model.add_regressor(f'{col}', standardize=False)

        model.fit(new_df)

        # future_dates2 = df_forecasts.drop(columns=['y'])

        data_forecastf = model.predict(df_forecasts)

        # TEST DATA
        monthly_data_norm_last = y_test
        monthly_data_norm_last = monthly_data_norm_last.reset_index(drop=True)

        future_forecast = data_forecastf['yhat'].tail(12)
        future_forecast_12 = future_forecast.reset_index(drop=True)

        #### Save model as pkl
        # import pickle
        # with open(f'../00-MODELS_UF_MENSAL/{estado}_{product}_Prophet_RawData_ExtDataSP_model.pkl', 'wb') as fd: pickle.dump({model}, fd)

        Valores_Reais = monthly_data['y'].tail(12)
        Valores_Reais = Valores_Reais.reset_index(drop=True)

        ################# DeNormalized #######################
        # Recupera a média e desvio-padrão da última subsequência observada
        mean_norm, std_norm = get_stats_norm(series, horizon, window)

        # Reescala a predição
        predictions_rescaled = znorm_reverse(future_forecast_12, mean_norm, std_norm)
        predictions_df2 = pd.DataFrame(predictions_rescaled, columns=['Predictions'])
        predictions_df2.rename(columns={'yhat': 'Predictions'}, inplace=True)


        p1 = ', '.join(map(str, predictions_df2['Predictions'].values))

        rmse_result2 = rmse(Valores_Reais, predictions_df2['Predictions'])
        mape_result2 = mape(Valores_Reais, predictions_df2['Predictions'])
        pocid_result2 = pocid(Valores_Reais, predictions_df2['Predictions'])
        # mcpm_result2 = mcpm(rmse_result2, mape_result2, pocid_result2)
        pbe_result2 = pbe(Valores_Reais, predictions_df2['Predictions'])
        basepredictions = baseline_persistent_window2(series, horizon)
        mase_result2 = mase(Valores_Reais, predictions_df2['Predictions'], basepredictions)


        # CSV Output VALORES REAIS
        with open(f'Prophet_ExtData_{window}_Exp4_t{threshold}_output.csv', 'a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([
                # product, estado, 'Prophet', fd.name, mape_result2, pocid_result2, pbe_result2, mase_result2, *future_forecast_12.values
                product, estado, 'Prophet', mape_result2, pocid_result2, pbe_result2, mase_result2, p1
            ])






A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.rename(columns={'DATA': 'ds'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['ds'] = pd.to_datetime(filtered_df['ds'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['y'] = y_train.tail(len(new_df))
14:15:12 - cmdstanpy - INFO - Chain [1] start processing
14:15:12 - cmdstanpy - INFO - Chain [1] done processin

In [62]:
############### Plot Prophet

from prophet.plot import plot_plotly, plot_components_plotly

# Plot the forecast
fig1 = plot_plotly(model, data_forecastf)
fig1.show()

# Plot forecast components
fig2 = plot_components_plotly(model, data_forecastf)
fig2.show()