In [1]:
############# Libraries ##############

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

# Evaluation metrics
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_percentage_error as mape

epislon = 1e-20  # Define a small epsilon value for division by zero cases

def rmse(y_true, y_pred):
  return np.sqrt(mse(y_true, y_pred))

def mase(y_true, y_pred, y_baseline):
    # Calcula o MAE do modelo
    mae_pred = np.mean(np.abs(y_true - y_pred))
    # Calcula o MAE do modelo baseline Persistent Window (i.e., últimas h observações antes do teste)
    mae_naive = np.mean(np.abs(y_true - y_baseline))
    result = mae_pred/mae_naive
    return result

def pbe(y_true, y_pred):
  if np.sum(y_true)!=0:
    return 100*(np.sum(y_true - y_pred)/np.sum(y_true))
  else:
    return 100*(np.sum(y_true - y_pred)/(np.sum(y_true) + epislon))

def pocid(y_true, y_pred):
  n = len(y_true)
  D = [1 if (y_pred[i] - y_pred[i-1]) * (y_true[i] - y_true[i-1]) > 0 else 0 for i in range(1, n)]
  POCID = 100 * np.sum(D) / (n-1)
  return POCID

def mcpm(rmse_result, mape_result, pocid_result):
  er_result = 100 - pocid_result

  A1 = (rmse_result * mape_result * np.sin((2*np.pi)/3))/2
  A2 = (mape_result * er_result * np.sin((2*np.pi)/3))/2
  A3 = (er_result * rmse_result * np.sin((2*np.pi)/3))/2
  total = A1 + A2 + A3
  return total

def znorm(x):
  if np.std(x) != 0: 
      x_znorm = (x - np.mean(x)) / np.std(x)
  else:
      x_znorm = (x - np.mean(x)) / (np.std(x) + epislon)
  return x_znorm

def znorm_reverse(x, mean_x, std_x):
  x_denormalized = (np.array(x) * std_x) + mean_x
  return x_denormalized

def get_stats_norm(series, horizon, window):
  last_subsequence = series[-(horizon+window):-horizon].values
  last_mean = np.mean(last_subsequence)
  last_std = np.std(last_subsequence)
  return last_mean, last_std

# Para predição de vendas por UF (mensal), será considerado horizon = 12
# Para predição de vendas por município (anual), será considerado horizon = 1
def train_test_split(data, horizon):
  X = data.iloc[:,:-1] # features
  y = data.iloc[:,-1] # target

  X_train = X[:-horizon] # features train
  X_test =  X[-horizon:] # features test

  y_train = y[:-horizon] # target train
  y_test = y[-horizon:] # target test
  return X_train, X_test, y_train, y_test

def recursive_multistep_forecasting(X_test, model, horizon):
  # example é composto pelas últimas observações vistas
  # na prática, é o pbeprimeiro exemplo do conjunto de teste
  example = X_test.iloc[0].values.reshape(1,-1)

  preds = []
  for i in range(horizon):
    pred = model.predict(example)[0]
    preds.append(pred)

    # Descartar o valor da primeira posição do vetor de características
    example = example[:,1:]

    # Adicionar o valor predito na última posição do vetor de características
    example = np.append(example, pred)
    example = example.reshape(1,-1)
  return preds

def baseline_mean(series, horizon):
  # como as séries são normalizadas, esse baseline irá retornar uma reta próxima de zero
  pred = np.repeat(np.mean(znorm(series[:-horizon])), horizon)
  return pred

def baseline_persistent(series, horizon):
  return np.repeat(znorm(series[-2*horizon:-horizon]).values[-1], horizon)

def baseline_persistent_window(series, horizon):
  subsequence = znorm(series[-horizon*2:-horizon]).values
  return subsequence

def baseline_persistent_windowR(series, horizon):
  subsequence2 = series[-horizon*2:-horizon].values
  return subsequence2

# Em geral, considera-se um tamanho de janela capaz de capturar um ciclo dos dados
# Por exemplo, 12 observações no caso dos dados com frequência mensal
def rolling_window(series, window):
  data = []
  for i in range(len(series)-window):
    example = znorm(np.array(series[i:i+window+1]))
    data.append(example)
  df = pd.DataFrame(data)
  return df


In [14]:
############# DEFs ##############

from prophet import Prophet
import os
import csv
import tsfeatures

def extract_estado(file_name):
    parts = file_name.split('_')
    estado = parts[1]
    return estado

def read_csv_files(folder_path):
    estados = []
    files = os.listdir(folder_path)
    for file_name in files:
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            with open(file_path, 'r', newline='') as csvfile:
                reader = csv.reader(csvfile)
                headers = next(reader)
                estado = extract_estado(file_name)
                estados.append(estado)
                estados.sort()
    return estados


##########################################

def rolling_window_prophet(series, window):
  data = []
  for i in range(len(series)-window):
    example = znorm(np.array(series[i:i+window+1]))
    # example = (np.array(series[i:i+window+1]))
    data.append(example)
  df = pd.DataFrame(data)
  return df


def rolling_window_real(series, window):
  data = []
  for i in range(len(series)-window):
    # example = znorm(np.array(series[i:i+window+1]))
    example = np.array(series[i:i+window+1])
    data.append(example)
  df = pd.DataFrame(data)
  return df

#########################################################


def rolling_window_featureCatch22N(series, window):
  data = []
  for i in range(len(series)-window):
      example = np.array(series[i:i+window])
      new_elements = pycatch22.catch22_all(example)
      data_feature = znorm(new_elements['values'])
      data.append(data_feature)
  df = pd.DataFrame(data)
  return df

def rolling_window_TsFeaturesN(out_window, window, freqF):
  results=[]
  for i in range(len(out_window)-window):
      target_feat = out_window[i:window+i]
      target_feat = pd.DataFrame(target_feat)
      target_feat.insert(0, 'unique_id', 'D1')
      target_feat.rename(columns={'m3': 'y'}, inplace=True)
      result = tsfeatures.tsfeatures(target_feat, freq=freqF)
      result = result.drop(columns=['unique_id'])
      result = result.apply(znorm, axis=1)
      result.fillna(0, inplace=True)
      results = pd.concat([results, result], ignore_index=True)
  return results

#########################################################



def recursive_multistep_forecasting_ProphetCatch22N(series, model, horizon, window,future_dates):
    serieatu=series[:-horizon]
    seriec = serieatu.tail(window)
    mean_norm, std_norm = get_stats_norm(series, horizon, window)
    predsreal = []
    predprophet = pd.DataFrame()
    for i in range(horizon):

        example = np.array(seriec)
        new_elements = pycatch22.catch22_all(example)
        exemple_feature = znorm(new_elements['values'])
        exemple_feature_df = pd.DataFrame(exemple_feature)
        exemple_feature_df.fillna(0, inplace=True)

        values = exemple_feature_df[0].tolist()
        future_dates_copy = future_dates.copy()

        num_columns_to_update = len(future_dates_copy.columns) - 1  # Exclude the 'ds' column
        if len(values) != num_columns_to_update:
            raise ValueError("Number of values does not match the number of columns")

        future_dates_copy.iloc[:, 1:] = values
        future_dates_copy = pd.DataFrame(future_dates_copy)

        predn = model.predict(future_dates_copy)
        predprophet = pd.concat([predprophet, predn], ignore_index=True)
        future_dates['ds'] = future_dates['ds'] + pd.DateOffset(months=1)
        pred = znorm_reverse(predn['yhat'], mean_norm, std_norm)
        predsreal.append(pred)
        series2 = seriec[1:]
        seriec = np.append(series2, pred)

    return predsreal, predprophet


def recursive_multistep_forecasting_TsFeaturesN(series, model, horizon, freqF, window):
    serieatu=series[:-horizon]
    seriec = serieatu.tail(2*window).reset_index(drop=True)
    mean_norm, std_norm = get_stats_norm(series, horizon, window)
    predsreal = []  
    for i in range(horizon):
        out_window2 = rolling_window_real(seriec,window)
        out_window2.rename(columns={window: 'y'}, inplace=True)
        example = out_window2
        example2 = pd.DataFrame(example)
        example2.insert(0, 'unique_id', 'D1')
        result = tsfeatures.tsfeatures(example2, freq=freqF)
        result = result.drop(columns=['unique_id'])
        result.apply(znorm)
        result.fillna(0, inplace=True)
        result.columns = range(result.shape[1])
        predN = model.predict(result)
        pred = znorm_reverse(predN, mean_norm, std_norm)    
        predsreal.append(pred)
        series2 = seriec[1:]
        seriec = np.append(series2, pred)
        preds2 = [val[0] for val in predsreal]

    return preds2


In [12]:
########### Prophet TEST ##################

horizon = 12

###########################################################
######### Considerar Janelas > 36 para o Prophet ##########
###########################################################

window = 12
freqF = 12

products = sorted([name for name in os.listdir('./uf/') if os.path.isdir(os.path.join('./uf/', name))])

product = 'etanolhidratado'
estado = 'ac'

# for product in products:
#     folder_path = f'./uf/{product}/'
#     # Read the CSV files and extract estado names
#     estados = read_csv_files(folder_path)
#     for estado in estados:

df = pd.read_csv(f"./uf/{product}/mensal_{estado}_{product}.csv", header=0, sep=";")

# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'].astype(str), format='%Y%m')
series = df["m3"]

TsFeatures = rolling_window_TsFeaturesN(series, window, freqF)

out_window = series

# def rolling_window_TsFeaturesN(out_window, window, freqF):
results=[]
for i in range(len(out_window)-window):
    target_feat = out_window[i:window+i]
    target_feat = pd.DataFrame(target_feat)
    target_feat.insert(0, 'unique_id', 'D1')
    target_feat.rename(columns={'m3': 'y'}, inplace=True)
    result = tsfeatures.tsfeatures(target_feat, freq=freqF)
    result = result.drop(columns=['unique_id'])
    result = result.apply(znorm, axis=1)
    results.append((result.values))
#   return results


KeyboardInterrupt: 

In [13]:
results

[array([[-0.2177643 ,  2.26545515, -3.2702678 , -0.27561255,         nan,
                 nan,         nan, -0.32075462, -0.10523713,  2.26545515,
         -0.32075462, -0.32075462, -0.32075462, -0.32075462, -0.27329948,
         -0.2557103 , -0.10523713,  2.04993767,  1.40338523,         nan,
         -0.16153865, -0.21723034,         nan,  0.00862856, -0.32075462,
         -0.32075461, -0.32075462,         nan,         nan,         nan,
                 nan,  0.11028035, -0.14341784,  1.40338523,         nan,
         -0.37186589, -0.26606671, -0.45068703, -0.1846479 , -0.44836521,
         -0.19354218,         nan]]),
 array([[-0.19926709,  2.40801161, -3.22391967, -0.28829522,         nan,
                 nan,         nan, -0.32146892, -0.09401221,  2.40801161,
         -0.32146892, -0.32146892, -0.32146892, -0.32146892, -0.35266473,
         -0.28569625, -0.09401221,  1.95309818,  1.27072805,         nan,
         -0.19510467, -0.07619003,         nan, -0.02072612, -0.32146892,


In [10]:
target_feat

Unnamed: 0,unique_id,m3
0,D1,860.685
1,D1,1027.991
2,D1,919.66
3,D1,823.809
4,D1,727.958
5,D1,966.731
6,D1,986.084
7,D1,684.643
8,D1,975.116
9,D1,797.041


In [None]:

# TsFeatures_filled = TsFeatures.fillna(0)
# TsFeatures_filled.replace([float('inf'), -float('inf')], 0, inplace=True)
# TsFeatures_filled.columns = [f'F_{i+1}' for i in range(TsFeatures_filled.shape[1])]

# y_norm = rolling_window(series,window)

# # Catch22_filled.insert(len(Catch22_filled.columns), 'timestamp', df['timestamp'].tail(len(series)-horizon).reset_index(drop=True))
# TsFeatures_filled.insert(0, 'timestamp', df['timestamp'].tail(len(series)-window).reset_index(drop=True))

# TsFeatures_filled['y'] = y_norm.iloc[:, [-1]]

# X_train, X_test, y_train, y_test = train_test_split(TsFeatures_filled, horizon, freqF)

# # Selecting the first column of monthly_data to pass the dates
# first_column=[]
# first_column = df['timestamp']
# first_column = first_column[window:-(horizon)]
# first_column = first_column.reset_index(drop=True)

# # Concatenating the first column with monthly_data_norm along axis 1 (columns)
# prophet_data2 = pd.concat([X_train, y_train], axis=1)
# prophet_data2.rename(columns={'timestamp': 'ds'}, inplace=True)

# model = Prophet()

# new_column_names = []
# for col in prophet_data2.columns:
#     if col == 'ds':
#         new_column_names.append('ds')
#     elif col == 'y':
#         new_column_names.append('y')
#     else:
#         new_column_names.append(f'{col}')
#         model.add_regressor(f'{col}', standardize=False)


# model.fit(prophet_data2)


# ##########################################################################

# ## RECURSIVO PARA FEATURES

# ##########################################################################

# future_dates = pd.DataFrame(df['timestamp'].tail(12))
# future_dates = future_dates.iloc[[0]].reset_index(drop=True)
# future_dates.rename(columns={'timestamp': 'ds'}, inplace=True)
# for col in new_column_names:
#     if col != 'ds' and col != 'y':
#         future_dates[col] = 0 

############################################################################

# predictions1a, predprophet = recursive_multistep_forecasting_ProphetCatch22N(series, model, horizon, window, future_dates)
    
# Valores_Previsao = [x[0] for x in predictions1a]

# Valores_Reais = df['m3'].tail(12)
# Valores_Reais = Valores_Reais.reset_index(drop=True)

# ###########################################################

# mean_norm, std_norm = get_stats_norm(series, horizon, window)
# basepredictionsnorm = baseline_persistent_window(series, horizon)
# basepredictions = znorm_reverse(basepredictionsnorm, mean_norm, std_norm)

# ###########################################################

# rmse_result2 = rmse(Valores_Reais, Valores_Previsao)
# mape_result2 = mape(Valores_Reais, Valores_Previsao)
# pocid_result2 = pocid(Valores_Reais, Valores_Previsao)
# mcpm_result2 = mcpm(rmse_result2, mape_result2, pocid_result2)
# pbe_result2 = pbe(Valores_Reais, Valores_Previsao)
# mase_result2 = mase(Valores_Reais, Valores_Previsao, basepredictions)

# # CSV Output VALORES REAIS
# with open(f'02-Prophet_{window}_Catch22_output.csv', 'a', newline='') as file:
#     writer = csv.writer(file)
#     writer.writerow([
#         product, estado, 'Prophet', mape_result2, pocid_result2, pbe_result2, mase_result2, Valores_Previsao
#     ])

# # import pickle
# # with open(f'./00-MODELS_UF_MENSAL-Prophet-Catch22/{estado}_{product}_Prophet_Catchqq_model.pkl', 'wb') as fd: pickle.dump({model}, fd)



In [10]:
############### Plot Prophet

from prophet.plot import plot_plotly, plot_components_plotly

# Plot the forecast
fig1 = plot_plotly(model, predprophet)
fig1.show()

# Plot forecast components
fig2 = plot_components_plotly(model, predprophet)
fig2.show()