In [18]:
############ LIBRARIES (updated) ###############

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import os
import csv
import tsfel

# Regressors
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost.sklearn import XGBRegressor 
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
import lightgbm as lgb

import warnings
from warnings import simplefilter
warnings.filterwarnings("ignore", category=RuntimeWarning)
simplefilter(action='ignore', category=FutureWarning)

# Evaluation metrics
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_percentage_error as mape

%matplotlib inline

# variavel que evita NaN nos resultados
epslon = 0.00005

def pbe(y_true, y_pred):
  if np.sum(y_true)!=0:
    return 100*(np.sum(y_pred - y_true)/np.sum(y_true))
  else:
   return 100*(np.sum(y_pred - y_true)/(np.sum(y_true)+ epslon))  

def pocid(y_true, y_pred):
  n = len(y_true)
  D = [1 if (y_pred[i] - y_pred[i-1]) * (y_true[i] - y_true[i-1]) > 0 else 0 for i in range(1, n)]
  POCID = 100 * np.sum(D) / (n-1)
  return POCID

#função para normalização
def znorm(x):
  if np.std(x)!=0:
    x_znorm = (x - np.mean(x)) / np.std(x)
  else:
    x_znorm = (x - np.mean(x)) / (np.std(x) + epslon) 
   
  return x_znorm

#função para desnormatização
def znorm_reverse(x, mean_x, std_x):
  x_denormalized = (np.array(x) * std_x) + mean_x
  return x_denormalized

def get_stats_norm(series, horizon, window):
  last_subsequence = series[-(horizon+window):-horizon].values
  last_mean = np.mean(last_subsequence)
  last_std = np.std(last_subsequence)
  return last_mean, last_std

# Em geral, considera-se um tamanho de janela capaz de capturar um ciclo dos dados
# Por exemplo, 12 observações no caso dos dados com frequência mensal
def rolling_window(series, window):
  data = []
  for i in range(len(series)-window):
    example = znorm(np.array(series[i:i+window+1]))
    data.append(example)
  df = pd.DataFrame(data)
  return df

# Para predição de vendas por UF (mensal), será considerado horizon = 12
# Para predição de vendas por município (anual), será considerado horizon = 1
def train_test_split(data, horizon):
  X = data.iloc[:,:-1] # features
  y = data.iloc[:,-1] # target

  X_train = X[:-horizon] # features train
  X_test =  X[-horizon:] # features test

  y_train = y[:-horizon] # target train
  y_test = y[-horizon:] # target test
  return X_train, X_test, y_train, y_test

def recursive_multistep_forecasting(X_test, model, horizon):
  # example é composto pelas últimas observações vistas
  # na prática, é o pbeprimeiro exemplo do conjunto de teste
  example = X_test.iloc[0].values.reshape(1,-1)

  preds = []
  for i in range(horizon):
    pred = model.predict(example)[0]
    preds.append(pred)

    # Descartar o valor da primeira posição do vetor de características
    example = example[:,1:]

    # Adicionar o valor predito na última posição do vetor de características
    example = np.append(example, pred)
    example = example.reshape(1,-1)
  return preds

# Função para extração de Features utilizando Tsfel
def rolling_window_featureTsfel(series, window):
  data_out=[]
  data_out2=pd.DataFrame(data_out)
  data_out3=[]
  for i in range(len(series)-window): #Itera sobre a diferença entre o tamanho da série e o tamanho da janela
    example = np.array(series[i:i+window+1]) #Normaliza as séries temporais ou a coluna 'm3'
    # example2 = example[:-1]
    cfg = tsfel.get_features_by_domain() # Recupera um conjunto de características pré-definidas para extração
    new_elements = tsfel.time_series_features_extractor(cfg, example[:-1]) # Extração das características
    new_elements_values_reshaped = np.squeeze(new_elements.values) # Remove dimensão unitária, transformando de (1, 140) para (140,)
    # exampledf = pd.DataFrame(example)
    data_featuredf=  pd.DataFrame(new_elements_values_reshaped.T)
    data_out = data_featuredf.T
    # 
    # data_out2= pd.DataFrame(data_out)
    # data_out4 = znorm(data_out2.values)
    data_out3 = pd.DataFrame(data_out)
    # data_out4 = pd.DataFrame(data_out4)
    # example2 = znorm(example)
    # data_out4['y']=example2[-1]
    data_out2 = pd.concat([data_out2, data_out3], ignore_index=True)
    # data_out3 = data_out3.fillna(0)
    # data_out2.append(data_out3)
  return data_out2 #retorna a criação de um DataFrame com o resultado dos novos dados e as características extraídas



In [14]:
#### LEITURA DOS DADOS ####

def extract_estado(file_name):
    # Split the file name by underscores
    parts = file_name.split('_')
    # Extract the name between underscores
    estado = parts[1]
    return estado

def read_csv_files(folder_path):
    estados = []
    # List all files in the folder
    files = os.listdir(folder_path)
    # Iterate through each file
    for file_name in files:
        # Check if it's a CSV file
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            # Open the CSV file and read the data
            with open(file_path, 'r', newline='') as csvfile:
                reader = csv.reader(csvfile)
                # Assuming the first row contains headers
                headers = next(reader)
                # Extract estado from file name and append to estados list
                estado = extract_estado(file_name)
                estados.append(estado)
                estados.sort()
    return estados



In [16]:
############## TsFel AUTO ##############

import pickle

horizon = 12
window = 12
features = 'TsFel'

products = sorted([name for name in os.listdir('./') if os.path.isdir(os.path.join('./', name))])
 
for product in products:
    folder_path = f'./{product}/'
    # Read the CSV files and extract estado names
    estados = read_csv_files(folder_path)
    
    for estado in estados:

        # carregamento do arquivo

        df = pd.read_csv(f"./{product}/mensal_{estado}_{product}.csv", header=0, sep=";")
        series = df['m3']

        TimeStamp = df['timestamp'].tail(398) ## add
        TimeStamp.reset_index(drop=True, inplace=True) ## add

        outTsFel = rolling_window_featureTsfel(series, window) ## add
        # outTsFel = outTsFel.loc[:, outTsFel.nunique() > 1]
        outTsFel = outTsFel.fillna(0) ## add
 
        # first_column = Catch22_filled.pop('timestamp') 
        outTsFel.insert(0, 'timestamp', TimeStamp) ## add

        folder_name = f'../SALVAS/{product}/'
        if not os.path.exists(folder_name):
            os.makedirs(folder_name)
        
        outTsFel.to_csv(f'../SALVAS/{folder_name}/FEAT_TsFel_{product}_{window}_{estado}.csv', index=False)


*** Feature extraction started ***


  new_elements = tsfel.time_series_features_extractor(cfg, example[:-1]) # Extração das características



*** Feature extraction finished ***


TypeError: cannot concatenate object of type '<class 'list'>'; only Series and DataFrame objs are valid

In [12]:
outTsFel

[            0           1    2             3         4    5    6        7    \
 0  1.365874e+07  113.294205  1.0  1.241704e+08  0.054243  2.0  9.0  727.958   
 
        8         9    ...           130            131            132  \
 0  986.084  0.083333  ...  137480.97741  254283.411575  146242.337197   
 
              133            134            135            136            137  \
 0  104989.564232  126114.506252  152480.418196  169172.344962  176542.368718   
 
              138  139  
 0  177566.096739  0.0  
 
 [1 rows x 140 columns],
             0          1    2             3         4    5    6        7    \
 0  1.361226e+07  112.51605  1.0  1.237478e+08  0.050548  2.0  9.0  727.958   
 
        8         9    ...            130           131            132  \
 0  986.084  0.083333  ...  115469.176562  278565.37598  132808.433453   
 
             133           134            135           136            137  \
 0  75360.476073  99550.195303  132819.142776  155488.22954