In [1]:
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from class_function_V1 import TimeSeriesAnalysis #Função que processa os dados, separando no grupo de 53 setores
import numpy as np
from pySSA.core import MSSA
from pySSA.simple import SSA


# Leitura do arquivo Excel
df_raw = pd.read_excel(r"D:\OneDrive\Organizar UFSC\Documentos\TCC\Data_set\Base de Dados.xlsx")
#df_raw = df_raw.drop(['29.1 Fabricação de automóveis, camionetas e utilitários'], axis=1)

numeric_cols = df_raw.select_dtypes(include=[np.number]).columns
df_log = df_raw.copy()
df_log[numeric_cols] = np.log(df_raw[numeric_cols])

# # Criação de instância da classe TimeSeriesAnalysis=
tsa = TimeSeriesAnalysis(df_log)

half = len(tsa.df_macro_sectors.columns) // 2
subset1 = tsa.df_macro_sectors.iloc[:, :15]
subset2 = tsa.df_macro_sectors.iloc[:, 15:30]
subset3 = tsa.df_macro_sectors.iloc[:, 30:]

#tsa.df_macro_sectors= tsa.df_macro_sectors.loc[:, ['10.1', '10.7', '13.1','17.1','19.2','19.3','20.5','22.2','23.1','23.2','24.1','24.5','26.2','26.2','26.4','27.4','28.1','28.3','28.5','28.6']]

tsa.df_macro_sectors.head()

Unnamed: 0,10.1,10.3,10.4,10.5,10.6,10.7,10.8,11.1,11.2,13.1,...,28.3,28.5,28.6,29.1,29.2,29.3,29.4,31.0,32.1,32.9
0,4.515331,4.05491,3.90618,4.661924,4.558232,3.536038,4.107816,4.268231,3.993507,4.998449,...,4.143789,3.545596,4.217789,3.839313,3.76405,4.200043,4.226908,4.372683,3.242279,4.488433
1,4.4218,3.735329,4.241995,4.513231,4.417194,3.528083,3.998127,4.143932,3.969375,4.99221,...,4.239395,3.719582,4.182698,3.824939,3.864202,4.144594,4.265512,4.214923,3.238978,4.347842
2,4.437739,3.566673,4.601538,4.529859,4.53273,3.277851,4.14494,4.198235,4.010407,5.08604,...,4.373216,3.810535,4.349028,4.023934,3.997371,4.229685,4.336743,4.379752,4.01146,4.352726
3,4.507316,3.616461,4.679233,4.536689,4.548309,3.727957,4.144719,4.265418,4.000468,5.153646,...,4.316473,3.959796,4.333748,4.113417,4.05566,4.327372,4.382654,4.455456,4.306655,4.353239
4,4.528129,3.688902,4.665151,4.480409,4.601294,4.748746,4.126926,4.270458,3.982138,5.162835,...,4.321764,4.011917,4.352513,4.030991,3.987235,4.23095,4.352051,4.387869,4.452716,4.317744


---------------------

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.model_selection import ParameterGrid
from pySSA.core import MSSA
from pySSA.simple import SSA

class SSAforecast:
    def __init__(self, df_in):
        self.df_in = df_in
        self.models = {}
        self.best_models = {}
        self.result_df = None

    def ssa_model(self,series, L, r):
        X = series.values
        ssa=SSA(X)
        decomposer = ssa.decompose(L, return_df=True)
        components = decomposer.iloc[:,:r].sum(axis=1)
        return components,ssa  # Use the last reconstructed component for forecasting

    def calculate_metrics(self, actual, forecast):
        error = forecast - actual
        mse = np.mean(error ** 2)
        mae = np.mean(np.abs(error))
        mape = np.mean(np.abs(error / actual))
        eqa = np.sum(error ** 2)
        rmse = np.sqrt(mse)
        rrmse = rmse / np.mean(actual)
        return mse, mae, mape, eqa, rmse, rrmse

    def select_best_ssa_model(self, metric, test_size):
        result_df = pd.DataFrame(columns=['Setor', 'Best Metric', 'L', 'r', 'Model', 'MSE_In-Sample', 'EQA_In-Sample', 'RMSE_In-Sample', 'RRMSE_In-Sample', 'MAE_In-Sample', 'MAPE_In-Sample', 'MSE_Out-of-Sample', 'EQA_Out-of-Sample', 'RMSE_Out-of-Sample', 'MAE_Out-of-Sample', 'MAPE_Out-of-Sample'])

        result_rows = []
        self.test_size = test_size
        L_range = [12,24,36,48,60,72]
        r_range = range(2,20)
        # r_range = [3]

        param_grid = ParameterGrid({'L': L_range, 'r': r_range})

        for col in self.df_in:
            series = self.df_in[col]
            train_size = len(series) - test_size
            train_data = series[:train_size]
            test_data = series[train_size:]

            best_metric = float('inf')
            best_model = None
            best_L = None
            best_r = None

            for params in param_grid:
                L = params['L']
                r = params['r']
                if r<L:
                    reconstructed_component, ssa = self.ssa_model(train_data, L, r)
                    forecast = ssa.RLforecast(r,steps=test_size, conf_int=False)
                    forecast_train=pd.DataFrame(forecast[:len(train_data)])
                    forecast_test =pd.DataFrame(forecast[len(train_data):])
                    # forecast_train = reconstructed_component[:len(train_data)]
                    # reconstructed_component=pd.DataFrame(reconstructed_component)
                    # forecast_test=pd.DataFrame(forecast_test)
                    if metric == 'mae':
                        current_metric = np.abs(forecast_test.values - test_data.values).mean()
                    elif metric == 'mse':
                        current_metric = ((forecast_test.values - test_data.values) ** 2).mean()
                    elif metric == 'mape':
                        current_metric = (np.abs(forecast_test.values - test_data.values) / test_datavalues).mean()
                    elif metric == 'rrmse':
                        current_metric = np.sqrt(((forecast_test.values - test_data.values) ** 2).mean())/np.mean(test_data.values)
                    elif metric == 'eqa':
                        current_metric = np.sum((forecast_test.values - test_data.values)**2)

                    if current_metric < best_metric:
                        best_metric = current_metric
                        best_model = reconstructed_component
                        best_L = L
                        best_r = r

            # Calcular previsões finais
            reconstructed_component, ssa = self.ssa_model(train_data, best_L, best_r)
            forecast = ssa.RLforecast(best_r,steps=test_size, conf_int=False)
            forecast_train=pd.DataFrame(forecast[:len(train_data)])
            forecast_test =pd.DataFrame(forecast[len(train_data):])
            

            # Plotar gráfico de erro
            plt.figure(figsize=(12, 4))
            plt.plot(train_data.index, train_data.values, label='Dados de treinamento')
            plt.plot(test_data.index, test_data.values, label='Dados de teste')
            plt.plot(forecast_train, label='Previsão in-sample')
            plt.plot(test_data.index, forecast_test, label='Previsão out-of-sample')
            plt.xlabel('Data')
            plt.ylabel('Valor')
            plt.title(f'Previsão SSA - {col}')
            
            # Calcular métricas
            in_sample_mse, in_sample_mae, in_sample_mape, in_sample_eqa, in_sample_rmse, in_sample_rrmse = self.calculate_metrics(pd.DataFrame(train_data).values, forecast_train.values)
            out_of_sample_mse, out_of_sample_mae, out_of_sample_mape, out_of_sample_eqa, out_of_sample_rmse, out_of_sample_rrmse = self.calculate_metrics(pd.DataFrame(test_data).values, forecast_test.values)

            result_rows.append({'Setor': col, 'Best Metric': best_metric, 'L': best_L, 'r': best_r, 'Modelo': best_model, 'MSE_In-Sample': in_sample_mse, 'EQA_In-Sample': in_sample_eqa, 'RMSE_In-Sample': in_sample_rmse, 'RRMSE_In-Sample': in_sample_rrmse, 'MAE_In-Sample': in_sample_mae, 'MAPE_In-Sample': in_sample_mape, 'MSE_Out-of-Sample': out_of_sample_mse, 'EQA_Out-of-Sample': out_of_sample_eqa, 'RMSE_Out-of-Sample': out_of_sample_rmse,'RRMSE_Out-of-Sample': out_of_sample_rrmse, 'MAE_Out-of-Sample': out_of_sample_mae, 'MAPE_Out-of-Sample': out_of_sample_mape})

        result_df = pd.DataFrame(result_rows)
       
        self.result_df = result_df
        return self.result_df,

    def generate_forecast_errors(self):
        errors_df = pd.DataFrame(columns=['Setor', 'h (período da previsão)', 'MSE_In-Sample', 'EQA_In-Sample', 'RMSE_In-Sample', 'RRMSE_In-Sample', 'MAE_In-Sample', 'MAPE_In-Sample', 'MSE_Out-of-Sample', 'EQA_Out-of-Sample', 'RMSE_Out-of-Sample', 'MAE_Out-of-Sample', 'MAPE_Out-of-Sample'])

        for _, row in self.result_df[0].iterrows():
            sector = row['Setor']
            best_L = row['L']
            best_r = row['r']

            series = self.df_in[sector]
            train_size = len(series) - self.test_size
            train_data = series[:train_size]
            test_data = series[train_size:]

            for h in [1, 6, 9, 12]:
                reconstructed_component, ssa = self.ssa_model(train_data, best_L, best_r)
                forecast_all = ssa.RLforecast(best_r, steps=train_size, conf_int=False)

                in_sample_forecast = pd.DataFrame(forecast_all[:len(train_data)])
                in_sample_actual = train_data

                forecast = pd.DataFrame(forecast_all[len(train_data):len(train_data)+h])
                actual = test_data[:h]

                in_sample_mse, in_sample_mae, in_sample_mape, in_sample_eqa, in_sample_rmse, in_sample_rrmse = self.calculate_metrics(in_sample_actual.values, in_sample_forecast.values)
                out_of_sample_mse, out_of_sample_mae, out_of_sample_mape, out_of_sample_eqa, out_of_sample_rmse, out_of_sample_rrmse = self.calculate_metrics(actual.values, forecast.values)

                errors_df = errors_df.append({'Setor': sector, 'h (período da previsão)': h, 'MSE_In-Sample': in_sample_mse, 'EQA_In-Sample': in_sample_eqa, 'RMSE_In-Sample': in_sample_rmse, 'RRMSE_In-Sample': in_sample_rrmse, 'MAE_In-Sample': in_sample_mae, 'MAPE_In-Sample': in_sample_mape, 'MSE_Out-of-Sample': out_of_sample_mse, 'EQA_Out-of-Sample': out_of_sample_eqa, 'RMSE_Out-of-Sample': out_of_sample_rmse,'RRMSE_Out-of-Sample': out_of_sample_rrmse, 'MAE_Out-of-Sample': out_of_sample_mae, 'MAPE_Out-of-Sample': out_of_sample_mape}, ignore_index=True)

        return errors_df



In [4]:
# ssa = SSAforecast(tsa.df_macro_sectors.loc[:,['10.1','10.7']])
ssa = SSAforecast(subset1)
ssa.result_df= ssa.select_best_ssa_model('mse', 25) 
ssa.result_df[0]

In [17]:
errors_df=ssa.generate_forecast_errors()

In [18]:
ssa.result_df[0].to_excel('param_SSA_eqa.xlsx', index=False) 
errors_df.to_excel('Forecasting_SSA_eqa.xlsx', index=False) 

In [240]:
# pd.DataFrame(test_data)