In [63]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import os

pd.set_option('display.max_rows', 500)

from statsmodels.tsa.statespace.sarimax import SARIMAX

In [64]:
dados = pd.DataFrame(columns=['ref_bacen', 'date', 'indice'])

diretorio = '../data/serieT/'

arquivos_json = [arquivo for arquivo in os.listdir(diretorio) if arquivo.endswith('.json')]

for arquivo_json in arquivos_json:
    with open(os.path.join(diretorio, arquivo_json), 'r') as file:
        d = json.loads(file.read())
        
        data_temp = pd.DataFrame({
            'ref_bacen': arquivo_json,
            'date': d['Indices']['NDVI']['Serie Processada']['Data'],
            'indice': d['Indices']['NDVI']['Serie Processada']['Indice']
        })

        dados = pd.concat([dados, data_temp], ignore_index=True)

dados['ref_bacen'] = dados['ref_bacen'].str.split('.').str.get(0)

dados['date'] = pd.to_datetime(dados['date'], format='%Y-%m-%d')
dados = dados.set_index('date')

In [65]:
dados.dtypes

ref_bacen     object
indice       float64
dtype: object

In [66]:
dados[dados['ref_bacen'] == '511332955']

Unnamed: 0_level_0,ref_bacen,indice
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-08-28,511332955,-0.001305
2020-09-05,511332955,0.011605
2020-10-07,511332955,-0.016679
2020-10-15,511332955,0.056596
2020-10-31,511332955,0.01893
2020-12-02,511332955,-0.042001
2020-12-10,511332955,0.01367
2020-12-18,511332955,0.147042
2020-12-26,511332955,0.197492
2021-01-01,511332955,0.099771


Model validation    

In [67]:
previsoes = []

for ref_bacen in dados['ref_bacen'].unique():
    print(f"Processando ref_bacen: {ref_bacen}")
    dados_ref = dados[dados['ref_bacen'] == ref_bacen][['indice']]  # Seleciona como DataFrame com uma única coluna

    modelo = SARIMAX(dados_ref, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
    
    try:
        resultado = modelo.fit()

        # Obter os próximos 6 passos de previsão a partir do próximo mês após a última data nos dados
        ultima_data = dados_ref.index[-1]
        proximas_datas_previsao = pd.date_range(start=ultima_data, periods=7, freq='M')[1:]

        previsao = resultado.get_forecast(steps=6)
        previsao_mean = previsao.predicted_mean
        
        previsao_df = pd.DataFrame({
            'date': proximas_datas_previsao,
            'ref_bacen': ref_bacen,
            'previsao': previsao_mean,
        })

        previsoes.append(previsao_df)

    except Exception as e:
        print(f"Erro para ref_bacen {ref_bacen}: {e}")

previsoes_df = pd.concat(previsoes)


Processando ref_bacen: 511332955
Erro para ref_bacen 511332955: too many indices for array: array is 0-dimensional, but 1 were indexed
Processando ref_bacen: 506185541
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f= -0.00000D+00    |proj g|=  0.00000D+00

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    5      0      1      0     0     0   0.000D+00  -0.000D+00
  F =  -0.0000000000000000     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
Processando ref_bacen: 512

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.



At iterate    5    f= -2.04434D-01    |proj g|=  4.55655D-01

At iterate   10    f= -2.73621D-01    |proj g|=  4.05075D-02

At iterate   15    f= -2.87674D-01    |proj g|=  6.00628D-01

At iterate   20    f= -2.94964D-01    |proj g|=  1.62140D+00

At iterate   25    f= -2.96234D-01    |proj g|=  1.15078D+00

At iterate   30    f= -2.97114D-01    |proj g|=  9.49613D-01

At iterate   35    f= -2.97370D-01    |proj g|=  2.88249D-01

At iterate   40    f= -2.97462D-01    |proj g|=  7.24187D-01

At iterate   45    f= -2.98112D-01    |proj g|=  1.22524D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    5     48     86   

  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('To

Processando ref_bacen: 512308192
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f= -0.00000D+00    |proj g|=  0.00000D+00

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    5      0      1      0     0     0   0.000D+00  -0.000D+00
  F =  -0.0000000000000000     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
Processando ref_bacen: 512483993
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0        

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn(

In [68]:
previsoes_df['date'] = pd.to_datetime(previsoes_df['date'], format='%Y-%m-%d')
final_df = pd.merge(dados, previsoes_df, how='outer', on=['date', 'ref_bacen'])
final_df['date'] = final_df['date'].dt.strftime('%Y-%m')

In [69]:
final_df = final_df.drop_duplicates(subset=['date', 'ref_bacen'], keep='first')

In [70]:
final_df = final_df[final_df['ref_bacen']!='st']

In [71]:
final_df[final_df['ref_bacen'] == '512309065']

Unnamed: 0,date,ref_bacen,indice,previsao
164,2021-07,512309065,0.016101,
165,2021-09,512309065,0.005549,
166,2021-10,512309065,0.014347,
167,2021-12,512309065,0.095448,
169,2022-01,512309065,0.00202,
170,2022-02,512309065,0.001811,
171,2022-03,512309065,0.006118,
172,2022-06,512309065,0.020576,
173,2022-08,512309065,0.055957,
246,2022-09,512309065,,0.055957


In [72]:
final_df.to_csv('../data/previsoes_1.csv')