In [15]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import os

from statsmodels.tsa.statespace.sarimax import SARIMAX

In [22]:
dados = pd.DataFrame(columns=['ref_bacen', 'date', 'indice'])

diretorio = '../data/rl/'

arquivos_json = [arquivo for arquivo in os.listdir(diretorio) if arquivo.endswith('.json')]

for arquivo_json in arquivos_json:
    with open(os.path.join(diretorio, arquivo_json), 'r') as file:
        d = json.loads(file.read())
        
        data_temp = pd.DataFrame({
            'ref_bacen': arquivo_json,
            'date': d['Indices']['NDVI']['Serie Processada']['Data'],
            'indice': d['Indices']['NDVI']['Serie Processada']['Indice']
        })

        dados = pd.concat([dados, data_temp], ignore_index=True)

dados['ref_bacen'] = dados['ref_bacen'].str.split('_').str.get(0)

dados['date'] = pd.to_datetime(dados['date'], format='%Y-%m-%d')
dados = dados.set_index('date')


In [23]:
dados.dtypes

ref_bacen     object
indice       float64
dtype: object

In [17]:
dados[dados['ref_bacen'] == '506273616']

Unnamed: 0_level_0,ref_bacen,indice
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-08-21,506273616,0.018645
2017-10-16,506273616,0.024464
2017-11-09,506273616,0.015563
2017-11-25,506273616,0.00372
2017-12-11,506273616,0.00557
2017-12-19,506273616,0.007996
2018-01-09,506273616,0.008844
2018-02-18,506273616,0.010653
2018-02-26,506273616,0.015367
2018-07-12,506273616,0.045398


Model validation    

In [18]:
previsoes = []

for ref_bacen in dados['ref_bacen'].unique():
    dados_ref = dados[dados['ref_bacen'] == ref_bacen]['indice']
    print(dados_ref)

    modelo = SARIMAX(dados_ref, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
    resultado = modelo.fit()

    # Obter os próximos 6 passos de previsão a partir do próximo mês após a última data nos dados
    ultima_data = dados_ref.index[-1]
    proximas_datas_previsao = pd.date_range(start=ultima_data, periods=7, freq='M')[1:]

    previsao = resultado.get_forecast(steps=6)
    previsao_mean = previsao.predicted_mean
    
    previsao_df = pd.DataFrame({
        'date': proximas_datas_previsao,
        'ref_bacen': ref_bacen,
        'previsao': previsao_mean,
    })

    previsoes.append(previsao_df)

previsoes_df = pd.concat(previsoes)


date
2017-08-21    0.018645
2017-10-16    0.024464
2017-11-09    0.015563
2017-11-25    0.003720
2017-12-11    0.005570
2017-12-19    0.007996
2018-01-09    0.008844
2018-02-18    0.010653
2018-02-26    0.015367
2018-07-12    0.045398
2018-08-13    0.000964
Name: indice, dtype: float64
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f= -0.00000D+00    |proj g|=  0.00000D+00

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    5      0      1      0     0     0   0.000D+00  -0.000D+00
 

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.



At iterate   15    f= -1.26177D+00    |proj g|=  8.41596D-02

At iterate   20    f= -1.26524D+00    |proj g|=  2.64481D-01

At iterate   25    f= -1.26531D+00    |proj g|=  4.51000D-02

At iterate   30    f= -1.26568D+00    |proj g|=  2.59272D-01

At iterate   35    f= -1.26638D+00    |proj g|=  2.79847D-01

At iterate   40    f= -1.27522D+00    |proj g|=  8.91499D-02

At iterate   45    f= -1.27602D+00    |proj g|=  1.26931D-01

At iterate   50    f= -1.27603D+00    |proj g|=  2.29550D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    5     50     81      1     0     0   2.296D-02  -1.276D+00
  F =  -1.27603290903

  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.



At iterate   15    f= -1.74116D+00    |proj g|=  2.70140D-01

At iterate   20    f= -1.74397D+00    |proj g|=  9.97112D+00

At iterate   25    f= -1.83000D+00    |proj g|=  4.11592D-01

At iterate   30    f= -1.84181D+00    |proj g|=  1.51483D+00

At iterate   35    f= -1.84911D+00    |proj g|=  1.59470D+00

At iterate   40    f= -1.84937D+00    |proj g|=  1.34105D-01



 Bad direction in the line search;
   refresh the lbfgs memory and restart the iteration.

 Bad direction in the line search;
   refresh the lbfgs memory and restart the iteration.


  ys=-3.615E-11  -gs= 3.205E-10 BFGS update SKIPPED

At iterate   45    f= -1.84937D+00    |proj g|=  7.18213D-02
  ys=-2.656E-10  -gs= 1.368E-08 BFGS update SKIPPED



 Line search cannot locate an adequate point after MAXLS
  function and gradient evaluations.
  Previous x, f and g restored.
 Possible causes: 1 error in function or gradient evaluation;
                  2 rounding error dominate computation.
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.



           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    5     46    160      3     2     0   7.182D-02  -1.849D+00
  F =  -1.8493683598272073     

ABNORMAL_TERMINATION_IN_LNSRCH                              
date
2017-07-20    0.022933
2017-08-29    0.018937
2017-09-14   -0.011990
2017-09-30    0.040084
2017-10-24   -0.029222
2017-12-03    0.017883
2018-01-01    0.051949
2018-01-09   -0.055066
2018-01-25    0.114627
2018-03-14    0.653819
2018-05-25    0.022128
2018-07-12    0.005242
2017-07-20    0.022933
2017-08-29    0.018937
2017-09-14   -0.011990
2017-09-30    0.040084
2017-10-24   -0.029222
2017-12-03    0.017883
2018


 Bad direction in the line search;
   refresh the lbfgs memory and restart the iteration.

 Line search cannot locate an adequate point after MAXLS
  function and gradient evaluations.
  Previous x, f and g restored.
 Possible causes: 1 error in function or gradient evaluation;
                  2 rounding error dominate computation.
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
 This problem is unconstrained.


date
2017-07-12    0.062466
2017-09-14    0.007318
2017-09-30    0.005574
2017-11-09    0.021432
2017-11-17    0.013768
2017-11-25   -0.008657
2017-12-11    0.023374
2017-12-27    0.109691
2018-01-01    0.124578
2018-01-25    0.010379
2018-02-02    0.011017
2018-07-12    0.001698
2017-07-12    0.062466
2017-09-14    0.007318
2017-09-30    0.005574
2017-11-09    0.021432
2017-11-17    0.013768
2017-11-25   -0.008657
2017-12-11    0.023374
2017-12-27    0.109691
2018-01-01    0.124578
2018-01-25    0.010379
2018-02-02    0.011017
2018-07-12    0.001698
Name: indice, dtype: float64
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f= -2.61394D+00    |proj g|=  2.10626D+05
  ys=-2.943E+00  -gs= 4.023E-01 BFGS update SKIPPED
  ys=-1.600E+01  -gs= 5.568E-01 BFGS update SKIPPED
  ys=-8.552E-04  -gs= 1.182E-01 BFGS update SKIPPED

           * * *

Tit   = to


 Bad direction in the line search;
   refresh the lbfgs memory and restart the iteration.

 Line search cannot locate an adequate point after MAXLS
  function and gradient evaluations.
  Previous x, f and g restored.
 Possible causes: 1 error in function or gradient evaluation;
                  2 rounding error dominate computation.
  return get_prediction_index(
  return get_prediction_index(


In [5]:
previsoes_df['date'] = pd.to_datetime(previsoes_df['date'], format='%Y-%m-%d')
final_df = pd.merge(dados, previsoes_df, how='outer', on=['date', 'ref_bacen'])
final_df['date'] = final_df['date'].dt.strftime('%Y-%m')
final_df

Unnamed: 0,date,ref_bacen,indice,previsao
0,2017-08,506273616,0.018645,
1,2017-10,506273616,0.024464,
2,2017-11,506273616,0.015563,
3,2017-11,506273616,0.003720,
4,2017-12,506273616,0.005570,
...,...,...,...,...
167,2018-09,505971238,,0.007318
168,2018-10,505971238,,0.005574
169,2018-11,505971238,,0.021432
170,2018-12,505971238,,0.013768


In [6]:
final_df = final_df.drop_duplicates(subset=['date', 'ref_bacen'], keep='first')

In [7]:
final_df = final_df[final_df['ref_bacen']!='st']

In [10]:
final_df[final_df['ref_bacen'] == '506273616']

Unnamed: 0,date,ref_bacen,indice,previsao
0,2017-08,506273616,0.018645,
1,2017-10,506273616,0.024464,
2,2017-11,506273616,0.015563,
4,2017-12,506273616,0.00557,
6,2018-01,506273616,0.008844,
7,2018-02,506273616,0.010653,
9,2018-07,506273616,0.045398,
10,2018-08,506273616,0.000964,
136,2018-09,506273616,,0.000964
137,2018-10,506273616,,0.010286


In [235]:
final_df.to_csv('../data/previsoes.csv')