In [86]:
# Importar bibliotecas
import pandas as pd
import ipeadatapy as ipea
from bcb import sgs
# Dados do saldo do CAGED (Ipeadata)
dados_ipeadata = ipea.timeseries("CAGED12_SALDON12")
# Dados do IDP/BP - acum. 12m - US$ (milhões) (SGS/BCB)
dados_sgs = sgs.get(codes = {"idp": 24422}, start = "2020-06-01", end = "2020-12-01")
dados_ipeadata

Unnamed: 0_level_0,CODE,RAW DATE,DAY,MONTH,YEAR,VALUE (Pessoa)
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-01,CAGED12_SALDON12,2020-01-01T00:00:00-03:00,1,1,2020,92678.0
2020-02-01,CAGED12_SALDON12,2020-02-01T00:00:00-03:00,1,2,2020,198368.0
2020-03-01,CAGED12_SALDON12,2020-03-01T00:00:00-03:00,1,3,2020,-232316.0
2020-04-01,CAGED12_SALDON12,2020-04-01T00:00:00-03:00,1,4,2020,-902317.0
2020-05-01,CAGED12_SALDON12,2020-05-01T00:00:00-03:00,1,5,2020,-352790.0
2020-06-01,CAGED12_SALDON12,2020-06-01T00:00:00-03:00,1,6,2020,-23111.0
2020-07-01,CAGED12_SALDON12,2020-07-01T00:00:00-03:00,1,7,2020,123297.0
2020-08-01,CAGED12_SALDON12,2020-08-01T00:00:00-03:00,1,8,2020,238125.0
2020-09-01,CAGED12_SALDON12,2020-09-01T00:00:00-03:00,1,9,2020,313564.0
2020-10-01,CAGED12_SALDON12,2020-10-01T00:00:00-03:00,1,10,2020,388938.0


In [87]:
# Dados de exemplo:
tabela_caged = (
  dados_ipeadata
  .reset_index()
  .rename(columns = {"DATE": "data", "VALUE (Pessoa)": "caged"})
  .filter(items = ["data", "caged"], axis = "columns").query("data >='2020-01-01' and data <= '2020-08-01'")
  .set_index("data")
)
tabela_caged

Unnamed: 0_level_0,caged
data,Unnamed: 1_level_1
2020-01-01,92678.0
2020-02-01,198368.0
2020-03-01,-232316.0
2020-04-01,-902317.0
2020-05-01,-352790.0
2020-06-01,-23111.0
2020-07-01,123297.0
2020-08-01,238125.0


In [88]:
tabela_idp = (
  dados_sgs
  .reset_index()
  .rename(columns = {"Date": "data"})
  .set_index("data")
)
tabela_idp

Unnamed: 0_level_0,idp
data,Unnamed: 1_level_1
2020-06-01,63600.5
2020-07-01,62835.5
2020-08-01,53665.2
2020-09-01,50999.2
2020-10-01,46394.8
2020-11-01,39992.6
2020-12-01,38270.1


In [89]:
#Para fazer o cruzamento usamos a função join() do pandas, 
# usando a sintaxe tabela_x.join(other = tabela_y, how = "inner"). 
# ### atenção Por padrão o index das tabelas será usado como chave ###.
# Inner Join: cruzar tabelas mantendo todas as linhas "em comum" de x e y
table_join=tabela_caged.join(other = tabela_idp, how = "inner")
table_join



Unnamed: 0_level_0,caged,idp
data,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-06-01,-23111.0,63600.5
2020-07-01,123297.0,62835.5
2020-08-01,238125.0,53665.2


In [90]:
#join with left
tabela_caged.join(tabela_idp,how='left')

Unnamed: 0_level_0,caged,idp
data,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-01,92678.0,
2020-02-01,198368.0,
2020-03-01,-232316.0,
2020-04-01,-902317.0,
2020-05-01,-352790.0,
2020-06-01,-23111.0,63600.5
2020-07-01,123297.0,62835.5
2020-08-01,238125.0,53665.2


In [91]:
tabela_caged.join(tabela_idp,how='right')


Unnamed: 0_level_0,caged,idp
data,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-06-01,-23111.0,63600.5
2020-07-01,123297.0,62835.5
2020-08-01,238125.0,53665.2
2020-09-01,,50999.2
2020-10-01,,46394.8
2020-11-01,,39992.6
2020-12-01,,38270.1


In [92]:
tabela_caged.join(tabela_idp,how='outer')

Unnamed: 0_level_0,caged,idp
data,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-01,92678.0,
2020-02-01,198368.0,
2020-03-01,-232316.0,
2020-04-01,-902317.0,
2020-05-01,-352790.0,
2020-06-01,-23111.0,63600.5
2020-07-01,123297.0,62835.5
2020-08-01,238125.0,53665.2
2020-09-01,,50999.2
2020-10-01,,46394.8


In [96]:
df_idp=tabela_idp.reset_index().rename(columns={'data':'Date'})
df_caged=tabela_caged.reset_index().rename(columns={'data':'Date'})
df_merge=df_idp.merge(df_caged,on='Date',how='inner')
df_merge

Unnamed: 0,Date,idp,caged
0,2020-06-01,63600.5,-23111.0
1,2020-07-01,62835.5,123297.0
2,2020-08-01,53665.2,238125.0
