### Env Config

In [1]:
%%capture
%pip install requests asyncio aiohttp pyarrow

In [2]:
import io, requests, os, asyncio, aiohttp
import pandas as pd
import pyarrow as pa

In [3]:
base_url = 'https://www.bcb.gov.br/api/servico/sitebcb'

### Recuperando parâmetros para extração

In [4]:
def get_parametros():
    response = requests.get(f'{base_url}/HistoricoTaxaJurosDiario/ParametrosConsulta')
    response.raise_for_status()
    return response.json()

In [5]:
parametros = get_parametros()
df_parametros = pd.DataFrame(parametros['conteudo'])

In [6]:
def get_datas():
    response = requests.get(f'{base_url}/HistoricoTaxaJurosDiario/ConsultaDatas')
    response.raise_for_status()
    return response.json()

In [7]:
datas = get_datas()
df_datas = pd.DataFrame(datas['conteudo'])

### Exportando para zona Raw

In [8]:
async def get_hist_taxas(session, classificacao, modalidade, data):
	url = f"{base_url}/historicotaxajurosdiario/TodosCampos?filtro=(codigoSegmento eq '{classificacao}') and (codigoModalidade eq '{modalidade}') and (InicioPeriodo eq '{data}')"

	async with session.get(url) as response:
		return await response.json()

In [None]:
async def process_parametros(session, data, df_parametros):
    tasks = []
    for parametro in df_parametros.iterrows():
        parametro = parametro[1]
        codigoSegmento = parametro['codigoSegmento']
        codigoModalidade = parametro['codigoModalidade']

        if not os.path.exists(f'taxas_juros/raw/{data}/{codigoSegmento}-{codigoModalidade}'):
            os.makedirs(f'taxas_juros/raw/{data}/{codigoSegmento}-{codigoModalidade}', exist_ok=True)
        else:
            continue

        task = asyncio.create_task(fetch_and_save_hist_taxas(session, codigoSegmento, codigoModalidade, data))
        tasks.append(task)

    await asyncio.gather(*tasks)

async def fetch_and_save_hist_taxas(session, codigoSegmento, codigoModalidade, data):
    hist_taxas = await get_hist_taxas(session, codigoSegmento, codigoModalidade, data)

    if len(hist_taxas['conteudo']) == 0:
        return

    with io.open(f'taxas_juros/raw/{data}/{codigoSegmento}-{codigoModalidade}/txjuros.json', 'w') as f:
        f.write(str(hist_taxas))

async def main(df_datas, df_parametros):
    async with aiohttp.ClientSession() as session:
        for data in df_datas['InicioPeriodo'][::-1]:
            print(f'Recuperando arquivos de {data}...')
            if not os.path.exists(f'taxas_juros/raw/{data}'):
                os.makedirs(f'taxas_juros/raw/{data}', exist_ok=True)

            await process_parametros(session, data, df_parametros)

await main(df_datas, df_parametros)

### Criando dataframe

In [None]:
dirs = os.listdir('taxas_juros/raw')
dirs.sort()

all_files = []

for dir in dirs:
	for root, _, files in os.walk(f'taxas_juros/raw/{dir}'):
		for file in files:
			all_files.append(os.path.join(root, file))

arrow_tables = []
for file in all_files:
	with open(file, 'r') as f:
		print(f'Lendo arquivo {file}')
		data = eval(f.read())
		df = pd.DataFrame(data['conteudo'])
		table = pa.Table.from_pandas(df)
		arrow_tables.append(table)

combined_table = pa.concat_tables(arrow_tables)

df = combined_table.to_pandas()

In [None]:
df['InstituicaoFinanceira'].value_counts()