In [18]:
# Importar as bibliotecas necessárias

from datetime import date
import json
import os
from shutil import unpack_archive
import sys
from urllib.request import urlretrieve, HTTPError

In [19]:
# Trazer as opções de configuração

base_dir = os.path.abspath('../options.json')

with open(base_dir, 'r') as file:
    data = json.load(file)

In [20]:
# Manejo da data dos investimentos

def transform_date_string_on_number_list(date_str):
    date_list = date_str.split('-')

    for index, element in enumerate(date_list):
        date_list[index] = int(date_list[index])
    
    return date_list

START_DATE = data["CONFIG"]["START_DATE"]
END_DATE = data.get("CONFIG", False).get("END_DATE", False) or str(date.today())

start_year, start_month, start_day = transform_date_string_on_number_list(START_DATE)
end_year, end_month, end_day = transform_date_string_on_number_list(END_DATE)


In [21]:
# Criar pastas para os arquivos

base_dir = os.path.join(os.path.abspath('../'), 'archives')
dir_zip = os.path.join(os.path.abspath(base_dir), data['CONFIG']['DIR_ZIP_NAME'])
dir_csv = os.path.join(os.path.abspath(base_dir), data['CONFIG']['DIR_CSV_NAME'])

if not os.path.exists(base_dir):
    os.mkdir(base_dir)

if not os.path.exists(dir_zip):
    os.mkdir(dir_zip)

if not os.path.exists(dir_csv):
    os.mkdir(dir_csv)

In [22]:
# Realizar download dos arquivos da CVM

def adjust_month (month):
    if month < 10:
        month = '0' + str(month)
    
    return str(month)

def download_inf_diario_archives (year, month):
    year = str(year)
    month = str(month)

    link = 'https://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/inf_diario_fi_' + year + month + '.zip'
    urlretrieve(link, '../archives/archives-zip/' + year + month + '.zip')

for year in range (start_year, end_year + 1):
    FIRST_MONTH_YEAR, LAST_MONTH_YEAR = 1, 12
    start_month_loop, end_month_loop = '', ''
    exception_case = False

    if start_year == year:
        start_month_loop = start_month
    else:
        start_month_loop = FIRST_MONTH_YEAR

    if end_year == year:
        end_month_loop = end_month
    else:
        end_month_loop = LAST_MONTH_YEAR

    for month in range (start_month_loop, end_month_loop + 1):
        month = adjust_month(month)

        try:
            download_inf_diario_archives(year, month)
        except HTTPError as err: 
            if err.code == 404:
                print('Link não encontrado. Ainda não foi disponibilizado o arquivo de ' + str(month) + '/' + str(year))
            else:
                print(err.msg)

In [23]:
# Unzip arquivos

archives_list = os.listdir(dir_zip)

if len(archives_list) == 0:
    print('Não há arquivos na pasta "./app/archives/archives-zip"')
    print('Verifique as datas inseridas no "../options.json"')
    sys.exit()

ARCHIVE_TEMPLATE = 'inf_diario_fi_'
ARCHIVE_EXTENSION = '.csv'

for archive_zip_name in archives_list:
    archive_date = archive_zip_name[0:6]
    archive_csv_name = ARCHIVE_TEMPLATE + archive_date + ARCHIVE_EXTENSION

    archive_csv_search = os.path.join(dir_csv, archive_csv_name)
    archive_zip_search = os.path.join(dir_zip, archive_zip_name)

    if not os.path.exists(archive_csv_search):
        unpack_archive(archive_zip_search, dir_csv)