In [None]:
# Importar as bibliotecas necessárias

from datetime import date
import json
import os
from shutil import unpack_archive
import sys
from urllib.request import urlretrieve, HTTPError

In [None]:
# Trazer as opções de configuração

json_file = os.path.abspath('../options.json')

with open(json_file, 'r') as j_file:
    json_data = json.load(j_file)

In [None]:
# Manejo da data dos investimentos

def transform_date_string_on_number_list(date_str):
    date_list = date_str.split('-')

    for index, element in enumerate(date_list):
        date_list[index] = int(date_list[index])
    
    return date_list

START_DATE = json_data["CONFIG"]["START_DATE"]
END_DATE = json_data.get("CONFIG", False).get("END_DATE", False) or str(date.today())

start_year, start_month, start_day = transform_date_string_on_number_list(START_DATE)
end_year, end_month, end_day       = transform_date_string_on_number_list(END_DATE)


In [None]:
# Adicionar um 'END_DATE' no JSON que em caso de ausência
## A regra settada é: se a data é vazia ou inexistente, será settado como hoje.

def create_json_date_end(file, new_date):
    file_content = ''
    with open(file, 'r', encoding='utf8') as j_file:
        
        file_content = json.load(j_file)
        file_content['CONFIG']['END_DATE'] = new_date

    with open(file, 'w', encoding='utf8') as j_file:
        json.dump(file_content, j_file, indent=4, ensure_ascii=False)
    
    return file

if json_data.get("CONFIG", False).get("END_DATE", False) == False or json_data['CONFIG']['END_DATE'] == '':
    create_json_date_end(json_file, END_DATE)

In [None]:
# Criar pastas para os arquivos

dir_archives = os.path.join(os.path.abspath('../'), 'archives')
dir_csv = os.path.join(os.path.abspath(dir_archives), json_data['CONFIG']['DIR_CVM_CSV_NAME'])
dir_zip = os.path.join(os.path.abspath(dir_archives), json_data['CONFIG']['DIR_CVM_ZIP_NAME'])

if not os.path.exists(dir_archives):
    os.mkdir(dir_archives)

if not os.path.exists(dir_csv):
    os.mkdir(dir_csv)

if not os.path.exists(dir_zip):
    os.mkdir(dir_zip)

In [None]:
# Realizar download dos arquivos da CVM

def adjust_month (num_month):
    if num_month < 10:
        num_month = '0' + str(num_month)
    
    return str(num_month)

def download_inf_diario_archives (year, num_month):
    year = str(year)
    num_month = str(num_month)

    dir_archive_zip = os.path.join(os.path.abspath('../archives'), json_data['CONFIG']['DIR_CVM_ZIP_NAME'])
    zip_file_name = year + num_month + '.zip'
    zip_file_path = os.path.join(dir_archive_zip, zip_file_name)

    url_cvm = 'https://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/inf_diario_fi_' + year + num_month + '.zip'
    urlretrieve(url_cvm, zip_file_path)

for year in range (start_year, end_year + 1):
    FIRST_MONTH_YEAR, LAST_MONTH_YEAR = 1, 12
    start_month_loop, end_month_loop = '', ''

    if start_year == year:
        start_month_loop = start_month
    else:
        start_month_loop = FIRST_MONTH_YEAR

    if end_year == year:
        end_month_loop = end_month
    else:
        end_month_loop = LAST_MONTH_YEAR

    for month in range (start_month_loop, end_month_loop + 1):
        month = adjust_month(month)

        try:
            download_inf_diario_archives(year, month)
        except HTTPError as err: 
            if err.code == 404:
                print('Link não encontrado. Ainda não foi disponibilizado o arquivo de ' + str(month) + '/' + str(year))
            else:
                print(err.msg)

In [None]:
# Unzip arquivos

archive_list = os.listdir(dir_zip)

if len(archive_list) == 0:
    print('Não há arquivos na pasta "./app/archives/archives-zip"')
    print('Verifique as datas inseridas no "../options.json"')
    sys.exit()

ARCHIVE_TEMPLATE = 'inf_diario_fi_'
ARCHIVE_EXTENSION = '.csv'

for archive_zip_full_name in archive_list:
    archive_zip_name = archive_zip_full_name[0:6]
    archive_csv_full_name = ARCHIVE_TEMPLATE + archive_zip_name + ARCHIVE_EXTENSION

    archive_csv_path = os.path.join(dir_csv, archive_csv_full_name)
    archive_zip_path = os.path.join(dir_zip, archive_zip_full_name)

    if not os.path.exists(archive_csv_path):
        unpack_archive(archive_zip_path, dir_csv)