In [None]:
# Importar as bibliotecas necessárias

from datetime import date
import json
import os
from pathlib import Path
from shutil import unpack_archive
import sys
from urllib.request import urlretrieve, HTTPError

In [None]:
# Resolver os 'imports' do projeto 

PROJECT_ROOT = Path().resolve().parent
sys.path.append(str(PROJECT_ROOT))

from   utils.classes.cvm_link import CvmLink
import utils.functions.date_transform as date_transform

In [None]:
# Traz as opções de configuração

json_file = os.path.abspath('../../options.json')

with open(json_file, 'r') as j_file:
    json_data = json.load(j_file)

In [None]:
# Declaração dos caminhos

dir_data = os.path.join(os.path.abspath('../../'), 'data')
dir_cvm  = os.path.join(dir_data, 'cvm')
dir_csv  = os.path.join(dir_cvm, json_data['DIR']['CVM']['CSV_NAME'])
dir_zip  = os.path.join(dir_cvm, json_data['DIR']['CVM']['ZIP_NAME'])

In [None]:
# Criar pastas para os arquivos

if not os.path.exists(dir_data):
    os.mkdir(dir_data)

if not os.path.exists(dir_cvm):
    os.mkdir(dir_cvm)

if not os.path.exists(dir_csv):
    os.mkdir(dir_csv)

if not os.path.exists(dir_zip):
    os.mkdir(dir_zip)

In [None]:
# Manejo da data dos investimentos

START_DATE = json_data["CONFIG"]["START_DATE"]
END_DATE   = json_data.get("CONFIG", False).get("END_DATE", False) or str(date.today())

start_date_object = date_transform.transform_date_string_on_number_list(START_DATE)
end_date_object   = date_transform.transform_date_string_on_number_list(END_DATE)


In [None]:
# Adicionar um 'END_DATE' no JSON que em caso de ausência
## A regra settada é: se a data é vazia ou inexistente, será settado como hoje.

if json_data.get("CONFIG", False).get("END_DATE", False) == False or json_data['CONFIG']['END_DATE'] == '':
    file_content = ''
    with open(json_file, 'r', encoding='utf8') as j_file:
        
        file_content = json.load(j_file)
        file_content['CONFIG']['END_DATE'] = END_DATE

    with open(json_file, 'w', encoding='utf8') as j_file:
        json.dump(file_content, j_file, indent=4, ensure_ascii=False)

In [None]:
# Realizar download dos arquivos da CVM

uvm_class = CvmLink()

for year in range (start_date_object['year'], end_date_object['year'] + 1):
    start_month_loop, end_month_loop = date_transform.get_date_loop(start_date_object['year'], end_date_object['year'] + 1, start_date_object['month'], end_date_object['month'], year)

    for month in range (start_month_loop, end_month_loop + 1):
        try:
            month, year = uvm_class.adjust_data(month, year)
            uvm_link    = uvm_class.create_link(month, year)
            
            zip_file_name = year + month + uvm_class.zip
            zip_file_path = os.path.join(dir_zip, zip_file_name)
            urlretrieve(uvm_link, zip_file_path)
            
        except HTTPError as err: 
            if err.code == 404:
                print('Link não encontrado. Ainda não foi disponibilizado o arquivo de ' + str(month) + '/' + str(year) + '.')
            else:
                print(err.msg)

In [None]:
# Unzip arquivos

archive_list = os.listdir(dir_zip)

if len(archive_list) == 0:
    print('Não há arquivos na pasta "app/data/cvm/' + json_data['DIR']['CVM']['ZIP_NAME'])
    print('Verifique as datas inseridas no "options.json"')
    sys.exit()

ARCHIVE_TEMPLATE = 'inf_diario_fi_'
ARCHIVE_EXTENSION = '.csv'

for archive_zip_full_name in archive_list:
    archive_zip_name = archive_zip_full_name[0:6]
    archive_csv_full_name = ARCHIVE_TEMPLATE + archive_zip_name + ARCHIVE_EXTENSION

    archive_csv_path = os.path.join(dir_csv, archive_csv_full_name)
    archive_zip_path = os.path.join(dir_zip, archive_zip_full_name)

    if not os.path.exists(archive_csv_path):
        unpack_archive(archive_zip_path, dir_csv)