In [3]:
import os
import pandas as pd

from datetime import datetime

PATH = "../dfp_cia_aberta_2024"

forms = [
    "BPA_con",
    "BPP_con",
    "DFC_MD_con",
    "DFC_MI_con",
    "DMPL_con",
    "DRA_con",
    "DRE_con",
    "composicao",
    "DVA_con",
    "parecer",
]

ORDEM_EXERC = "ÚLTIMO"
mapping = {
    "CNPJ": "CNPJ_CIA",
    "REPORT_DATE": "DT_REFER",
    "COMPANY_NAME": "DENOM_CIA",
    "CVM_CODE": "CD_CVM",
    "DFP_GROUP": "GRUPO_DFP",
    "VERSION": "VERSAO",
    "CURRENCY": "MOEDA",
    "ANALYSIS_START_PERIOD_DATE": "DT_INI_EXERC",
    "ANALYSIS_END_PERIOD_DATE": "DT_FIM_EXERC",
    "ACCOUNT_NUMBER": "CD_CONTA",
    "ACCOUNT_NAME": "DS_CONTA",
    "ACCOUNT_VALUE": "VL_CONTA",
    "IS_FIXED_ACCOUNT": "ST_CONTA_FIXA",
}

# Processamento

- Balanço Patrimonial Ativo (BPA)
- Balanço Patrimonial Passivo (BPP)
- Demonstração de Fluxo de Caixa - Método Direto (DFC-MD)
- Demonstração de Fluxo de Caixa - Método Indireto (DFC-MI)
- Demonstração das Mutações do Patrimônio Líquido (DMPL)
- Demonstração de Resultado Abrangente (DRA)
- Demonstração de Resultado (DRE)
- Demonstração de Valor Adicionado (DVA)


In [37]:
result_forms = []
for form in [
    "BPA_con",
    "BPP_con",
    "DFC_MD_con",
    "DFC_MI_con",
    "DMPL_con",
    "DRA_con",
    "DRE_con",
    "DVA_con",
]:
    path = [path for path in os.listdir(PATH) if form in path][0]
    path = os.path.join(PATH, path)

    with open(path, "r", encoding="latin-1") as f:
        doc = f.readlines()

    header, rows = doc[0].strip().split(";"), doc[1:]

    results = []
    for row in rows:
        row = row.strip().split(";")
        key_value = {key: value for key, value in zip(header, row)}
        if key_value["ORDEM_EXERC"] != "ÚLTIMO":
            continue

        unit = 1
        if key_value["ESCALA_MOEDA"] == "MIL":
            unit = 1000

        if form in ["BPA_con", "BPP_con"]:
            dict_row = {
                key: key_value[mapping[key]]
                for key in mapping
                if key != "ANALYSIS_START_PERIOD_DATE"
            }
        else:
            dict_row = {key: key_value[mapping[key]] for key in mapping}
            dict_row["ANALYSIS_START_PERIOD_DATE"] = datetime.strptime(
                dict_row["ANALYSIS_START_PERIOD_DATE"], "%Y-%m-%d"
            )

        dict_row["IS_FIXED_ACCOUNT"] = (
            True if dict_row["IS_FIXED_ACCOUNT"] == "S" else False
        )
        dict_row["ACCOUNT_VALUE"] = float(dict_row["ACCOUNT_VALUE"]) * unit
        dict_row["ANALYSIS_END_PERIOD_DATE"] = datetime.strptime(
            dict_row["ANALYSIS_END_PERIOD_DATE"], "%Y-%m-%d"
        )
        dict_row["REPORT_DATE"] = datetime.strptime(dict_row["REPORT_DATE"], "%Y-%m-%d")

        results.append(dict_row)

    df = pd.DataFrame(results)
    result_forms.append(df)

df = pd.concat(result_forms)

Criando base de dados

In [39]:
import sqlite3

database_path = "dfp_cvm.db"

with open(database_path, "w") as f:
    f.write("")

command = """CREATE TABLE IF NOT EXISTS DFP_CVM (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
	CNPJ TEXT NOT NULL,
	REPORT_DATE DATETIME NOT NULL,
	COMPANY_NAME TEXT NOT NULL,
	CVM_CODE TEXT NOT NULL,
	DFP_GROUP TEXT NOT NULL,
	VERSION TEXT NOT NULL,
	CURRENCY TEXT NOT NULL,
	ANALYSIS_START_PERIOD_DATE DATETIME,
	ANALYSIS_END_PERIOD_DATE DATETIME NOT NULL,
	ACCOUNT_NUMBER TEXT NOT NULL,
	ACCOUNT_NAME TEXT NOT NULL,
	ACCOUNT_VALUE FLOAT NOT NULL,
	IS_FIXED_ACCOUNT TEXT
)"""

# Connect to a SQLite database (or create it if it doesn't exist)
connection = sqlite3.connect(database_path)

# Create a cursor object using the connection
cursor = connection.cursor()

# Execute the command
cursor.executescript(command)

# Commit the changes to the database
connection.commit()

# Close the cursor and connection
cursor.close()
connection.close()

Populando base de dados

In [40]:
queries = []

fields = list(mapping.keys())
fields_str = ", ".join(fields)
placeholders = ", ".join(["?"] * len(fields))

query = f"INSERT INTO DFP_CVM ({fields_str}) VALUES ({placeholders})"

data = []  # list of tuples to insert

for idx, row in df.iterrows():
    if len(row["ACCOUNT_NUMBER"].split(".")) > 3:
        continue

    row_values = []
    for field in fields:
        value = row[field]
        if isinstance(value, datetime):
            try:
                value = value.strftime("%Y-%m-%d")
            except Exception:
                value = "NULL"
        elif isinstance(value, float) and (value != value):  # NaN check
            value = None
        elif value == "NULL" or value == "":
            value = None
        row_values.append(value)

    data.append(tuple(row_values))

# Connect to a SQLite database (or create it if it doesn't exist)
connection = sqlite3.connect(database_path)

# Create a cursor object using the connection
cursor = connection.cursor()

# Execute the command
# Bulk insert safely
cursor.executemany(query, data)
# Commit the changes to the database
connection.commit()

# Close the cursor and connection
cursor.close()
connection.close()

# Processamento Composição

Carregando e limpando os dados

In [8]:
mapping = {
    "CNPJ": "CNPJ_CIA",
    "REPORT_DATE": "DT_REFER",
    "COMPANY_NAME": "DENOM_CIA",
    "VERSION": "VERSAO",
    "ORDINARY_SHARES_ISSUED": "QT_ACAO_ORDIN_CAP_INTEGR",
    "ORDINARY_SHARES_TREASURY": "QT_ACAO_ORDIN_TESOURO",
    "PREFERRED_SHARES_ISSUED": "QT_ACAO_PREF_CAP_INTEGR",
    "PREFERRED_SHARES_TREASURY": "QT_ACAO_PREF_TESOURO",
    "TOTAL_SHARES_ISSUED": "QT_ACAO_TOTAL_CAP_INTEGR",
    "TOTAL_SHARES_TREASURY": "QT_ACAO_TOTAL_TESOURO",
}

form = "composicao"
path = [path for path in os.listdir(PATH) if form in path][0]
path = os.path.join(PATH, path)

with open(path, "r", encoding="latin-1") as f:
    doc = f.readlines()

header, rows = doc[0].strip().split(";"), doc[1:]

results = []
for row in rows:
    row = row.strip().split(";")
    key_value = {key: value for key, value in zip(header, row)}

    dict_row = {key: key_value[mapping[key]] for key in mapping}
    dict_row["REPORT_DATE"] = datetime.strptime(dict_row["REPORT_DATE"], "%Y-%m-%d")
    dict_row["ORDINARY_SHARES_ISSUED"] = int(dict_row["ORDINARY_SHARES_ISSUED"])
    dict_row["ORDINARY_SHARES_TREASURY"] = int(dict_row["ORDINARY_SHARES_TREASURY"])
    dict_row["PREFERRED_SHARES_ISSUED"] = int(dict_row["PREFERRED_SHARES_ISSUED"])
    dict_row["PREFERRED_SHARES_TREASURY"] = int(dict_row["PREFERRED_SHARES_TREASURY"])
    dict_row["TOTAL_SHARES_ISSUED"] = int(dict_row["TOTAL_SHARES_ISSUED"])
    dict_row["TOTAL_SHARES_TREASURY"] = int(dict_row["TOTAL_SHARES_TREASURY"])

    results.append(dict_row)

df = pd.DataFrame(results)

Criando base de dados

In [9]:
command = """CREATE TABLE IF NOT EXISTS CVM_SHARE_COMPOSITION (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
	CNPJ TEXT NOT NULL,
    REPORT_DATE DATETIME NOT NULL,
    COMPANY_NAME TEXT NOT NULL,
    VERSION TEXT NOT NULL,
    ORDINARY_SHARES_ISSUED INTEGER NOT NULL,
    ORDINARY_SHARES_TREASURY INTEGER NOT NULL,
    PREFERRED_SHARES_ISSUED INTEGER NOT NULL,
    PREFERRED_SHARES_TREASURY INTEGER NOT NULL,
    TOTAL_SHARES_ISSUED INTEGER NOT NULL,
    TOTAL_SHARES_TREASURY INTEGER NOT NULL
)"""

# Connect to a SQLite database (or create it if it doesn't exist)
connection = sqlite3.connect(database_path)

# Create a cursor object using the connection
cursor = connection.cursor()

# Execute the command
cursor.executescript(command)

# Commit the changes to the database
connection.commit()

# Close the cursor and connection
cursor.close()
connection.close()

Inserindo os dados

In [10]:
queries = []

fields = list(mapping.keys())
fields_str = ", ".join(fields)
placeholders = ", ".join(["?"] * len(fields))

query = f"INSERT INTO CVM_SHARE_COMPOSITION ({fields_str}) VALUES ({placeholders})"

data = []  # list of tuples to insert

for idx, row in df.iterrows():
    row_values = []
    for field in fields:
        value = row[field]
        if isinstance(value, datetime):
            value = value.strftime("%Y-%m-%d")
        row_values.append(value)

    data.append(tuple(row_values))

# Connect to a SQLite database (or create it if it doesn't exist)
connection = sqlite3.connect(database_path)

# Create a cursor object using the connection
cursor = connection.cursor()

# Execute the command
# Bulk insert safely
cursor.executemany(query, data)
# Commit the changes to the database
connection.commit()

# Close the cursor and connection
cursor.close()
connection.close()

In [58]:
command = """SELECT DISTINCT ACCOUNT_NUMBER, ACCOUNT_NAME FROM DFP_CVM ORDER BY ACCOUNT_NUMBER"""

# Connect to a SQLite database (or create it if it doesn't exist)
connection = sqlite3.connect(database_path)

# Create a cursor object using the connection
cursor = connection.cursor()

# Query the database
cursor.execute(command)

# Fetch column names
columns = (
    [description[0] for description in cursor.description] if cursor.description else []
)

# Fetch results
rows = cursor.fetchall()

# Close the cursor and connection
cursor.close()
connection.close()

# Format results as markdown table
if not rows:
    output = "No data found with given query"
else:
    # Create markdown table
    output = "| " + " | ".join(columns) + " |\n"
    output += "| " + " | ".join(["---" for _ in columns]) + " |\n"
    for row in rows:
        output += (
            "| "
            + " | ".join([str(cell) if cell is not None else "" for cell in row])
            + " |\n"
        )

print(output)

| ACCOUNT_NUMBER | ACCOUNT_NAME |
| --- | --- |
| 1 | Ativo Total |
| 1.01 | Caixa e Equivalentes de Caixa |
| 1.01 | Ativo Circulante |
| 1.01.01 | Caixa |
| 1.01.01 | Caixa e Equivalentes de Caixa |
| 1.01.01 | Disponibilidades |
| 1.01.02 | Aplicações de Liquidez |
| 1.01.02 | Aplicações Financeiras |
| 1.01.02 | Aplicações no mercado aberto |
| 1.01.03 | Contas a Receber |
| 1.01.03 | Ativos da Atividade Seguradora/Resseguradora |
| 1.01.04 | Estoques |
| 1.01.04 | Títulos e Créditos a Receber |
| 1.01.05 | Ativos Biológicos |
| 1.01.05 | Outros Valores e Bens |
| 1.01.06 | Tributos a Recuperar |
| 1.01.06 | Empréstimos e Depósitos Compulsórios |
| 1.01.07 | Despesas Antecipadas |
| 1.01.08 | Outros Ativos Circulantes |
| 1.01.08 | Despesas de Comercialização Diferidas |
| 1.01.09 | Outros Ativos Circulantes |
| 1.02 | Ativos Financeiros |
| 1.02 | Ativo Não Circulante |
| 1.02.01 | Depósito Compulsório Banco Central |
| 1.02.01 | Ativo Realizável a Longo Prazo |
| 1.02.01 | Ativos