Notebook for filtering the accounts and indicators that will be used for selecting stocks for the Magic Formula

In [1]:
import pandas as pd
pd.options.display.max_colwidth = 30
pd.options.display.max_rows = 10
TAX_RATE = 0.34

In [2]:
# Lendo a base diretamente do S3 -> são mais de 10 milhões de linhas contábeis!
# df = pd.read_feather("s3://aq-dl/FinancialStatements/dataset.feather")
df = pd.read_feather("/mnt/aq_disk/data/FinancialStatements/dataset.feather")
print('Number of companies available for backtesting:', df.cia_id.nunique())
df

Number of companies available for backtesting: 1110


Unnamed: 0,cia_id,cia_nome,doc_id,doc_arq,doc_tp,doc_ver,doc_env,doc_ref,per_ini,per_fim,per_ref,dem_tp,conta_id,conta_fixa,conta_desc,conta_valor
0,3,CIA MODELO,54947,00000320160331301.zip,ITR,1,2016-04-02 12:04:12,2016-03-31,2016-01-01,2016-03-31,0,IND,3.01,1,Receita de Venda de Bens e...,100000000.0
1,3,CIA MODELO,54947,00000320160331301.zip,ITR,1,2016-04-02 12:04:12,2016-03-31,2016-01-01,2016-03-31,0,IND,3.02,1,Custo dos Bens e/ou Serviç...,-10000000.0
2,3,CIA MODELO,54947,00000320160331301.zip,ITR,1,2016-04-02 12:04:12,2016-03-31,2016-01-01,2016-03-31,0,IND,3.03,1,Resultado Bruto,90000000.0
3,3,CIA MODELO,54947,00000320160331301.zip,ITR,1,2016-04-02 12:04:12,2016-03-31,2016-01-01,2016-03-31,0,IND,3.04,1,Despesas/Receitas Operacio...,-15000000.0
4,3,CIA MODELO,54947,00000320160331301.zip,ITR,1,2016-04-02 12:04:12,2016-03-31,2016-01-01,2016-03-31,0,IND,3.05,1,Resultado Antes do Resulta...,75000000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10549207,26875,Eixo SP Concessionária de ...,112698,02687520211231401.zip,DFP,1,2022-03-18 14:02:06,2021-12-31,2021-01-01,2021-12-31,0,IND,7.08.03.03.01,0,Despesas financeiras,27651000.0
10549208,26875,Eixo SP Concessionária de ...,112698,02687520211231401.zip,DFP,1,2022-03-18 14:02:06,2021-12-31,2021-01-01,2021-12-31,0,IND,7.08.04,1,Remuneração de Capitais Pr...,22119000.0
10549209,26875,Eixo SP Concessionária de ...,112698,02687520211231401.zip,DFP,1,2022-03-18 14:02:06,2021-12-31,2021-01-01,2021-12-31,0,IND,7.08.04.03,1,Lucros Retidos / Prejuízo ...,22119000.0
10549210,26875,Eixo SP Concessionária de ...,112698,02687520211231401.zip,DFP,1,2022-03-18 14:02:06,2021-12-31,2021-01-01,2021-12-31,0,IND,9.01.01,1,Ações Ordinárias (Capital ...,969857000.0


In [3]:
# Ler arquivo com as empresas que farão parte do backtesting
df_included = pd.read_csv("../data/included_companies.psv", sep="|")
df_included

Unnamed: 0,DENOM_SOCIAL,DT_REG,DT_CANCEL,SIT,DT_INI_SIT,CD_CVM,SETOR_ATIV,CONTROLE_ACIONARIO
0,ACOPALMA CIA INDL ACOS V P...,1978-05-09,2007-08-07,CANCELADA,2007-08-07,60,Metalurgia e Siderurgia,PRIVADO
1,ACOS ANHANGUERA SA,1971-10-20,1994-02-09,CANCELADA,1994-02-09,78,Metalurgia e Siderurgia,PRIVADO
2,ACOS DO BRASIL SA IND E COM,1972-08-16,1989-12-14,CANCELADA,1989-12-14,86,Metalurgia e Siderurgia,PRIVADO
3,PANATLANTICA SA,1971-12-29,,ATIVO,1971-12-29,94,Metalurgia e Siderurgia,PRIVADO
4,ACRINOR ACRILONITRILA DO N...,1981-12-23,2000-02-21,CANCELADA,2000-02-21,116,Petroquímicos e Borracha,PRIVADO
...,...,...,...,...,...,...,...,...
1438,TECIDOS E ARMARINHOS MIGUE...,2022-04-27,,ATIVO,2022-04-27,26760,Comércio (Atacado e Varejo),PRIVADO
1439,SENIOR SISTEMAS S.A.,2022-05-12,,ATIVO,2022-05-12,26786,Comunicação e Informática,PRIVADO
1440,SELF IT ACADEMIAS HOLDING ...,2022-05-13,,ATIVO,2022-05-13,26794,Emp. Adm. Part. - Brinqued...,PRIVADO
1441,QESTRA TECNOLOGIA ADMINIST...,2022-05-30,,ATIVO,2022-05-30,26816,Comunicação e Informática,PRIVADO


In [4]:
# Create list with companies that must be excluded from backtest
included_codes = df_included.CD_CVM.to_list()
print(included_codes[:10])

[60, 78, 86, 94, 116, 132, 159, 167, 175, 183]


In [5]:
# Remove those companies from financials dataframe
df.query('cia_id == @included_codes', inplace=True)
print('Number of companies available for backtesting:', df.cia_id.nunique())
df

Number of companies available for backtesting: 585


Unnamed: 0,cia_id,cia_nome,doc_id,doc_arq,doc_tp,doc_ver,doc_env,doc_ref,per_ini,per_fim,per_ref,dem_tp,conta_id,conta_fixa,conta_desc,conta_valor
171,94,PANATLANTICA S.A.,6094,00009420101231401.zip,DFP,1,2011-03-31 10:16:48,2010-12-31,2008-01-01,2008-12-31,-2,IND,1,1,Ativo Total,1.847500e+08
172,94,PANATLANTICA S.A.,6094,00009420101231401.zip,DFP,1,2011-03-31 10:16:48,2010-12-31,2008-01-01,2008-12-31,-2,CON,1,1,Ativo Total,2.045610e+08
173,94,PANATLANTICA S.A.,6094,00009420101231401.zip,DFP,1,2011-03-31 10:16:48,2010-12-31,2008-01-01,2008-12-31,-2,IND,1.01,1,Ativo Circulante,1.306040e+08
174,94,PANATLANTICA S.A.,6094,00009420101231401.zip,DFP,1,2011-03-31 10:16:48,2010-12-31,2008-01-01,2008-12-31,-2,CON,1.01,1,Ativo Circulante,1.697710e+08
175,94,PANATLANTICA S.A.,6094,00009420101231401.zip,DFP,1,2011-03-31 10:16:48,2010-12-31,2008-01-01,2008-12-31,-2,IND,1.01.01,1,Caixa e Equivalentes de Caixa,2.244000e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10546286,24775,BCBF Participações S.A.,87098,02477520181231403.zip,DFP,3,2019-09-02 11:46:05,2018-12-31,2018-01-01,2018-12-31,0,CON,7.08,1,Distribuição do Valor Adic...,1.000000e+03
10546287,24775,BCBF Participações S.A.,87098,02477520181231403.zip,DFP,3,2019-09-02 11:46:05,2018-12-31,2018-01-01,2018-12-31,0,IND,7.08.05,1,Outros,1.000000e+03
10546288,24775,BCBF Participações S.A.,87098,02477520181231403.zip,DFP,3,2019-09-02 11:46:05,2018-12-31,2018-01-01,2018-12-31,0,CON,7.08.05,1,Outros,1.000000e+03
10546289,24775,BCBF Participações S.A.,87098,02477520181231403.zip,DFP,3,2019-09-02 11:46:05,2018-12-31,2018-01-01,2018-12-31,0,IND,9.01.01,1,Ações Ordinárias (Capital ...,1.563080e+09


In [6]:
# Versão inicial: somente dados auditados serão usados -> (DFP) 
df.query('doc_tp == "DFP"', inplace=True)
# Remover colunas que não serão usadas no backtesting
# doc_ref -> já está implícito no período inicial e no final
# doc_ver e doc_id -> o controle da versão do documento será feito pela horário
# de envio e pelo empresa
df.drop(columns=["doc_tp", "doc_arq", "doc_ref", "doc_ver", "doc_id"], inplace=True)
df.reset_index(drop=True, inplace=True)
print('Number of companies available for backtesting:', df.cia_id.nunique())
df

Number of companies available for backtesting: 585


Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,per_ref,dem_tp,conta_id,conta_fixa,conta_desc,conta_valor
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2008-01-01,2008-12-31,-2,IND,1,1,Ativo Total,1.847500e+08
1,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2008-01-01,2008-12-31,-2,CON,1,1,Ativo Total,2.045610e+08
2,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2008-01-01,2008-12-31,-2,IND,1.01,1,Ativo Circulante,1.306040e+08
3,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2008-01-01,2008-12-31,-2,CON,1.01,1,Ativo Circulante,1.697710e+08
4,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2008-01-01,2008-12-31,-2,IND,1.01.01,1,Caixa e Equivalentes de Caixa,2.244000e+06
...,...,...,...,...,...,...,...,...,...,...,...
4265017,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,0,CON,7.08,1,Distribuição do Valor Adic...,1.000000e+03
4265018,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,0,IND,7.08.05,1,Outros,1.000000e+03
4265019,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,0,CON,7.08.05,1,Outros,1.000000e+03
4265020,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,0,IND,9.01.01,1,Ações Ordinárias (Capital ...,1.563080e+09


In [7]:
# Iremos simular somente com dados consolidados (CON)
# Os dados com o número de ações foram estão no balanço individual (IND)
# A conta que interessa é a "9.01.03" -> "Total de Ações (Capital Integralizado)"
df.query('dem_tp == "CON" or conta_id == "9.01.03"', inplace=True)
# Remover coluna que não será mais usada no backtesting
df.drop(columns=["dem_tp"], inplace=True)
df.reset_index(drop=True, inplace=True)
print('Number of companies available for backtesting:', df.cia_id.nunique())
df

Number of companies available for backtesting: 585


Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,per_ref,conta_id,conta_fixa,conta_desc,conta_valor
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2008-01-01,2008-12-31,-2,1,1,Ativo Total,2.045610e+08
1,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2008-01-01,2008-12-31,-2,1.01,1,Ativo Circulante,1.697710e+08
2,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2008-01-01,2008-12-31,-2,1.01.01,1,Caixa e Equivalentes de Caixa,2.536000e+06
3,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2008-01-01,2008-12-31,-2,1.01.02,1,Aplicações Financeiras,3.980200e+07
4,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2008-01-01,2008-12-31,-2,1.01.03,1,Contas a Receber,4.513700e+07
...,...,...,...,...,...,...,...,...,...,...
1952456,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,0,7.05,1,Valor Adicionado Líquido P...,1.000000e+03
1952457,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,0,7.07,1,Valor Adicionado Total a D...,1.000000e+03
1952458,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,0,7.08,1,Distribuição do Valor Adic...,1.000000e+03
1952459,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,0,7.08.05,1,Outros,1.000000e+03


* (pág. 138) *For purposes of the study, earnings-related numbers were based on the latest 12-month period, balance sheet items were based on the most recent balance sheet, and market prices were based on the most recent closing price. Utilities, financial stocks and companies where we could not be certain that the information in the database was timely or complete were eliminated. Adjustments were also made for certain non-interest bearing liabilities. The study was structured so that an average of 30 stocks was held during the study period. Stocks with only limited liquidity were eliminated from the study. Market capitalizations were determined based on 2003 dollars. Both the number of companies in each decile as well as the number of companies in each market capitalization group fluctuated as the number of companies in the database varied during the study period.

In [8]:
# Pelo que está no livro, somentes os indicadores dos últimos 12 meses serão usados. 
# Logo, podemos descartar períodos que não são o corrente -> per_ref == 0
df.query('per_ref == 0', inplace=True)
# Remover coluna que não será mais usada no backtesting
df.drop(columns=["per_ref"], inplace=True)
df.reset_index(drop=True, inplace=True)
print('Number of companies available for backtesting:', df.cia_id.nunique())
df

Number of companies available for backtesting: 585


Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,conta_id,conta_fixa,conta_desc,conta_valor
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1,1,Ativo Total,2.788580e+08
1,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.01,1,Ativo Circulante,2.203650e+08
2,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.01.01,1,Caixa e Equivalentes de Caixa,4.944000e+06
3,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.01.02,1,Aplicações Financeiras,7.979400e+07
4,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.01.03,1,Contas a Receber,6.341700e+07
...,...,...,...,...,...,...,...,...,...
700202,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,7.05,1,Valor Adicionado Líquido P...,1.000000e+03
700203,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,7.07,1,Valor Adicionado Total a D...,1.000000e+03
700204,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,7.08,1,Distribuição do Valor Adic...,1.000000e+03
700205,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,7.08.05,1,Outros,1.000000e+03


Plano de contas da CVM (parte inicial "conta_id"):
- 1 -> Balance Sheet - Assets
- 2 -> Balance Sheet - Liabilities and Shareholders’ Equity
- 3 -> Income
- 4 -> Comprehensive Income
- 5 -> Changes in Equity
- 6 -> Cash Flow (Indirect Method)
- 7 -> Added Value

Logo, para usar a fórmula precisamos filtrar as empresas por:
- Liquidez mínima (o livro filtra por valor de mercado no lugar de liquidez)
- Excluir empresas do setor financeiro, seguros e utilities
- ROIC = EBIT / Invested Capital
- Invested Capital = Net Working Capital + Net Fixed Assets
- Earnings yield = EBIT / Enterprise Value
    - Enterprise Value (EV) = market value of equity + net interest-bearing debt
    - Invested Capital (Damodaran) = PL<sub>con</sub> + Dívida<sub>con</sub> - Caixa<sub>con</sub> (calculados pelo valor contábil)
- Desmembrando os indicadores:
    - EBIT = lucro antes dos juros e impostos -> 3.05
    - Market value of equity = núm. de ações x preço da ação -> 9.01.03
    - Net interest-bearing debt = dívida líquida
        - total_cash = df.loc["1.01.01"] + df.loc["1.01.02"]
        - total_debt = df.loc["2.01.04"] + df.loc["2.02.01"]
        - net_debt = total_debt - total_cash
    - PL -> df.loc["2.03"]

In [9]:
df.query('conta_id.str.startswith("3") and cia_id == 5410 and per_fim == "2021-12-31"').head(10)

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,conta_id,conta_fixa,conta_desc,conta_valor
72310,5410,WEG S.A.,2022-02-16 07:05:37,2021-01-01,2021-12-31,3.01,1,Receita de Venda de Bens e...,23563340000.0
72311,5410,WEG S.A.,2022-02-16 07:05:37,2021-01-01,2021-12-31,3.02,1,Custo dos Bens e/ou Serviç...,-16602380000.0
72312,5410,WEG S.A.,2022-02-16 07:05:37,2021-01-01,2021-12-31,3.03,1,Resultado Bruto,6960957000.0
72313,5410,WEG S.A.,2022-02-16 07:05:37,2021-01-01,2021-12-31,3.04,1,Despesas/Receitas Operacio...,-2802614000.0
72314,5410,WEG S.A.,2022-02-16 07:05:37,2021-01-01,2021-12-31,3.04.01,1,Despesas com Vendas,-1833204000.0
72315,5410,WEG S.A.,2022-02-16 07:05:37,2021-01-01,2021-12-31,3.04.02,1,Despesas Gerais e Administ...,-776007000.0
72316,5410,WEG S.A.,2022-02-16 07:05:37,2021-01-01,2021-12-31,3.04.02.01,0,Honorários dos Administrad...,-25257000.0
72317,5410,WEG S.A.,2022-02-16 07:05:37,2021-01-01,2021-12-31,3.04.02.02,0,Outras,-750750000.0
72318,5410,WEG S.A.,2022-02-16 07:05:37,2021-01-01,2021-12-31,3.04.04,1,Outras Receitas Operacionais,422154000.0
72319,5410,WEG S.A.,2022-02-16 07:05:37,2021-01-01,2021-12-31,3.04.05,1,Outras Despesas Operacionais,-615557000.0


In [10]:
# Filtrar as contas que serão usadas
contas = ["1.01.01", "1.01.02", "2.01.04", "2.02.01", "2.03", "3.01", "3.05", "9.01.03"]
df.query('conta_id == @contas', inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,conta_id,conta_fixa,conta_desc,conta_valor
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.01.01,1,Caixa e Equivalentes de Caixa,4.944000e+06
1,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.01.02,1,Aplicações Financeiras,7.979400e+07
2,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,2.01.04,1,Empréstimos e Financiamentos,6.148400e+07
3,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,2.02.01,1,Empréstimos e Financiamentos,4.273000e+06
4,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,2.03,1,Patrimônio Líquido Consoli...,1.428050e+08
...,...,...,...,...,...,...,...,...,...
31324,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,2.02.01,1,Empréstimos e Financiamentos,3.201370e+08
31325,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,2.03,1,Patrimônio Líquido Consoli...,1.873480e+09
31326,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,3.01,1,Receita de Venda de Bens e...,6.135217e+09
31327,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,3.05,1,Resultado Antes do Resulta...,7.234170e+08


In [11]:
# Verificar se as contas selecionadas são fixas
df.conta_fixa.value_counts()

1    31329
0        0
Name: conta_fixa, dtype: int64

In [12]:
# Testar se sobrou alguma IF na seleção
procurar = "bco |banco|crédito|mercantil|seguradora|seguro|PPLA PARTICIPATIONS"
df.query('cia_nome.str.contains(@procurar, case=False)').drop_duplicates('cia_nome')

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,conta_id,conta_fixa,conta_desc,conta_valor


In [13]:
# Remover a coluna conta fixa da base, pois não será mais usada
df.drop(columns=["conta_fixa"], inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,conta_id,conta_desc,conta_valor
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.01.01,Caixa e Equivalentes de Caixa,4.944000e+06
1,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.01.02,Aplicações Financeiras,7.979400e+07
2,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,2.01.04,Empréstimos e Financiamentos,6.148400e+07
3,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,2.02.01,Empréstimos e Financiamentos,4.273000e+06
4,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,2.03,Patrimônio Líquido Consoli...,1.428050e+08
...,...,...,...,...,...,...,...,...
31324,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,2.02.01,Empréstimos e Financiamentos,3.201370e+08
31325,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,2.03,Patrimônio Líquido Consoli...,1.873480e+09
31326,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,3.01,Receita de Venda de Bens e...,6.135217e+09
31327,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,3.05,Resultado Antes do Resulta...,7.234170e+08


In [14]:
# Remover a descrição do código contábil para a operação futura de unstack
df.drop(columns=["conta_desc"], inplace=True)
df

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,conta_id,conta_valor
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.01.01,4.944000e+06
1,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.01.02,7.979400e+07
2,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,2.01.04,6.148400e+07
3,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,2.02.01,4.273000e+06
4,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,2.03,1.428050e+08
...,...,...,...,...,...,...,...
31324,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,2.02.01,3.201370e+08
31325,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,2.03,1.873480e+09
31326,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,3.01,6.135217e+09
31327,24775,BCBF Participações S.A.,2019-09-02 11:46:05,2018-01-01,2018-12-31,3.05,7.234170e+08


In [15]:
# Unstack -> escolher as colunas que irão para o multiindex
colunas_index = df.columns[:-1].to_list()
colunas_index

['cia_id', 'cia_nome', 'doc_env', 'per_ini', 'per_fim', 'conta_id']

In [16]:
# Passar o dataframe para multiindex
df = df.set_index(colunas_index).sort_index()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,conta_valor
cia_id,cia_nome,doc_env,per_ini,per_fim,conta_id,Unnamed: 6_level_1
94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.01.01,4944000.0
94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.01.02,79794000.0
94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,2.01.04,61484000.0
94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,2.02.01,4273000.0
94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,2.03,142805000.0
...,...,...,...,...,...,...
26794,Self It Academias Holding S.A.,2022-04-29 18:14:40,2021-01-01,2021-12-31,3.01,97904000.0
26794,Self It Academias Holding S.A.,2022-04-29 18:14:40,2021-01-01,2021-12-31,3.05,-46981000.0
26794,Self It Academias Holding S.A.,2022-04-29 18:14:40,2021-01-01,2021-12-31,9.01.03,63790395.0
26816,Qestra Tecnologia Administração E Participações S.A.,2022-04-14 20:44:45,2021-01-01,2021-12-31,9.01.03,118672.0


In [17]:
# Fazer o unstack do dataframe com base na última coluna do índice (conta_id)
df = df.unstack(level='conta_id', fill_value=0)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,conta_valor,conta_valor,conta_valor,conta_valor,conta_valor,conta_valor,conta_valor,conta_valor
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,conta_id,1.01.01,1.01.02,2.01.04,2.02.01,2.03,3.01,3.05,9.01.03
cia_id,cia_nome,doc_env,per_ini,per_fim,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,4944000.0,79794000.0,61484000.0,4273000.0,142805000.0,390963000.0,24777000.0,8856000.0
94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,4944000.0,79794000.0,61485000.0,4273000.0,142082000.0,377666000.0,21093000.0,8856000.0
94,PANATLANTICA S.A.,2012-03-20 15:20:37,2011-01-01,2011-12-31,2649000.0,99038000.0,51741000.0,3764000.0,158596000.0,365581000.0,8720000.0,9480000.0
94,PANATLANTICA S.A.,2013-03-08 16:12:09,2012-01-01,2012-12-31,2460000.0,113261000.0,77550000.0,10290000.0,174065000.0,374837000.0,15767000.0,9778000.0
94,PANATLANTICA S.A.,2014-03-18 10:38:57,2013-01-01,2013-12-31,11693000.0,132014000.0,91201000.0,81188000.0,281428000.0,746372000.0,68165000.0,12587000.0
...,...,...,...,...,...,...,...,...,...,...,...,...
26786,Senior Sistemas S.A.,2022-02-01 19:43:03,2021-01-01,2021-12-31,121642000.0,4097000.0,15813000.0,21309000.0,250599000.0,582475000.0,137222000.0,1269683.0
26794,Self It Academias Holding S.A.,2021-12-10 22:49:21,2020-01-01,2020-12-31,23952000.0,0.0,34777000.0,122713000.0,73586000.0,86859000.0,-41873000.0,10336691.0
26794,Self It Academias Holding S.A.,2022-04-29 18:14:40,2021-01-01,2021-12-31,28204000.0,0.0,66296000.0,99983000.0,89309000.0,97904000.0,-46981000.0,63790395.0
26816,Qestra Tecnologia Administração E Participações S.A.,2022-04-14 20:44:45,2021-01-01,2021-12-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,118672.0


In [18]:
# Remover o multiindex das colunas
df.columns = df.columns.droplevel(0)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,conta_id,1.01.01,1.01.02,2.01.04,2.02.01,2.03,3.01,3.05,9.01.03
cia_id,cia_nome,doc_env,per_ini,per_fim,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,4944000.0,79794000.0,61484000.0,4273000.0,142805000.0,390963000.0,24777000.0,8856000.0
94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,4944000.0,79794000.0,61485000.0,4273000.0,142082000.0,377666000.0,21093000.0,8856000.0
94,PANATLANTICA S.A.,2012-03-20 15:20:37,2011-01-01,2011-12-31,2649000.0,99038000.0,51741000.0,3764000.0,158596000.0,365581000.0,8720000.0,9480000.0
94,PANATLANTICA S.A.,2013-03-08 16:12:09,2012-01-01,2012-12-31,2460000.0,113261000.0,77550000.0,10290000.0,174065000.0,374837000.0,15767000.0,9778000.0
94,PANATLANTICA S.A.,2014-03-18 10:38:57,2013-01-01,2013-12-31,11693000.0,132014000.0,91201000.0,81188000.0,281428000.0,746372000.0,68165000.0,12587000.0
...,...,...,...,...,...,...,...,...,...,...,...,...
26786,Senior Sistemas S.A.,2022-02-01 19:43:03,2021-01-01,2021-12-31,121642000.0,4097000.0,15813000.0,21309000.0,250599000.0,582475000.0,137222000.0,1269683.0
26794,Self It Academias Holding S.A.,2021-12-10 22:49:21,2020-01-01,2020-12-31,23952000.0,0.0,34777000.0,122713000.0,73586000.0,86859000.0,-41873000.0,10336691.0
26794,Self It Academias Holding S.A.,2022-04-29 18:14:40,2021-01-01,2021-12-31,28204000.0,0.0,66296000.0,99983000.0,89309000.0,97904000.0,-46981000.0,63790395.0
26816,Qestra Tecnologia Administração E Participações S.A.,2022-04-14 20:44:45,2021-01-01,2021-12-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,118672.0


In [19]:
# Remover o multiindex do índice
df.reset_index(inplace=True)
df.columns.name = None
df

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,1.01.01,1.01.02,2.01.04,2.02.01,2.03,3.01,3.05,9.01.03
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,4944000.0,79794000.0,61484000.0,4273000.0,142805000.0,390963000.0,24777000.0,8856000.0
1,94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,4944000.0,79794000.0,61485000.0,4273000.0,142082000.0,377666000.0,21093000.0,8856000.0
2,94,PANATLANTICA S.A.,2012-03-20 15:20:37,2011-01-01,2011-12-31,2649000.0,99038000.0,51741000.0,3764000.0,158596000.0,365581000.0,8720000.0,9480000.0
3,94,PANATLANTICA S.A.,2013-03-08 16:12:09,2012-01-01,2012-12-31,2460000.0,113261000.0,77550000.0,10290000.0,174065000.0,374837000.0,15767000.0,9778000.0
4,94,PANATLANTICA S.A.,2014-03-18 10:38:57,2013-01-01,2013-12-31,11693000.0,132014000.0,91201000.0,81188000.0,281428000.0,746372000.0,68165000.0,12587000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5603,26786,Senior Sistemas S.A.,2022-02-01 19:43:03,2021-01-01,2021-12-31,121642000.0,4097000.0,15813000.0,21309000.0,250599000.0,582475000.0,137222000.0,1269683.0
5604,26794,Self It Academias Holding ...,2021-12-10 22:49:21,2020-01-01,2020-12-31,23952000.0,0.0,34777000.0,122713000.0,73586000.0,86859000.0,-41873000.0,10336691.0
5605,26794,Self It Academias Holding ...,2022-04-29 18:14:40,2021-01-01,2021-12-31,28204000.0,0.0,66296000.0,99983000.0,89309000.0,97904000.0,-46981000.0,63790395.0
5606,26816,Qestra Tecnologia Administ...,2022-04-14 20:44:45,2021-01-01,2021-12-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,118672.0


In [20]:
# Remove pure holdings, without own evenues except from equity method.
# See the case of the company BRAP3 (Bradespar) CVM code 18724.
# df.query('cia_id == 18724')
df.query('`3.01` != 0', inplace=True)
df

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,1.01.01,1.01.02,2.01.04,2.02.01,2.03,3.01,3.05,9.01.03
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,4944000.0,7.979400e+07,61484000.0,4.273000e+06,1.428050e+08,3.909630e+08,2.477700e+07,8856000.0
1,94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,4944000.0,7.979400e+07,61485000.0,4.273000e+06,1.420820e+08,3.776660e+08,2.109300e+07,8856000.0
2,94,PANATLANTICA S.A.,2012-03-20 15:20:37,2011-01-01,2011-12-31,2649000.0,9.903800e+07,51741000.0,3.764000e+06,1.585960e+08,3.655810e+08,8.720000e+06,9480000.0
3,94,PANATLANTICA S.A.,2013-03-08 16:12:09,2012-01-01,2012-12-31,2460000.0,1.132610e+08,77550000.0,1.029000e+07,1.740650e+08,3.748370e+08,1.576700e+07,9778000.0
4,94,PANATLANTICA S.A.,2014-03-18 10:38:57,2013-01-01,2013-12-31,11693000.0,1.320140e+08,91201000.0,8.118800e+07,2.814280e+08,7.463720e+08,6.816500e+07,12587000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5599,26700,EUROFARMA LABORATÓRIOS S.A.,2021-12-16 16:23:45,2020-01-01,2020-12-31,128680000.0,1.258534e+09,154918000.0,1.840028e+09,2.765208e+09,5.723191e+09,1.086628e+09,855863854.0
5600,26700,EUROFARMA LABORATÓRIOS S.A.,2022-03-29 18:27:45,2021-01-01,2021-12-31,122970000.0,5.944020e+08,627317000.0,1.749283e+09,3.385494e+09,7.067827e+09,1.449802e+09,858714812.0
5603,26786,Senior Sistemas S.A.,2022-02-01 19:43:03,2021-01-01,2021-12-31,121642000.0,4.097000e+06,15813000.0,2.130900e+07,2.505990e+08,5.824750e+08,1.372220e+08,1269683.0
5604,26794,Self It Academias Holding ...,2021-12-10 22:49:21,2020-01-01,2020-12-31,23952000.0,0.000000e+00,34777000.0,1.227130e+08,7.358600e+07,8.685900e+07,-4.187300e+07,10336691.0


In [21]:
# Calcular os indicadores
df["total_cash"] = df["1.01.01"] + df["1.01.02"]
df["total_debt"] = df["2.01.04"] + df["2.02.01"]
df["net_debt"] = df["total_debt"] - df["total_cash"]
df.rename(
    columns={"2.03": "equity", "3.05": "ebit", "9.01.03": "shares_outstanding"},
    inplace=True
)
# account 1.02.02 -> "investments in minority holdings"
df["invested_capital"] = df["equity"] + df["net_debt"]
df["roic"] = df["ebit"] * (1 - TAX_RATE) / df["invested_capital"]
df.drop(columns=["1.01.01", "1.01.02", "2.01.04", "2.02.01", "3.01"], inplace=True)
df

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,equity,ebit,shares_outstanding,total_cash,total_debt,net_debt,invested_capital,roic
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.428050e+08,2.477700e+07,8856000.0,8.473800e+07,6.575700e+07,-1.898100e+07,1.238240e+08,0.132065
1,94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,1.420820e+08,2.109300e+07,8856000.0,8.473800e+07,6.575800e+07,-1.898000e+07,1.231020e+08,0.113088
2,94,PANATLANTICA S.A.,2012-03-20 15:20:37,2011-01-01,2011-12-31,1.585960e+08,8.720000e+06,9480000.0,1.016870e+08,5.550500e+07,-4.618200e+07,1.124140e+08,0.051196
3,94,PANATLANTICA S.A.,2013-03-08 16:12:09,2012-01-01,2012-12-31,1.740650e+08,1.576700e+07,9778000.0,1.157210e+08,8.784000e+07,-2.788100e+07,1.461840e+08,0.071186
4,94,PANATLANTICA S.A.,2014-03-18 10:38:57,2013-01-01,2013-12-31,2.814280e+08,6.816500e+07,12587000.0,1.437070e+08,1.723890e+08,2.868200e+07,3.101100e+08,0.145074
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5599,26700,EUROFARMA LABORATÓRIOS S.A.,2021-12-16 16:23:45,2020-01-01,2020-12-31,2.765208e+09,1.086628e+09,855863854.0,1.387214e+09,1.994946e+09,6.077320e+08,3.372940e+09,0.212626
5600,26700,EUROFARMA LABORATÓRIOS S.A.,2022-03-29 18:27:45,2021-01-01,2021-12-31,3.385494e+09,1.449802e+09,858714812.0,7.173720e+08,2.376600e+09,1.659228e+09,5.044722e+09,0.189677
5603,26786,Senior Sistemas S.A.,2022-02-01 19:43:03,2021-01-01,2021-12-31,2.505990e+08,1.372220e+08,1269683.0,1.257390e+08,3.712200e+07,-8.861700e+07,1.619820e+08,0.559115
5604,26794,Self It Academias Holding ...,2021-12-10 22:49:21,2020-01-01,2020-12-31,7.358600e+07,-4.187300e+07,10336691.0,2.395200e+07,1.574900e+08,1.335380e+08,2.071240e+08,-0.133428


In [22]:
df.query('equity > 0', inplace=True)
df

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,equity,ebit,shares_outstanding,total_cash,total_debt,net_debt,invested_capital,roic
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.428050e+08,2.477700e+07,8856000.0,8.473800e+07,6.575700e+07,-1.898100e+07,1.238240e+08,0.132065
1,94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,1.420820e+08,2.109300e+07,8856000.0,8.473800e+07,6.575800e+07,-1.898000e+07,1.231020e+08,0.113088
2,94,PANATLANTICA S.A.,2012-03-20 15:20:37,2011-01-01,2011-12-31,1.585960e+08,8.720000e+06,9480000.0,1.016870e+08,5.550500e+07,-4.618200e+07,1.124140e+08,0.051196
3,94,PANATLANTICA S.A.,2013-03-08 16:12:09,2012-01-01,2012-12-31,1.740650e+08,1.576700e+07,9778000.0,1.157210e+08,8.784000e+07,-2.788100e+07,1.461840e+08,0.071186
4,94,PANATLANTICA S.A.,2014-03-18 10:38:57,2013-01-01,2013-12-31,2.814280e+08,6.816500e+07,12587000.0,1.437070e+08,1.723890e+08,2.868200e+07,3.101100e+08,0.145074
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5599,26700,EUROFARMA LABORATÓRIOS S.A.,2021-12-16 16:23:45,2020-01-01,2020-12-31,2.765208e+09,1.086628e+09,855863854.0,1.387214e+09,1.994946e+09,6.077320e+08,3.372940e+09,0.212626
5600,26700,EUROFARMA LABORATÓRIOS S.A.,2022-03-29 18:27:45,2021-01-01,2021-12-31,3.385494e+09,1.449802e+09,858714812.0,7.173720e+08,2.376600e+09,1.659228e+09,5.044722e+09,0.189677
5603,26786,Senior Sistemas S.A.,2022-02-01 19:43:03,2021-01-01,2021-12-31,2.505990e+08,1.372220e+08,1269683.0,1.257390e+08,3.712200e+07,-8.861700e+07,1.619820e+08,0.559115
5604,26794,Self It Academias Holding ...,2021-12-10 22:49:21,2020-01-01,2020-12-31,7.358600e+07,-4.187300e+07,10336691.0,2.395200e+07,1.574900e+08,1.335380e+08,2.071240e+08,-0.133428


In [23]:
df.query('total_cash > 0', inplace=True)
df

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,equity,ebit,shares_outstanding,total_cash,total_debt,net_debt,invested_capital,roic
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.428050e+08,2.477700e+07,8856000.0,8.473800e+07,6.575700e+07,-1.898100e+07,1.238240e+08,0.132065
1,94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,1.420820e+08,2.109300e+07,8856000.0,8.473800e+07,6.575800e+07,-1.898000e+07,1.231020e+08,0.113088
2,94,PANATLANTICA S.A.,2012-03-20 15:20:37,2011-01-01,2011-12-31,1.585960e+08,8.720000e+06,9480000.0,1.016870e+08,5.550500e+07,-4.618200e+07,1.124140e+08,0.051196
3,94,PANATLANTICA S.A.,2013-03-08 16:12:09,2012-01-01,2012-12-31,1.740650e+08,1.576700e+07,9778000.0,1.157210e+08,8.784000e+07,-2.788100e+07,1.461840e+08,0.071186
4,94,PANATLANTICA S.A.,2014-03-18 10:38:57,2013-01-01,2013-12-31,2.814280e+08,6.816500e+07,12587000.0,1.437070e+08,1.723890e+08,2.868200e+07,3.101100e+08,0.145074
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5599,26700,EUROFARMA LABORATÓRIOS S.A.,2021-12-16 16:23:45,2020-01-01,2020-12-31,2.765208e+09,1.086628e+09,855863854.0,1.387214e+09,1.994946e+09,6.077320e+08,3.372940e+09,0.212626
5600,26700,EUROFARMA LABORATÓRIOS S.A.,2022-03-29 18:27:45,2021-01-01,2021-12-31,3.385494e+09,1.449802e+09,858714812.0,7.173720e+08,2.376600e+09,1.659228e+09,5.044722e+09,0.189677
5603,26786,Senior Sistemas S.A.,2022-02-01 19:43:03,2021-01-01,2021-12-31,2.505990e+08,1.372220e+08,1269683.0,1.257390e+08,3.712200e+07,-8.861700e+07,1.619820e+08,0.559115
5604,26794,Self It Academias Holding ...,2021-12-10 22:49:21,2020-01-01,2020-12-31,7.358600e+07,-4.187300e+07,10336691.0,2.395200e+07,1.574900e+08,1.335380e+08,2.071240e+08,-0.133428


In [24]:
# Remover ebit negativos ou próximos de 0 (logo, ROIC negativo ou próximo de 0) da base,
# pois essas empresas não serão selecionadas pela fórmula
df.query('ebit >= 0.001', inplace=True)
df

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,equity,ebit,shares_outstanding,total_cash,total_debt,net_debt,invested_capital,roic
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.428050e+08,2.477700e+07,8856000.0,8.473800e+07,6.575700e+07,-1.898100e+07,1.238240e+08,0.132065
1,94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,1.420820e+08,2.109300e+07,8856000.0,8.473800e+07,6.575800e+07,-1.898000e+07,1.231020e+08,0.113088
2,94,PANATLANTICA S.A.,2012-03-20 15:20:37,2011-01-01,2011-12-31,1.585960e+08,8.720000e+06,9480000.0,1.016870e+08,5.550500e+07,-4.618200e+07,1.124140e+08,0.051196
3,94,PANATLANTICA S.A.,2013-03-08 16:12:09,2012-01-01,2012-12-31,1.740650e+08,1.576700e+07,9778000.0,1.157210e+08,8.784000e+07,-2.788100e+07,1.461840e+08,0.071186
4,94,PANATLANTICA S.A.,2014-03-18 10:38:57,2013-01-01,2013-12-31,2.814280e+08,6.816500e+07,12587000.0,1.437070e+08,1.723890e+08,2.868200e+07,3.101100e+08,0.145074
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5594,26603,GSH CORP Participações S.A.,2021-11-17 17:42:10,2020-01-01,2020-12-31,2.252800e+07,3.111700e+07,7116750.0,2.699000e+06,0.000000e+00,-2.699000e+06,1.982900e+07,1.035716
5595,26603,GSH CORP Participações S.A.,2022-03-28 18:01:25,2021-01-01,2021-12-31,1.024510e+08,3.893300e+07,272756849.0,9.125200e+07,4.373160e+08,3.460640e+08,4.485150e+08,0.057291
5599,26700,EUROFARMA LABORATÓRIOS S.A.,2021-12-16 16:23:45,2020-01-01,2020-12-31,2.765208e+09,1.086628e+09,855863854.0,1.387214e+09,1.994946e+09,6.077320e+08,3.372940e+09,0.212626
5600,26700,EUROFARMA LABORATÓRIOS S.A.,2022-03-29 18:27:45,2021-01-01,2021-12-31,3.385494e+09,1.449802e+09,858714812.0,7.173720e+08,2.376600e+09,1.659228e+09,5.044722e+09,0.189677


In [25]:
df.query('roic > 0', inplace=True)
df

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,equity,ebit,shares_outstanding,total_cash,total_debt,net_debt,invested_capital,roic
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,1.428050e+08,2.477700e+07,8856000.0,8.473800e+07,6.575700e+07,-1.898100e+07,1.238240e+08,0.132065
1,94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,1.420820e+08,2.109300e+07,8856000.0,8.473800e+07,6.575800e+07,-1.898000e+07,1.231020e+08,0.113088
2,94,PANATLANTICA S.A.,2012-03-20 15:20:37,2011-01-01,2011-12-31,1.585960e+08,8.720000e+06,9480000.0,1.016870e+08,5.550500e+07,-4.618200e+07,1.124140e+08,0.051196
3,94,PANATLANTICA S.A.,2013-03-08 16:12:09,2012-01-01,2012-12-31,1.740650e+08,1.576700e+07,9778000.0,1.157210e+08,8.784000e+07,-2.788100e+07,1.461840e+08,0.071186
4,94,PANATLANTICA S.A.,2014-03-18 10:38:57,2013-01-01,2013-12-31,2.814280e+08,6.816500e+07,12587000.0,1.437070e+08,1.723890e+08,2.868200e+07,3.101100e+08,0.145074
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5594,26603,GSH CORP Participações S.A.,2021-11-17 17:42:10,2020-01-01,2020-12-31,2.252800e+07,3.111700e+07,7116750.0,2.699000e+06,0.000000e+00,-2.699000e+06,1.982900e+07,1.035716
5595,26603,GSH CORP Participações S.A.,2022-03-28 18:01:25,2021-01-01,2021-12-31,1.024510e+08,3.893300e+07,272756849.0,9.125200e+07,4.373160e+08,3.460640e+08,4.485150e+08,0.057291
5599,26700,EUROFARMA LABORATÓRIOS S.A.,2021-12-16 16:23:45,2020-01-01,2020-12-31,2.765208e+09,1.086628e+09,855863854.0,1.387214e+09,1.994946e+09,6.077320e+08,3.372940e+09,0.212626
5600,26700,EUROFARMA LABORATÓRIOS S.A.,2022-03-29 18:27:45,2021-01-01,2021-12-31,3.385494e+09,1.449802e+09,858714812.0,7.173720e+08,2.376600e+09,1.659228e+09,5.044722e+09,0.189677


In [26]:
# Remover os indicadores intermediários que não serão usados no backtesting
colunas = (
    df.columns[:5].to_list() +
    ['shares_outstanding', 'net_debt', 'ebit', 'roic']
)
df = df[colunas].copy()
df

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,8856000.0,-1.898100e+07,2.477700e+07,0.132065
1,94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,8856000.0,-1.898000e+07,2.109300e+07,0.113088
2,94,PANATLANTICA S.A.,2012-03-20 15:20:37,2011-01-01,2011-12-31,9480000.0,-4.618200e+07,8.720000e+06,0.051196
3,94,PANATLANTICA S.A.,2013-03-08 16:12:09,2012-01-01,2012-12-31,9778000.0,-2.788100e+07,1.576700e+07,0.071186
4,94,PANATLANTICA S.A.,2014-03-18 10:38:57,2013-01-01,2013-12-31,12587000.0,2.868200e+07,6.816500e+07,0.145074
...,...,...,...,...,...,...,...,...,...
5594,26603,GSH CORP Participações S.A.,2021-11-17 17:42:10,2020-01-01,2020-12-31,7116750.0,-2.699000e+06,3.111700e+07,1.035716
5595,26603,GSH CORP Participações S.A.,2022-03-28 18:01:25,2021-01-01,2021-12-31,272756849.0,3.460640e+08,3.893300e+07,0.057291
5599,26700,EUROFARMA LABORATÓRIOS S.A.,2021-12-16 16:23:45,2020-01-01,2020-12-31,855863854.0,6.077320e+08,1.086628e+09,0.212626
5600,26700,EUROFARMA LABORATÓRIOS S.A.,2022-03-29 18:27:45,2021-01-01,2021-12-31,858714812.0,1.659228e+09,1.449802e+09,0.189677


In [27]:
# Empresas sem o número de ações
# Verificar o doc. da Petro de 2018 00951220181231401.zip
df.query('shares_outstanding == 0')

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic
5421,25526,CRUZEIRO DO SUL EDUCACIONA...,2022-03-30 19:50:24,2021-01-01,2021-12-31,0.0,91674000.0,313341000.0,0.133814


In [28]:
# Remover essas empresas
df.query('shares_outstanding != 0', inplace=True)
print('Number of companies available for backtesting:', df.cia_id.nunique())
df

Number of companies available for backtesting: 372


Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,8856000.0,-1.898100e+07,2.477700e+07,0.132065
1,94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,8856000.0,-1.898000e+07,2.109300e+07,0.113088
2,94,PANATLANTICA S.A.,2012-03-20 15:20:37,2011-01-01,2011-12-31,9480000.0,-4.618200e+07,8.720000e+06,0.051196
3,94,PANATLANTICA S.A.,2013-03-08 16:12:09,2012-01-01,2012-12-31,9778000.0,-2.788100e+07,1.576700e+07,0.071186
4,94,PANATLANTICA S.A.,2014-03-18 10:38:57,2013-01-01,2013-12-31,12587000.0,2.868200e+07,6.816500e+07,0.145074
...,...,...,...,...,...,...,...,...,...
5594,26603,GSH CORP Participações S.A.,2021-11-17 17:42:10,2020-01-01,2020-12-31,7116750.0,-2.699000e+06,3.111700e+07,1.035716
5595,26603,GSH CORP Participações S.A.,2022-03-28 18:01:25,2021-01-01,2021-12-31,272756849.0,3.460640e+08,3.893300e+07,0.057291
5599,26700,EUROFARMA LABORATÓRIOS S.A.,2021-12-16 16:23:45,2020-01-01,2020-12-31,855863854.0,6.077320e+08,1.086628e+09,0.212626
5600,26700,EUROFARMA LABORATÓRIOS S.A.,2022-03-29 18:27:45,2021-01-01,2021-12-31,858714812.0,1.659228e+09,1.449802e+09,0.189677


In [29]:
df.to_csv("../data/magic_financials.csv", index=False)

In [30]:
# Testar o arquivo
pd.read_csv("../data/magic_financials.csv")

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,8856000.0,-1.898100e+07,2.477700e+07,0.132065
1,94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,8856000.0,-1.898000e+07,2.109300e+07,0.113088
2,94,PANATLANTICA S.A.,2012-03-20 15:20:37,2011-01-01,2011-12-31,9480000.0,-4.618200e+07,8.720000e+06,0.051196
3,94,PANATLANTICA S.A.,2013-03-08 16:12:09,2012-01-01,2012-12-31,9778000.0,-2.788100e+07,1.576700e+07,0.071186
4,94,PANATLANTICA S.A.,2014-03-18 10:38:57,2013-01-01,2013-12-31,12587000.0,2.868200e+07,6.816500e+07,0.145074
...,...,...,...,...,...,...,...,...,...
2885,26603,GSH CORP Participações S.A.,2021-11-17 17:42:10,2020-01-01,2020-12-31,7116750.0,-2.699000e+06,3.111700e+07,1.035716
2886,26603,GSH CORP Participações S.A.,2022-03-28 18:01:25,2021-01-01,2021-12-31,272756849.0,3.460640e+08,3.893300e+07,0.057291
2887,26700,EUROFARMA LABORATÓRIOS S.A.,2021-12-16 16:23:45,2020-01-01,2020-12-31,855863854.0,6.077320e+08,1.086628e+09,0.212626
2888,26700,EUROFARMA LABORATÓRIOS S.A.,2022-03-29 18:27:45,2021-01-01,2021-12-31,858714812.0,1.659228e+09,1.449802e+09,0.189677


In [31]:
df.query('cia_id == 4820')

Unnamed: 0,cia_id,cia_nome,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic
468,4820,BRASKEM S.A.,2011-03-17 09:14:46,2010-01-01,2010-12-31,8.016650e+08,9.867897e+09,3.214961e+09,0.104649
469,4820,BRASKEM S.A.,2012-03-14 09:48:31,2011-01-01,2011-12-31,8.016656e+11,1.200680e+10,1.929905e+09,0.058008
470,4820,BRASKEM S.A.,2012-03-14 14:08:47,2011-01-01,2011-12-31,8.016650e+08,1.200680e+10,1.929905e+09,0.058008
471,4820,BRASKEM S.A.,2012-03-26 20:08:37,2011-01-01,2011-12-31,8.016650e+08,1.200680e+10,1.929905e+09,0.058008
472,4820,BRASKEM S.A.,2013-02-07 10:13:32,2012-01-01,2012-12-31,7.972653e+08,1.405187e+10,1.538603e+09,0.044704
...,...,...,...,...,...,...,...,...,...
476,4820,BRASKEM S.A.,2016-02-18 10:22:20,2015-01-01,2015-12-31,7.972652e+08,1.989790e+10,7.052191e+09,0.219181
477,4820,BRASKEM S.A.,2017-08-15 22:19:29,2016-01-01,2016-12-31,7.972576e+08,1.543872e+10,5.951247e+09,0.228902
478,4820,BRASKEM S.A.,2018-03-29 05:11:07,2017-01-01,2017-12-31,7.972576e+08,1.756980e+10,9.359062e+09,0.265561
479,4820,BRASKEM S.A.,2019-03-13 19:01:57,2018-01-01,2018-12-31,7.972186e+08,1.725968e+10,8.303943e+09,0.236529
