df_fin -> input dataframe com os dados financeiros das empresas

df_cod -> input dataframe com os códigos CVM dos ativos

df_mag -> output dataframe que irá armazenar o resultado com as magic stocks

In [1]:
from pathlib import Path
import pandas as pd

# Mostrar floats com duas casas decimas
pd.set_option('display.float_format',  lambda x: '%.3g' % x)
pd.options.display.max_colwidth = 20
pd.options.display.max_columns = 20
pd.options.display.max_rows = 4

In [2]:
# Ler a base ajustada no S3
# df_mag = pd.read_feather('s3://aq-dl/HistoricalQuotations/base_adj.feather')
# df_mag = pd.read_feather('/mnt/aq_disk/data/HistoricalQuotations/processed/base_adj.feather')
DATA_FOLDER = Path("/mnt/aq_disk/data/HistoricalQuotations/processed")
BASE_ADJ = DATA_FOLDER / "base_adj.feather"
BASE1 = DATA_FOLDER / "base_95-21.feather"
BASE2 = DATA_FOLDER / "base_22.feather"
cols = ['datneg', 'codneg', 'nomres', 'especi', 'codbdi', 'tpmerc', 'preult', 'premed', 'totneg', 'voltot']
df_mag = (pd
    .concat([pd.read_feather(BASE1), pd.read_feather(BASE2)], ignore_index=True)
    [cols]
    .query('tpmerc == 10')
)
df_mag

Unnamed: 0,datneg,codneg,nomres,especi,codbdi,tpmerc,preult,premed,totneg,voltot
0,2020-02-07,A1AP34,ADVANCE AUTO,DRN,2,10,143,143,2,6.14e+06
1,2020-02-10,A1AP34,ADVANCE AUTO,DRN,2,10,142,142,1,5.12e+05
...,...,...,...,...,...,...,...,...,...,...
10952032,2022-06-13,ZIFI11,FII ZION,CI,12,10,1e+03,1e+03,1,4e+03
10952033,2022-06-14,ZIFI11,FII ZION,CI,12,10,1.15e+03,1.15e+03,2,2.3e+03


In [3]:
# Filtrar somente cotações após 2011, mercado à vista (tpmerc == 10) e ações ON ou PN
df_mag.query('\
    codbdi == 2 and \
    datneg >= "2011.01.01" and \
    especi.str.contains("ON |PN ")'
    , inplace=True
)
df_mag.reset_index(drop=True, inplace=True)
df_mag

Unnamed: 0,datneg,codneg,nomres,especi,codbdi,tpmerc,preult,premed,totneg,voltot
0,2016-10-28,AALR3,ALLIAR,ON NM,2,10,19.2,19.3,4460,1.22e+08
1,2016-10-31,AALR3,ALLIAR,ON NM,2,10,18.1,18.2,4238,4.59e+07
...,...,...,...,...,...,...,...,...,...,...
578193,2022-06-14,YDUQ3,YDUQS PART,ON NM,2,10,13.5,13.4,7617,2.62e+07
578194,2022-06-15,YDUQ3,YDUQS PART,ON NM,2,10,13.7,13.7,10731,3.26e+07


In [4]:
# Manter somente as colunas que serão usadas para fazer o corte nas datas
cols = ['datneg', 'codneg', 'nomres', 'premed', 'totneg']
df_mag = df_mag[cols].copy()
# Inserir o cód. dos emissores -> 4 primeiros caracteres do código de negociação
df_mag['codemi'] = df_mag.codneg.str[0:4]
# Inserir o dia do ano e o ano para a operação de corte das ações
df_mag['day_year'] = df_mag.datneg.dt.day_of_year
df_mag['year'] = df_mag.datneg.dt.year
print('Number of companies available for backtesting', df_mag.codemi.nunique())
df_mag

Number of companies available for backtesting 500


Unnamed: 0,datneg,codneg,nomres,premed,totneg,codemi,day_year,year
0,2016-10-28,AALR3,ALLIAR,19.3,4460,AALR,302,2016
1,2016-10-31,AALR3,ALLIAR,18.2,4238,AALR,305,2016
...,...,...,...,...,...,...,...,...
578193,2022-06-14,YDUQ3,YDUQS PART,13.4,7617,YDUQ,165,2022
578194,2022-06-15,YDUQ3,YDUQS PART,13.7,10731,YDUQ,166,2022


In [5]:
# Definir a data de corte e remover negociações posteriores ao corte
df_mag.query('day_year >= 100', inplace=True)
df_mag.reset_index(drop=True, inplace=True)
df_mag

Unnamed: 0,datneg,codneg,nomres,premed,totneg,codemi,day_year,year
0,2016-10-28,AALR3,ALLIAR,19.3,4460,AALR,302,2016
1,2016-10-31,AALR3,ALLIAR,18.2,4238,AALR,305,2016
...,...,...,...,...,...,...,...,...
414365,2022-06-14,YDUQ3,YDUQS PART,13.4,7617,YDUQ,165,2022
414366,2022-06-15,YDUQ3,YDUQS PART,13.7,10731,YDUQ,166,2022


In [6]:
df_corte = df_mag.groupby(by=['year'])['day_year'].min().reset_index()
df_corte

Unnamed: 0,year,day_year
0,2011,101
1,2012,100
...,...,...
10,2021,102
11,2022,101


In [7]:
# Filtrar o dataframe de ações com dados somente nos dias de corte
# Usar 'year' e 'day_year' como chaves para a operação de união entre os dois dataframes
df_mag = df_mag.merge(
    right=df_corte, how='inner', on=['year', 'day_year']
)
# A coluna 'day_year' não será mais usada
df_mag.drop(columns=['day_year'], inplace=True)
print('Number of companies available for backtesting:', df_mag.codemi.nunique())
df_mag

Number of companies available for backtesting: 377


Unnamed: 0,datneg,codneg,nomres,premed,totneg,codemi,year
0,2017-04-10,AALR3,ALLIAR,15.4,315,AALR,2017
1,2017-04-10,ABCB4,ABC BRASIL,18.4,1005,ABCB,2017
...,...,...,...,...,...,...,...
2465,2022-04-11,WLMM4,WLM IND COM,34.5,14,WLMM,2022
2466,2022-04-11,YDUQ3,YDUQS PART,19.7,10252,YDUQ,2022


In [8]:
# Excluir empresas financeiras e prestadoras de serviços públicos (utilities)
excluded_companies = pd.read_csv('../data/external/excluded_companies.csv')
excluded_companies = excluded_companies['company_code'].to_list()
df_mag.query('codemi != @excluded_companies', inplace=True)
print('Number of companies available for backtesting', df_mag.codemi.nunique())
df_mag

Number of companies available for backtesting 311


Unnamed: 0,datneg,codneg,nomres,premed,totneg,codemi,year
0,2017-04-10,AALR3,ALLIAR,15.4,315,AALR,2017
2,2017-04-10,AGRO3,BRASILAGRO,12.3,199,AGRO,2017
...,...,...,...,...,...,...,...
2465,2022-04-11,WLMM4,WLM IND COM,34.5,14,WLMM,2022
2466,2022-04-11,YDUQ3,YDUQS PART,19.7,10252,YDUQ,2022


In [9]:
# Carregar dataframe com o código de emissão das empresas
df_cod = pd.read_pickle('/mnt/aq_disk/data/AQ/codemi.pkl')
df_cod

Unnamed: 0,codcvm,cnpj,densoc,situac,codemi
0,60,18451005000104,ACOPALMA CIA IND...,CANCELADA,ZWVZ
1,94,92693019000189,PANATLANTICA SA,ATIVO,PATI
...,...,...,...,...,...
1764,26824,43335774000186,TRAVESSIA SECURI...,ATIVO,TMER
1765,26832,38482780000126,ANEMUS WIND HOLD...,ATIVO,ANEM


In [10]:
# A chave da união será o cód. do emissor (codemi)
# Somente o cód. CVM (codcvm) será inserido na união -> Remover colunas que não serão 
# usadas na operação de merge
df_cod = df_cod[['codcvm', 'codemi']].copy()
df_cod

Unnamed: 0,codcvm,codemi
0,60,ZWVZ
1,94,PATI
...,...,...
1764,26824,TMER
1765,26832,ANEM


In [11]:
# Criar um set com a lista de ativos antes da operação de união dos dataframes
s0 = set(df_mag.codemi.unique())
# Obter os código de emissão dos ativos unindo os dataframes 
df_mag = df_mag.merge(right=df_cod, how='inner', on='codemi')
df_mag.reset_index(drop=True, inplace=True)
print('Number of companies available for backtesting:', df_mag.codemi.nunique())
df_mag

Number of companies available for backtesting: 277


Unnamed: 0,datneg,codneg,nomres,premed,totneg,codemi,year,codcvm
0,2017-04-10,AALR3,ALLIAR,15.4,315,AALR,2017,24058
1,2018-04-10,AALR3,ALLIAR,15.2,175,AALR,2018,24058
...,...,...,...,...,...,...,...,...
1797,2022-04-11,VITT3,VITTIA,12.3,232,VITT,2022,25763
1798,2022-04-11,VVEO3,VIVEO,15.8,871,VVEO,2022,25682


In [12]:
# Verificar a diferença entre os dois sets
s1 = set(df_mag.codemi.unique())
print('Núm. de empresas cujo código não foi localizado', len(s0-s1))
# 34 empresas não tiveram seu código localizado.
# Analisando os dados, tratam-se de empresas cujo código de listagem foi alterado:
# BVMF->B3SA, VVAR->VIIA, etc
print(s0 - s1)

Núm. de empresas cujo código não foi localizado 34
{'HRTP', 'PRTX', 'BPNM', 'BTOW', 'ENMA', 'VAGR', 'PARC', 'KROT', 'SNSL', 'VVAR', 'BRDT', 'BRIN', 'CNTO', 'QGEP', 'SSBR', 'OHLB', 'LIQO', 'DTEX', 'INPR', 'LLXL', 'ALLL', 'RNAR', 'BBRK', 'TBLE', 'ABRE', 'DROG', 'BVMF', 'ECOD', 'ESTC', 'IDNT', 'MPXE', 'CTAX', 'CCPR', 'FJTA'}


In [13]:
# Carregar dataframe com os dados financeiros das empresas
df_fin = pd.read_csv(
    '../data/1_financials.csv',
    parse_dates=['doc_env', 'per_ini', 'per_fim']
)
# Renomear coluna com o código CVM para coincidir com as outras bases
df_fin.rename(columns={'cia_id': 'codcvm'}, inplace=True)
# As colunas 'per_ini' e 'cia_nome' não serão usadas
df_fin.drop(columns=['per_ini', 'cia_nome'], inplace=True)
# year = ano em que a informação será usada -> ano seguinte ao fim do período
df_fin['year'] = df_fin.per_fim.dt.year + 1
df_fin

Unnamed: 0,codcvm,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,year
0,94,2011-03-31 10:16:48,2010-12-31,8.86e+06,-1.9e+07,2.48e+07,0.2,2011
1,94,2011-04-01 17:31:56,2010-12-31,8.86e+06,-1.9e+07,2.11e+07,0.171,2011
...,...,...,...,...,...,...,...,...
4056,80195,2021-03-12 18:33:08,2020-12-31,6.6e+07,1.04e+08,9.22e+07,0.191,2021
4057,90212,2018-05-30 15:43:03,2017-12-31,2.16e+08,-7.08e+07,2.41e+08,0.306,2018


In [14]:
# Incluir os dados contábeis em 'df_mag' 
df_mag = df_mag.merge(right=df_fin, how='inner', on=['year', 'codcvm'])
# A coluna 'codcvm não será mais usada
df_mag.drop(columns=['codcvm'], inplace=True)
df_mag

Unnamed: 0,datneg,codneg,nomres,premed,totneg,codemi,year,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic
0,2017-04-10,AALR3,ALLIAR,15.4,315,AALR,2017,2017-03-22 23:42:13,2016-12-31,1.15e+08,3.09e+08,1.01e+08,0.0662
1,2018-04-10,AALR3,ALLIAR,15.2,175,AALR,2018,2018-03-28 20:20:31,2017-12-31,1.18e+08,5.08e+08,7.03e+07,0.0396
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1949,2022-04-11,VVEO3,VIVEO,15.8,871,VVEO,2022,2022-03-10 18:22:56,2021-12-31,2.86e+08,-7.65e+07,4.92e+08,0.237
1950,2022-04-11,VVEO3,VIVEO,15.8,871,VVEO,2022,2022-03-30 18:25:34,2021-12-31,2.86e+08,-7.65e+07,4.92e+08,0.237


In [15]:
# Calcular os indicadores que dependem do preço do ação
df_mag['market_cap'] = df_mag['shares_outstanding'] * df_mag['premed']
df_mag['enterprise_value'] = df_mag['market_cap'] - df_mag['net_debt']
df_mag['earnings_yield'] = df_mag['ebit'] / df_mag['enterprise_value']
# A coluna 'premed' não será mais usada
df_mag.drop(columns=['premed'], inplace=True)
df_mag

Unnamed: 0,datneg,codneg,nomres,totneg,codemi,year,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
0,2017-04-10,AALR3,ALLIAR,315,AALR,2017,2017-03-22 23:42:13,2016-12-31,1.15e+08,3.09e+08,1.01e+08,0.0662,1.76e+09,1.46e+09,0.0691
1,2018-04-10,AALR3,ALLIAR,175,AALR,2018,2018-03-28 20:20:31,2017-12-31,1.18e+08,5.08e+08,7.03e+07,0.0396,1.79e+09,1.29e+09,0.0547
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1949,2022-04-11,VVEO3,VIVEO,871,VVEO,2022,2022-03-10 18:22:56,2021-12-31,2.86e+08,-7.65e+07,4.92e+08,0.237,4.51e+09,4.58e+09,0.107
1950,2022-04-11,VVEO3,VIVEO,871,VVEO,2022,2022-03-30 18:25:34,2021-12-31,2.86e+08,-7.65e+07,4.92e+08,0.237,4.51e+09,4.58e+09,0.107


In [16]:
# Remover revisões de DFPs publicadas no mesmo dia ou posteriores ao corte
# No livro, o corte é de uma semana
df_mag.query('doc_env.dt.date < datneg')

Unnamed: 0,datneg,codneg,nomres,totneg,codemi,year,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
0,2017-04-10,AALR3,ALLIAR,315,AALR,2017,2017-03-22 23:42:13,2016-12-31,1.15e+08,3.09e+08,1.01e+08,0.0662,1.76e+09,1.46e+09,0.0691
1,2018-04-10,AALR3,ALLIAR,175,AALR,2018,2018-03-28 20:20:31,2017-12-31,1.18e+08,5.08e+08,7.03e+07,0.0396,1.79e+09,1.29e+09,0.0547
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1949,2022-04-11,VVEO3,VIVEO,871,VVEO,2022,2022-03-10 18:22:56,2021-12-31,2.86e+08,-7.65e+07,4.92e+08,0.237,4.51e+09,4.58e+09,0.107
1950,2022-04-11,VVEO3,VIVEO,871,VVEO,2022,2022-03-30 18:25:34,2021-12-31,2.86e+08,-7.65e+07,4.92e+08,0.237,4.51e+09,4.58e+09,0.107


In [17]:
# Manter somente a DFP mais recente ao corte para cada ativo
df_mag.sort_values('doc_env', inplace=True)
df_mag.drop_duplicates(subset=['codneg', 'year'], keep='last', inplace=True)
df_mag

Unnamed: 0,datneg,codneg,nomres,totneg,codemi,year,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
1348,2011-04-11,TOTS3,TOTVS,244,TOTS,2011,2011-01-31 19:05:59,2010-12-31,3.15e+07,1.79e+08,2.12e+08,0.261,9.94e+08,8.15e+08,0.26
784,2011-04-11,LREN3,LOJAS RENNER,3604,LREN,2011,2011-02-16 19:53:52,2010-12-31,1.22e+08,-2.72e+07,4.04e+08,0.407,6.68e+09,6.71e+09,0.0603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1644,2022-04-11,ALLD3,ALLIED,273,ALLD,2022,2022-04-27 12:11:23,2021-12-31,9.32e+07,-3.38e+08,4.08e+08,0.347,1.38e+09,1.72e+09,0.237
1943,2022-04-11,RECV3,PETRORECSA,2648,RECV,2022,2022-06-02 17:08:52,2021-12-31,2.49e+08,-6.15e+07,2.85e+08,0.157,5.74e+09,5.8e+09,0.049


In [19]:
# Remover o ativo menos líquido da empresa no ano pelo critério do núm. de negociações
df_mag.sort_values(by=['year', 'codemi', 'totneg'], inplace=True)
df_mag.drop_duplicates(
    subset=['codemi', 'year'], keep='last', inplace=True, ignore_index=True
)
# A coluna 'codemi' não será mais necessária
df_mag.drop(columns='codemi', inplace=True)
print('Number of companies available for backtesting', df_mag.codneg.nunique())
df_mag

Number of companies available for backtesting 252


Unnamed: 0,datneg,codneg,nomres,totneg,year,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
0,2011-04-11,AEDU3,ANHANGUERA,978,2011,2011-05-26 11:54:58,2010-12-31,1.46e+08,-5.71e+08,1.57e+08,0.11,5.54e+09,6.11e+09,0.0257
1,2011-04-11,ALPA4,ALPARGATAS,158,2011,2011-03-30 16:55:23,2010-12-31,3.53e+08,-3.59e+08,3.24e+08,0.328,4.01e+09,4.37e+09,0.0742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1275,2022-04-11,WLMM4,WLM IND COM,14,2022,2022-03-22 22:18:18,2021-12-31,3.64e+07,-1.52e+08,1.36e+08,0.349,1.26e+09,1.41e+09,0.0965
1276,2022-04-11,YDUQ3,YDUQS PART,10252,2022,2022-03-15 18:09:20,2021-12-31,3.09e+08,3.69e+09,5.47e+08,0.0788,6.09e+09,2.4e+09,0.228


In [20]:
# O Livro fala em empresas com pelos menos USD 50 milhões de valor de mercado
# Remover empresas com menos de R$ 250 milhões de valor de mercado
df_mag.query('market_cap > 250_000_000', inplace=True)
# Remover ações com baixíssima liquidez no dia
df_mag.query('totneg > 100', inplace=True)
# A coluna 'totneg' não será mais necessária
df_mag.drop(columns='totneg', inplace=True)
print('Number of companies available for backtesting', df_mag.codneg.nunique())
df_mag

Number of companies available for backtesting 224


Unnamed: 0,datneg,codneg,nomres,year,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
0,2011-04-11,AEDU3,ANHANGUERA,2011,2011-05-26 11:54:58,2010-12-31,1.46e+08,-5.71e+08,1.57e+08,0.11,5.54e+09,6.11e+09,0.0257
1,2011-04-11,ALPA4,ALPARGATAS,2011,2011-03-30 16:55:23,2010-12-31,3.53e+08,-3.59e+08,3.24e+08,0.328,4.01e+09,4.37e+09,0.0742
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1274,2022-04-11,WEGE3,WEG,2022,2022-02-16 07:05:37,2021-12-31,4.2e+09,-1.43e+09,4.16e+09,0.33,1.38e+11,1.4e+11,0.0297
1276,2022-04-11,YDUQ3,YDUQS PART,2022,2022-03-15 18:09:20,2021-12-31,3.09e+08,3.69e+09,5.47e+08,0.0788,6.09e+09,2.4e+09,0.228


In [21]:
df_mag['rank_roic'] = (df_mag
    .groupby(by=['year'])['roic']
    .rank(method='dense', ascending=False)
)
df_mag['rank_ey'] = (df_mag
    .groupby(by=['year'])['earnings_yield']
    .rank(method='dense', ascending=False)
)
df_mag['ranks_sum'] = df_mag['rank_roic'] + df_mag['rank_ey']
df_mag['rank_final'] = (df_mag
    .groupby(by=['year'])['ranks_sum']
    .rank(method='first', ascending=True)
)
df_mag

Unnamed: 0,datneg,codneg,nomres,year,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield,rank_roic,rank_ey,ranks_sum,rank_final
0,2011-04-11,AEDU3,ANHANGUERA,2011,2011-05-26 11:54:58,2010-12-31,1.46e+08,-5.71e+08,1.57e+08,0.11,5.54e+09,6.11e+09,0.0257,46,58,104,57
1,2011-04-11,ALPA4,ALPARGATAS,2011,2011-03-30 16:55:23,2010-12-31,3.53e+08,-3.59e+08,3.24e+08,0.328,4.01e+09,4.37e+09,0.0742,8,40,48,18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1274,2022-04-11,WEGE3,WEG,2022,2022-02-16 07:05:37,2021-12-31,4.2e+09,-1.43e+09,4.16e+09,0.33,1.38e+11,1.4e+11,0.0297,24,121,145,75
1276,2022-04-11,YDUQ3,YDUQS PART,2022,2022-03-15 18:09:20,2021-12-31,3.09e+08,3.69e+09,5.47e+08,0.0788,6.09e+09,2.4e+09,0.228,110,47,157,80


In [22]:
df_mag.sort_values(by=['year', 'rank_final'], inplace=True)
# Renomear a coluna 'datneg'
df_mag.rename(columns={'datneg': 'cutoff_date'}, inplace=True)
# A coluna 'year' já está implícita na coluna 'cutoff_date' e não será mais usada
df_mag.drop(columns=['year'], inplace=True)
# Remover colunas de cálculo intemediário
df_mag.drop(columns=['rank_roic', 'rank_ey', 'ranks_sum'], inplace=True)
df_mag

Unnamed: 0,cutoff_date,codneg,nomres,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield,rank_final
14,2011-04-11,BRPR3,BR PROPERT,2011-03-21 17:07:01,2010-12-31,1.4e+08,1.6e+09,1.21e+09,0.28,2.43e+09,8.35e+08,1.45,1
93,2011-04-11,TOTS3,TOTVS,2011-01-31 19:05:59,2010-12-31,3.15e+07,1.79e+08,2.12e+08,0.261,9.94e+08,8.15e+08,0.26,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1124,2022-04-11,AMAR3,LOJAS MARISA,2022-03-16 19:26:20,2021-12-31,2.62e+08,1.2e+09,4.8e+06,0.00218,7.61e+08,-4.4e+08,-0.0109,150
1144,2022-04-11,COGN3,COGNA ON,2022-03-24 21:24:09,2021-12-31,1.88e+09,5.87e+09,7.84e+07,0.00399,5.05e+09,-8.19e+08,-0.0957,151


In [23]:
# Select only the first 30 companies in the magic rank for each year
df_mag.query('rank_final <= 30', inplace=True)
df_mag.reset_index(drop=True, inplace=True)
print('Number of selected companies for backtesting', df_mag.codneg.nunique())
df_mag

Number of selected companies for backtesting 124


Unnamed: 0,cutoff_date,codneg,nomres,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield,rank_final
0,2011-04-11,BRPR3,BR PROPERT,2011-03-21 17:07:01,2010-12-31,1.4e+08,1.6e+09,1.21e+09,0.28,2.43e+09,8.35e+08,1.45,1
1,2011-04-11,TOTS3,TOTVS,2011-01-31 19:05:59,2010-12-31,3.15e+07,1.79e+08,2.12e+08,0.261,9.94e+08,8.15e+08,0.26,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
358,2022-04-11,PDTC3,PADTEC,2022-03-15 18:02:05,2021-12-31,7.84e+07,2.03e+07,5.53e+07,0.372,4.02e+08,3.81e+08,0.145,29
359,2022-04-11,POSI3,POSITIVO TEC,2022-03-30 18:17:00,2021-12-31,1.42e+08,5.42e+08,3.03e+08,0.176,1.21e+09,6.69e+08,0.453,30


In [24]:
df_mag.to_csv('../data/2_magic_stocks.csv', index=False)

In [25]:
df_mag = pd.read_csv('../data/2_magic_stocks.csv')
df_mag

Unnamed: 0,cutoff_date,codneg,nomres,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield,rank_final
0,2011-04-11,BRPR3,BR PROPERT,2011-03-21 17:07:01,2010-12-31,1.4e+08,1.6e+09,1.21e+09,0.28,2.43e+09,8.35e+08,1.45,1
1,2011-04-11,TOTS3,TOTVS,2011-01-31 19:05:59,2010-12-31,3.15e+07,1.79e+08,2.12e+08,0.261,9.94e+08,8.15e+08,0.26,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
358,2022-04-11,PDTC3,PADTEC,2022-03-15 18:02:05,2021-12-31,7.84e+07,2.03e+07,5.53e+07,0.372,4.02e+08,3.81e+08,0.145,29
359,2022-04-11,POSI3,POSITIVO TEC,2022-03-30 18:17:00,2021-12-31,1.42e+08,5.42e+08,3.03e+08,0.176,1.21e+09,6.69e+08,0.453,30


In [26]:
# Check 2022 data
df_22 = df_mag.query('cutoff_date > "2022-01-01"')[['codneg', 'nomres']].reset_index(drop=True)
df_22.index += 1
print(df_22.to_markdown(mode='github'))

|    | codneg   | nomres       |
|---:|:---------|:-------------|
|  1 | BRKM3    | BRASKEM      |
|  2 | SYNE3    | SYN PROP TEC |
|  3 | BRAP4    | BRADESPAR    |
|  4 | GOAU4    | GERDAU MET   |
|  5 | USIM3    | USIMINAS     |
|  6 | TASA4    | TAURUS ARMAS |
|  7 | PETR4    | PETROBRAS    |
|  8 | GGBR4    | GERDAU       |
|  9 | JBSS3    | JBS          |
| 10 | CMIN3    | CSNMINERACAO |
| 11 | VALE3    | VALE         |
| 12 | BEEF3    | MINERVA      |
| 13 | ENAT3    | ENAUTA PART  |
| 14 | DEXP3    | DEXXOS PAR   |
| 15 | PLPL3    | PLANOEPLANO  |
| 16 | SUZB3    | SUZANO S.A.  |
| 17 | EUCA4    | EUCATEX      |
| 18 | LEVE3    | METAL LEVE   |
| 19 | RANI3    | IRANI        |
| 20 | PTBL3    | PORTOBELLO   |
| 21 | CURY3    | CURY S/A     |
| 22 | KLBN4    | KLABIN S/A   |
| 23 | ALLD3    | ALLIED       |
| 24 | RAPT4    | RANDON PART  |
| 25 | FESA4    | FERBASA      |
| 26 | PARD3    | IHPARDINI    |
| 27 | SLCE3    | SLC AGRICOLA |
| 28 | JALL3    | JALLESMACHAD |
| 29 | PDT

In [26]:
df_mag.query('codneg == "PRIO3"')['codneg']

Unnamed: 0,cutoff_date,codneg,nomres,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield,rank_final
308,2021-04-12,PRIO3,PETRORIO,2021-03-02 00:08:01,2020-12-31,145000000.0,1700000000.0,943000000.0,0.194,2790000000.0,1090000000.0,0.868,9
