In [622]:
import pandas as pd
import numpy as np

import os
import sys
sys.path.append('..')

from pathlib import Path
import datetime as dt

import wget
import requests
from zipfile import ZipFile

## Data Collection

Coletando dados de ITR, DFP e FRE

In [630]:
def download_cvm_data(
    data_type,
    initial_year=2011, 
    last_year=2022, 
    save_folder='.'
):

    download_path = os.path.join(save_folder, 'downloaded')
    os.makedirs(download_path, exist_ok=True)

    if data_type=='fre':
        BASE_URL = 'https://dados.cvm.gov.br/dados/CIA_ABERTA/DOC/FRE/DADOS/'
    elif data_type=='itr':
        BASE_URL = 'http://dados.cvm.gov.br/dados/CIA_ABERTA/DOC/ITR/DADOS/'
    if data_type=='dfp':
        BASE_URL = 'https://dados.cvm.gov.br/dados/CIA_ABERTA/DOC/DFP/DADOS/'

    filename_list = []
    for year in range(initial_year, last_year + 1):
        filename_list.append(f'{data_type}_cia_aberta_{year}.zip')


    for filename in filename_list:
        try:
            save_path = os.path.join(download_path, filename)
            
            if not Path(save_path).exists():
                wget.download(
                    BASE_URL + filename, 
                    out=save_path
                )
                
        except Exception as e:
            print(f'The file of year {year} was not found.')
            print('Exception Message:', e, end='\n\n')


In [631]:
START_YEAR = 2011
LAST_YEAR = 2022

itr_save_folder = os.path.join('..', 'cvm_data', 'itr')
fre_save_folder = os.path.join('..', 'cvm_data', 'fre')
dfp_save_folder = os.path.join('..', 'cvm_data', 'dfp')

download_cvm_data('itr', START_YEAR, LAST_YEAR, save_folder=itr_save_folder)
download_cvm_data('dfp', START_YEAR, LAST_YEAR, save_folder=dfp_save_folder)
download_cvm_data('fre', START_YEAR, LAST_YEAR, save_folder=fre_save_folder)

In [632]:
def extract_all_files(folder_path):
    filename_list = os.listdir(folder_path)

    extract_path = os.path.join(Path(folder_path).parent, 'extracted')
    for filename in filename_list:
        ZipFile(
            os.path.join(folder_path, filename), 
            'r'
        ).extractall(extract_path)


extract_all_files(os.path.join(itr_save_folder, 'downloaded'))
extract_all_files(os.path.join(fre_save_folder, 'downloaded'))
extract_all_files(os.path.join(dfp_save_folder, 'downloaded'))

Consolidando os dados anuais

In [633]:
def consolidate_years(folder_path, data_type, document_names, initial_year, last_year):

    for document_name in document_names:
        df_file = pd.DataFrame()
        for year in range(initial_year, last_year+1):
            df_file = pd.concat([
                df_file,
                pd.read_csv(
                    os.path.join(
                        folder_path,
                        f'{data_type}_cia_aberta_{document_name}_{year}.csv'
                    ),
                    sep=';',
                    decimal=',',
                    encoding='ISO-8859-1'
                )
            ])

        save_path = os.path.join(
            Path(folder_path).parent, 
            'consolidated',
            f'{data_type}_{document_name}_2011_2021.csv'
        )
        os.makedirs(Path(save_path).parent, exist_ok=True)

        df_file.to_csv(save_path)


document_names = ['DRE_con', 'BPA_con', 'BPP_con', 'DFC_MI_con']
consolidate_years(
    os.path.join(itr_save_folder, 'extracted'),
    'itr', 
    document_names, 
    2011, 2021
)

document_names = ['DRE_con', 'BPA_con', 'BPP_con', 'DFC_MI_con']
consolidate_years(
    os.path.join(dfp_save_folder, 'extracted'),
    'dfp', 
    document_names, 
    2011, 2021
)

document_names = ['capital_social']
consolidate_years(
    os.path.join(fre_save_folder, 'extracted'),
    'fre', 
    document_names, 
    2011, 2021
)

## Data Processing

In [634]:
COD_CVM = 9512
CNPJ = '33.000.167/0001-01'
TICKER_NAME = 'PETR4.SA'

In [635]:
# Funções de preprocessamento

def preprocess_dre(codigo_cvm):

    df_dfp = pd.read_csv('..\cvm_data\dfp\consolidated\dfp_DRE_con_2011_2021.csv', index_col=0)
    df_dfp.insert(11, 'TIPO_PERIODO','ANUAL')
    df_itr = pd.read_csv('..\cvm_data\itr\consolidated\itr_DRE_con_2011_2021.csv', index_col=0)
    df_itr.insert(11, 'TIPO_PERIODO','TRIMESTRAL')

    df_dre = pd.concat([df_dfp, df_itr])

    df_dre = df_dre[df_dre['CD_CVM'] == codigo_cvm]
    df_dre = df_dre[df_dre['ORDEM_EXERC'] == 'ÚLTIMO']

    df_dre['DT_REFER'] = pd.to_datetime(df_dre['DT_REFER'])
    df_dre['DT_INI_EXERC'] = pd.to_datetime(df_dre['DT_INI_EXERC'])
    df_dre['DT_FIM_EXERC'] = pd.to_datetime(df_dre['DT_FIM_EXERC'])

    # IGNORA SEMESTRAL
    period_days = (df_dre['DT_FIM_EXERC'] - df_dre['DT_INI_EXERC']).dt.days
    df_dre = df_dre[(period_days<100) | (period_days>360)]

    return df_dre

def preprocess_bpp(codigo_cvm):

    df_dfp = pd.read_csv('..\cvm_data\dfp\consolidated\dfp_BPP_con_2011_2021.csv', index_col=0)
    df_itr = pd.read_csv('..\cvm_data\itr\consolidated\itr_BPP_con_2011_2021.csv', index_col=0)

    df_bpp = pd.concat([df_dfp, df_itr])

    df_bpp = df_bpp[df_bpp['CD_CVM'] == COD_CVM]
    df_bpp = df_bpp[df_bpp['ORDEM_EXERC'] == 'ÚLTIMO']

    df_bpp['DT_REFER'] = pd.to_datetime(df_bpp['DT_REFER'])
    df_bpp['DT_FIM_EXERC'] = pd.to_datetime(df_bpp['DT_FIM_EXERC'])

    return df_bpp


def get_dfc_mi(cod_cvm):
    df_dfp = pd.read_csv('..\cvm_data\dfp\consolidated\dfp_DFC_MI_con_2011_2021.csv', index_col=0)
    df_dfp.insert(11, 'TIPO_PERIODO','ANUAL')
    df_itr = pd.read_csv('..\cvm_data\itr\consolidated\itr_DFC_MI_con_2011_2021.csv', index_col=0)
    df_itr.insert(11, 'TIPO_PERIODO','TRIMESTRAL')

    df_dfc = pd.concat([df_dfp, df_itr])

    df_dfc = df_dfc[df_dfc['CD_CVM'] == cod_cvm]
    df_dfc = df_dfc[df_dfc['ORDEM_EXERC'] == 'ÚLTIMO']

    df_dfc['DT_REFER'] = pd.to_datetime(df_dfc['DT_REFER'])
    df_dfc['DT_INI_EXERC'] = pd.to_datetime(df_dfc['DT_INI_EXERC'])
    df_dfc['DT_FIM_EXERC'] = pd.to_datetime(df_dfc['DT_FIM_EXERC'])

    return df_dfc

def preprocess_bpa(codigo_cvm):

    df_dfp = pd.read_csv('..\cvm_data\dfp\consolidated\dfp_BPA_con_2011_2021.csv', index_col=0)
    df_itr = pd.read_csv('..\cvm_data\itr\consolidated\itr_BPA_con_2011_2021.csv', index_col=0)

    df = pd.concat([df_dfp, df_itr])

    df = df[df['CD_CVM'] == COD_CVM]
    df = df[df['ORDEM_EXERC'] == 'ÚLTIMO']

    df['DT_REFER'] = pd.to_datetime(df['DT_REFER'])
    df['DT_FIM_EXERC'] = pd.to_datetime(df['DT_FIM_EXERC'])

    return df

def preprocess_fre(codigo_cvm):
    df_fre = pd.read_csv('../cvm_data/fre/consolidated/fre_capital_social_2011_2021.csv', index_col=0)
    df_fre['Data_Referencia'] = pd.to_datetime(df_fre['Data_Referencia'])
    df_fre = df_fre[df_fre['CNPJ_Companhia'] == CNPJ]
    
    return df_fre


In [636]:
df_dre = preprocess_dre(COD_CVM)
df_bpa = preprocess_bpa(COD_CVM)
df_dfc = get_dfc_mi(COD_CVM)
df_bpp = preprocess_bpp(COD_CVM)
df_fre = preprocess_fre(COD_CVM)

  mask |= (ar1 == a)


In [637]:
# Funções para coleta de dados dos balanços

def get_lucro_liquido_trimestral(df_dre):
    df_dre = df_dre.copy()
    
    first_year = df_dre['DT_REFER'].dt.year.min()
    last_year = df_dre['DT_REFER'].dt.year.max()

    df_dre = df_dre.loc[
        (df_dre['DS_CONTA'].str.contains('Lucro')) &
        (df_dre['DS_CONTA'].str.contains('Prejuízo')) &
        (df_dre['DS_CONTA'].str.contains('Consolidado'))
    ].copy()
    for year in range(first_year, last_year+1):

        df_dre_temp = df_dre[(df_dre['DT_REFER'].dt.year == year)]

        trimestral_sum = df_dre_temp[df_dre_temp['TIPO_PERIODO']=='TRIMESTRAL']['VL_CONTA'].sum()

        anual_index = df_dre_temp[df_dre_temp['TIPO_PERIODO']=='ANUAL'].index[0]

        df_dre.loc[anual_index, 'VL_CONTA'] -= trimestral_sum
        df_dre.loc[anual_index, 'TIPO_PERIODO'] = 'TRIMESTRAL'

    df_dre = df_dre[['CNPJ_CIA', 'DT_REFER', 'VL_CONTA']]
    df_dre.columns = ['cnpj', 'dt_refer', 'value']
    df_dre.insert(2, 'description', 'lucro_liquido')

    return df_dre

def get_qtd_acoes(df_fre):
    df_fre = df_fre.copy()
    df_fre = df_fre[df_fre['Tipo_Capital'] == 'Capital Integralizado']
    df_fre = df_fre[['CNPJ_Companhia', 'Data_Referencia', 'Quantidade_Total_Acoes']]
    df_fre.columns = ['cnpj', 'dt_refer', 'value']
    df_fre.insert(2, 'description', 'qtd_acoes')
    return df_fre


def get_patrimonio_liquido(df_bpp):
    df_bpp = df_bpp.copy()
    df_bpp = df_bpp[
        (df_bpp['DS_CONTA'].str.contains('Patrimônio Líquido'))
    ]
    df_bpp = df_bpp[['CNPJ_CIA', 'DT_REFER', 'VL_CONTA']]
    df_bpp.columns = ['cnpj', 'dt_refer', 'value']
    df_bpp.insert(2, 'description', 'patrimonio_liquido')
    return df_bpp


def get_divida_bruta(df_bpp):
    df_bpp = df_bpp.copy()
    df_bpp = df_bpp[
        df_bpp['DS_CONTA'].str.contains('Empr') & 
        df_bpp['DS_CONTA'].str.contains('Finan') &
        (df_bpp['CD_CONTA'].str.len() == 7)
    ].groupby(['CNPJ_CIA', 'DT_REFER']).sum()[['VL_CONTA']].reset_index()

    df_bpp.columns = ['cnpj', 'dt_refer', 'value']
    df_bpp.insert(2, 'description', 'divida_bruta')
    
    return df_bpp

def get_deprec_e_amort_trimestral(df_dfc):
    df_dfc = df_dfc.copy()
    
    first_year = df_dfc['DT_REFER'].dt.year.min()
    last_year = df_dfc['DT_REFER'].dt.year.max()

    df_dfc = df_dfc.loc[
        (df_dfc['DS_CONTA'].str.contains('Deprec')) &
        (df_dfc['DS_CONTA'].str.contains('Amort'))
    ].sort_values('DT_REFER')

    for year in range(first_year, last_year+1):

        df_dfc_temp = df_dfc[(df_dfc['DT_REFER'].dt.year == year)]

        for i, (index, row) in enumerate(df_dfc_temp.iterrows()):
            if i > 0:
                df_dfc.loc[index, 'VL_CONTA'] = df_dfc_temp.iloc[i]['VL_CONTA'] - df_dfc_temp.iloc[i-1]['VL_CONTA']

    df_dfc = df_dfc[['CNPJ_CIA', 'DT_REFER', 'VL_CONTA']]
    df_dfc.columns = ['cnpj', 'dt_refer', 'value']
    df_dfc.insert(2, 'description', 'deprec_e_amort')

    return df_dfc

def get_caixa(df_bpa):
    df_bpa = df_bpa.copy()
    df_bpa = df_bpa[
        (df_bpa['DS_CONTA'].str.contains('Caixa') &
        df_bpa['DS_CONTA'].str.contains('Equiv')) |
        (df_bpa['DS_CONTA'].str.contains('Aplicações Financeiras') &
        (df_bpa['CD_CONTA'].str.len()==7))
    ].groupby(['CNPJ_CIA', 'DT_REFER']).sum()['VL_CONTA'].reset_index()

    df_bpa.columns = ['cnpj', 'dt_refer', 'value']
    df_bpa.insert(2, 'description', 'caixa')
    return df_bpa

def get_ebit(df_dre):
    df_dre = df_dre.copy()
        
    first_year = df_dre['DT_REFER'].dt.year.min()
    last_year = df_dre['DT_REFER'].dt.year.max()

    df_dre = df_dre.loc[
        ((df_dre['DS_CONTA'].str.contains('Resultado Bruto')) |
        ((df_dre['DS_CONTA'].str.contains('Despesas')) &
        (df_dre['DS_CONTA'].str.contains('Rece')) &
        (df_dre['DS_CONTA'].str.contains('Opera')))) &
        (df_dre['CD_CONTA'].str.len() == 4)
    ].copy()

    df_dre = df_dre.groupby(
        ['CNPJ_CIA', 'DT_REFER', 'TIPO_PERIODO']
    ).sum()['VL_CONTA'].reset_index()

    for year in range(first_year, last_year+1):

        df_dre_temp = df_dre[(df_dre['DT_REFER'].dt.year == year)]

        trimestral_sum = df_dre_temp[df_dre_temp['TIPO_PERIODO']=='TRIMESTRAL']['VL_CONTA'].sum()

        anual_index = df_dre_temp[df_dre_temp['TIPO_PERIODO']=='ANUAL'].index[0]

        df_dre.loc[anual_index, 'VL_CONTA'] -= trimestral_sum
        df_dre.loc[anual_index, 'TIPO_PERIODO'] = 'TRIMESTRAL'
    
    df_dre = df_dre[['CNPJ_CIA', 'DT_REFER', 'VL_CONTA']]
    df_dre.columns = ['cnpj', 'dt_refer', 'value']
    df_dre.insert(2, 'description', 'ebit')

    return df_dre

def get_receita_liquida(df_dre):
    df_dre = df_dre.copy()
    
    first_year = df_dre['DT_REFER'].dt.year.min()
    last_year = df_dre['DT_REFER'].dt.year.max()

    df_dre = df_dre.loc[
        (df_dre['DS_CONTA'].str.contains('Receita ')) &
        (df_dre['DS_CONTA'].str.contains('Bens')) &
        (df_dre['DS_CONTA'].str.contains('Serviços'))
    ].copy()

    for year in range(first_year, last_year+1):
        df_dre_temp = df_dre[(df_dre['DT_REFER'].dt.year == year)]

        trimestral_sum = df_dre_temp[df_dre_temp['TIPO_PERIODO']=='TRIMESTRAL']['VL_CONTA'].sum()

        anual_index = df_dre_temp[df_dre_temp['TIPO_PERIODO']=='ANUAL'].index[0]

        df_dre.loc[anual_index, 'VL_CONTA'] -= trimestral_sum
        df_dre.loc[anual_index, 'TIPO_PERIODO'] = 'TRIMESTRAL'

    df_dre = df_dre[['CNPJ_CIA', 'DT_REFER', 'VL_CONTA']]
    df_dre.columns = ['cnpj', 'dt_refer', 'value']
    df_dre.insert(2, 'description', 'receita_liquida')

    return df_dre.sort_values('dt_refer')

In [638]:
df_petr_lucro_liquido = get_lucro_liquido_trimestral(df_dre)
df_qtd_acoes = get_qtd_acoes(df_fre)
df_patrimonio_liquido = get_patrimonio_liquido(df_bpp)
df_divida_bruta = get_divida_bruta(df_bpp)
df_deprec_amort = get_deprec_e_amort_trimestral(df_dfc)
df_caixa = get_caixa(df_bpa)
df_ebit = get_ebit(df_dre)
df_receita_liquida = get_receita_liquida(df_dre)




In [639]:
df = pd.concat([
    df_petr_lucro_liquido, df_qtd_acoes, df_patrimonio_liquido,
    df_divida_bruta, df_deprec_amort, df_caixa, df_ebit, df_receita_liquida
])
df.insert(0, 'ticker', TICKER_NAME)
df = df.pivot_table(
    index=['ticker', 'cnpj', 'dt_refer'],
    columns='description',
    values='value'
)

df['qtd_acoes'] = df['qtd_acoes'].fillna(method='ffill')

df = df.dropna()

df['lucro_liquido'] = df['lucro_liquido'].rolling(4).sum()
df['receita_liquida'] = df['receita_liquida'].rolling(4).sum()
df['ebit'] = df['ebit'].rolling(4).sum()
df['deprec_e_amort'] = df['deprec_e_amort'].rolling(4).sum()

df = df.dropna()


df = df.reset_index()
df

description,ticker,cnpj,dt_refer,caixa,deprec_e_amort,divida_bruta,ebit,lucro_liquido,patrimonio_liquido,qtd_acoes,receita_liquida
0,PETR4.SA,33.000.167/0001-01,2011-12-31,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,13044500000.0,244176142.0
1,PETR4.SA,33.000.167/0001-01,2012-03-31,57907297.0,18930844.0,164136498.0,43323122.0,31343117.0,338822385.0,13044500000.0,255509674.0
2,PETR4.SA,33.000.167/0001-01,2012-06-30,45986786.0,20160138.0,179175248.0,35965514.0,18061322.0,338889016.0,13044500000.0,262088198.0
3,PETR4.SA,33.000.167/0001-01,2012-09-30,52860878.0,21585057.0,186556437.0,32907819.0,18423191.0,344648378.0,13044500000.0,271702111.0
4,PETR4.SA,33.000.167/0001-01,2012-12-31,48943729.0,21766102.0,196313349.0,31476031.0,20959362.0,345433653.0,13044500000.0,281379482.0
5,PETR4.SA,33.000.167/0001-01,2013-03-31,46536684.0,23399387.0,196933959.0,29574854.0,19366898.0,337664977.0,13044500000.0,287780798.0
6,PETR4.SA,33.000.167/0001-01,2013-06-30,73031576.0,25066168.0,249040586.0,36215012.0,26916500.0,340025548.0,13044500000.0,293359778.0
7,PETR4.SA,33.000.167/0001-01,2013-09-30,57895756.0,26887739.0,250865895.0,33409858.0,24823276.0,343102187.0,13044500000.0,297267163.0
8,PETR4.SA,33.000.167/0001-01,2013-12-31,46272389.0,28467224.0,267820895.0,34356460.0,23006885.0,349333684.0,13044500000.0,304889934.0
9,PETR4.SA,33.000.167/0001-01,2014-03-31,78492492.0,29207892.0,308146801.0,32112252.0,20957410.0,355769648.0,13044500000.0,313899392.0


In [640]:
import yfinance

ticker = yfinance.Ticker(TICKER_NAME)

df_close = ticker.history(
    start=df['dt_refer'].min(),
    end=df['dt_refer'].max() + dt.timedelta(days=1),
    auto_adjust=False
).reset_index()

df_close = df_close[['Date', 'Close', 'Dividends']]
df_close.columns = ['dt_refer', 'close_price', 'dividends']
df_close




Unnamed: 0,dt_refer,close_price,dividends
0,2012-01-02,21.730000,0.0000
1,2012-01-03,22.410000,0.1773
2,2012-01-04,22.570000,0.0000
3,2012-01-05,22.309999,0.0000
4,2012-01-06,22.330000,0.0000
...,...,...,...
2474,2021-12-23,28.330000,0.0000
2475,2021-12-27,28.750000,0.0000
2476,2021-12-28,28.780001,0.0000
2477,2021-12-29,28.540001,0.0000


In [645]:
df_final = df.merge(df_close, how='outer', on='dt_refer')
df_final = df_final.sort_values('dt_refer')
df_final = df_final.fillna(method='ffill')
df_final = df_final.dropna()
df_final

Unnamed: 0,ticker,cnpj,dt_refer,caixa,deprec_e_amort,divida_bruta,ebit,lucro_liquido,patrimonio_liquido,qtd_acoes,receita_liquida,close_price,dividends
41,PETR4.SA,33.000.167/0001-01,2012-01-02,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,244176142.0,21.730000,0.0000
42,PETR4.SA,33.000.167/0001-01,2012-01-03,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,244176142.0,22.410000,0.1773
43,PETR4.SA,33.000.167/0001-01,2012-01-04,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,244176142.0,22.570000,0.0000
44,PETR4.SA,33.000.167/0001-01,2012-01-05,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,244176142.0,22.309999,0.0000
45,PETR4.SA,33.000.167/0001-01,2012-01-06,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,244176142.0,22.330000,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2497,PETR4.SA,33.000.167/0001-01,2021-12-27,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,393450000.0,28.750000,0.0000
2498,PETR4.SA,33.000.167/0001-01,2021-12-28,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,393450000.0,28.780001,0.0000
2499,PETR4.SA,33.000.167/0001-01,2021-12-29,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,393450000.0,28.540001,0.0000
2500,PETR4.SA,33.000.167/0001-01,2021-12-30,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,393450000.0,28.450001,0.0000


In [646]:
df_final['dt_refer'] = df_final['dt_refer'].shift(-20) 
df_final['close_price'] = df_final['close_price'].shift(-20) 

df_final = df_final.dropna()
df_final

Unnamed: 0,ticker,cnpj,dt_refer,caixa,deprec_e_amort,divida_bruta,ebit,lucro_liquido,patrimonio_liquido,qtd_acoes,receita_liquida,close_price,dividends
41,PETR4.SA,33.000.167/0001-01,2012-01-31,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,244176142.0,24.570000,0.000000
42,PETR4.SA,33.000.167/0001-01,2012-02-01,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,244176142.0,24.950001,0.177300
43,PETR4.SA,33.000.167/0001-01,2012-02-02,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,244176142.0,24.530001,0.000000
44,PETR4.SA,33.000.167/0001-01,2012-02-03,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,244176142.0,24.610001,0.000000
45,PETR4.SA,33.000.167/0001-01,2012-02-06,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,244176142.0,24.860001,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2477,PETR4.SA,33.000.167/0001-01,2021-12-27,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,393450000.0,28.750000,0.000000
2478,PETR4.SA,33.000.167/0001-01,2021-12-28,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,393450000.0,28.780001,0.000000
2479,PETR4.SA,33.000.167/0001-01,2021-12-29,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,393450000.0,28.540001,0.000000
2480,PETR4.SA,33.000.167/0001-01,2021-12-30,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,393450000.0,28.450001,0.000000


In [647]:
# TODO - Calcular EBITDA

df_final['EBITDA'] = df_final['ebit'] + df_final['deprec_e_amort']
df_final['divida_liquida'] = df_final['divida_bruta'] - df_final['caixa']
df_final['LPA'] = df_final['lucro_liquido']*1000 / df_final['qtd_acoes']
df_final['VPA'] = df_final['patrimonio_liquido']*1000 / df_final['qtd_acoes']


df_final['P/L'] = df_final['close_price'] / df_final['LPA']
df_final['P/EBITDA'] = df_final['close_price'] / df_final['EBITDA']
df_final['P/VPA'] = df_final['close_price'] / df_final['VPA']
df_final['DL/PL'] = df_final['divida_liquida'] / df_final['patrimonio_liquido']
df_final['DL/EBITDA'] = df_final['divida_liquida'] / df_final['EBITDA']
df_final['DL/EBIT'] = df_final['divida_liquida'] / df_final['ebit']
df_final['ROE'] = df_final['lucro_liquido'] / df_final['patrimonio_liquido']
df_final['MARGEM_EBITDA'] = df_final['EBITDA'] / df_final['receita_liquida']
df_final['MARGEM_EBIT'] = df_final['ebit'] / df_final['receita_liquida']
df_final['MARGEM_LIQUIDA'] = df_final['lucro_liquido'] / df_final['receita_liquida']

df_final

Unnamed: 0,ticker,cnpj,dt_refer,caixa,deprec_e_amort,divida_bruta,ebit,lucro_liquido,patrimonio_liquido,qtd_acoes,...,P/L,P/EBITDA,P/VPA,DL/PL,DL/EBITDA,DL/EBIT,ROE,MARGEM_EBITDA,MARGEM_EBIT,MARGEM_LIQUIDA
41,PETR4.SA,33.000.167/0001-01,2012-01-31,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,...,9.680068,3.964936e-07,0.964722,0.310029,1.662126,2.328781,0.099661,0.253785,0.181134,0.135597
42,PETR4.SA,33.000.167/0001-01,2012-02-01,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,...,9.829780,4.026258e-07,0.979642,0.310029,1.662126,2.328781,0.099661,0.253785,0.181134,0.135597
43,PETR4.SA,33.000.167/0001-01,2012-02-02,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,...,9.664309,3.958481e-07,0.963151,0.310029,1.662126,2.328781,0.099661,0.253785,0.181134,0.135597
44,PETR4.SA,33.000.167/0001-01,2012-02-03,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,...,9.695827,3.971391e-07,0.966292,0.310029,1.662126,2.328781,0.099661,0.253785,0.181134,0.135597
45,PETR4.SA,33.000.167/0001-01,2012-02-06,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,...,9.794322,4.011734e-07,0.976108,0.310029,1.662126,2.328781,0.099661,0.253785,0.181134,0.135597
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2477,PETR4.SA,33.000.167/0001-01,2021-12-27,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,...,2.757710,1.028722e-07,0.998244,0.696880,0.936799,1.187072,0.361983,0.710314,0.560557,0.345642
2478,PETR4.SA,33.000.167/0001-01,2021-12-28,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,...,2.760588,1.029795e-07,0.999286,0.696880,0.936799,1.187072,0.361983,0.710314,0.560557,0.345642
2479,PETR4.SA,33.000.167/0001-01,2021-12-29,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,...,2.737567,1.021208e-07,0.990953,0.696880,0.936799,1.187072,0.361983,0.710314,0.560557,0.345642
2480,PETR4.SA,33.000.167/0001-01,2021-12-30,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,...,2.728934,1.017987e-07,0.987828,0.696880,0.936799,1.187072,0.361983,0.710314,0.560557,0.345642


In [648]:
save_path = os.path.join('..', 'cvm_data', f'{TICKER_NAME}_FUND.csv')
df_final_to_save = df_final.rename(columns={'dt_refer': 'date'}).iloc[:, 2:]
df_final_to_save.to_csv(save_path, index=False)

Unnamed: 0,ticker,cnpj,dt_refer,caixa,deprec_e_amort,divida_bruta,ebit,lucro_liquido,patrimonio_liquido,qtd_acoes,...,P/L,P/EBITDA,P/VPA,DL/PL,DL/EBITDA,ROE,MARGEM_EBITDA,DL/EBIT,MARGEM_EBIT,MARGEM_LIQUIDA
41,PETR4.SA,33.000.167/0001-01,2012-01-02,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,...,8.561167,3.506636e-07,0.853211,0.310029,1.662126,0.099661,0.253785,2.328781,0.181134,0.135597
42,PETR4.SA,33.000.167/0001-01,2012-01-03,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,...,8.829073,3.616370e-07,0.879911,0.310029,1.662126,0.099661,0.253785,2.328781,0.181134,0.135597
43,PETR4.SA,33.000.167/0001-01,2012-01-04,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,...,8.892109,3.642190e-07,0.886193,0.310029,1.662126,0.099661,0.253785,2.328781,0.181134,0.135597
44,PETR4.SA,33.000.167/0001-01,2012-01-05,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,...,8.789675,3.600233e-07,0.875984,0.310029,1.662126,0.099661,0.253785,2.328781,0.181134,0.135597
45,PETR4.SA,33.000.167/0001-01,2012-01-06,52555707.0,17739496.0,155554694.0,44228720.0,33109612.0,332223611.0,1.304450e+10,...,8.797554,3.603460e-07,0.876770,0.310029,1.662126,0.099661,0.253785,2.328781,0.181134,0.135597
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2497,PETR4.SA,33.000.167/0001-01,2021-12-27,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,...,2.757710,1.028722e-07,0.998244,0.696880,0.936799,0.361983,0.710314,1.187072,0.560557,0.345642
2498,PETR4.SA,33.000.167/0001-01,2021-12-28,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,...,2.760588,1.029795e-07,0.999286,0.696880,0.936799,0.361983,0.710314,1.187072,0.560557,0.345642
2499,PETR4.SA,33.000.167/0001-01,2021-12-29,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,...,2.737567,1.021208e-07,0.990953,0.696880,0.936799,0.361983,0.710314,1.187072,0.560557,0.345642
2500,PETR4.SA,33.000.167/0001-01,2021-12-30,62314000.0,58922000.0,324124000.0,220551000.0,135993000.0,375689000.0,1.304450e+10,...,2.728934,1.017987e-07,0.987828,0.696880,0.936799,0.361983,0.710314,1.187072,0.560557,0.345642


In [615]:

# TODO - Conferir detalhadamente
# display(df_final[df_final['dt_refer'] > '2021-01-01'].iloc[[0], 3:-1])
# display(df_final[df_final['dt_refer'] > '2020-01-01'].iloc[[0], 3:-1])
# display(df_final[df_final['dt_refer'] > '2019-01-01'].iloc[[0], 3:-1])

Unnamed: 0,caixa,deprec_e_amort,divida_bruta,ebit,lucro_liquido,patrimonio_liquido,qtd_acoes,receita_liquida,close_price,dividends,...,divida_liquida,LPA,VPA,P/L,P/EBITDA,P/VPA,DL/PL,DL/EBITDA,ROE,MARGEM_EBITDA
2257,64280000.0,58305000.0,392548000.0,49621000.0,6246000.0,311150000.0,13044500000.0,272069000.0,28.91,0.0,...,328268000.0,0.478823,23.85297,60.377266,2.678687e-07,1.212008,1.055015,3.041603,0.020074,0.396686


Unnamed: 0,caixa,deprec_e_amort,divida_bruta,ebit,lucro_liquido,patrimonio_liquido,qtd_acoes,receita_liquida,close_price,dividends,...,divida_liquida,LPA,VPA,P/L,P/EBITDA,P/VPA,DL/PL,DL/EBITDA,ROE,MARGEM_EBITDA
2012,33294000.0,58502000.0,351161000.0,81701000.0,40970000.0,299137000.0,13044500000.0,302245000.0,30.700001,0.0,...,317867000.0,3.140788,22.932046,9.774617,2.189682e-07,1.338738,1.062613,2.267191,0.136961,0.463872


Unnamed: 0,caixa,deprec_e_amort,divida_bruta,ebit,lucro_liquido,patrimonio_liquido,qtd_acoes,receita_liquida,close_price,dividends,...,divida_liquida,LPA,VPA,P/L,P/EBITDA,P/VPA,DL/PL,DL/EBITDA,ROE,MARGEM_EBITDA
1765,58052000.0,43646000.0,326876000.0,64876000.0,26698000.0,283543000.0,13044500000.0,349836000.0,24.059999,0.0,...,268824000.0,2.046687,21.736599,11.755584,2.217062e-07,1.106889,0.948089,2.477138,0.094159,0.310208
