## 1.0 Bibliotecas e Funções

### 1.1. Bibliotecas

In [1]:

# data manipulation libs
import numpy as np
import pandas as pd
# date and time libs
import datetime as dt
from datetime import timedelta
# using a soup lib to scrapp the page
import requests
from bs4 import BeautifulSoup
import urllib.request
import urllib.parse
# lib to pass cookies
import http.cookiejar
from lxml.html import fragment_fromstring
import re
# libs to clean data exported data
from collections import OrderedDict
from decimal import Decimal
from functools import reduce


### 1.2. Funções

In [2]:
# classe contendo todas as funções personalizadas
class functions(object):

    def __init__(self) -> None:
        pass

    def format_currency(x):
        return "R${:,.2f}".format(x)

    def format_perc( x):
        return "{}%".format(x)

    def today():
        return dt.date.today()

    def replace_nan(df,column,to_replace,repl):
        df[column] = df[column].replace(to_replace,repl)
    
    def replace_nan_str(df,column:str,to_replace:str,repl:str):
        df[column] = df[column].str.replace(to_replace,repl)

    def change_type(df,column,type,dy=True):
        if type == 'datetime64':
            df[column] = df[column].astype(type, dayfirst=dy, errors='ignore')
        else:
            df[column] = df[column].astype(type, errors='ignore')

    def options():
        global pd_options
        pd_options = pd.options.mode.chained_assignment = None

    def column_index(df, query_cols):
        cols = df.columns.values
        sidx = np.argsort(cols)
        return sidx[np.searchsorted(cols,query_cols,sorter=sidx)]

    def inicio_mes():
        hoje = dt.datetime.today() 
        inicio_mes_data = hoje - timedelta(hoje.day)+ timedelta(days=1)
        return inicio_mes_data
        
    def round_data(df,columns_to_round):
        df[columns_to_round] = np.round(df[columns_to_round],2)
    
    def centralizar_valor(valor):
        return f'{valor:^10}'

    def merge_all_dfs(dfs,name:str,type_of_merge:str):
        df = reduce(lambda left, right: pd.merge(left,right, on=name, how=type_of_merge), dfs)
        return df
    
    def decimal_point_thousand(df, column):
        df[column] = df[column].apply(lambda x: str(x).replace('.', '', 1))

## 2.0 Extração

In [3]:
# url de extracao
base_url = r"https://www.fundamentus.com.br/resultado.php"

In [4]:
# desabilitar cookies
cookie_jar = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookie_jar))
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201'),
                         ('Accept', 'text/html, text/plain, text/css, text/sgml, */*;q=0.01')]

### 2.1 Extracao de Html

In [5]:
# usar o opener para acessar a url base
html = opener.open(base_url)
# decodificar em ISO8859
html_content = html.read().decode('ISO-8859-1')

In [6]:
# Fazer o soup
soup = BeautifulSoup(html_content,'html.parser')

### 2.2. Criacao de Dicionario

In [7]:
#extrair a tabela do html
table = soup.find_all(
   'table'
)
tables = table[0]
thead = tables.find('thead')
headers_cells = thead.find_all('th')

headers = []

for cell in headers_cells:
   headers.append(cell.get_text(strip=True))

acoes_data = []
rows = tables.find_all('tr')

# criar loop e localizar os dados dentro da tag td
for row in rows[1:]:
   cells = row.find_all('td')
   nome_acao = cells[0].a.get_text(strip=True)  # Obter o texto da tag <a>
   cotacao = cells[1].get_text(strip=True)  # Obter o texto da tag <td>
   p_l = cells[2].get_text(strip=True) 
   p_vp = cells[3].get_text(strip=True)  # Obter o texto da tag <td>
   psr = cells[4].get_text(strip=True) 
   dividend_yield = cells[5].get_text(strip=True) 
   p_ativo = cells[6].get_text(strip=True)  # Obter o texto da tag <td>
   p_cap_giro= cells[7].get_text(strip=True)
   p_ebit = cells[8].get_text(strip=True)
   p_ativ_circ_liq = cells[9].get_text(strip=True)
   ev_ebit = cells[10].get_text(strip=True)
   ev_ebitda = cells[11].get_text(strip=True)
   mrg_ebit = cells[12].get_text(strip=True)
   mrg_liq = cells[13].get_text(strip=True)
   liq_corr = cells[14].get_text(strip=True)
   roic = cells[15].get_text(strip=True)
   roe = cells[16].get_text(strip=True)
   liq_2meses = cells[17].get_text(strip=True)
   patrim_liq = cells[18].get_text(strip=True)
   div_brut_patrimv = cells[19].get_text(strip=True)
   cresc_rec_5av= cells[20].get_text(strip=True)

# criar um dicionario com os valores encontrados

acoes_data.append({'papel':nome_acao, 
                       'cotacao':cotacao, 
                       'p_l':p_l, 
                       'p_vp':p_vp, 
                       'psr':psr, 
                       'div_yield':dividend_yield, 
                       'p_ativo':p_ativo,
                        'p_cap_giro':p_cap_giro, 
                        'p_ebit':p_ebit, 
                        'p_ativ_circ_liq':p_ativ_circ_liq,
                        'ev_ebit':ev_ebit,
                        'ev_ebitda':ev_ebitda,
                        'mrg_ebit':mrg_ebit,
                        'mrg_liq':mrg_liq,
                        'liq_corr':liq_corr,
                        'roic':roic,
                        'roe':roe, 
                        'liq_2meses':liq_2meses,
                        'patrim_liq':patrim_liq, 
                        'div_brut_patrim':div_brut_patrimv, 
                        'cresc_rec_5a':cresc_rec_5av
})

In [8]:
for row in rows[1:]:
    cells = row.find_all('td')
    nome_acao = cells[0].a.get_text(strip=True)  # Obter o texto da tag <a>
    cotacao = cells[1].get_text(strip=True)  # Obter o texto da tag <td>
    p_l = cells[2].get_text(strip=True) 
    p_vp = cells[3].get_text(strip=True)  # Obter o texto da tag <td>
    psr = cells[4].get_text(strip=True) 
    dividend_yield = cells[5].get_text(strip=True) 
    p_ativo = cells[6].get_text(strip=True)  # Obter o texto da tag <td>
    p_cap_giro= cells[7].get_text(strip=True)
    p_ebit = cells[8].get_text(strip=True)
    p_ativ_circ_liq = cells[9].get_text(strip=True)
    ev_ebit = cells[10].get_text(strip=True)
    ev_ebitda = cells[11].get_text(strip=True)
    mrg_ebit = cells[12].get_text(strip=True)
    mrg_liq = cells[13].get_text(strip=True)
    liq_corr = cells[14].get_text(strip=True)
    roic = cells[15].get_text(strip=True)
    roe = cells[16].get_text(strip=True)
    liq_2meses = cells[17].get_text(strip=True)
    patrim_liq = cells[18].get_text(strip=True)
    div_brut_patrimv = cells[19].get_text(strip=True)
    cresc_rec_5av= cells[20].get_text(strip=True)

# criar um dicionario com os valores encontrados

    acoes_data.append({'papel':nome_acao, 
                       'cotacao':cotacao, 
                       'p_l':p_l, 
                       'p_vp':p_vp, 
                       'psr':psr, 
                       'div_yield':dividend_yield, 
                       'p_ativo':p_ativo,
                        'p_cap_giro':p_cap_giro, 
                        'p_ebit':p_ebit, 
                        'p_ativ_circ_liq':p_ativ_circ_liq,
                        'ev_ebit':ev_ebit,
                        'ev_ebitda':ev_ebitda,
                        'mrg_ebit':mrg_ebit,
                        'mrg_liq':mrg_liq,
                        'liq_corr':liq_corr,
                        'roic':roic,
                        'roe':roe, 
                        'liq_2meses':liq_2meses,
                        'patrim_liq':patrim_liq, 
                        'div_brut_patrim':div_brut_patrimv, 
                        'cresc_rec_5a':cresc_rec_5av
    })

#### 2.3. Renderizar Dataframe

In [9]:
# criar dataframe a partir do dicionario
stocks_df = pd.DataFrame.from_dict(
    acoes_data
    )

In [10]:
# dataframe renderizado
stocks_df

Unnamed: 0,papel,cotacao,p_l,p_vp,psr,div_yield,p_ativo,p_cap_giro,p_ebit,p_ativ_circ_liq,...,ev_ebitda,mrg_ebit,mrg_liq,liq_corr,roic,roe,liq_2meses,patrim_liq,div_brut_patrim,cresc_rec_5a
0,UBBR3,1800,"1.466,61",477,0000,"0,00%",0000,000,000,000,...,000,"0,00%","0,00%",000,"0,00%","0,33%",000,"10.317.200.000,00",000,"10,58%"
1,IVTT3,000,000,000,0000,"0,00%",0000,000,000,000,...,000,"0,00%","0,00%",000,"0,00%","-0,40%",000,"1.083.050.000,00",000,"20,67%"
2,CSTB4,14769,000,000,0000,"0,00%",0000,000,000,000,...,000,"40,85%","28,98%",260,"22,40%","20,11%",000,"8.420.670.000,00",014,"31,91%"
3,CSTB3,15000,000,000,0000,"0,00%",0000,000,000,000,...,000,"40,85%","28,98%",260,"22,40%","20,11%",000,"8.420.670.000,00",014,"31,91%"
4,PORP4,240,000,000,0000,"0,00%",0000,000,000,000,...,000,"0,00%","0,00%",000,"0,00%","-2,08%",000,"22.399.000,00",000,"13,66%"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
982,RHDS4,100,36345,2950,4977,"0,00%",7182,-59356,6811,-2455,...,5372,"7,31%","1,17%",097,"11,80%","8,12%",000,"272.045.000,00",149,"4,38%"
983,PRBC4,1454,51191,3916,0000,"0,00%",0000,000,000,000,...,000,"0,00%","0,00%",000,"0,00%","7,65%",000,"1.209.470.000,00",000,"-7,92%"
984,UBBR4,749,61027,199,0000,"0,00%",0000,000,000,000,...,000,"0,00%","0,00%",000,"0,00%","0,33%",000,"10.317.200.000,00",000,"10,58%"
985,UBBR11,1475,"1.201,81",391,0000,"0,00%",0000,000,000,000,...,000,"0,00%","0,00%",000,"0,00%","0,33%",000,"10.317.200.000,00",000,"10,58%"


In [11]:
# selecionar colunas para realizar limpeza
columns_to_replace_perc_ = ['cotacao', 'p_l', 'p_vp', 'psr', 'div_yield', 'p_ativo', 'p_cap_giro', 
                            'p_ebit', 'p_ativ_circ_liq', 'ev_ebit', 'ev_ebitda','div_yield',
                            'mrg_ebit', 'mrg_liq','liq_corr','roic','roe', 'cresc_rec_5a','roe', 'liq_2meses',
       'patrim_liq', 'div_brut_patrim', 'cresc_rec_5a']


In [12]:
# retirar caracteres indesejados das colunas que contem numeros
for perc in columns_to_replace_perc_:
    functions.replace_nan_str(
        stocks_df,
        perc,
        '%',
        ''
    )

    functions.decimal_point_thousand(
        stocks_df,
        perc
    )

    functions.replace_nan_str(
        stocks_df,
        perc,
        ',',
        '.'
    )
    

In [13]:
# alterar o tipo para float64
for type in columns_to_replace_perc_:
    functions.change_type(
        stocks_df,
        type,
        'float64'
    )

In [14]:
# converter virgula para ponto e transformar em float
columns_to_cg = ['cotacao','p_l','p_vp','psr','div_yield','p_ativo','p_cap_giro','p_ebit','p_ativ_circ_liq']

for x in columns_to_cg:
    functions.change_type(stocks_df,x,str)
    functions.replace_nan(stocks_df,x,'.','')

stocks_df.columns

Index(['papel', 'cotacao', 'p_l', 'p_vp', 'psr', 'div_yield', 'p_ativo',
       'p_cap_giro', 'p_ebit', 'p_ativ_circ_liq', 'ev_ebit', 'ev_ebitda',
       'mrg_ebit', 'mrg_liq', 'liq_corr', 'roic', 'roe', 'liq_2meses',
       'patrim_liq', 'div_brut_patrim', 'cresc_rec_5a'],
      dtype='object')

In [15]:
stocks_df[stocks_df['papel'] == 'VALE3']

Unnamed: 0,papel,cotacao,p_l,p_vp,psr,div_yield,p_ativo,p_cap_giro,p_ebit,p_ativ_circ_liq,...,ev_ebitda,mrg_ebit,mrg_liq,liq_corr,roic,roe,liq_2meses,patrim_liq,div_brut_patrim,cresc_rec_5a
571,VALE3,74.65,7.13,1.78,1.642,814.0,0.76,65.77,4.24,-1.94,...,4.06,38.76,23.32,1.08,20.11,2499.0,1658.240.000.00,190172.000.000.00,0.37,1073.0


#### 3.1. Extrair os Setores Das Ações
- Coletar os setores de cada ação para poder analisar cada indicador por setor

In [16]:
setor_url =  'https://www.fundamentus.com.br/detalhes.php?papel=VALE3&h=1'

In [17]:
# usar o opener para acessar a url base
html_1 = opener.open(setor_url)
# decodificar em ISO8859
html_content_1 = html_1.read().decode('ISO-8859-1')

In [18]:
# fazer o scrap do html para coletar os dados do setor dessa acao
soup_1 = BeautifulSoup(html_content_1, 'html.parser')

In [19]:
soup_1

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">

<html lang="pt-br">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<title>FUNDAMENTUS - VALE3 - Invista consciente - Indicadores Fundamentalistas</title>
<link href="css/estilo.css" media="screen, projection" rel="stylesheet" type="text/css"/>
<link href="css/print.css" media="print" rel="stylesheet" type="text/css"/>
<link href="img/fundamentus.ico" rel="shortcut icon" type="image/x-icon"/>
<!--[if lte IE 6]>
		<link rel="stylesheet" type="text/css" href="css/menu_ie6.css">
		<script type="text/javascript" src="script/ADxMenu.js"></script>
	<![endif]-->
<script src="//ajax.googleapis.com/ajax/libs/mootools/1.11/mootools-yui-compressed.js" type="text/javascript"></script>
<script>
  !function(f,b,e,v,n,t,s)
  {if(f.fbq)return;n=f.fbq=function(){n.callMethod?
  n.callMethod.apply(n,arguments):n.queue.push(arguments)};
  if(!f._fbq)f._fbq=n;n.push=n;n.loaded=!0;n.versi

In [20]:
# pegar todos os dados da html de cada ação
span = soup_1.find_all("span",attrs={'class':'txt'})

In [21]:
# adicionar o texto da var span para uma lista
list_string = []

for cabecalho in span:
    list_string.append(cabecalho.text)

In [22]:
# coleta os nomes de cada item
cabecalhos = list_string[0:len(list_string):2]
cabecalhos = cabecalhos[:14]

In [23]:
# coleta os valores de cada item
data = list_string[1:len(list_string):2]
data = data[:14]

In [24]:
# cria uma lista até 9 itens do scrap
rows_clean = []

for row in data:
    rows_clean.append(str(row[:9]))

In [25]:
# cria dicionario om cabeçãos e data e uma estrutura para verificar se a lista são do mesmo tamanho
if len(cabecalhos) != len(data):
    print("Tamanhos de lista não correspondem.")
else:
    # Crie um dicionário a partir das listas
    data_dict = {cabecalhos[i]: [data[i]] for i in range(len(cabecalhos))}
    

In [26]:
# criar o dataframe com os dados do setor
df_sector = pd.DataFrame(data_dict)

In [27]:
# limpar as colunas
df_sector.columns = df_sector.columns.str.replace('ã','a').str.replace('ú','u').str.replace('Ú','U').str.replace('.','').str.replace('$','').str.replace('ç','c').str.replace('õ','o').str.replace('é','e').str.lower().str.replace(' ','_').str.replace('__','_')

In [28]:
acoes = stocks_df['papel'].unique()

In [29]:
# função para arquivo py
def extract_data_sector(acao):
    setor_url =  fr'https://www.fundamentus.com.br/detalhes.php?papel={acao}&h=1'
    # usar o opener para acessar a url base
    html_1 = opener.open(setor_url)
    # decodificar em ISO8859
    html_content_1 = html_1.read().decode('ISO-8859-1')
    # fazer o scrap do html para coletar os dados do setor dessa acao
    soup_1 = BeautifulSoup(html_content_1, 'html.parser')
    # pegar todos os dados da html de cada ação
    span = soup_1.find_all("span",attrs={'class':'txt'})
    # adicionar o texto da var span para uma lista
    list_string = []

    for cabecalho in span:
        list_string.append(cabecalho.text)
    # coleta os nomes de cada item
    cabecalhos = list_string[0:len(list_string):2]
    cabecalhos = cabecalhos[:14]
    data = list_string[1:len(list_string):2]
    data = data[:14]
    # cria uma lista até 9 itens do scrap
    rows_clean = []

    for row in data:
        rows_clean.append(str(row[:9]))
    if len(cabecalhos) != len(data):
        print("Tamanhos de lista não correspondem.")
    else:
        # Crie um dicionário a partir das listas
        data_dict = {cabecalhos[i]: [data[i]] for i in range(len(cabecalhos))}
    # criar o dataframe com os dados do setor
    df_sector = pd.DataFrame(data_dict)
    parquet_to_append = pd.read_parquet(r'../database/dm_stocks_sector.parquet')
    df_final = pd.concat([parquet_to_append,df_sector])
    df_final.to_parquet(r'../database/dm_stocks_sector.parquet')

In [33]:
extract_data_sector('VALE3')

In [32]:
data = []
for a in acoes:
    data = extract_data_sector(a)
    

KeyboardInterrupt: 

In [34]:
data = pd.read_parquet(r'../database/dm_stocks_sector.parquet')

In [35]:
data.columns = data.columns.str.lower().str.replace('ã','a').str.replace('ú','u').str.replace('Ú','U').str.replace('.','').str.replace('$','').str.replace('ç','c').str.replace('õ','o').str.replace('é','e').str.lower().str.replace(' ','_').str.replace('__','_')

In [36]:
# separar colunas por tipos
datetime_columns = ['ult_balanco_processado','data_ult_cot'] # colunas de data

for slash in datetime_columns:
    functions.replace_nan_str(
        data,
        slash,
        '/','-'
    )
    
'''    functions.change_type(
        data,
        slash,
        'datetime64',
        True
    )'''

"    functions.change_type(\n        data,\n        slash,\n        'datetime64',\n        True\n    )"

In [37]:
data['setor'] = data['setor'].replace('','Sem Setor')
data['subsetor'] = data['subsetor'].replace('','Sem Subsetor')

In [38]:
data.reset_index(drop=True, inplace=True)

In [39]:
data['valor_da_firma'] = data['valor_da_firma'].str.replace('-','0')

In [40]:
decimal_point = ['max_52_sem','min_52_sem','cotacao']

for point in decimal_point:
    functions.decimal_point_thousand(
            data,
            point
        )
    functions.replace_nan_str(data,point,',','.')


In [41]:
data.drop(columns={'cotacao'},inplace=True)

## 3.3. Extracao Dividendos

In [42]:
stock_filter = 'BBDC4'

In [43]:
url_div = r'https://www.fundamentus.com.br/proventos.php?papel={}'.format(stock_filter)

In [44]:
# usar o opener para acessar a url base
html_1 = opener.open(url_div)
# decodificar em ISO8859
html_content_1 = html_1.read().decode('ISO-8859-1')
# soup
div_soup = BeautifulSoup(html_content_1, 'html.parser')

In [45]:
table_div = div_soup.find_all('table')[0]

In [46]:
th = table_div.find_all('thead')[0]

In [47]:
th_1 = th.find_all('th')

In [48]:
headers_div = []

for cab in th_1:
    headers_div.append(cab.get_text(strip=True))

In [49]:
rw = table_div.find_all('td')

In [50]:
texto = []
for dado in rw:
    texto.append(dado.get_text(strip=True))

In [51]:

num_colunas = 5
dados = [texto[i:i+num_colunas] for i in range(0, len(texto), num_colunas)]


In [52]:
dividendos = pd.DataFrame(dados,columns=headers_div)

In [53]:
dividendos['cod_acao'] = stock_filter

In [54]:
dividendos.columns = dividendos.columns.str.lower().str.replace(' ','_').str.replace('ç','c').str.replace('õ','o')

In [55]:
dividendos.columns

Index(['data', 'valor', 'tipo', 'data_de_pagamento', 'por_quantas_acoes',
       'cod_acao'],
      dtype='object')

In [56]:
dividendos['data_extract'] = functions.today()

In [57]:
dividendos_t = dividendos[['cod_acao','data', 'valor', 'tipo', 'data_de_pagamento', 'por_quantas_acoes','data_extract']]

In [58]:
def extract_dividends(stock):
    url_div = r'https://www.fundamentus.com.br/proventos.php?papel={}'.format(stock)
    # usar o opener para acessar a url base
    html_1 = opener.open(url_div)
    # decodificar em ISO8859
    html_content_1 = html_1.read().decode('ISO-8859-1')
    # soup
    div_soup = BeautifulSoup(html_content_1, 'html.parser')
    table_div = div_soup.find_all('table')[0]
    th = table_div.find_all('thead')[0]

    th_1 = th.find_all('th')
    headers_div = []
    for cab in th_1:
        headers_div.append(cab.get_text(strip=True))

    rw = table_div.find_all('td')
    texto = []
    for dado in rw:
        texto.append(dado.get_text(strip=True))
    num_colunas = 5
    dados = [texto[i:i+num_colunas] for i in range(0, len(texto), num_colunas)]
    dividendos = pd.DataFrame(dados,columns=headers_div)
    dividendos['cod_acao'] = stock_filter
    dividendos.columns = dividendos.columns.str.lower().str.replace(' ','_').str.replace('ç','c').str.replace('õ','o')
    dividendos['data_extract'] = functions.today()
    parquet_to_app = pd.read_parquet(r'../database/dm_stocks_div.parquet')
    divi = pd.concat([parquet_to_app,dividendos])
    divi.to_parquet(r'../database/dm_stocks_div.parquet')

In [154]:
dividendos.to_parquet(r'../database/dm_stocks_div.parquet')

In [155]:
data = []
for a in acoes:
    data = extract_dividends(a)

IndexError: list index out of range

## 4.0 Análise

In [59]:
pd.merge(stocks_df,data,how='left',on='papel')

Unnamed: 0,papel,cotacao,p_l,p_vp,psr,div_yield,p_ativo,p_cap_giro,p_ebit,p_ativ_circ_liq,...,empresa,min_52_sem,setor,max_52_sem,subsetor,vol_med_(2m),valor_de_mercado,ult_balanco_processado,valor_da_firma,nro_acoes
0,UBBR3,18.0,1466.61,4.77,0.0,0.0,0.0,0.0,0.0,0.0,...,UNIBANCO SA ON N1,0.00,Intermediários Financeiros,0.00,Bancos,0,49.219.500.000,31-12-2008,0,2.734.420.000
1,UBBR3,18.0,1466.61,4.77,0.0,0.0,0.0,0.0,0.0,0.0,...,UNIBANCO SA ON N1,0.00,Intermediários Financeiros,0.00,Bancos,0,49.219.500.000,31-12-2008,0,2.734.420.000
2,IVTT3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,BHG ON NM,0.00,Exploração de Imóveis,0.00,Exploração de Imóveis,0,0,31-03-2015,0,62.141.400
3,IVTT3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,BHG ON NM,0.00,Exploração de Imóveis,0.00,Exploração de Imóveis,0,0,31-03-2015,0,62.141.400
4,CSTB4,147.69,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,COMPANHIA SIDERÚRGICA DE TUBARÃO PN,0.00,Sem Setor,0.00,Sem Subsetor,0,0,30-09-2005,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1056,PRBC4,14.54,511.91,39.16,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
1057,UBBR4,7.49,610.27,1.99,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
1058,UBBR11,14.75,1201.81,3.91,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
1059,UBBR3,18.0,1466.61,4.77,0.0,0.0,0.0,0.0,0.0,0.0,...,UNIBANCO SA ON N1,0.00,Intermediários Financeiros,0.00,Bancos,0,49.219.500.000,31-12-2008,0,2.734.420.000


In [60]:
pd.merge(stocks_df,data,how='left',on='papel')

Unnamed: 0,papel,cotacao,p_l,p_vp,psr,div_yield,p_ativo,p_cap_giro,p_ebit,p_ativ_circ_liq,...,empresa,min_52_sem,setor,max_52_sem,subsetor,vol_med_(2m),valor_de_mercado,ult_balanco_processado,valor_da_firma,nro_acoes
0,UBBR3,18.0,1466.61,4.77,0.0,0.0,0.0,0.0,0.0,0.0,...,UNIBANCO SA ON N1,0.00,Intermediários Financeiros,0.00,Bancos,0,49.219.500.000,31-12-2008,0,2.734.420.000
1,UBBR3,18.0,1466.61,4.77,0.0,0.0,0.0,0.0,0.0,0.0,...,UNIBANCO SA ON N1,0.00,Intermediários Financeiros,0.00,Bancos,0,49.219.500.000,31-12-2008,0,2.734.420.000
2,IVTT3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,BHG ON NM,0.00,Exploração de Imóveis,0.00,Exploração de Imóveis,0,0,31-03-2015,0,62.141.400
3,IVTT3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,BHG ON NM,0.00,Exploração de Imóveis,0.00,Exploração de Imóveis,0,0,31-03-2015,0,62.141.400
4,CSTB4,147.69,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,COMPANHIA SIDERÚRGICA DE TUBARÃO PN,0.00,Sem Setor,0.00,Sem Subsetor,0,0,30-09-2005,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1056,PRBC4,14.54,511.91,39.16,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
1057,UBBR4,7.49,610.27,1.99,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
1058,UBBR11,14.75,1201.81,3.91,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
1059,UBBR3,18.0,1466.61,4.77,0.0,0.0,0.0,0.0,0.0,0.0,...,UNIBANCO SA ON N1,0.00,Intermediários Financeiros,0.00,Bancos,0,49.219.500.000,31-12-2008,0,2.734.420.000


#### PV
- Criaremos a métrica de 