In [14]:
import pandas as pd
import numpy as np
from datetime import date
from time import sleep
import os
from io import StringIO

In [15]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException

def scrape_fundsexplorer_selenium():
    """
    Fun√ß√£o de scraping usando Selenium
    """
    
    print("="*46)
    print(" - Iniciando scraping do FundsExplorer.com ...")
    print("="*46)

    # Configura Chrome
    chrome_options = Options()
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--disable-blink-features=AutomationControlled')
    chrome_options.add_argument('--start-maximized')
    chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
    # chrome_options.add_argument('--headless=new')  # Descomente para modo headless
    
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    driver.implicitly_wait(10)
    wait = WebDriverWait(driver, 20)
    
    try:
        # Acessa a p√°gina
        print("üåê Acessando FundsExplorer ranking")
        driver.get("https://www.fundsexplorer.com.br/ranking")
        sleep(3)
        
        # Fecha popup de cookies
        print("üç™ Fechando popup de cookies...")
        try:
            cookie_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, '//*[@id="hs-eu-confirmation-button"]'))
            )
            cookie_button.click()
            print("‚úÖ Cookie aceito!")
            sleep(2)
        except TimeoutException:
            print("‚ö†Ô∏è  Bot√£o de cookie n√£o encontrado")
        
        # Fecha popup de propaganda
        print("üö´ Tentando fechar popup de propaganda...")
        try:
            iframe_element = driver.find_element(By.XPATH, "//iframe[@title='Popup CTA']")
            driver.switch_to.frame(iframe_element)
            close_button = driver.find_element(By.XPATH, "/html/body/div/div[1]")
            close_button.click()
            driver.switch_to.default_content()
            print("‚úÖ Popup fechado!")
            sleep(2)
        except (NoSuchElementException, TimeoutException):
            print("‚ö†Ô∏è  Popup n√£o encontrado - FECHE MANUALMENTE se aparecer")
            driver.switch_to.default_content()
            sleep(5)
        
        # Rola a p√°gina
        print(" - Rolando a p√°gina...")
        driver.execute_script("window.scrollBy(0, 500);")
        sleep(2)
        
        # Seleciona todas as colunas
        print(" - Selecionando todas as colunas...")
        try:
            columns_button = wait.until(
                EC.element_to_be_clickable((By.XPATH, '//*[@id="colunas-ranking__select-button"]'))
            )
            columns_button.click()
            sleep(2)
            
            all_columns_option = wait.until(
                EC.element_to_be_clickable((By.XPATH, '/html/body/div[7]/div[1]/div/div[2]/div[2]/ul/li[1]/label'))
            )
            all_columns_option.click()
            sleep(3)
            print("‚úÖ Todas as colunas selecionadas!")
        except TimeoutException as e:
            print(f"‚ö†Ô∏è  Erro ao selecionar colunas: {e}")
        
        # Extrai a tabela
        print(" - Extraindo dados da tabela...")
        html_str = driver.page_source
        tabelas_html = pd.read_html(StringIO(html_str))
        
        if len(tabelas_html) == 0:
            raise Exception("‚ùå Nenhuma tabela encontrada na p√°gina!")
        
        df = tabelas_html[0]
        print(f"‚úÖ Tabela extra√≠da! {len(df)} fundos encontrados")
        
        return df
        
    except Exception as e:
        print(f"‚ùå Erro durante o scraping: {e}")
        driver.save_screenshot("error_screenshot_selenium.png")
        print("üì∏ Screenshot do erro salvo")
        raise
        
    finally:
        print("üîí Fechando navegador...")
        driver.quit()

In [16]:
df = pd.DataFrame()
try:
    df = scrape_fundsexplorer_selenium()
    print("\nüéâ Sucesso! Dados extra√≠dos com Selenium!")
except Exception as e:
    print(f"\nüí• Falha no scraping: {e}")

 - Iniciando scraping do FundsExplorer.com ...
üåê Acessando FundsExplorer ranking
üç™ Fechando popup de cookies...
‚úÖ Cookie aceito!
üö´ Tentando fechar popup de propaganda...
‚úÖ Popup fechado!
 - Rolando a p√°gina...
 - Selecionando todas as colunas...
‚úÖ Todas as colunas selecionadas!
 - Extraindo dados da tabela...
‚úÖ Tabela extra√≠da! 548 fundos encontrados
üîí Fechando navegador...

üéâ Sucesso! Dados extra√≠dos com Selenium!


In [17]:
def converter_valor(x):
    """Converte valores do formato brasileiro para float"""
    if pd.isna(x) or x == 'N/A':
        return np.nan
    x = str(x)
    if ',' in x:  # formato brasileiro
        x = x.replace('.', '').replace(',', '.')
        return float(x)
    else:  # sem v√≠rgula ‚Üí interpretar como centavos impl√≠citos
        return float(x) / 100

In [18]:
# df_copia = df.copy()
# df_copia.head(3)

In [19]:
# df = df_copia.copy()
df['Pre√ßo Atual (R$)'] = df['Pre√ßo Atual (R$)'].apply(converter_valor)
df['Liquidez Di√°ria (R$)'] = df['Liquidez Di√°ria (R$)'].apply(converter_valor)
df['P/VP'] = df['P/VP'].apply(converter_valor)
df['√öltimo Dividendo'] = df['√öltimo Dividendo'].apply(converter_valor)
df['Volatilidade'] = df['Volatilidade'].apply(converter_valor)
df = df.replace([np.inf, -np.inf], 'N/A')
df = df.fillna('N/A')

df.head(10)

Unnamed: 0,Fundos,Setor,Pre√ßo Atual (R$),Liquidez Di√°ria (R$),P/VP,√öltimo Dividendo,Dividend Yield,DY (3M) Acumulado,DY (6M) Acumulado,DY (12M) Acumulado,...,DY Patrimonial,Varia√ß√£o Patrimonial,Rentab. Patr. Per√≠odo,Rentab. Patr. Acumulada,Quant. Ativos,Volatilidade,Num. Cotistas,Tax. Gest√£o,Tax. Performance,Tax. Administra√ß√£o
0,AAGR11,Indefinido,97.89,41249.1,,1.46,"1,59 %","1,59 %","4,77 %","14,95 %",...,"0,00 %","0,00 %","0,00 %","0,00 %",0,6043.05,0.0,,,
1,AAZQ11,Indefinido,8.62,712940.71,1.0,0.13,"1,52 %","4,55 %","8,78 %","18,24 %",...,"1,39 %","0,00 %","0,00 %","0,00 %",0,1446.81,30.064,,"10,00 % a.a","1,20 % a.a"
2,ABCP11,Shoppings,83.6,49234.52,0.74,0.65,"0,81 %","2,56 %","4,36 %","9,70 %",...,"0,59 %","20,67 %","21,38 %","30,02 %",1,2675.74,14.076,,,"0,10 % a.a"
3,AFHI11,Pap√©is,96.61,727700.29,1.01,1.01,"1,05 %","3,17 %","6,53 %","13,60 %",...,"1,07 %","-0,43 %","0,63 %","6,94 %",14,770.83,37.615,,,"1,00 % a.a"
4,AGRX11,Indefinido,8.8,283930.0,0.83,0.12,"1,37 %","4,35 %","9,01 %","18,49 %",...,"1,13 %","0,00 %","0,00 %","0,00 %",0,1881.02,18.9,,"10,00 % a.a","1,00 % a.a"
5,AIEC11,Lajes Corporativas,54.01,260118.86,0.71,0.34,"0,64 %","1,92 %","4,07 %","8,12 %",...,"0,44 %","-18,12 %","-17,75 %","-16,80 %",2,1520.53,13.55,,"20,00 % a.a","0,75 % a.a"
6,AJFI11,Shoppings,8.26,356391.67,0.75,0.05,"0,62 %","1,80 %","3,83 %","9,74 %",...,,,,,12,1114.09,11.371,"0,60 % a.a",,"0,10 % a.a"
7,ALMI11,Lajes Corporativas,626.69,12139.26,0.31,4.8,"0,81 %","1,52 %","2,95 %","6,61 %",...,"0,00 %","-0,05 %","-0,05 %","0,90 %",1,4898.99,1.886,,,"0,38 % a.a"
8,ALZC11,Pap√©is,7.96,386095.57,0.84,0.11,"1,38 %","3,91 %","8,03 %","16,60 %",...,,,,,14,3525.03,9.946,,"20,00 % a.a","1,30 % a.a"
9,ALZR11,Misto,10.76,2155196.57,1.01,0.09,"0,79 %","2,36 %","4,81 %","10,03 %",...,"0,79 %","-90,05 %","-89,97 %","-89,17 %",19,876.7,178.582,"0,70 % a.a",,"0,25 % a.a"


In [20]:
# Estat√≠sticas b√°sicas
df.describe()

Unnamed: 0,√öltimo Dividendo,Quant. Ativos
count,548.0,548.0
mean,22.451642,7.870438
std,239.259752,10.195332
min,0.0,0.0
25%,0.1575,1.0
50%,0.8,4.0
75%,1.4,14.0
max,5302.02,102.0


In [21]:
def dadosInfra () :

    print("="*46)
    print(" - Iniciando scraping do Investidor10.com ...")
    

    # Configura Chrome
    chrome_options = Options()
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--disable-blink-features=AutomationControlled')
    chrome_options.add_argument('--start-maximized')
    chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
    # chrome_options.add_argument('--headless=new')  # Descomente para modo headless
    
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    driver.implicitly_wait(10)
    wait = WebDriverWait(driver, 20)
    
 
    xpath1='//*[@id="table-indicators"]/div[12]/div[2]/div/span'
    xpath2='//*[@id="table-indicators"]/div[14]/div[2]/div'
    xpath3='//*[@id="cards-ticker"]/div[2]/div[2]/div/span'

    print(" = scraping JURO11 ")

    # driver.get("https://data.anbima.com.br/fundos/627127")
    driver.get("https://investidor10.com.br/fiis/juro11/")

    sleep(3)

    juro11_vpa = driver.find_element(By.XPATH,xpath1).text
    juro11_div = driver.find_element(By.XPATH,xpath2).text
    juro11_dy = driver.find_element(By.XPATH,xpath3).text

    print(" = scraping BIDB11 ")

    # driver.get("https://data.anbima.com.br/fundos/617350")
    driver.get("https://investidor10.com.br/fiis/bidb11/")
    sleep(3)

    bidb11_vpa = driver.find_element(By.XPATH,xpath1).text
    bidb11_div = driver.find_element(By.XPATH,xpath2).text
    bidb11_dy = driver.find_element(By.XPATH,xpath3).text

    print(" = scraping CPTI11 ")

    # driver.get("https://data.anbima.com.br/fundos/617350")
    driver.get("https://investidor10.com.br/fiis/cpti11/")
    sleep(3)

    cpti11_vpa = driver.find_element(By.XPATH,xpath1).text
    cpti11_div = driver.find_element(By.XPATH,xpath2).text
    cpti11_dy = driver.find_element(By.XPATH,xpath3).text

    driver.close()
    print("="*46)

    return juro11_vpa,juro11_div,juro11_dy,bidb11_vpa, bidb11_div, bidb11_dy ,cpti11_vpa , cpti11_div , cpti11_dy


In [22]:
# print(" ====== Escreve na planilha")
# # from DT_atualiza_settings import *
today = date.today().strftime("%d/%m/%Y")

import gspread
from google.oauth2.service_account import Credentials

scope = [
    "https://www.googleapis.com/auth/spreadsheets",
    "https://www.googleapis.com/auth/drive"
]

creds = Credentials.from_service_account_file(
    "/home/yair/GHub/Finance-playground/carteira-328314-2248cd9489bb.json",
    scopes=scope
)

gc = gspread.authorize(creds)


In [23]:

try:
    planilha = gc.open('Investimentos')
    pagina = planilha.worksheet("FundsExp")
    # pagina = planilha.worksheet("C√≥pia de FundsExp")
    pagina.clear()

    pagina.update(range_name= 'a1',values= [[today]])
    
    # Preparar dados para envio
    dados_para_envio = [df.columns.values.tolist()] + df.values.tolist()
    
    # Debug: mostrar os primeiros valores para verificar se h√° problemas
    print("Primeiras linhas dos dados:")
    for i, linha in enumerate(dados_para_envio[:3]):  # Mostra apenas as 3 primeiras linhas
        print(f"Linha {i}: {linha[:5]}...")  # Mostra apenas os primeiros 5 valores
    
    pagina.update(range_name='a2', values=dados_para_envio)
    
    print(" ====== Funds Explorer terminou com sucesso")
    
except Exception as e:
    print(f" ====== Erro ao escrever na planilha: {e}")
    print(" ====== Salvando dados localmente como backup")
    df.to_csv(f'backup_fundsexplorer_{today.replace("/", "_")}.csv', index=False)


Primeiras linhas dos dados:
Linha 0: ['Fundos', 'Setor', 'Pre√ßo Atual (R$)', 'Liquidez Di√°ria (R$)', 'P/VP']...
Linha 1: ['AAGR11', 'Indefinido', 97.89, 41249.1, 'N/A']...
Linha 2: ['AAZQ11', 'Indefinido', 8.62, 712940.71, 1.0]...


In [24]:

print(" ====== FI-Infra ===== ")

try:
    juro11_vpa,juro11_div,juro11_dy,bidb11_vpa, bidb11_div, bidb11_dy ,cpti11_vpa , cpti11_div , cpti11_dy = dadosInfra()

    print(" ====== Escreve na planilha")

    planilha = gc.open('Investimentos')
    pagina = planilha.worksheet('FundsExp')
    # pagina = planilha.worksheet("C√≥pia de FundsExp")

    pagina.update(range_name='b1',values= [["juro11 (VPA,Prov,DY)"]])
    pagina.update(range_name='c1',values= [[juro11_vpa]])
    pagina.update(range_name='d1',values= [[juro11_div]])
    pagina.update(range_name='e1',values= [[juro11_dy]])
    pagina.update(range_name='f1',values= [["bidb11 (VPA,Prov,DY)"]])
    pagina.update(range_name='g1',values= [[bidb11_vpa]])
    pagina.update(range_name='h1',values= [[bidb11_div]])
    pagina.update(range_name='i1',values= [[bidb11_dy]])
    pagina.update(range_name='j1',values= [["cpti11 (VPA,Prov,DY)"]])
    pagina.update(range_name='k1',values= [[cpti11_vpa]])
    pagina.update(range_name='l1',values= [[cpti11_div]])
    pagina.update(range_name='m1',values= [[cpti11_dy]])

    print(" ====== FI-Infra Terminou")
    
except Exception as e:
    print(f" ====== Erro na se√ß√£o FI-Infra: {e}")

# finally:
#     # Certificar que o driver √© fechado mesmo em caso de erro
#     try:
#         driver.close()
#     except:
#         pass

 - Iniciando scraping do Investidor10.com ...
 = scraping JURO11 
 = scraping BIDB11 
 = scraping CPTI11 
