In [30]:
!pip install -U undetected-chromedriver selenium --quiet


[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: C:\Users\Migue\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [31]:
import os
import time
import random
import traceback
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import (
    TimeoutException,
    WebDriverException,
    StaleElementReferenceException,
    NoSuchElementException,
)
import undetected_chromedriver as uc

In [32]:
options = uc.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--start-maximized")
# options.add_argument("--headless=new")

In [33]:
def start_driver():
    try:
        # tenta forçar versão principal (ajuste se precisar)
        return uc.Chrome(options=options, version_main=140)
    except Exception as e:
        print("Aviso: não foi possível iniciar com version_main=140 (ou não aplicável). Tentando iniciar sem especificar version_main.")
        print("Erro:", e)
        try:
            return uc.Chrome(options=options)
        except Exception as e2:
            print("Erro ao iniciar o driver:", e2)
            raise

driver = start_driver()
driver.implicitly_wait(3)

# ---------------------------
# Utilitários
# ---------------------------
def human_pause(min_t=1.2, max_t=3.5):
    time.sleep(random.uniform(min_t, max_t))

def safe_click(driver, el):
    """Tenta clicar com click() e falha para JS click como fallback."""
    try:
        el.click()
        return True
    except Exception:
        try:
            driver.execute_script("arguments[0].click();", el)
            return True
        except Exception:
            return False

def save_debug(driver, company_slug):
    safe_name = company_slug.replace("/", "_")
    os.makedirs("debug", exist_ok=True)
    try:
        driver.save_screenshot(f"debug/{safe_name}.png")
    except Exception as e:
        print("Erro ao salvar screenshot:", e)
    try:
        with open(f"debug/{safe_name}.html", "w", encoding="utf-8") as f:
            f.write(driver.page_source)
    except Exception as e:
        print("Erro ao salvar html:", e)

In [34]:
def wait_for_page_complete(driver, timeout=20):
    """
    Tenta aguardar document.readyState == 'complete'.
    Se der timeout, faz um fallback aguardando a presença do <body> e segue.
    """
    try:
        WebDriverWait(driver, timeout).until(
            lambda d: d.execute_script("return document.readyState") == "complete"
        )
        return True
    except TimeoutException:
        # aceita 'interactive' como fallback e tenta presença do body
        try:
            state = driver.execute_script("return document.readyState")
            if state in ("interactive",):
                print("readyState='interactive' -- prosseguindo com cuidado.")
                return True
        except Exception:
            pass
        try:
            WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
            print("Aviso: timeout aguardando 'complete'. Encontrado <body>, prosseguindo.")
            return True
        except Exception:
            print("Erro: aguardar page complete falhou completamente.")
            raise

def wait_for_real_page(driver, timeout=25):
    """
    Aguarda até que a página não exiba mensagens de anti-bot como 'Um momento'
    e que a URL contenha '/empresa/' (ou outra condição desejada).
    """
    end = time.time() + timeout
    last_exception = None
    while time.time() < end:
        try:
            title = driver.title or ""
            url = driver.current_url or ""
            # tenta pegar texto do body (mais robusto)
            body_text = ""
            try:
                body_text = driver.execute_script("return (document.body && document.body.innerText) ? document.body.innerText : ''")
            except WebDriverException:
                body_text = ""
            # condições: não ter indicador anti-bot no title/body e conter '/empresa/' na URL
            if ("um momento" not in title.lower()
                and "um momento" not in body_text.lower()
                and "/empresa/" in url):
                return True
        except WebDriverException as e:
            last_exception = e
        time.sleep(0.5)
    # se chegou aqui, falhou
    raise TimeoutException(f"wait_for_real_page: timeout after {timeout}s. Última exceção: {last_exception}")

In [35]:
def try_find_text(driver, xpaths, wait_timeout=8):
    """Tenta uma lista de xpaths e retorna o primeiro texto visível encontrado."""
    for xp in xpaths:
        try:
            el = WebDriverWait(driver, wait_timeout).until(EC.visibility_of_element_located((By.XPATH, xp)))
            try:
                text = el.text.strip()
            except StaleElementReferenceException:
                # tenta recapturar
                try:
                    el = driver.find_element(By.XPATH, xp)
                    text = el.text.strip()
                except Exception:
                    text = ""
            if text:
                return text
        except (TimeoutException, StaleElementReferenceException):
            continue
        except Exception:
            continue
    return None

# ---------------------------
# Acesso e interação com o site
# ---------------------------
linkReclame = "https://www.reclameaqui.com.br/"

try:
    driver.get(linkReclame)
except Exception as e:
    print("Aviso: driver.get lançou uma exceção (possível timeout). Tentando novamente uma vez.")
    try:
        driver.get(linkReclame)
    except Exception as e2:
        print("Falha ao abrir o site:", e2)
        raise

# espera a página carregar (mais tolerante)
try:
    wait_for_page_complete(driver, timeout=20)
except TimeoutException:
    print("Aviso: timeout aguardando complete. Continuando (pode haver conteúdo parcial).")

human_pause()

# ---------------------------
# Fechar cookie/modal (mais robusto)
# ---------------------------
cookie_selectors = [
    # XPaths com varias palavras chave comuns
    "//button[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'aceitar')]",
    "//button[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'aceito')]",
    "//button[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'concordo')]",
    "//button[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'fechar')]",
    "//button[contains(., 'OK') or contains(., 'Ok') or contains(., 'ok')]",
    # botões com aria-labels
    "//button[contains(@aria-label, 'aceitar') or contains(@aria-label, 'Aceitar')]",
    # CSS (em caso de sites com botões com classes)
    "//*[@id and (contains(@id, 'cookie') or contains(@id, 'Cookie'))]//button",
    "//div[contains(@class,'cookie')]/button"
]

clicked_cookie = False
for sel in cookie_selectors:
    try:
        btn = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, sel)))
        if safe_click(driver, btn):
            print("Cookie/modal fechado com seletor:", sel)
            human_pause(0.5, 1.2)
            clicked_cookie = True
            break
    except TimeoutException:
        continue
    except Exception as e:
        # ignora exceções e tenta próximo seletor
        continue

if not clicked_cookie:
    print("Aviso: não encontrei/fechei cookie modal automaticamente. Siga manualmente se necessário.")

# ---------------------------
# Abrir menu de categorias e escolher 'Casa de Aposta' (tenta com alguns xpaths)
# ---------------------------
menu_xpaths = [
    '//div[@role="menu"]',
    '//button[contains(@aria-label, "categor") or contains(translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "categor")]',
    '//div[contains(@class, "select") or contains(@class, "dropdown")]',
    '//button[contains(., "Categorias") or contains(., "categorias")]'
]

menu_clicked = False
for xp in menu_xpaths:
    try:
        el = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, xp)))
        if safe_click(driver, el):
            human_pause(0.4, 1.0)
            menu_clicked = True
            break
    except TimeoutException:
        continue
    except Exception:
        continue

if not menu_clicked:
    print("Aviso: não encontrei/abri o menu automaticamente. Prosseguir manualmente se necessário.")
else:
    # tenta selecionar a categoria "Casa de Aposta"
    try:
        choose_xp = '//button[normalize-space()="Casa de Aposta" or contains(., "Casa de Aposta")]'
        chooseCategory = WebDriverWait(driver, 8).until(EC.element_to_be_clickable((By.XPATH, choose_xp)))
        driver.execute_script("arguments[0].scrollIntoView(true);", chooseCategory)
        human_pause(0.5, 1.5)
        safe_click(driver, chooseCategory)
        human_pause(1.5, 3)
    except TimeoutException:
        print("Categoria 'Casa de Aposta' não encontrada automaticamente.")
    except Exception as e:
        print("Erro ao tentar selecionar categoria:", e)

# ---------------------------
# Coleta de empresas (bons e piores)
# ---------------------------
company_name_xpath = "//span[contains(@class, 'text-sm') and contains(@class, 'font-semibold')]"

nameCompany = []
try:
    elements_bons = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, company_name_xpath)))
    nameCompany = [e.text.strip().replace(" ", "-").replace(".", "-").lower() for e in elements_bons[:3] if e.text.strip()]
except TimeoutException:
    print("Não encontrei empresas na aba inicial (ou xpath mudou).")

# Tenta clicar aba 'Piores' (se existir) e coletar mais
try:
    buttonWorst = WebDriverWait(driver, 6).until(EC.element_to_be_clickable((By.XPATH, '//li[@data-testid="tab-worst" or contains(., "Piores")]')))
    driver.execute_script("arguments[0].scrollIntoView(true);", buttonWorst)
    human_pause(0.4, 1.2)
    safe_click(driver, buttonWorst)
    human_pause(1.5, 2.8)
    try:
        elements_ruins = WebDriverWait(driver, 8).until(EC.presence_of_all_elements_located((By.XPATH, company_name_xpath)))
        nameCompany += [e.text.strip().replace(" ", "-").replace(".", "-").lower() for e in elements_ruins[:3] if e.text.strip()]
    except TimeoutException:
        print("Não encontrei empresas na aba 'Piores' após clicar.")
except TimeoutException:
    print("Botão 'Piores' não encontrado (pode não existir ou xpath mudou).")
except Exception:
    print("Erro ao manipular aba 'Piores' (ignorado).")

# dedupe e filtra vazios
nameCompany = [n for i, n in enumerate(nameCompany) if n and n not in nameCompany[:i]]
print("Empresas coletadas:", nameCompany)

# ---------------------------
# XPaths métricas
# ---------------------------
rating_xpaths = [
    '(//span[contains(translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "nota média")]/strong)[1]',
    '//span[contains(translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "nota média")]/strong',
    '//strong[contains(translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "nota")][1]',
    '//div[contains(@class, "rating") or contains(@class, "nota")]/strong',
]

responded_xpaths = [
    '//span[contains(translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "respondeu")]/strong',
    '//strong[contains(translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "respondeu")]',
    '//div[contains(.,"Respondeu")]/strong',
]

comingback_xpaths = [
    '//span[contains(translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "voltariam")]/strong',
    '//strong[contains(translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "voltariam")]',
    '//div[contains(.,"voltariam")]/strong',
]

solved_xpaths = [
    '//span[contains(translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "a empresa resolveu")]/strong',
    '//strong[contains(translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "resolveu")]',
    '//div[contains(.,"resolveu")]/strong',
]

# ---------------------------
# Loop de extração principal
# ---------------------------
responded, comingBack, solved, rating = [], [], [], []

for company in nameCompany:
    try:
        url_empresa = linkReclame.rstrip('/') + '/empresa/' + company
        print("\nAbrindo:", url_empresa)
        driver.get(url_empresa)

        # espera que não seja a tela anti-bot
        try:
            wait_for_real_page(driver, timeout=25)
        except TimeoutException:
            print("Aviso: esperar página real timeout. Salvando debug e tentando continuar.")
            save_debug(driver, company)
            # tenta prosseguir mesmo assim
        human_pause(1, 2)
        try:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight/4);")
        except Exception:
            pass
        human_pause(0.8, 1.8)

        print("Title:", driver.title)
        print("URL atual:", driver.current_url)

        r = try_find_text(driver, rating_xpaths, wait_timeout=6) or "N/A"
        rating.append(r)

        resp = try_find_text(driver, responded_xpaths, wait_timeout=5) or "N/A"
        responded.append(resp)

        cb = try_find_text(driver, comingback_xpaths, wait_timeout=5) or "N/A"
        comingBack.append(cb)

        sol = try_find_text(driver, solved_xpaths, wait_timeout=5) or "N/A"
        solved.append(sol)

    except WebDriverException as e:
        print(f"[ERROR] Erro ao processar {company}: {e}")
        traceback.print_exc()
        save_debug(driver, company)
        rating.append("ERR")
        responded.append("ERR")
        comingBack.append("ERR")
        solved.append("ERR")
    except Exception as e:
        print(f"[ERROR-GERAL] {e}")
        traceback.print_exc()
        save_debug(driver, company)
        rating.append("ERR")
        responded.append("ERR")
        comingBack.append("ERR")
        solved.append("ERR")

# ---------------------------
# Resultados
# ---------------------------
for i in range(len(nameCompany)):
    print('------------------------------')
    print(f'Empresa: {nameCompany[i]}')
    print(f'Nota média: {rating[i]}')
    print(f'Respondeu: {responded[i]}')
    print(f'Voltariam: {comingBack[i]}')
    print(f'A empresa resolveu: {solved[i]}')
    print('------------------------------')

# encerra driver
try:
    driver.quit()
except Exception:
    pass

Aviso: timeout aguardando 'complete'. Encontrado <body>, prosseguindo.
Cookie/modal fechado com seletor: //button[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'aceitar')]
Empresas coletadas: ['betou', 'luva-bet', 'f12-bet', 'vert-bet', 'brasil-bet', '1xbet']

Abrindo: https://www.reclameaqui.com.br/empresa/betou
Title: Betou - Reclame Aqui
URL atual: https://www.reclameaqui.com.br/empresa/betou/

Abrindo: https://www.reclameaqui.com.br/empresa/luva-bet
Title: Luva bet - Reclame Aqui
URL atual: https://www.reclameaqui.com.br/empresa/luva-bet/

Abrindo: https://www.reclameaqui.com.br/empresa/f12-bet
Title: F12.bet - Reclame Aqui
URL atual: https://www.reclameaqui.com.br/empresa/f12-bet/

Abrindo: https://www.reclameaqui.com.br/empresa/vert-bet
Title: Vert Bet - Reclame Aqui
URL atual: https://www.reclameaqui.com.br/empresa/vert-bet/

Abrindo: https://www.reclameaqui.com.br/empresa/brasil-bet
Title: Brasil bet - Reclame Aqui
URL atual: https://www.rec