In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import openpyxl
import os
import pandas as pd
from dataclasses import dataclass
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
from typing import List
import json
from dataclasses import asdict
from datetime import datetime

In [None]:
def get_driver(url):
    options = Options()
    options.add_argument("--headless=new")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--window-size=1366,768")
    options.add_argument("--user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36")
    driver = webdriver.Chrome(options=options)
    try:
        driver.maximize_window()
    except Exception:
        pass
    driver.get(url)
    return driver

In [4]:
def get_workbook(file_name):
    if os.path.exists(file_name):
        workbook = openpyxl.load_workbook(file_name)
    else:
        workbook = openpyxl.Workbook()
        workbook.save(file_name)

    return workbook

In [None]:
def write_to_excel(driver, file_name):
    workbook = get_workbook(file_name)
    sheet = workbook.active

    WebDriverWait(driver, 20).until(lambda d: d.execute_script('return document.readyState') == 'complete')

    # Try multiple selectors for cookie/login/consent buttons (robust to DOM changes)
    clickable_selectors = [
        (By.XPATH, "//*[@id='root']//form//button[contains(., 'Allow') or contains(., 'Accept') or contains(., 'Login')][2]"),
        (By.CSS_SELECTOR, "#root form button:nth-of-type(2)"),
    ]
    for by, sel in clickable_selectors:
        try:
            element = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((by, sel))
            )
            element.click()
            break
        except Exception:
            pass

    dismiss_selectors = [
        (By.XPATH, "//div[@role='dialog']//button | //div[contains(@class,'modal')]//button[1]"),
        (By.CSS_SELECTOR, "div[role='dialog'] button, .modal button"),
    ]
    for by, sel in dismiss_selectors:
        try:
            element = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((by, sel))
            )
            element.click()
            break
        except Exception:
            pass

    def find_rows_any_context():
        rows = driver.find_elements(By.CLASS_NAME, 'bet-table-row')
        if rows:
            return rows
        iframes = driver.find_elements(By.TAG_NAME, 'iframe')
        for f in iframes:
            try:
                driver.switch_to.frame(f)
                rows = driver.find_elements(By.CLASS_NAME, 'bet-table-row')
                if rows:
                    return rows
            except Exception:
                pass
            finally:
                driver.switch_to.default_content()
        return []

    try:
        WebDriverWait(driver, 25).until(lambda d: len(find_rows_any_context()) > 0)
    except Exception:
        ts = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
        try:
            with open(f'/tmp/page_source_{ts}.html', 'w', encoding='utf-8') as f:
                f.write(driver.page_source)
        except Exception:
            pass
        try:
            driver.get_screenshot_as_file(f'/tmp/screen_{ts}.png')
        except Exception:
            pass
        raise

    if sheet.max_row == 1:
        sheet.append(['Match Name', 'Match Link', 'Active'])

    match_rows = find_rows_any_context()

    for match_row in match_rows:
        try:
            match_table_link_href = match_row.find_element(By.CSS_SELECTOR, "div:nth-of-type(1) a").get_attribute("href")
            match_name_list = match_row.find_elements(By.CSS_SELECTOR, "div:nth-of-type(1) a span")
            match_name_text = ''
            match_is_Active = 'True' if match_row.find_elements(By.XPATH, ".//*[contains(@class, 'active')]") else 'False'
            for match_name in match_name_list:
                match_name_text += (match_name.text or '') + ' '

            sheet.append([match_name_text.strip(), match_table_link_href, match_is_Active])
            workbook.save(file_name)
        except Exception as e:
            print(f'error row: {e}')


In [6]:
def get_sport_name_and_links_run():
    url = 'https://d247.com/home'

    file_name = "./data/cricket-matches.xlsx"

    driver = get_driver(url)

    write_to_excel(driver, file_name)

    driver.close()

In [7]:
get_sport_name_and_links_run()