In [31]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# import ollama
import time
import pandas as pd

In [32]:

# Configure Selenium (Headless Chrome)
chrome_options = Options()
# chrome_options.add_argument("--headless") #Run without opening browser
chrome_options.add_argument("--disable-bot-detection") # Bypass simple bot checks
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
    AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36")
# Anti-bot evasion techniques
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option("useAutomationExtension", False)

In [33]:
# Initialize WebDriver
options = Options()
options.binary_location = "/snap/bin/chromium"
service = Service(executable_path='/snap/bin/chromium.chromedriver')
driver = webdriver.Chrome(service=service, options=chrome_options)


In [None]:
# 3. Configure Wait Strategy
wait = WebDriverWait(driver, 30)
try:
    # 4. Load page with JavaScript verification
    driver.get("https://www.oddsportal.com/football/england/premier-league/")
    
    # 5. Add human-like delay
    time.sleep(3)  # Let initial JavaScript execute
    
    # 6. Alternative waiting strategies
    # Wait for either the table OR a loading element to disappear
    wait.until(
        EC.any_of(
            EC.presence_of_element_located((By.CSS_SELECTOR, "table.table-main")),
            EC.invisibility_of_element_located((By.CSS_SELECTOR, "div.loading")),
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.eventRow"))
            
        )
    )
    
    # 7. Additional verification
    if "oddsportal.com" not in driver.current_url:
        print("Redirect detected! Current URL:", driver.current_url)
    
    # Scroll to load all matches (adjust based on your needs)
    for _ in range(3):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
    # 8. Extract data with more resilient selectors
    matches = []
    # rows = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "table")))
    rows  = driver.find_elements(By.CSS_SELECTOR, "div.eventRow")
    
    for row in rows:
        try:
            # Extract team names
            participants = row.find_elements(By.CSS_SELECTOR, "a[title]")
            if len(participants) < 2:
                continue
                
            home_team = participants[0].get_attribute("title")
            away_team = participants[1].get_attribute("title")
            
            # Extract odds
            odds_containers = row.find_elements(
                By.CSS_SELECTOR, "div[data-testid='odd-container-default'] p[data-testid='odd-container-default']"
            )
            
            if len(odds_containers) >= 3:
                home_odds = odds_containers[0].text
                draw_odds = odds_containers[1].text
                away_odds = odds_containers[2].text
                
                matches.append({
                    "Home Team": home_team,
                    "Away Team": away_team,
                    "Home Odds": home_odds,
                    "Draw Odds": draw_odds,
                    "Away Odds": away_odds
                })
        except Exception as e:
            print(f"Error processing row: {e}")
            continue
    
finally:
    driver.quit()

In [None]:
# Save to DataFrame
df = pd.DataFrame(matches)

# Create DataFrame
if matches:
    df = pd.DataFrame(matches)
    print("\nPremier League Match Odds:")
    print(df)
    
    # Save to CSV
    df.to_csv("premier_league_odds.csv", index=False)
    print("\nData saved to premier_league_odds.csv")
else:
    print("No matches found")
print("Scrapped Odds Data:")
print(df.head(10))