In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.edge.options import Options
import re
from datetime import datetime, timedelta
import pandas as pd

## Topsport

In [2]:
# Set options
options = Options()
options.add_argument("--headless")  # Run in headless mode
options.add_argument("--disable-gpu")  # Disable GPU acceleration
options.add_argument("--no-sandbox")  # Disables sandbox for the browser
options.add_argument("--disable-images")  # Disable images

# Initialize WebDriver
driver = webdriver.Edge(options=options)
driver.get("https://en.topsport.lt/basketball/euroleaguetv")

try:
    # Wait for the events to load
    events = WebDriverWait(driver, 5).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, "js-prelive-event"))
    )
    
    matches_topsport = []
    today = datetime.today()

    # Loop through events
    for event in events:
        # Extract match date and time
        date = event.find_element(By.CLASS_NAME, "prelive-event-date").text
        
        # Extract teams and odds
        teams = event.find_elements(By.CLASS_NAME, "prelive-list-league-choice-title")
        odds = event.find_elements(By.CLASS_NAME, "prelive-list-league-rate")
        
        # Convert "Today" and "Tomorrow" to actual dates
        if "Today" in date:
            date = today.strftime("%Y-%m-%d ") + date.split(" ")[1]
        elif "Tomorrow" in date:
            tomorrow = today + timedelta(days=1)
            date = tomorrow.strftime("%Y-%m-%d ") + date.split(" ")[1]
        
        # Ensure we have both teams and their respective odds
        if len(teams) == 2 and len(odds) == 2:
            team1 = teams[0].text.strip()
            team2 = teams[1].text.strip()
            odd1 = odds[0].text.strip()
            odd2 = odds[1].text.strip()
            
            if team1 == "Yes" or team2 == "Yes": # Skip extra bets with "yes" "no" options
                continue

            matches_topsport.append((date, (team1, team2), (odd1, odd2)))

finally:
    # Close the browser
    driver.quit()

## Betsafe

In [3]:
# Set options
options = Options()
# options.add_argument("--headless")  # Run in headless mode
options.add_argument("--disable-gpu")  # Disable GPU acceleration
options.add_argument("--no-sandbox")  # Disables sandbox for the browser
options.add_argument("--disable-images")  # Disable images

# Initialize WebDriver
driver = webdriver.Edge(options=options)
driver.get("https://www.betsafe.lt/en/betting/euroleague")

try:
    # Wait for the events to load
    events = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, "wpt-table__row"))
    )
    
    # List to store all matches
    matches_betsafe = []
    # Loop through events
    for event in events:
        # Extract match date and time
        date_elements = event.find_elements(By.CLASS_NAME, "wpt-time")
        if date_elements:
            date_text = date_elements[0].text  # e.g., 04 Dec\n20:00
            date_text = date_text.replace('\n', ' ')

            # Convert to datetime object
            date = datetime.strptime(date_text, "%d %b %H:%M")
            date = date.strftime("%m-%d %H:%M") # Output: e.g., 12-04 20:00
        
        
        teams = event.find_elements(By.CLASS_NAME, "wpt-teams__team")
        if len(teams) != 2:  # Ensure there are exactly 2 teams
            continue

        # Extract odds values
        odds_elements = event.find_elements(By.CLASS_NAME, "wpt-odd-changer")
        odds = [odd.text for odd in odds_elements if odd.text]  # Extract non-empty text
        if not odds:
            continue

        team1 = teams[0].find_element(By.TAG_NAME, "a").text
        team2 = teams[1].find_element(By.TAG_NAME, "a").text

        odd1 = odds[0]
        odd2 = odds[1]
        matches_betsafe.append((date, (team1, team2), (odd1, odd2)))
        
finally:
    # Close the browser
    driver.quit()

In [None]:
# Helper function to parse dates
def parse_date(date_str, with_year=False):
    if with_year:
        return datetime.strptime(date_str, "%Y-%m-%d %H:%M")
    else:
        return datetime.strptime(date_str, "%m-%d %H:%M")

# Parse dates
matches_topsport_parsed = [
    (parse_date(date, with_year=True), teams, odds, date) for date, teams, odds in matches_topsport
]
matches_betsafe_parsed = [
    (parse_date(date, with_year=False), teams, odds) for date, teams, odds in matches_betsafe
]


# Match entries
matched_entries = []
unmatched_topsport = []
unmatched_betsafe = matches_betsafe_parsed[:]

# Match entries from both lists
for top_date_full, top_teams, top_odds, original_top_date in matches_topsport_parsed:
    matched = False
    for i, (bet_date, bet_teams, bet_odds) in enumerate(unmatched_betsafe):
        if (
            top_date_full.month == bet_date.month
            and top_date_full.day == bet_date.day
            and top_date_full.time() == bet_date.time()
            and (top_teams == bet_teams 
                or top_teams[0].split()[0] in bet_teams[0]
                or top_teams[0].split()[1] in bet_teams[0])
        ):

            matched_entries.append(
                (
                    original_top_date,
                    f"{top_teams[0]} vs {top_teams[1]}",
                    top_odds[0],
                    top_odds[1],
                    bet_odds[0],
                    bet_odds[1],
                )
            )
            unmatched_betsafe.pop(i)  # Remove matched entry from unmatched list
            matched = True
            break

    if not matched:
        unmatched_topsport.append((original_top_date, top_teams, top_odds))

# Convert to DataFrames
df = pd.DataFrame(
    matched_entries,
    columns=["date", "match", "topsport_odd_1", "topsport_odd_2", "betsafe_odd_1", "betsafe_odd_2"],
)

unmatched_topsport_df = pd.DataFrame(
    unmatched_topsport, columns=["date", "teams", "odds"]
)

unmatched_betsafe_df = pd.DataFrame(
    unmatched_betsafe, columns=["date", "teams", "odds"]
)

# print("\nUnmatched Topsport:")
# print(unmatched_topsport_df)
# print("\nUnmatched Betsafe:")
# print(unmatched_betsafe_df)


In [5]:
df

Unnamed: 0,date,match,topsport_odd_1,topsport_odd_2,betsafe_odd_1,betsafe_odd_2
0,2024-12-05 20:00,Zalgiris vs Anadolu Efes,1.62,2.31,1.67,2.2
1,2024-12-05 21:05,Maccabi Tel Aviv vs Barcelona,2.04,1.78,2.0,1.8
2,2024-12-05 21:30,FC Bayern Munchen vs Baskonia Vitoria Gasteiz,1.36,3.15,1.37,3.1
3,2024-12-05 21:30,Partizan vs Panathinaikos,1.88,1.93,1.95,1.85
4,2024-12-05 21:45,Real Madrid vs Fenerbahce,1.44,2.85,1.4,2.95
5,2024-12-06 20:00,Monaco vs Alba Berlin,1.05,8.6,1.07,8.5
6,2024-12-06 21:15,Olympiacos vs Paris Basketball,1.3,3.3,1.32,3.4
7,2024-12-06 21:30,Virtus Bologna vs Crvena Zvezda,1.81,1.94,1.8,2.0
8,2024-12-06 21:45,Milano Armani Exchange vs Asvel Lyon Villeurbanne,1.34,3.1,1.35,3.2
9,2024-12-12 21:45,Real Madrid vs Zalgiris,1.24,3.65,1.27,3.4
