In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
import requests
from bs4 import BeautifulSoup
from itertools import combinations
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import itertools as it
import re

## Scrape Topsport, 7bet and betsafe

In [2]:
# Set all urls
url_list_topsport = ["https://www.topsport.lt/krepsinis/nba",
                     "https://www.topsport.lt/krepsinis/eurolyga",
                     "https://www.topsport.lt/futbolas/uefa-europos-lyga",
                     ]

# Set all league names (ending of each url)
league_names = [re.search(r'/([^/]+?)-', url).group(1) if '-' in url else re.search(r'/([^/]+)$', url).group(1) for url in url_list_topsport]

def scrape_topsport(url):
    html = requests.get(url)
    soup = BeautifulSoup(html.text, 'html.parser')
    today = datetime.today()

    events = soup.find_all('div', {'class': 'js-prelive-event-row'}) # Find all events

    matches_topsport = []
    for event in events:
        try:
            date = event.find('span', {'class':'prelive-event-date'}).text.lower()
            teams = event.find_all('div', {'class': 'prelive-outcome-buttons-item-title-overflow'})
            odds = event.find_all('span', "prelive-list-league-rate ml-1 h-font-secondary h-fs17 h-fw500")

            # Convert "Today" and "Tomorrow" to actual dates
            if "šiandien" in date:
                date = today.strftime("%Y-%m-%d ") + date.split(" ")[1]
            elif "rytoj" in date:
                tomorrow = today + timedelta(days=1)
                date = tomorrow.strftime("%Y-%m-%d ") + date.split(" ")[1]

            # Ensure we have both teams and their respective odds
            if len(teams) == 2 and len(odds) == 2:
                team1 = teams[0].text.strip()
                team2 = teams[1].text.strip()
                odd1 = float(odds[0].text)
                odd2 = float(odds[1].text)
                
                if team1 == "Taip" or team2 == "Taip": # Skip extra bets with "yes" "no" options
                    continue

                matches_topsport.append((date, (team1, team2), (odd1, odd2)))

            # For games with 3 odds like football
            elif len(teams) == 3 and len(odds) == 3:
                team1 = teams[0].text.strip()
                team2 = teams[2].text.strip()
                odd1 = float(odds[0].text)
                odd2 = float(odds[1].text)
                odd3 = float(odds[2].text)
                
                if team1 == "Yes" or team2 == "Yes": # Skip extra bets with "yes" "no" options
                    continue

                matches_topsport.append((date, (team1, "Draw" ,team2), (odd1, odd2, odd3)))
        except:
            continue

    return matches_topsport

all_matches = []

for i, league_name in enumerate(league_names):
    matches = scrape_topsport(url_list_topsport[i])
    all_matches.extend([(date, teams, odds, league_name) for date, teams, odds in matches])

# Create a single DataFrame
df_topsport = pd.DataFrame(all_matches, columns=["Date", "Teams", "Odds", "League"])

# df_topsport

In [3]:
# Define leagues. Integers represent number of odds for that sport
leagues = {"nba": ((2,2), "https://sb2frontend-altenar2.biahosted.com/api/widget/GetEvents"),
           "eurolyga": ((2,2), "https://sb2frontend-altenar2.biahosted.com/api/widget/GetEvents?culture=lt-LT&timezoneOffset=-120&integration=7bet&deviceType=1&numFormat=en-GB&countryCode=LT&eventCount=0&sportId=0&champIds=2995"),
           "uefa": ((3,10), "https://sb2frontend-altenar2.biahosted.com/api/widget/GetEvents?culture=lt-LT&timezoneOffset=-120&integration=7bet&deviceType=1&numFormat=en-GB&countryCode=LT&eventCount=0&sportId=0&champIds=16809"),
           }

params = {
    "culture": "lt-LT",
    "timezoneOffset": "-120",
    "integration": "7bet",
    "deviceType": "1",
    "numFormat": "en-GB",
    "countryCode": "LT",
    "eventCount": "0",
    "sportId": "0",
    "champIds": "2980"
}

# Add headers copied from browser
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Referer": "https://7bet.lt/", 
    "Origin": "https://7bet.lt",
    "Accept": "application/json, text/plain, */*"
}


def scrape_bet7(url, number_of_odds, increment):
    response = requests.get(url, params=params, headers=headers)

    if response.status_code == 200:
        data = response.json()
    else:
        print(f"Error: {response.status_code}")

    # Only first 2 bets of each match have False, others True. Can also be used 'typeID'==1 or 3
    filtered_events = [(idx, event) for idx, event in enumerate(data['odds']) if event['isMB'] is False]

    # Group the filtered events into pairs
    grouped_pairs = [filtered_events[i:i+number_of_odds] for i in range(0, len(filtered_events), increment)]

    matches_bet7 = []
    # Extract only price and name from each event
    for group in grouped_pairs:
        if len(group) == 2:
            # Extracting the first and second team's names and prices
            team1, team2 = group[0][1]['name'], group[1][1]['name']
            odd1, odd2 = group[0][1]['price'], group[1][1]['price']
            
            # Round odds
            odd1 = round(odd1, 2)
            odd2 = round(odd2, 2)
            
            # Append the tuple to the matches list
            matches_bet7.append(((team1, team2), (odd1, odd2)))

        elif len(group) == 3:
            # Extracting the first and second team's names and prices
            team1, middle, team2 = group[0][1]['name'], group[1][1]['name'] ,group[2][1]['name']
            odd1, odd2, odd3 = group[0][1]['price'], group[1][1]['price'], group[2][1]['price']
            
            if middle == "Lygiosios":
                middle = "Draw"
            else: # something went wrong
                continue

            # Round odds
            odd1 = round(odd1, 2)
            odd2 = round(odd2, 2)
            odd3 = round(odd3, 2)
            
            # Append the tuple to the matches list
            matches_bet7.append(((team1, middle, team2), (odd1, odd2, odd3)))

    return matches_bet7


all_matches = []

for league in leagues.keys():
    number_of_odds = leagues[league][0][0]
    increment = leagues[league][0][1]
    url = leagues[league][1]
    matches = scrape_bet7(url, number_of_odds, increment)
    all_matches.extend([(teams, odds, league) for teams, odds in matches])

# Create a single DataFrame
df_7bet = pd.DataFrame(all_matches, columns=["Teams", "Odds", "League"])

# df_7bet

In [4]:
# Set options
options = Options()
options.add_argument("--headless")  # Run in headless mode
options.add_argument("--disable-gpu")  # Disable GPU acceleration
options.add_argument("--no-sandbox")  # Disables sandbox for the browser
options.add_argument("--disable-images")  # Disable images

# Set urls. Integers represent number of bets to include
url_list_betsafe = [(2, "https://www.betsafe.lt/lt/betting/nba"), 
                    (2, "https://www.betsafe.lt/lt/lazybos/eurolyga"),
                    (3, "https://www.betsafe.lt/lt/lazybos/futbolas/europa/uefa-europos-lyga"),
                    ]

# Set all league names (first word after last "/"" till "-"")
league_names = [
    re.search(r'/([^/]+?)-', url[1]).group(1) if '-' in url[1] else re.search(r'/([^/]+)$', url[1]).group(1) 
    for url in url_list_betsafe
]


def scrape_betsafe(url, number_of_odds):
    # Start WebDriver
    driver = webdriver.Firefox(options=options)
    driver.get(url)

    try:
        # List to store all matches
        matches_betsafe = []
        
        # Wait for the events to load
        events = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, "wpt-table__row"))
        )
        
        # Loop through events
        for event in events:
            # Extract teams and odds
            teams = event.find_elements(By.CLASS_NAME, "wpt-teams__team")
            odds_elements = event.find_elements(By.CLASS_NAME, "wpt-odd-changer")
            odds = [odd.text for odd in odds_elements if odd.text]  # Extract non-empty text

            if not odds or len(teams) != 2:
                continue

            if len(odds) >= 2 and number_of_odds == 2: # There are 4 extra bets that are excluded
                team1 = teams[0].find_element(By.TAG_NAME, "a").text
                team2 = teams[1].find_element(By.TAG_NAME, "a").text

                odd1 = float(odds[0])
                odd2 = float(odds[1])
                matches_betsafe.append(((team1, team2), (odd1, odd2)))

            if len(odds) >= 3 and number_of_odds == 3:
                team1 = teams[0].find_element(By.TAG_NAME, "a").text
                team2 = teams[1].find_element(By.TAG_NAME, "a").text

                odd1 = float(odds[0])
                odd2 = float(odds[1])
                odd3 = float(odds[2])
                matches_betsafe.append(((team1, "Draw" ,team2), (odd1, odd2, odd3)))

            
    finally:
        # Close the browser
        driver.quit()
        return matches_betsafe

all_matches = []

for i, league_name in enumerate(league_names):
    number_of_odds = url_list_betsafe[i][0]
    matches = scrape_betsafe(url_list_betsafe[i][1], number_of_odds)
    all_matches.extend([(teams, odds, league_name) for teams, odds in matches])

# Create a single DataFrame
df_betsafe = pd.DataFrame(all_matches, columns=["Teams", "Odds", "League"])

## Merge dataframes in one

In [5]:
# Handle tuples to match names from different sites
def teams_match(teams1, teams2):
    return all(
        any(word1 in team2 for team2 in teams2 for word1 in team1.split()) 
        for team1 in teams1
    ) or all(
        any(word2 in team1 for team1 in teams1 for word2 in team2.split()) 
        for team2 in teams2
    )


# Define the function to merge odds
def merge_odds(df_topsport, df_betsafe, df_7bet):
    merged_matches = []

    matched_betsafe_indices = set()
    matched_7bet_indices = set()

    # Iterate through rows in df_topsport
    for _, topsport_row in df_topsport.iterrows():
        top_date = topsport_row['Date']
        top_teams = topsport_row['Teams']
        top_league = topsport_row['League']
        matched_betsafe_odds = None
        matched_7bet_odds = None

        # Match with betsafe
        for betsafe_index, betsafe_row in df_betsafe.iterrows():
            if betsafe_index in matched_betsafe_indices:
                continue
            if (
                betsafe_row['League'] == top_league
                and teams_match(top_teams, betsafe_row['Teams'])
            ):
                matched_betsafe_odds = betsafe_row['Odds']
                matched_betsafe_indices.add(betsafe_index)
                break

        # Match with 7bet
        for _7bet_index, _7bet_row in df_7bet.iterrows():
            if _7bet_index in matched_7bet_indices:
                continue
            if (
                _7bet_row['League'] == top_league
                and teams_match(top_teams, _7bet_row['Teams'])
            ):
                matched_7bet_odds = _7bet_row['Odds']
                matched_7bet_indices.add(_7bet_index)
                break

        # Append the result
        merged_matches.append(
            {
                'Date': top_date,
                'Match': top_teams,
                'League': top_league,
                'Topsport': topsport_row['Odds'],
                'Betsafe': matched_betsafe_odds,
                '7bet': matched_7bet_odds,
            }
        )

    # Convert merged_matches to DataFrame
    return pd.DataFrame(merged_matches)

df = merge_odds(df_topsport, df_betsafe, df_7bet)

## Check arbitrages

In [6]:
# Function to calculate arbitrage percentages for any number of odds
def calculate_arbitrage(odds_list):
    total = sum(1 / odd for odd in odds_list)
    bet_percentages = [round((1 / odd) * 100 / total, 2) for odd in odds_list]
    return bet_percentages

arbitrage_list = []

# Function to display found arbitrages
def display_result(match, sum, bet_percentages, odds, bookmakers):
    profit = (sum - 1) * 100
    result_str = (
        f'Profit={profit:.2f}% on {match} '
    )
    for idx, (odd, bet_percentage) in enumerate(zip(odds, bet_percentages)):
        result_str += f'Bet {bet_percentage}% for {odd},\n'
    
    arbitrage_list.append(result_str)
    return

# Function to check for arbitrage opportunities
def check_arbitrage(row):
    # Extract odds for each bookmaker
    odds_top = row['Topsport']
    odds_betsafe = row['Betsafe']
    odds_7bet = row['7bet']

    # Create a list of tuples (bookmaker, odds)
    odds_list = []
    if odds_top is not None:
        odds_list.append((odds_top))
    if odds_betsafe is not None:
        odds_list.append((odds_betsafe))
    if odds_7bet is not None:
        odds_list.append((odds_7bet))

    odds_array = np.array(odds_list) 
    odds_array_t = np.transpose(odds_array)

    arbitrage = False
    min_sum = 2  # Any absurd starting value will work. At least should be 1

    # Generate combinations where each bookmaker provides one odds value
    for combination in it.product(*odds_array_t):
        # Check if the total sum indicates an arbitrage opportunity
        total_sum = round(sum(1 / odd for odd in combination), 5)
        # If the total sum indicates an arbitrage opportunity (should be less than 1)
        if total_sum < 1.03:
            arbitrage = True
            min_sum = min(min_sum, total_sum)
            bet_percentages = calculate_arbitrage(combination)
            display_result(row['Match'], total_sum, bet_percentages, combination, [odds[0] for odds in odds_list])

    # Return the results and minimum arbitrage sum, or False if no arbitrage is found
    return (True, min_sum) if arbitrage else (False, None)

df[['arbitrage', 'min_sum']] = df.apply(check_arbitrage, axis=1, result_type='expand')

display(df.head())
arbitrage_list


Unnamed: 0,Date,Match,League,Topsport,Betsafe,7bet,arbitrage,min_sum
0,2024-12-13 02:30,"(Boston Celtics, Detroit Pistons)",nba,"(1.12, 6.4)","(1.11, 6.6)","(1.12, 6.5)",False,
1,2024-12-13 02:30,"(Miami Heat, Toronto Raptors)",nba,"(1.19, 4.8)","(1.18, 4.9)","(1.19, 4.85)",False,
2,2024-12-13 03:00,"(New Orleans Pelicans, Sacramento Kings)",nba,"(2.85, 1.43)","(2.95, 1.4)","(2.9, 1.43)",False,
3,2024-12-14 02:00,"(Cleveland Cavaliers, Washington Wizards)",nba,"(1.05, 11.0)",,"(1.05, 10.0)",False,
4,2024-12-14 02:00,"(Philadelphia 76ers, Indiana Pacers)",nba,"(1.45, 2.8)",,"(1.44, 2.8)",False,


["Profit=2.62% on ('New York Knicks', 'San Antonio Spurs') Bet 77.34% for 1.26,\nBet 22.66% for 4.3,\n",
 "Profit=2.00% on ('Boston Celtics', 'Philadelphia 76ers') Bet 77.2% for 1.27,\nBet 22.8% for 4.3,\n",
 "Profit=2.94% on ('FC Bayern Munchen', 'Maccabi Tel Aviv') Bet 71.43% for 1.36,\nBet 28.57% for 3.4,\n",
 "Profit=2.30% on ('Maccabi Tel Aviv', 'Žalgiris') Bet 57.5% for 1.7,\nBet 42.5% for 2.3,\n",
 "Profit=2.53% on ('Plzen', 'Draw', 'Manchester United') Bet 19.51% for 5.0,\nBet 23.79% for 4.1,\nBet 56.71% for 1.72,\n",
 "Profit=2.94% on ('Plzen', 'Draw', 'Manchester United') Bet 19.83% for 4.9,\nBet 23.69% for 4.1,\nBet 56.48% for 1.72,\n",
 "Profit=2.74% on ('PAOK Saloniki', 'Draw', 'Ferencvaros') Bet 49.41% for 1.97,\nBet 27.42% for 3.55,\nBet 23.17% for 4.2,\n"]