In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import time

# Set up Chrome options
chrome_options = Options()
chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')
chrome_options.add_argument("--headless") 

# Set up the Chrome WebDriver
service = Service("/opt/homebrew/bin/chromedriver")
driver = webdriver.Chrome(service=service, options=chrome_options)

# Navigate to the specific oddschecker page
url = "https://www.oddschecker.com/politics/us-politics/us-presidential-election/winner"
driver.get(url)

# Wait for the page to load
time.sleep(10)

# Get the page source
page_source = driver.page_source

# Close the browser
driver.quit()

In [9]:
# Parse the HTML with BeautifulSoup
soup = BeautifulSoup(page_source, 'html.parser')

# Find the table or relevant parts
odds_table = soup.find('tbody', id='t1')

# Extract each row and the data within
odds_data = []
bookmakers_set = set()

for row in odds_table.find_all('tr'):
    market_name = row.find('a', class_='popup').text.strip()  # Extract the party name
    odds_dict = {'Market': market_name}
    
    # Find all td elements with odds information
    for td in row.find_all('td', class_=lambda x: x and ('o' in x.split() or 'bs' in x.split())):
        bookmaker = td.get('data-bk')  # Extract the bookmaker name
        decimal_odds = td.get('data-odig')  # Extract the decimal odds value
        if bookmaker and decimal_odds:  # Only add if both are present
            odds_dict[bookmaker] = float(decimal_odds)  # Convert odds to float
            bookmakers_set.add(bookmaker)
    
    odds_data.append(odds_dict)

# Create a DataFrame with all bookmakers as columns
df = pd.DataFrame(odds_data).set_index('Market')

# Ensure all bookmakers are columns, even if some are missing in certain rows
df = df.reindex(columns=sorted(bookmakers_set))

print(df)

                       AKB     B3      BF     BY     CE     DP    EE      FB  \
Market                                                                         
Kamala Harris         1.91    1.8    1.94    1.8    1.8    1.8   1.8    1.80   
Donald Trump          2.05    2.0    2.10    2.0    2.0    2.0   2.0    1.91   
Robert Kennedy Jr.  151.00   67.0  245.00  101.0   67.0  101.0  67.0  126.00   
J D Vance           151.00   81.0    0.00    0.0  101.0    0.0   0.0    0.00   
Nikki Haley         151.00   67.0  294.00  101.0   67.0  101.0  51.0  151.00   
...                    ...    ...     ...    ...    ...    ...   ...     ...   
Dwayne Johnson        0.00    0.0  980.00    0.0    0.0    0.0   0.0    0.00   
Bernie Sanders        0.00  201.0  980.00    0.0  401.0    0.0   0.0    0.00   
Tim Scott             0.00    0.0  980.00    0.0    0.0    0.0   0.0    0.00   
Jill Stein            0.00    0.0    0.00    0.0    0.0    0.0   0.0    0.00   
Cornel West           0.00    0.0  980.0

In [22]:
import concurrent.futures
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import time

# Function to extract odds data from a given URL
def extract_odds(url, user_agent):
    # Set up Chrome options
    chrome_options = Options()
    chrome_options.add_argument(f'user-agent={user_agent}')
    chrome_options.add_argument("--headless")

    # Set up the Chrome WebDriver
    service = Service("/opt/homebrew/bin/chromedriver")
    driver = webdriver.Chrome(service=service, options=chrome_options)

    try:
        # Navigate to the specific oddschecker page
        driver.get(url)

        # Wait for the page to load
        time.sleep(10)  # Adjust if necessary

        # Get the page source
        page_source = driver.page_source

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(page_source, 'html.parser')

        # Find the table or relevant parts
        odds_table = soup.find('tbody', id='t1')

        if not odds_table:
            print(f"No odds table found for URL: {url}")
            return None  # Skip this URL if the table isn't found

        # Extract each row and the data within
        odds_data = []
        bookmakers_set = set()

        for row in odds_table.find_all('tr'):
            party_name = row.find('a', class_='popup').text.strip()  # Extract the party name
            odds_dict = {'Party': party_name}
            
            # Find all td elements with odds information
            for td in row.find_all('td', class_=lambda x: x and ('o' in x.split() or 'bs' in x.split())):
                bookmaker = td.get('data-bk')  # Extract the bookmaker name
                decimal_odds = td.get('data-odig')  # Extract the decimal odds value
                if bookmaker and decimal_odds:  # Only add if both are present
                    odds_dict[bookmaker] = float(decimal_odds)  # Convert odds to float
                    bookmakers_set.add(bookmaker)
            
            odds_data.append(odds_dict)

        # Create a DataFrame with all bookmakers as columns
        df = pd.DataFrame(odds_data).set_index('Party')

        # Ensure all bookmakers are columns, even if some are missing in certain rows
        df = df.reindex(columns=sorted(bookmakers_set))

        # Add the URL as a column in the DataFrame
        df['URL'] = url

        return df
    finally:
        # Close the browser
        driver.quit()

# List of URLs to scrape
urls = [
    "https://www.oddschecker.com/politics/us-politics/us-presidential-election/winner",
    "https://www.oddschecker.com/politics/us-politics/us-state-betting/arizona",
    "https://www.oddschecker.com/politics/us-politics/us-state-betting/georgia",
    "https://www.oddschecker.com/politics/us-politics/us-state-betting/michigan",
    "https://www.oddschecker.com/politics/us-politics/us-state-betting/pennsylvania",
    "https://www.oddschecker.com/politics/us-politics/us-state-betting/wisconsin"
]

# List of user agents to rotate
user_agents = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:87.0) Gecko/20100101 Firefox/87.0',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'
]

# List to store DataFrames
dataframes_list = []

# Use ThreadPoolExecutor to process URLs in parallel in batches of 5
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    futures = []
    for i, url in enumerate(urls):
        user_agent = user_agents[i % len(user_agents)]  # Rotate user agents
        futures.append(executor.submit(extract_odds, url, user_agent))

        # Wait for each batch of 5 to complete before starting the next batch
        if (i + 1) % 5 == 0 or i == len(urls) - 1:
            for future in concurrent.futures.as_completed(futures):
                df = future.result()
                if df is not None:
                    dataframes_list.append(df)
            futures = []  # Clear futures list for the next batch


In [23]:
print(dataframes_list)

[             AKB   B3    BF   BY    CE   DP   EE   FB    FR   G5  ...   QN  \
Party                                                             ...        
Democrats    0.0  0.0  1.95  0.0  1.73  0.0  0.0  0.0  1.83  0.0  ...  0.0   
Republicans  0.0  0.0  1.94  0.0  2.00  0.0  0.0  0.0  1.83  0.0  ...  0.0   

              S6   SI   SK   SX   UN   VC   VT   WA    WH  
Party                                                      
Democrats    1.8  0.0  0.0  0.0  0.0  0.0  0.0  1.8  1.73  
Republicans  2.0  0.0  0.0  0.0  0.0  0.0  0.0  1.9  2.00  

[2 rows x 27 columns],                        AKB     B3      BF     BY     CE     DP    EE      FB  \
Party                                                                          
Kamala Harris         1.91    1.8    1.93    1.8    1.8    1.8   1.8    1.80   
Donald Trump          2.05    2.0    2.12    2.0    2.0    2.0   2.0    1.91   
Robert Kennedy Jr.  151.00   67.0  245.00  101.0   67.0  101.0  67.0  126.00   
J D Vance           15

In [24]:
from dotenv import load_dotenv
import os
import queue

load_dotenv()

bf_usr = os.getenv("BF_LOGIN")
bf_pass = os.getenv("BF_PASS")
bf_api = os.getenv("BF_API_KEY")
bf_session = os.getenv("BF_SESSION")


In [30]:
import betfairlightweight
from betfairlightweight import filters
from betfairlightweight import APIClient

client = APIClient(bf_usr, bf_pass, app_key=bf_api)
client.session_token = bf_session

market_filter = betfairlightweight.filters.market_filter(
    event_type_ids=['2378961'],  # Politics event type
)

market_catalogues = client.betting.list_market_catalogue(
    filter=market_filter,
    max_results=100
)

# Process the market catalogues
for market in market_catalogues:
    print(market.market_name, market.market_id)

Next Labour Leader 1.170273835
Next Lib Dem Leader 1.179233218
Year Rishi Sunak replaced as Conservative Leader 1.205534173
Next Conservative Leader 1.205526560
Democratic Nominee 1.178163685
Democratic Vice President Nominee 1.190716127
Nominee Forecast 1.216990085
Northern Territory Election 2024 1.223219482
Queensland State Election 2024 1.218937278
Will Joe Biden be impeached before 2024 Election? 1.218257169
Senate Control after 2024 Election 1.225479090
Election Winner 1.176878927
Winning Party 1.178176964
Popular Vote Winner 1.178165812
Party of Popular Vote Winner 1.178176967
Gender of Election Winner 1.178176193
Will Election Winner lose Popular Vote? 1.226054697
Joe Manchin to be re-elected to the senate in 2024 1.213966025
Mississippi 1.230000329
Arizona 1.229996509
Massachusetts 1.230000327
Oklahoma 1.230123427
Pennsylvania 1.230123429
South Dakota 1.230123632
Michigan 1.229999165
Oregon 1.230123428
Tennessee 1.230123636
Minnesota 1.229999638
Hawaii 1.229997182
Alabama 1.22