In [39]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager

# Define the website to scrape
website = 'https://www.adamchoi.co.uk/overs/detailed'

# Use ChromeDriverManager to automatically download and manage the correct ChromeDriver version
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Open the website
driver.get(website)

# Click the "All matches" button to display all match data
all_matches_button = driver.find_element(By.XPATH, "//label[contains(text(), 'All matches')]")
all_matches_button.click()

# Select the 2024/2025 season from the dropdown
try:
    # Locate the season dropdown
    season_dropdown = driver.find_element(By.XPATH, "//select[@id='season']")
    
    # Create a Select object for interacting with the dropdown
    select = Select(season_dropdown)
    
    # Select the '2024/2025' season by its visible text
    select.select_by_visible_text('2024/2025')
    
except Exception as e:
    print(f"Error selecting season: {e}")

# Find all match rows by the 'tr' tag name
matches = driver.find_elements(By.TAG_NAME, 'tr')

# Initialize lists to hold the extracted data
date = []
home_team = []
score = []
away_team = []

# Loop through the table rows to extract match data
for match in matches:
    try:
        # Find all 'td' elements (columns) in the current row
        cells = match.find_elements(By.TAG_NAME, 'td')

        # Ensure that the row has exactly 4 columns (date, home team, score, away team)
        if len(cells) == 4:
            # Append each piece of data to its corresponding list
            date.append(cells[0].text)
            home_team.append(cells[1].text)
            score.append(cells[2].text)
            away_team.append(cells[3].text)

            # Print each row for debugging
            print(f"Date: {cells[0].text}, Home: {cells[1].text}, Score: {cells[2].text}, Away: {cells[3].text}")

    except Exception as e:
        print(f"Error processing row: {e}")

# Close the browser after extracting the data
driver.quit()

# Create a pandas DataFrame from the extracted data
df = pd.DataFrame({
    'Date': date,
    'home_team': home_team,
    'score': score,
    'away_team': away_team
})

# Replace hyphens in the 'score' column to avoid Excel interpreting scores as dates
df['score'] = df['score'].apply(lambda x: x.replace('-', ':'))

# Save the DataFrame to a CSV file
df.to_csv('Football_data.csv', index=False)

# Print the DataFrame
print(df)


Date: 17-08-2024, Home: Arsenal, Score: 2 - 0, Away: Wolves
Date: 24-08-2024, Home: Aston Villa, Score: 0 - 2, Away: Arsenal
Date: 31-08-2024, Home: Arsenal, Score: 1 - 1, Away: Brighton
Date: 15-09-2024, Home: Tottenham, Score: 0 - 1, Away: Arsenal
Date: 17-08-2024, Home: West Ham, Score: 1 - 2, Away: Aston Villa
Date: 24-08-2024, Home: Aston Villa, Score: 0 - 2, Away: Arsenal
Date: 31-08-2024, Home: Leicester, Score: 1 - 2, Away: Aston Villa
Date: 14-09-2024, Home: Aston Villa, Score: 3 - 2, Away: Everton
Date: 17-08-2024, Home: Nott'm Forest, Score: 1 - 1, Away: Bournemouth
Date: 25-08-2024, Home: Bournemouth, Score: 1 - 1, Away: Newcastle
Date: 31-08-2024, Home: Everton, Score: 2 - 3, Away: Bournemouth
Date: 14-09-2024, Home: Bournemouth, Score: 0 - 1, Away: Chelsea
Date: 18-08-2024, Home: Brentford, Score: 2 - 1, Away: Crystal Palace
Date: 25-08-2024, Home: Liverpool, Score: 2 - 0, Away: Brentford
Date: 31-08-2024, Home: Brentford, Score: 3 - 1, Away: Southampton
Date: 14-09-2024,