In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv
import time

# Set up WebDriver
options = webdriver.FirefoxOptions()
options.headless = True
driver = webdriver.Firefox(options=options)

# Define seasons and matchdays
years = range(2017, 2025)
match_days = range(1, 35)
all_matches = []

def get_date_for_match(row):
    """Find the date of a match from its closest match-date-header."""
    try:
        # Check previous elements or container with date
        date_element = row.find_element(By.XPATH, 'preceding::match-date-header[1]')
        return date_element.text.strip()
    except Exception as e:
        print(f"Date extraction failed: {e}")
        return "Unknown Date"

for year in years:
    next_year = year + 1
    for week in match_days:
        url = f'https://www.bundesliga.com/en/bundesliga/matchday/{year}-{next_year}/{week}'
        driver.get(url)
        print(f"Scraping: {url}")

        try:
            # Wait for the main content
            WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, 'fixturescomponent'))
            )
            match_rows = driver.find_elements(By.CSS_SELECTOR, '.matchRow')
            
            for row in match_rows:
                try:
                    date = get_date_for_match(row)
                    home_team = row.find_element(By.CSS_SELECTOR, '#fixtures > fixturescomponent > div > div:nth-child(2) > a > match-fixture > score-bug > div > div.cell.home > div.tlc').text.strip()
                    away_team = row.find_element(By.CSS_SELECTOR, '#fixtures > fixturescomponent > div > div:nth-child(2) > a > match-fixture > score-bug > div > div.cell.away > div.tlc').text.strip()
                    home_score = row.find_element(By.CSS_SELECTOR, 'score-bug .cell.home .score').text.strip()
                    away_score = row.find_element(By.CSS_SELECTOR, 'score-bug .cell.away .score').text.strip()
                    result = f"{home_score}-{away_score}"
                    
                    all_matches.append({
                        'season': f"{year}-{next_year}",
                        'match_day': week,
                        'date': date,
                        'home_team': home_team,
                        'away_team': away_team,
                        'result': result
                    })
                except Exception as e:
                    print(f"Failed to extract match: {e}")
        except Exception as e:
            print(f"Failed to load page or content: {e}")

        time.sleep(1)

driver.quit()

# Save to CSV
csv_file = 'bundesliga_full_data.csv'
csv_columns = ['season', 'match_day', 'date', 'home_team', 'away_team', 'result']

with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=csv_columns)
    writer.writeheader()
    writer.writerows(all_matches)

print("Data saved to {csv_file}")