# WEB SCRAPPING UEFA CHAMPIONS LEAGUE DATA
- AUTHOR: Marcos Rodr√≠guez
- DATE: June 15th 2025

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from concurrent.futures import ThreadPoolExecutor
import os
import polars as pl

def matches_season(season:int):
    # =================================
    # Enviroment Webdriver
    # =================================
    season = str(season)
    url = "https://www.uefa.com/uefachampionsleague/history/seasons/"+ season + "/"

    driver = webdriver.Chrome()
    driver.maximize_window()
    driver.get(url)
    time.sleep(2)

    #Cookies button
    driver.find_element(By.ID, "onetrust-accept-btn-handler").click()
    #Matches button
    driver.find_element(By.LINK_TEXT, "Matches").click()
    time.sleep(2)

    # =================================
    # Paused Scroll
    # =================================
    width = driver.find_element(By.CSS_SELECTOR, ".footer-wrap.lazyloaded").size['width']

    for _ in range(11):
        scroll_target = driver.execute_script("return document.body.scrollHeight") - width
        driver.execute_script(f"window.scrollTo(0, {scroll_target});")
        time.sleep(2)

    # =================================
    # Soccer Match Links
    # =================================
    teams_buttons = driver.find_elements(By.CSS_SELECTOR, ".mu.cFPSBpJ37woVu853")
    links_teams = list(map(lambda button: button.get_attribute("href"),teams_buttons))
    time.sleep(2)
    driver.quit()

    print("//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ \n")
    print(f"There were {len(links_teams)} matches in the Season {season} \n")
    print("//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ \n")

    return links_teams

def match_stats(url:str):
    # =================================
    # Enviroment Webdriver
    # =================================
    driver = webdriver.Chrome()
    driver.maximize_window()
    driver.get(url)
    time.sleep(2)

    #Cookies button
    driver.find_element(By.ID, "onetrust-accept-btn-handler").click()

    # =================================
    # Soccer Match Stats
    # =================================
    versus = driver.find_element(By.CSS_SELECTOR, ".R8S4vodbnPwrHjMR.vs-team")
    teams = versus.text.split("\n")
    box_stats = driver.find_elements(By.CSS_SELECTOR, ".stats-module__single-stat.stats-module__single-stat--comparison")
    stats = list(map(lambda stat: stat.text.split("\n"),box_stats))
    time.sleep(1)
    stats.append(teams)
    driver.quit()

    return stats



In [None]:
# =================================
# INPUTS:
# season year: int 
# =================================
season = 2024

if __name__ == "__main__":
    
    print("//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ \n")
    print(f"Initializing Web Scraping for the {season} Season of the UCL \n")
    matches = matches_season(season)
    cpu = int(os.cpu_count() * 0.5)
    
    with ThreadPoolExecutor(max_workers=cpu) as executor:
        all_stats_matches = list(executor.map(match_stats, matches))

    
    local_data = list(map(lambda stat: list(map(lambda lista: lista[stat][0],all_stats_matches)),range(len(all_stats_matches[0]))))
    visit_data = list(map(lambda stat: list(map(lambda lista: lista[stat][-1],all_stats_matches)),range(len(all_stats_matches[0]))))
    head_data = list(map(lambda lista: lista[1],all_stats_matches[0]))
    head_data[-1] = 'Team'

    df_local = pl.DataFrame({head_data[i]: local_data[i] for i in range(len(head_data))})
    df_visit = pl.DataFrame({head_data[i]: visit_data[i] for i in range(len(head_data))})

    df_visit = df_visit.rename({name: name + "_visit" for name in df_visit.columns})
    df_local = df_local.rename({name: name + "_local" for name in df_local.columns})

    df_visit = df_visit.select([df_visit.columns[-1]] + df_visit.columns[:-1])
    df_local = df_local.select([df_local.columns[-1]] + df_local.columns[:-1])

    df_visit = df_visit.with_columns([pl.col(col).cast(pl.Float64) for col in df_visit.columns[1:]])
    df_local = df_local.with_columns([pl.col(col).cast(pl.Float64) for col in df_local.columns[1:]])

    season_stats = pl.concat([df_local, df_visit], how="horizontal")
    print(f"End Web Scraping for the {season} Season 2025 of the UCL \n")
    print("//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ \n")


In [None]:
season_stats