# WEB SCRAPPING UEFA CHAMPIONS LEAGUE DATA
- AUTHOR: Marcos Rodríguez
- DATE: June 15th 2025

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from concurrent.futures import ThreadPoolExecutor
import os
import polars as pl

def matches_season(season:int):
    # =================================
    # Enviroment Webdriver
    # =================================
    season = str(season)
    url = "https://www.uefa.com/uefachampionsleague/history/seasons/"+ season + "/"

    driver = webdriver.Chrome()
    driver.maximize_window()
    driver.get(url)
    time.sleep(2)

    #Cookies button
    driver.find_element(By.ID, "onetrust-accept-btn-handler").click()
    #Matches button
    driver.find_element(By.LINK_TEXT, "Matches").click()
    time.sleep(2)

    # =================================
    # Paused Scroll
    # =================================
    width = driver.find_element(By.CSS_SELECTOR, ".footer-wrap.lazyloaded").size['width']

    for _ in range(11):
        scroll_target = driver.execute_script("return document.body.scrollHeight") - width
        driver.execute_script(f"window.scrollTo(0, {scroll_target});")
        time.sleep(2)

    # =================================
    # Soccer Match Links
    # =================================
    teams_buttons = driver.find_elements(By.CSS_SELECTOR, ".mu.cFPSBpJ37woVu853")
    links_teams = list(map(lambda button: button.get_attribute("href"),teams_buttons))
    time.sleep(2)
    driver.quit()

    print("//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ \n")
    print(f"There were {len(links_teams)} matches in the Season {season} \n")
    print("//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ \n")

    return links_teams

def match_stats(url:str):
    # =================================
    # Enviroment Webdriver
    # =================================
    driver = webdriver.Chrome()
    driver.maximize_window()
    driver.get(url)
    time.sleep(2)

    #Cookies button
    driver.find_element(By.ID, "onetrust-accept-btn-handler").click()

    # =================================
    # Soccer Match Stats
    # =================================
    versus = driver.find_element(By.CSS_SELECTOR, ".R8S4vodbnPwrHjMR.vs-team")
    teams = versus.text.split("\n")
    box_stats = driver.find_elements(By.CSS_SELECTOR, ".stats-module__single-stat.stats-module__single-stat--comparison")
    stats = list(map(lambda stat: stat.text.split("\n"),box_stats))
    time.sleep(1)
    stats.append(teams)
    driver.quit()

    return stats



In [2]:
# =================================
# INPUTS:
# season year: int 
# =================================
season = 2024

if __name__ == "__main__":
    
    print("//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ \n")
    print(f"Initializing Web Scraping for the {season} Season of the UCL \n")
    matches = matches_season(season)
    cpu = int(os.cpu_count() * 0.5)
    
    with ThreadPoolExecutor(max_workers=cpu) as executor:
        all_stats_matches = list(executor.map(match_stats, matches))

    
    local_data = list(map(lambda stat: list(map(lambda lista: lista[stat][0],all_stats_matches)),range(len(all_stats_matches[0]))))
    visit_data = list(map(lambda stat: list(map(lambda lista: lista[stat][-1],all_stats_matches)),range(len(all_stats_matches[0]))))
    head_data = list(map(lambda lista: lista[1],all_stats_matches[0]))
    head_data[-1] = 'Team'

    df_local = pl.DataFrame({head_data[i]: local_data[i] for i in range(len(head_data))})
    df_visit = pl.DataFrame({head_data[i]: visit_data[i] for i in range(len(head_data))})

    df_visit = df_visit.rename({name: name + "_visit" for name in df_visit.columns})
    df_local = df_local.rename({name: name + "_local" for name in df_local.columns})

    df_visit = df_visit.select([df_visit.columns[-1]] + df_visit.columns[:-1])
    df_local = df_local.select([df_local.columns[-1]] + df_local.columns[:-1])

    df_visit = df_visit.with_columns([pl.col(col).cast(pl.Float64) for col in df_visit.columns[1:]])
    df_local = df_local.with_columns([pl.col(col).cast(pl.Float64) for col in df_local.columns[1:]])

    season_stats = pl.concat([df_local, df_visit], how="horizontal")
    print(f"End Web Scraping for the {season} Season 2025 of the UCL \n")
    print("//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ \n")


//\//\//\//\//\//\//\//\//\//\//\//\//\//\ 

Initializing Web Scraping for the 2024 Season of the UCL 

//\//\//\//\//\//\//\//\//\//\//\//\//\//\ 

There were 125 matches in the Season 2024 

//\//\//\//\//\//\//\//\//\//\//\//\//\//\ 

End Web Scraping for the 2024 Season 2025 of the UCL 

//\//\//\//\//\//\//\//\//\//\//\//\//\//\ 



In [None]:
season_stats

Team_local,Possession (%)_local,Passing accuracy (%)_local,Passes attempted_local,Passes completed_local,Distance covered (km)_local,Goals_local,Total attempts_local,Attempts on target_local,Attempts off target_local,Blocked_local,Woodwork_local,Corners taken_local,Offsides_local,Balls recovered_local,Tackles_local,Blocks_local,Clearances completed_local,Yellow cards_local,Red cards_local,Fouls committed_local,Team_visit,Possession (%)_visit,Passing accuracy (%)_visit,Passes attempted_visit,Passes completed_visit,Distance covered (km)_visit,Goals_visit,Total attempts_visit,Attempts on target_visit,Attempts off target_visit,Blocked_visit,Woodwork_visit,Corners taken_visit,Offsides_visit,Balls recovered_visit,Tackles_visit,Blocks_visit,Clearances completed_visit,Yellow cards_visit,Red cards_visit,Fouls committed_visit
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""B. Dortmund""",46.0,86.0,410.0,353.0,111.86,0.0,13.0,3.0,7.0,3.0,1.0,9.0,1.0,28.0,16.0,2.0,14.0,3.0,0.0,12.0,"""Real Madrid""",54.0,92.0,549.0,504.0,108.17,2.0,13.0,6.0,5.0,2.0,0.0,8.0,0.0,34.0,20.0,3.0,14.0,1.0,0.0,8.0
"""Real Madrid""",55.0,90.0,653.0,587.0,117.63,2.0,19.0,7.0,6.0,6.0,0.0,8.0,0.0,31.0,16.0,2.0,6.0,1.0,0.0,12.0,"""Bayern München""",45.0,86.0,486.0,419.0,121.13,1.0,8.0,5.0,1.0,2.0,0.0,4.0,2.0,37.0,14.0,6.0,26.0,1.0,0.0,10.0
"""Paris""",67.0,91.0,668.0,605.0,110.97,0.0,31.0,5.0,16.0,10.0,3.0,12.0,0.0,43.0,16.0,2.0,6.0,2.0,0.0,8.0,"""B. Dortmund""",33.0,79.0,293.0,232.0,119.81,1.0,6.0,3.0,1.0,2.0,0.0,4.0,2.0,43.0,16.0,10.0,27.0,2.0,0.0,6.0
"""B. Dortmund""",46.0,86.0,436.0,376.0,119.75,1.0,12.0,4.0,4.0,4.0,0.0,4.0,1.0,51.0,19.0,1.0,10.0,2.0,0.0,11.0,"""Paris""",54.0,92.0,585.0,539.0,110.02,0.0,14.0,3.0,10.0,1.0,2.0,3.0,2.0,37.0,15.0,4.0,8.0,1.0,0.0,6.0
"""Bayern München""",54.0,94.0,568.0,532.0,107.1,2.0,14.0,5.0,7.0,2.0,0.0,6.0,0.0,34.0,18.0,3.0,10.0,2.0,0.0,11.0,"""Real Madrid""",46.0,92.0,529.0,485.0,105.56,2.0,8.0,4.0,1.0,3.0,0.0,5.0,2.0,24.0,18.0,2.0,3.0,2.0,0.0,11.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Paris""",65.0,91.0,695.0,629.0,111.85,2.0,17.0,2.0,10.0,5.0,1.0,10.0,3.0,55.0,18.0,5.0,7.0,0.0,0.0,8.0,"""B. Dortmund""",35.0,78.0,321.0,250.0,115.59,0.0,13.0,1.0,7.0,5.0,1.0,7.0,3.0,47.0,20.0,5.0,16.0,3.0,0.0,10.0
"""Lazio""",51.0,90.0,594.0,537.0,121.45,1.0,19.0,7.0,4.0,8.0,0.0,11.0,1.0,31.0,8.0,4.0,9.0,3.0,0.0,6.0,"""Atleti""",49.0,90.0,622.0,561.0,114.78,1.0,9.0,4.0,1.0,4.0,0.0,5.0,0.0,34.0,16.0,8.0,20.0,4.0,0.0,12.0
"""Feyenoord""",58.0,90.0,632.0,570.0,121.96,2.0,16.0,5.0,5.0,6.0,0.0,4.0,6.0,46.0,14.0,3.0,9.0,1.0,0.0,13.0,"""Celtic""",42.0,83.0,412.0,343.0,116.38,0.0,8.0,3.0,2.0,3.0,0.0,1.0,0.0,40.0,12.0,6.0,19.0,3.0,2.0,11.0
"""Young Boys""",53.0,85.0,504.0,427.0,118.92,1.0,7.0,3.0,3.0,1.0,0.0,3.0,3.0,38.0,19.0,5.0,18.0,3.0,0.0,12.0,"""Leipzig""",47.0,81.0,404.0,329.0,118.63,3.0,21.0,8.0,8.0,5.0,0.0,7.0,3.0,38.0,16.0,1.0,16.0,4.0,0.0,11.0
