In [65]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from io import StringIO
import time

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from webdriver_manager.chrome import ChromeDriverManager

In [97]:
options = webdriver.ChromeOptions()
options.set_capability(
    "goog:loggingPrefs", {"performance": "ALL", "browser": "ALL"}
)


driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)

In [67]:
driver.set_page_load_timeout(10)

try:
    driver.get("https://www.fotmob.com/leagues/87/stats/season/27233/players/total_tackle/laliga")
except:
    pass


In [68]:
try:
    # 2. Wait for the stats container to load
    wait = WebDriverWait(driver, 20)
    # This class targets the entire stats list container
    container_class = "css-fvfi51-LeagueSeasonStatsTableCSS"
    wait.until(EC.presence_of_element_located((By.CLASS_NAME, container_class)))

    # 3. Find all "Rows" in the stats table
    # FotMob uses <a> tags for each player row in this specific stats view
    rows = driver.find_elements(By.CSS_SELECTOR, f".{container_class} a")
    
    player_data = []

    for row in rows:
        # Splits text into [Rank, Name, Details, StatValue]
        row_text = row.text.split('\n')
        
        # Only add to list if it has exactly 4 items to avoid errors
        if len(row_text) == 4:
            player_data.append(row_text)

    # 4. Create the DataFrame with 4 columns to match the data
    df_fotmob = pd.DataFrame(player_data, columns=['Rank', 'Player', 'Details', 'StatValue'])
    
    # 5. Clean up: Keep only the columns you need for the similarity search
    df_fotmob = df_fotmob[['Player', 'StatValue']].copy()
    
    # Convert StatValue to a number so you can use it in math later
    df_fotmob['StatValue'] = pd.to_numeric(df_fotmob['StatValue'], errors='coerce')

    print("Successfully scraped and cleaned FotMob data!")
    print(df_fotmob.head())

except Exception as e:
    print(f"Error scraping FotMob: {e}")

Successfully scraped and cleaned FotMob data!
              Player  StatValue
0      Gabriel Suazo        4.4
1   Santiago Mourino        4.4
2      Dani Ceballos        4.3
3  Eduardo Camavinga        4.2
4     Alejandro Rego        4.2


In [69]:
df_fotmob

Unnamed: 0,Player,StatValue
0,Gabriel Suazo,4.4
1,Santiago Mourino,4.4
2,Dani Ceballos,4.3
3,Eduardo Camavinga,4.2
4,Alejandro Rego,4.2
...,...,...
331,Antonio Sivera,0.1
332,Augusto Batalla,0.1
333,Julen Agirrezabala,0.1
334,Thibaut Courtois,0.1


In [70]:
df_tackles = df_fotmob.copy()

In [71]:
df_tackles

Unnamed: 0,Player,StatValue
0,Gabriel Suazo,4.4
1,Santiago Mourino,4.4
2,Dani Ceballos,4.3
3,Eduardo Camavinga,4.2
4,Alejandro Rego,4.2
...,...,...
331,Antonio Sivera,0.1
332,Augusto Batalla,0.1
333,Julen Agirrezabala,0.1
334,Thibaut Courtois,0.1


In [72]:
driver.set_page_load_timeout(10)

try:
    driver.get("https://www.fotmob.com/leagues/87/stats/season/27233/players/interception/laliga")
except:
    pass


In [None]:
try:
    # 2. Wait for the stats container to load
    wait = WebDriverWait(driver, 20)
    # This class targets the entire stats list container
    container_class = "css-fvfi51-LeagueSeasonStatsTableCSS"
    wait.until(EC.presence_of_element_located((By.CLASS_NAME, container_class)))

    # 3. Find all "Rows" in the stats table
    # FotMob uses <a> tags for each player row in this specific stats view
    rows = driver.find_elements(By.CSS_SELECTOR, f".{container_class} a")
    
    player_data = []

    for row in rows:
        # Splits text into [Rank, Name, Details, StatValue]
        row_text = row.text.split('\n')
        
        # Only add to list if it has exactly 4 items to avoid errors
        if len(row_text) == 4:
            player_data.append(row_text)

    # 4. Create the DataFrame with 4 columns to match the data
    df_fotmob = pd.DataFrame(player_data, columns=['Rank', 'Player', 'Details', 'StatValue'])
    
    # 5. Clean up: Keep only the columns you need for the similarity search
    df_fotmob = df_fotmob[['Player', 'StatValue']].copy()
    
    # Convert StatValue to a number so you can use it in math later
    df_fotmob['StatValue'] = pd.to_numeric(df_fotmob['StatValue'], errors='coerce')

    print("Successfully scraped and cleaned FotMob data!")
    print(df_fotmob.head())

except Exception as e:
    print(f"Error scraping FotMob: {e}")

Successfully scraped and cleaned FotMob data!
              Player  StatValue
0      Victor Garcia        2.8
1        Mikel Vesga        2.7
2     Sergi Altimira        2.2
3  César Azpilicueta        2.1
4        Marc Aguado        2.0


In [None]:
df_interceptions = df_fotmob.copy()

In [None]:
df_interceptions

Unnamed: 0,Player,StatValue
0,Victor Garcia,2.8
1,Mikel Vesga,2.7
2,Sergi Altimira,2.2
3,César Azpilicueta,2.1
4,Marc Aguado,2.0
...,...,...
304,Marcus Rashford,0.1
305,Robert Lewandowski,0.1
306,Vedat Muriqi,0.1
307,Vinicius Junior,0.1


In [74]:
driver.set_page_load_timeout(10)

try:
    driver.get("https://www.fotmob.com/leagues/87/stats/season/27233/players/poss_won_att_3rd/laliga")
except:
    pass


In [99]:
try:
    # 2. Wait for the stats container to load
    wait = WebDriverWait(driver, 20)
    # This class targets the entire stats list container
    container_class = "css-fvfi51-LeagueSeasonStatsTableCSS"
    wait.until(EC.presence_of_element_located((By.CLASS_NAME, container_class)))

    # 3. Find all "Rows" in the stats table
    # FotMob uses <a> tags for each player row in this specific stats view
    rows = driver.find_elements(By.CSS_SELECTOR, f".{container_class} a")
    
    player_data = []

    for row in rows:
        # Splits text into [Rank, Name, Details, StatValue]
        row_text = row.text.split('\n')
        
        # Only add to list if it has exactly 4 items to avoid errors
        if len(row_text) == 4:
            player_data.append(row_text)

    # 4. Create the DataFrame with 4 columns to match the data
    df_fotmob = pd.DataFrame(player_data, columns=['Rank', 'Player', 'Details', 'StatValue'])
    
    # 5. Clean up: Keep only the columns you need for the similarity search
    df_fotmob = df_fotmob[['Player', 'StatValue']].copy()
    
    # Convert StatValue to a number so you can use it in math later
    df_fotmob['StatValue'] = pd.to_numeric(df_fotmob['StatValue'], errors='coerce')

    print("Successfully scraped and cleaned FotMob data!")
    print(df_fotmob.head())

except Exception as e:
    print(f"Error scraping FotMob: {e}")

Successfully scraped and cleaned FotMob data!
                Player  StatValue
0          Pau Cubarsi       85.7
1                Pedri       83.1
2      Frenkie de Jong       76.4
3          Marc Casado       76.3
4  Andreas Christensen       75.3


In [76]:
df_poss_n_fthird = df_fotmob.copy()

In [77]:
df_poss_n_fthird

Unnamed: 0,Player,StatValue
0,Lucas Boye,1.6
1,Daniel Raba,1.4
2,Abdessamad Ezzalzouli,1.4
3,Ramon Terrats,1.4
4,Nico Williams,1.3
...,...,...
278,Antonio Raillo,0.1
279,David Carmo,0.1
280,Jeremy Toljan,0.1
281,Leandro Cabrera,0.1


In [91]:
df_tackles.rename(columns={'Tackles': 'Tackles90'}, inplace=True)

In [92]:
df_tackles.head()

Unnamed: 0,Player,Tackles90
0,Gabriel Suazo,4.4
1,Santiago Mourino,4.4
2,Dani Ceballos,4.3
3,Eduardo Camavinga,4.2
4,Alejandro Rego,4.2


In [93]:
df_interceptions.rename(columns={'Interceptions': 'Interceptions90'}, inplace=True)

In [94]:
df_interceptions.head()

Unnamed: 0,Player,Interceptions90
0,Victor Garcia,2.8
1,Mikel Vesga,2.7
2,Sergi Altimira,2.2
3,César Azpilicueta,2.1
4,Marc Aguado,2.0


In [95]:
df_poss_n_fthird.rename(columns={'StatValue': 'PossessionWonFinalThird90'}, inplace=True)

In [96]:
df_poss_n_fthird.head()

Unnamed: 0,Player,PossessionWonFinalThird90
0,Lucas Boye,1.6
1,Daniel Raba,1.4
2,Abdessamad Ezzalzouli,1.4
3,Ramon Terrats,1.4
4,Nico Williams,1.3


In [98]:
driver.set_page_load_timeout(10)

try:
    driver.get("https://www.fotmob.com/leagues/87/stats/season/27233/players/accurate_pass/laliga")
except:
    pass


In [100]:
df_accurate_passes90 = df_fotmob.copy()

In [101]:
df_accurate_passes90

Unnamed: 0,Player,StatValue
0,Pau Cubarsi,85.7
1,Pedri,83.1
2,Frenkie de Jong,76.4
3,Marc Casado,76.3
4,Andreas Christensen,75.3
...,...,...
346,Cristhian Stuani,9.2
347,Ante Budimir,9.2
348,Akor Adams,9.0
349,Mariano Diaz,8.2


In [102]:
df_tackles

Unnamed: 0,Player,Tackles90
0,Gabriel Suazo,4.4
1,Santiago Mourino,4.4
2,Dani Ceballos,4.3
3,Eduardo Camavinga,4.2
4,Alejandro Rego,4.2
...,...,...
331,Antonio Sivera,0.1
332,Augusto Batalla,0.1
333,Julen Agirrezabala,0.1
334,Thibaut Courtois,0.1


In [103]:
df_accurate_passes90.rename(columns={'StatValue': 'AccuratePasses90'}, inplace=True)

In [106]:
# Merge the first two, then merge the result with the third, and so on
df = pd.merge(df_tackles, df_interceptions, on='Player', how='left')
df = pd.merge(df, df_poss_n_fthird, on='Player', how='left')
df = pd.merge(df, df_accurate_passes90, on='Player', how='left')

In [107]:
df.head()

Unnamed: 0,Player,Tackles90,Interceptions90,PossessionWonFinalThird90,AccuratePasses90
0,Gabriel Suazo,4.4,1.2,0.4,38.1
1,Santiago Mourino,4.4,1.3,0.4,43.7
2,Dani Ceballos,4.3,1.5,0.3,71.1
3,Eduardo Camavinga,4.2,0.7,0.5,56.7
4,Alejandro Rego,4.2,1.7,0.7,33.2


In [108]:
df.to_csv("fotmob.csv")