In [1]:
import requests
from bs4 import BeautifulSoup
import re
import json
import time
import random
import pandas as pd
import numpy as np

USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'

class LeagueManager:
    """
    Clase para gestionar ligas de fútbol y generar URLs de estadísticas de jugadores desde FBref.
    """
    def __init__(self):
        """
        Inicializa los atributos necesarios para acceder a las ligas, temporadas y tipos de estadísticas disponibles.
        """
        self.base_url = "https://fbref.com/en/comps/"
        # Diccionario con ligas disponibles, cada una con su ID, slug para la URL y temporadas disponibles
        self.possible_leagues = {
            'Fbref': {
                'Premier League': {
                    'id': 9,
                    'slug': 'Premier-League',
                    'seasons': ['2024-2025', '2023-2024', '2022-2023', '2021-2022', '2020-2021']
                },
                'La Liga': {
                    'id': 12,
                    'slug': 'La-Liga',
                    'seasons': ['2024-2025', '2023-2024', '2022-2023', '2021-2022', '2020-2021']
                },
                'Ligue 1': {
                    'id': 13,
                    'slug': 'Ligue-1',
                    'seasons': ['2024-2025', '2023-2024', '2022-2023', '2021-2022', '2020-2021']    
                },
                'Bundesliga': {
                    'id': 20,
                    'slug': 'Bundesliga',
                    'seasons': ['2024-2025', '2023-2024', '2022-2023', '2021-2022', '2020-2021']
                },
                'Serie A': {
                    'id': 11,
                    'slug': 'Serie-A',
                    'seasons': ['2024-2025', '2023-2024', '2022-2023', '2021-2022', '2020-2021']
                },
                'Big 5 European Leagues': {
                    'id': 'Big5',
                    'slug': 'Big-5-European-Leagues',
                    'seasons': ['2024-2025', '2023-2024', '2022-2023', '2021-2022', '2020-2021']
                },
            }
        }

        # Tipos de estadísticas disponibles para jugadores
        self.player_tables = {
            "Standard Stats": "stats/players",
            "Goalkeeping": "keepers/players",
            "Advanced Goalkeeping": "keepersadv/players",
            "Shooting": "shooting/players",
            "Passing": "passing/players",
            "Pass Types": "passing_types/players",
            "Goal and Shot Creation": "gca/players",
            "Defensive Actions": "defense/players",
            "Possession": "possession/players",
            "Playing Time": "playingtime/players",
            "Miscellaneous Stats": "misc/players",
        }

    def get_available_leagues(self):
        """
        Devuelve un diccionario con las ligas disponibles, sus identificadores y temporadas.

        Return:
            dict: Ligas disponibles con su ID y temporadas.
        """
        return {
            league_name: {
                'id': data['id'],
                'seasons': data['seasons']
            }
            for league_name, data in self.possible_leagues['Fbref'].items()
        }

    def get_league_info(self, league_name):
        """
        Devuelve la información de una liga específica.

        Args:
            league_name (str): Nombre de la liga.

        Return:
            dict or None: Información de la liga seleccionada (id, slug, seasons) o None si no existe.
        """
        return self.possible_leagues['Fbref'].get(league_name)

    def get_all_league_names(self):
        """
        Devuelve la lista de nombres de todas las ligas disponibles.

        Return:
            list: Nombres de las ligas.
        """
        return list(self.possible_leagues['Fbref'].keys())

    def generate_player_urls(self):
        """
        Genera URLs completas para acceder a estadísticas de jugadores por liga, temporada y tipo de estadística.

        Return:
            dict: Diccionario anidado con URLs organizadas por liga y temporada.
                  Formato: {liga: {temporada: {tipo_estadistica: url}}}
        """
        urls = {}

        for league_name, league_data in self.possible_leagues['Fbref'].items():
            league_id = league_data['id']
            seasons = league_data['seasons']
            urls[league_name] = {}

            for season in seasons:
                season_urls = {}
                for stat_name, path in self.player_tables.items():
                    url = (
                        f"{self.base_url}{league_id}/{path}/{season}/"
                        f"{league_name.replace(' ', '-')}-Stats"
                    )
                    season_urls[stat_name] = url

                urls[league_name][season] = season_urls

        return urls
    
def format_dataframe_columns(df, stat_category):
    """
    Reformatea las columnas de un DataFrame eliminando los niveles de índice
    y añadiendo un sufijo basado en la estadística.

    Args:
        df (pd.DataFrame): El DataFrame original con columnas multinivel.
        stat_category (str): La estadística que se añadirá como sufijo a las columnas.

    Returns:
        pd.DataFrame: El DataFrame con columnas reformateadas.
    """
    # Verifica si las columnas del DataFrame tienen múltiples niveles (MultiIndex)
    if isinstance(df.columns, pd.MultiIndex):
        # Si tienen múltiples niveles, crea nombres planos combinando el segundo nivel (nombre de columna)
        # con el primero (categoría), junto con el sufijo proporcionado por stat_category
        df.columns = [f"{col[1]} ({col[0]} - {stat_category})" for col in df.columns]
    else:
        # Si las columnas no son multinivel, simplemente añade el sufijo con stat_category a cada nombre
        df.columns = [f"{col} ({stat_category})" for col in df.columns]
    # Devuelve el DataFrame con los nuevos nombres de columnas
    return df

#Llamada a la clase LeagueManager para generar URLs de jugadores
manager = LeagueManager()
player_urls = manager.generate_player_urls()
# Ver las URLs de La Liga 2024-2025
for stat, url in player_urls['La Liga']['2024-2025'].items():
    print(stat, "->", url)

Standard Stats -> https://fbref.com/en/comps/12/stats/players/2024-2025/La-Liga-Stats
Goalkeeping -> https://fbref.com/en/comps/12/keepers/players/2024-2025/La-Liga-Stats
Advanced Goalkeeping -> https://fbref.com/en/comps/12/keepersadv/players/2024-2025/La-Liga-Stats
Shooting -> https://fbref.com/en/comps/12/shooting/players/2024-2025/La-Liga-Stats
Passing -> https://fbref.com/en/comps/12/passing/players/2024-2025/La-Liga-Stats
Pass Types -> https://fbref.com/en/comps/12/passing_types/players/2024-2025/La-Liga-Stats
Goal and Shot Creation -> https://fbref.com/en/comps/12/gca/players/2024-2025/La-Liga-Stats
Defensive Actions -> https://fbref.com/en/comps/12/defense/players/2024-2025/La-Liga-Stats
Possession -> https://fbref.com/en/comps/12/possession/players/2024-2025/La-Liga-Stats
Playing Time -> https://fbref.com/en/comps/12/playingtime/players/2024-2025/La-Liga-Stats
Miscellaneous Stats -> https://fbref.com/en/comps/12/misc/players/2024-2025/La-Liga-Stats


In [3]:
first_stat, first_url = next(iter(player_urls['La Liga']['2024-2025'].items()))
print(f"Métrica general: {first_stat}")
print(f"URL: {first_url}")

Métrica general: Standard Stats
URL: https://fbref.com/en/comps/12/stats/players/2024-2025/La-Liga-Stats


In [10]:
# Scrapeamos la página
response = requests.get(first_url)
soup = BeautifulSoup(response.content, "html.parser")

# Buscamos la primera tabla visible
table = soup.find("table")  # Ajusta esto si hay más de una tabla
table

<table class="stats_table sortable min_width" data-cols-to-freeze=",1" id="stats_squads_standard_for"> <caption>Squad Standard Stats <span style="color: #666; font-size:smaller">2024-2025 La Liga</span> Table</caption> <colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup> <thead> <tr class="over_header"> <th aria-label="" class="over_header center" colspan="4" data-stat=""></th> <th aria-label="" class="over_header center group_start" colspan="4" data-stat="header_playing">Playing Time</th> <th aria-label="" class="over_header center group_start" colspan="8" data-stat="header_performance">Performance</th> <th aria-label="" class="over_header center group_start" colspan="4" data-stat="header_expected">Expected</th> <th aria-label="" class="over_header center group_start" colspan="2" data-stat="header_progression">Progression</th>

In [12]:
columns = []
ths = table.find_all("th")
for th in ths:
    data_stat = th.get("data-stat", "").strip()
    data_over_header = th.get("data-over-header", None)
    columns.append({
        "data-stat": data_stat,
        "data-over-header": data_over_header,
        "metrica-general": first_stat
    })

# Convertimos a DataFrame
df_columns = pd.DataFrame(columns)
df_columns

Unnamed: 0,data-stat,data-over-header,metrica-general
0,,,Standard Stats
1,header_playing,,Standard Stats
2,header_performance,,Standard Stats
3,header_expected,,Standard Stats
4,header_progression,,Standard Stats
5,header_per90,,Standard Stats
6,team,,Standard Stats
7,players_used,,Standard Stats
8,avg_age,,Standard Stats
9,possession,,Standard Stats


In [14]:
# Ejemplo con una URL de jugador o equipo (reemplaza con la URL real)
url = 'https://fbref.com/en/comps/12/stats/La-Liga-Stats'  # PON AQUÍ TU URL
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Encuentra la tabla (ajusta el ID de la tabla si es necesario)
table = soup.find('table')

# Obtiene el nombre de la metrica-general desde la URL
metrica_general = 'Standard Stats'  # O extrae dinámicamente si tienes varias

# Lista para almacenar los datos
columns_data = []

# Encuentra la cabecera de la tabla (thead)
header_rows = table.find('thead').find_all('tr')

# La fila inferior de thead suele tener los th con data-stat
last_header_row = header_rows[-1]

for th in last_header_row.find_all('th'):
    data_stat = th.get('data-stat')
    data_over_header = th.get('data-over-header')
    
    # Normaliza valores None
    if data_over_header is None:
        data_over_header = 'General'
    
    columns_data.append({
        'data-stat': data_stat,
        'data-over-header': data_over_header,
        'metrica-general': metrica_general
    })

# Convierte a DataFrame
df_columns = pd.DataFrame(columns_data)
print(df_columns)

                   data-stat data-over-header metrica-general
0                       team          General  Standard Stats
1               players_used          General  Standard Stats
2                    avg_age          General  Standard Stats
3                 possession          General  Standard Stats
4                      games     Playing Time  Standard Stats
5               games_starts     Playing Time  Standard Stats
6                    minutes     Playing Time  Standard Stats
7                minutes_90s     Playing Time  Standard Stats
8                      goals      Performance  Standard Stats
9                    assists      Performance  Standard Stats
10             goals_assists      Performance  Standard Stats
11                goals_pens      Performance  Standard Stats
12                 pens_made      Performance  Standard Stats
13                  pens_att      Performance  Standard Stats
14              cards_yellow      Performance  Standard Stats
15      

In [16]:
from bs4 import BeautifulSoup, Comment

In [18]:
def get_players_columns(url, metrica_general='Standard Stats'):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Buscar tablas ocultas (comentadas)
    comments = soup.find_all(string=lambda text: isinstance(text, Comment))
    player_table = None

    for comment in comments:
        # Parsear el comentario como HTML
        comment_soup = BeautifulSoup(comment, 'html.parser')
        tables = comment_soup.find_all('table')
        for table in tables:
            # Buscamos la tabla con filas de jugadores
            if table.find('td', {'data-stat': 'player'}):
                player_table = table
                break
        if player_table:
            break

    if not player_table:
        print("No se encontró la tabla de jugadores.")
        return None

    # Procesar el encabezado de la tabla
    header_rows = player_table.find('thead').find_all('tr')
    last_header_row = header_rows[-1]

    columns_data = []
    for th in last_header_row.find_all('th'):
        data_stat = th.get('data-stat')
        data_over_header = th.get('data-over-header')
        if data_over_header is None:
            data_over_header = 'General'
        columns_data.append({
            'data-stat': data_stat,
            'data-over-header': data_over_header,
            'metrica-general': metrica_general
        })

    df_columns = pd.DataFrame(columns_data)
    return df_columns

# Uso
url = 'https://fbref.com/en/comps/12/stats/La-Liga-Stats'  # Ajusta con la URL de la tabla de jugadores
df_columns = get_players_columns(url)
print(df_columns)

                      data-stat data-over-header metrica-general
0                        ranker          General  Standard Stats
1                        player          General  Standard Stats
2                   nationality          General  Standard Stats
3                      position          General  Standard Stats
4                          team          General  Standard Stats
5                           age          General  Standard Stats
6                    birth_year          General  Standard Stats
7                         games     Playing Time  Standard Stats
8                  games_starts     Playing Time  Standard Stats
9                       minutes     Playing Time  Standard Stats
10                  minutes_90s     Playing Time  Standard Stats
11                        goals      Performance  Standard Stats
12                      assists      Performance  Standard Stats
13                goals_assists      Performance  Standard Stats
14                   goal

In [20]:
# Sustituimos espacios por guiones bajos en 'data-over-header' y 'metrica-general'
df_columns['data-over-header'] = df_columns['data-over-header'].str.strip().str.replace(' ', '_').str.lower()
df_columns['metrica-general'] = df_columns['metrica-general'].str.strip().str.replace(' ', '_').str.lower()

# También para 'data-stat' (aunque suelen ser cortos)
df_columns['data-stat'] = df_columns['data-stat'].str.strip().str.lower()

# Ahora generamos el nombre de columna como antes:
df_columns['column_name'] = (
    df_columns['data-stat'] + '_' +
    df_columns['data-over-header'] + '_' +
    df_columns['metrica-general']
)

# Mostramos el DataFrame para revisar
print(df_columns)

                      data-stat data-over-header metrica-general  \
0                        ranker          general  standard_stats   
1                        player          general  standard_stats   
2                   nationality          general  standard_stats   
3                      position          general  standard_stats   
4                          team          general  standard_stats   
5                           age          general  standard_stats   
6                    birth_year          general  standard_stats   
7                         games     playing_time  standard_stats   
8                  games_starts     playing_time  standard_stats   
9                       minutes     playing_time  standard_stats   
10                  minutes_90s     playing_time  standard_stats   
11                        goals      performance  standard_stats   
12                      assists      performance  standard_stats   
13                goals_assists      performance

In [68]:
import requests
from bs4 import BeautifulSoup, Comment
import pandas as pd

def get_players_data(url, metrica_general='Standard Stats'):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Buscar tablas ocultas (comentadas)
    comments = soup.find_all(string=lambda text: isinstance(text, Comment))
    player_table = None

    for comment in comments:
        comment_soup = BeautifulSoup(comment, 'html.parser')
        tables = comment_soup.find_all('table')
        for table in tables:
            if table.find('td', {'data-stat': 'player'}):
                player_table = table
                break
        if player_table:
            break

    if not player_table:
        print("No se encontró la tabla de jugadores.")
        return None, None

    # -----------------------
    # 1️⃣ Procesar el encabezado de la tabla
    # -----------------------
    header_rows = player_table.find('thead').find_all('tr')
    last_header_row = header_rows[-1]

    columns_data = []
    column_names = []

    for th in last_header_row.find_all('th'):
        data_stat = th.get('data-stat')
        data_over_header = th.get('data-over-header') or 'General'
        data_over_header = data_over_header.replace(' ', '_')
        metrica_general_clean = metrica_general.replace(' ', '_')
        column_name = f"{data_stat}_{data_over_header}_{metrica_general_clean}"
        column_names.append(column_name)

        columns_data.append({
            'data-stat': data_stat,
            'data-over-header': data_over_header,
            'metrica-general': metrica_general
        })

    df_columns = pd.DataFrame(columns_data)

    # -----------------------
    # 2️⃣ Extraer las filas de jugadores
    # -----------------------
    data_rows = []
    for row in player_table.find('tbody').find_all('tr'):
        row_data = []
        for cell in row.find_all(['th', 'td']):
            cell_text = cell.get_text(strip=True)
            row_data.append(cell_text)
        if row_data:  # Evitar filas vacías
            data_rows.append(row_data)

    # -----------------------
    # 3️⃣ Crear el DataFrame de jugadores
    # -----------------------
    df_players = pd.DataFrame(data_rows, columns=column_names)

    return  df_players

# Uso
url = 'https://fbref.com/en/comps/12/stats/La-Liga-Stats'
df_players_stats= get_players_data(url)
df_players_stats


Unnamed: 0,ranker_General_Standard_Stats,player_General_Standard_Stats,nationality_General_Standard_Stats,position_General_Standard_Stats,team_General_Standard_Stats,age_General_Standard_Stats,birth_year_General_Standard_Stats,games_Playing_Time_Standard_Stats,games_starts_Playing_Time_Standard_Stats,minutes_Playing_Time_Standard_Stats,...,assists_per90_Per_90_Minutes_Standard_Stats,goals_assists_per90_Per_90_Minutes_Standard_Stats,goals_pens_per90_Per_90_Minutes_Standard_Stats,goals_assists_pens_per90_Per_90_Minutes_Standard_Stats,xg_per90_Per_90_Minutes_Standard_Stats,xg_assist_per90_Per_90_Minutes_Standard_Stats,xg_xg_assist_per90_Per_90_Minutes_Standard_Stats,npxg_per90_Per_90_Minutes_Standard_Stats,npxg_xg_assist_per90_Per_90_Minutes_Standard_Stats,matches_General_Standard_Stats
0,1,Max Aarons,engENG,"DF,MF",Valencia,24,2000,4,1,120,...,0.00,0.00,0.00,0.00,0.00,0.02,0.02,0.00,0.02,Matches
1,2,Rodrigo Abajas,esESP,DF,Valencia,21,2003,1,1,65,...,0.00,0.00,0.00,0.00,0.10,0.00,0.10,0.10,0.10,Matches
2,3,Abel,esESP,DF,Osasuna,23,2000,35,20,2074,...,0.00,0.09,0.09,0.09,0.02,0.05,0.07,0.02,0.07,Matches
3,4,Nabil Aberdin,frFRA,MF,Getafe,21,2002,7,4,263,...,0.00,0.00,0.00,0.00,0.01,0.01,0.02,0.01,0.02,Matches
4,5,Abdel Abqar,maMAR,DF,Alavés,25,1999,29,29,2463,...,0.04,0.04,0.00,0.04,0.03,0.01,0.04,0.03,0.04,Matches
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620,598,Arsen Zakharyan,ruRUS,"MF,FW",Real Sociedad,21,2003,3,1,73,...,0.00,1.23,1.23,1.23,0.24,0.12,0.36,0.24,0.36,Matches
621,599,Bryan Zaragoza,esESP,"FW,MF",Osasuna,23,2001,27,22,1836,...,0.29,0.34,0.05,0.34,0.13,0.16,0.29,0.13,0.29,Matches
622,600,Igor Zubeldia,esESP,DF,Real Sociedad,27,1997,28,27,2269,...,0.00,0.00,0.00,0.00,0.02,0.02,0.04,0.02,0.04,Matches
623,Rk,Player,Nation,Pos,Squad,Age,Born,MP,Starts,Min,...,Ast,G+A,G-PK,G+A-PK,xG,xAG,xG+xAG,npxG,npxG+xAG,Matches


In [76]:
def limpieza_df_players(df):
    df = df[df['ranker_General_Standard_Stats'] != 'Rk'].reset_index(drop=True)
    df = df.drop(columns=['ranker_General_Standard_Stats', 'matches_General_Standard_Stats'	])

    # Procesar la columna de nacionalidad si existe
    nationality_col = [col for col in df.columns if 'nationality' in col]
    if nationality_col:
        col_name = nationality_col[0]
        df[col_name] = df[col_name].astype(str).str.extract(r'([A-Z]+)$')

    # Extraer la parte de la competición
    competition_name_match = re.search(r'/([^/]+)-Stats(?:/|$)', url)
    if competition_name_match:
        competition_name = competition_name_match.group(1).replace('-', ' ')
    else:
        competition_name = 'Desconocida'

    competition_col = [col for col in df.columns if 'competition' in col]
    if not competition_col:
        df['competition'] = competition_name
    # Reemplazar celdas vacías por NaN y luego NaN por 0
    df.replace('', np.nan, inplace=True)
    df.fillna(0, inplace=True)
    return df


In [77]:
df_players_stats_limpio= limpieza_df_players(df_players_stats)
df_players_stats_limpio

Unnamed: 0,player_General_Standard_Stats,nationality_General_Standard_Stats,position_General_Standard_Stats,team_General_Standard_Stats,age_General_Standard_Stats,birth_year_General_Standard_Stats,games_Playing_Time_Standard_Stats,games_starts_Playing_Time_Standard_Stats,minutes_Playing_Time_Standard_Stats,minutes_90s_Playing_Time_Standard_Stats,...,assists_per90_Per_90_Minutes_Standard_Stats,goals_assists_per90_Per_90_Minutes_Standard_Stats,goals_pens_per90_Per_90_Minutes_Standard_Stats,goals_assists_pens_per90_Per_90_Minutes_Standard_Stats,xg_per90_Per_90_Minutes_Standard_Stats,xg_assist_per90_Per_90_Minutes_Standard_Stats,xg_xg_assist_per90_Per_90_Minutes_Standard_Stats,npxg_per90_Per_90_Minutes_Standard_Stats,npxg_xg_assist_per90_Per_90_Minutes_Standard_Stats,competition
0,Max Aarons,ENG,"DF,MF",Valencia,24,2000,4,1,120,1.3,...,0.00,0.00,0.00,0.00,0.00,0.02,0.02,0.00,0.02,La Liga
1,Rodrigo Abajas,ESP,DF,Valencia,21,2003,1,1,65,0.7,...,0.00,0.00,0.00,0.00,0.10,0.00,0.10,0.10,0.10,La Liga
2,Abel,ESP,DF,Osasuna,23,2000,35,20,2074,23.0,...,0.00,0.09,0.09,0.09,0.02,0.05,0.07,0.02,0.07,La Liga
3,Nabil Aberdin,FRA,MF,Getafe,21,2002,7,4,263,2.9,...,0.00,0.00,0.00,0.00,0.01,0.01,0.02,0.01,0.02,La Liga
4,Abdel Abqar,MAR,DF,Alavés,25,1999,29,29,2463,27.4,...,0.04,0.04,0.00,0.04,0.03,0.01,0.04,0.03,0.04,La Liga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,Bertuğ Yıldırım,TUR,FW,Getafe,22,2002,22,14,1083,12.0,...,0.00,0.08,0.08,0.08,0.21,0.10,0.31,0.21,0.31,La Liga
597,Arsen Zakharyan,RUS,"MF,FW",Real Sociedad,21,2003,3,1,73,0.8,...,0.00,1.23,1.23,1.23,0.24,0.12,0.36,0.24,0.36,La Liga
598,Bryan Zaragoza,ESP,"FW,MF",Osasuna,23,2001,27,22,1836,20.4,...,0.29,0.34,0.05,0.34,0.13,0.16,0.29,0.13,0.29,La Liga
599,Igor Zubeldia,ESP,DF,Real Sociedad,27,1997,28,27,2269,25.2,...,0.00,0.00,0.00,0.00,0.02,0.02,0.04,0.02,0.04,La Liga


In [78]:
url= 'https://fbref.com/en/comps/12/shooting/La-Liga-Stats'
df_players_shooting= get_players_data(url)
df_players_shooting

Unnamed: 0,ranker_General_Standard_Stats,player_General_Standard_Stats,nationality_General_Standard_Stats,position_General_Standard_Stats,team_General_Standard_Stats,age_General_Standard_Stats,birth_year_General_Standard_Stats,minutes_90s_General_Standard_Stats,goals_Standard_Standard_Stats,shots_Standard_Standard_Stats,...,average_shot_distance_Standard_Standard_Stats,shots_free_kicks_Standard_Standard_Stats,pens_made_Standard_Standard_Stats,pens_att_Standard_Standard_Stats,xg_Expected_Standard_Stats,npxg_Expected_Standard_Stats,npxg_per_shot_Expected_Standard_Stats,xg_net_Expected_Standard_Stats,npxg_net_Expected_Standard_Stats,matches_General_Standard_Stats
0,1,Max Aarons,engENG,"DF,MF",Valencia,24,2000,1.3,0,0,...,,0,0,0,0.0,0.0,,0.0,0.0,Matches
1,2,Rodrigo Abajas,esESP,DF,Valencia,21,2003,0.7,0,1,...,24.5,0,0,0,0.1,0.1,0.07,-0.1,-0.1,Matches
2,3,Abel,esESP,DF,Osasuna,23,2000,23.0,2,12,...,22.9,0,0,0,0.5,0.5,0.04,+1.5,+1.5,Matches
3,4,Nabil Aberdin,frFRA,MF,Getafe,21,2002,2.9,0,2,...,31.2,0,0,0,0.0,0.0,0.02,0.0,0.0,Matches
4,5,Abdel Abqar,maMAR,DF,Alavés,25,1999,27.4,0,12,...,8.0,0,0,0,0.9,0.9,0.08,-0.9,-0.9,Matches
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620,598,Arsen Zakharyan,ruRUS,"MF,FW",Real Sociedad,21,2003,0.8,1,4,...,22.7,0,0,0,0.2,0.2,0.05,+0.8,+0.8,Matches
621,599,Bryan Zaragoza,esESP,"FW,MF",Osasuna,23,2001,20.4,1,42,...,20.2,0,0,0,2.7,2.7,0.06,-1.7,-1.7,Matches
622,600,Igor Zubeldia,esESP,DF,Real Sociedad,27,1997,25.2,0,12,...,17.1,0,0,0,0.6,0.6,0.05,-0.6,-0.6,Matches
623,Rk,Player,Nation,Pos,Squad,Age,Born,90s,Gls,Sh,...,Dist,FK,PK,PKatt,xG,npxG,npxG/Sh,G-xG,np:G-xG,Matches


In [79]:
df_players_shooting_limpio= limpieza_df_players(df_players_shooting)
df_players_shooting_limpio

Unnamed: 0,player_General_Standard_Stats,nationality_General_Standard_Stats,position_General_Standard_Stats,team_General_Standard_Stats,age_General_Standard_Stats,birth_year_General_Standard_Stats,minutes_90s_General_Standard_Stats,goals_Standard_Standard_Stats,shots_Standard_Standard_Stats,shots_on_target_Standard_Standard_Stats,...,average_shot_distance_Standard_Standard_Stats,shots_free_kicks_Standard_Standard_Stats,pens_made_Standard_Standard_Stats,pens_att_Standard_Standard_Stats,xg_Expected_Standard_Stats,npxg_Expected_Standard_Stats,npxg_per_shot_Expected_Standard_Stats,xg_net_Expected_Standard_Stats,npxg_net_Expected_Standard_Stats,competition
0,Max Aarons,ENG,"DF,MF",Valencia,24,2000,1.3,0,0,0,...,0,0,0,0,0.0,0.0,0,0.0,0.0,La Liga
1,Rodrigo Abajas,ESP,DF,Valencia,21,2003,0.7,0,1,0,...,24.5,0,0,0,0.1,0.1,0.07,-0.1,-0.1,La Liga
2,Abel,ESP,DF,Osasuna,23,2000,23.0,2,12,4,...,22.9,0,0,0,0.5,0.5,0.04,+1.5,+1.5,La Liga
3,Nabil Aberdin,FRA,MF,Getafe,21,2002,2.9,0,2,0,...,31.2,0,0,0,0.0,0.0,0.02,0.0,0.0,La Liga
4,Abdel Abqar,MAR,DF,Alavés,25,1999,27.4,0,12,1,...,8.0,0,0,0,0.9,0.9,0.08,-0.9,-0.9,La Liga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,Bertuğ Yıldırım,TUR,FW,Getafe,22,2002,12.0,1,19,7,...,11.4,0,0,0,2.5,2.5,0.13,-1.5,-1.5,La Liga
597,Arsen Zakharyan,RUS,"MF,FW",Real Sociedad,21,2003,0.8,1,4,2,...,22.7,0,0,0,0.2,0.2,0.05,+0.8,+0.8,La Liga
598,Bryan Zaragoza,ESP,"FW,MF",Osasuna,23,2001,20.4,1,42,13,...,20.2,0,0,0,2.7,2.7,0.06,-1.7,-1.7,La Liga
599,Igor Zubeldia,ESP,DF,Real Sociedad,27,1997,25.2,0,12,4,...,17.1,0,0,0,0.6,0.6,0.05,-0.6,-0.6,La Liga


In [80]:
url= 'https://fbref.com/en/comps/12/passing/La-Liga-Stats'
df_players_passing= get_players_data(url)
df_players_passing

Unnamed: 0,ranker_General_Standard_Stats,player_General_Standard_Stats,nationality_General_Standard_Stats,position_General_Standard_Stats,team_General_Standard_Stats,age_General_Standard_Stats,birth_year_General_Standard_Stats,minutes_90s_General_Standard_Stats,passes_completed_Total_Standard_Stats,passes_Total_Standard_Stats,...,assists_General_Standard_Stats,xg_assist_General_Standard_Stats,pass_xa_Expected_Standard_Stats,xg_assist_net_Expected_Standard_Stats,assisted_shots_General_Standard_Stats,passes_into_final_third_General_Standard_Stats,passes_into_penalty_area_General_Standard_Stats,crosses_into_penalty_area_General_Standard_Stats,progressive_passes_General_Standard_Stats,matches_General_Standard_Stats
0,1,Max Aarons,engENG,"DF,MF",Valencia,24,2000,1.3,47,66,...,0,0.0,0.0,0.0,1,2,0,0,6,Matches
1,2,Rodrigo Abajas,esESP,DF,Valencia,21,2003,0.7,17,29,...,0,0.0,0.0,0.0,0,0,0,0,2,Matches
2,3,Abel,esESP,DF,Osasuna,23,2000,23.0,776,1071,...,0,1.0,1.5,-1.0,16,57,18,13,77,Matches
3,4,Nabil Aberdin,frFRA,MF,Getafe,21,2002,2.9,36,61,...,0,0.0,0.1,0.0,1,7,2,0,6,Matches
4,5,Abdel Abqar,maMAR,DF,Alavés,25,1999,27.4,719,939,...,1,0.3,0.4,+0.7,8,67,10,2,79,Matches
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620,598,Arsen Zakharyan,ruRUS,"MF,FW",Real Sociedad,21,2003,0.8,28,33,...,0,0.1,0.1,-0.1,1,3,1,0,3,Matches
621,599,Bryan Zaragoza,esESP,"FW,MF",Osasuna,23,2001,20.4,358,554,...,6,3.2,5.1,+2.8,36,23,34,20,50,Matches
622,600,Igor Zubeldia,esESP,DF,Real Sociedad,27,1997,25.2,1281,1483,...,0,0.4,0.6,-0.4,4,93,6,0,101,Matches
623,Rk,Player,Nation,Pos,Squad,Age,Born,90s,Cmp,Att,...,Ast,xAG,xA,A-xAG,KP,1/3,PPA,CrsPA,PrgP,Matches


In [82]:
df_players_passing_limpio= limpieza_df_players(df_players_passing)
df_players_passing_limpio

Unnamed: 0,player_General_Standard_Stats,nationality_General_Standard_Stats,position_General_Standard_Stats,team_General_Standard_Stats,age_General_Standard_Stats,birth_year_General_Standard_Stats,minutes_90s_General_Standard_Stats,passes_completed_Total_Standard_Stats,passes_Total_Standard_Stats,passes_pct_Total_Standard_Stats,...,assists_General_Standard_Stats,xg_assist_General_Standard_Stats,pass_xa_Expected_Standard_Stats,xg_assist_net_Expected_Standard_Stats,assisted_shots_General_Standard_Stats,passes_into_final_third_General_Standard_Stats,passes_into_penalty_area_General_Standard_Stats,crosses_into_penalty_area_General_Standard_Stats,progressive_passes_General_Standard_Stats,competition
0,Max Aarons,ENG,"DF,MF",Valencia,24,2000,1.3,47,66,71.2,...,0,0.0,0.0,0.0,1,2,0,0,6,La Liga
1,Rodrigo Abajas,ESP,DF,Valencia,21,2003,0.7,17,29,58.6,...,0,0.0,0.0,0.0,0,0,0,0,2,La Liga
2,Abel,ESP,DF,Osasuna,23,2000,23.0,776,1071,72.5,...,0,1.0,1.5,-1.0,16,57,18,13,77,La Liga
3,Nabil Aberdin,FRA,MF,Getafe,21,2002,2.9,36,61,59.0,...,0,0.0,0.1,0.0,1,7,2,0,6,La Liga
4,Abdel Abqar,MAR,DF,Alavés,25,1999,27.4,719,939,76.6,...,1,0.3,0.4,+0.7,8,67,10,2,79,La Liga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,Bertuğ Yıldırım,TUR,FW,Getafe,22,2002,12.0,101,186,54.3,...,0,1.2,0.6,-1.2,9,9,9,3,14,La Liga
597,Arsen Zakharyan,RUS,"MF,FW",Real Sociedad,21,2003,0.8,28,33,84.8,...,0,0.1,0.1,-0.1,1,3,1,0,3,La Liga
598,Bryan Zaragoza,ESP,"FW,MF",Osasuna,23,2001,20.4,358,554,64.6,...,6,3.2,5.1,+2.8,36,23,34,20,50,La Liga
599,Igor Zubeldia,ESP,DF,Real Sociedad,27,1997,25.2,1281,1483,86.4,...,0,0.4,0.6,-0.4,4,93,6,0,101,La Liga


In [88]:
def creacion_df_jugadores_estadistica_unica(url: str, guardar_csv=False, stat='Standard Stat', league='La Liga', season='2024'):
    # Obtener datos de la URL
    df_sucio = get_players_data(url)
    
    # Limpiar los datos
    df_limpio = limpieza_df_players(df_sucio)  # Asegúrate de pasar la URL a la función de limpieza

    # Guardar CSV si es necesario
    if guardar_csv:
        df_limpio.to_csv(f'./df_players_{stat}_{league}_{season}.csv', index=False)
    
    return df_limpio

In [89]:
creacion_df_jugadores_estadistica_unica(url= 'https://fbref.com/en/comps/12/passing/La-Liga-Stats', guardar_csv=True, stat='Standard Stat', league='La Liga', season='2024')

Unnamed: 0,player_General_Standard_Stats,nationality_General_Standard_Stats,position_General_Standard_Stats,team_General_Standard_Stats,age_General_Standard_Stats,birth_year_General_Standard_Stats,minutes_90s_General_Standard_Stats,passes_completed_Total_Standard_Stats,passes_Total_Standard_Stats,passes_pct_Total_Standard_Stats,...,assists_General_Standard_Stats,xg_assist_General_Standard_Stats,pass_xa_Expected_Standard_Stats,xg_assist_net_Expected_Standard_Stats,assisted_shots_General_Standard_Stats,passes_into_final_third_General_Standard_Stats,passes_into_penalty_area_General_Standard_Stats,crosses_into_penalty_area_General_Standard_Stats,progressive_passes_General_Standard_Stats,competition
0,Max Aarons,ENG,"DF,MF",Valencia,24,2000,1.3,47,66,71.2,...,0,0.0,0.0,0.0,1,2,0,0,6,La Liga
1,Rodrigo Abajas,ESP,DF,Valencia,21,2003,0.7,17,29,58.6,...,0,0.0,0.0,0.0,0,0,0,0,2,La Liga
2,Abel,ESP,DF,Osasuna,23,2000,23.0,776,1071,72.5,...,0,1.0,1.5,-1.0,16,57,18,13,77,La Liga
3,Nabil Aberdin,FRA,MF,Getafe,21,2002,2.9,36,61,59.0,...,0,0.0,0.1,0.0,1,7,2,0,6,La Liga
4,Abdel Abqar,MAR,DF,Alavés,25,1999,27.4,719,939,76.6,...,1,0.3,0.4,+0.7,8,67,10,2,79,La Liga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,Bertuğ Yıldırım,TUR,FW,Getafe,22,2002,12.0,101,186,54.3,...,0,1.2,0.6,-1.2,9,9,9,3,14,La Liga
597,Arsen Zakharyan,RUS,"MF,FW",Real Sociedad,21,2003,0.8,28,33,84.8,...,0,0.1,0.1,-0.1,1,3,1,0,3,La Liga
598,Bryan Zaragoza,ESP,"FW,MF",Osasuna,23,2001,20.4,358,554,64.6,...,6,3.2,5.1,+2.8,36,23,34,20,50,La Liga
599,Igor Zubeldia,ESP,DF,Real Sociedad,27,1997,25.2,1281,1483,86.4,...,0,0.4,0.6,-0.4,4,93,6,0,101,La Liga
