In [1]:
import requests
from bs4 import BeautifulSoup
import re
import json
import time
import random
import pandas as pd
import numpy as np

USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'

class LeagueManager:
    """
    Clase para gestionar ligas de fútbol y generar URLs de estadísticas de jugadores desde FBref.
    """
    def __init__(self):
        """
        Inicializa los atributos necesarios para acceder a las ligas, temporadas y tipos de estadísticas disponibles.
        """
        self.base_url = "https://fbref.com/en/comps/"
        # Diccionario con ligas disponibles, cada una con su ID, slug para la URL y temporadas disponibles
        self.possible_leagues = {
            'Fbref': {
                'Premier League': {
                    'id': 9,
                    'slug': 'Premier-League',
                    'seasons': ['2024-2025', '2023-2024', '2022-2023', '2021-2022', '2020-2021']
                },
                'La Liga': {
                    'id': 12,
                    'slug': 'La-Liga',
                    'seasons': ['2024-2025', '2023-2024', '2022-2023', '2021-2022', '2020-2021']
                },
                'Ligue 1': {
                    'id': 13,
                    'slug': 'Ligue-1',
                    'seasons': ['2024-2025', '2023-2024', '2022-2023', '2021-2022', '2020-2021']    
                },
                'Bundesliga': {
                    'id': 20,
                    'slug': 'Bundesliga',
                    'seasons': ['2024-2025', '2023-2024', '2022-2023', '2021-2022', '2020-2021']
                },
                'Serie A': {
                    'id': 11,
                    'slug': 'Serie-A',
                    'seasons': ['2024-2025', '2023-2024', '2022-2023', '2021-2022', '2020-2021']
                },
                'Big 5 European Leagues': {
                    'id': 'Big5',
                    'slug': 'Big-5-European-Leagues',
                    'seasons': ['2024-2025', '2023-2024', '2022-2023', '2021-2022', '2020-2021']
                },
            }
        }

        # Tipos de estadísticas disponibles para jugadores
        self.player_tables = {
            "Standard Stats": "stats/players",
            "Goalkeeping": "keepers/players",
            "Advanced Goalkeeping": "keepersadv/players",
            "Shooting": "shooting/players",
            "Passing": "passing/players",
            "Pass Types": "passing_types/players",
            "Goal and Shot Creation": "gca/players",
            "Defensive Actions": "defense/players",
            "Possession": "possession/players",
            "Playing Time": "playingtime/players",
            "Miscellaneous Stats": "misc/players",
        }

    def get_available_leagues(self):
        """
        Devuelve un diccionario con las ligas disponibles, sus identificadores y temporadas.

        Return:
            dict: Ligas disponibles con su ID y temporadas.
        """
        return {
            league_name: {
                'id': data['id'],
                'seasons': data['seasons']
            }
            for league_name, data in self.possible_leagues['Fbref'].items()
        }

    def get_league_info(self, league_name):
        """
        Devuelve la información de una liga específica.

        Args:
            league_name (str): Nombre de la liga.

        Return:
            dict or None: Información de la liga seleccionada (id, slug, seasons) o None si no existe.
        """
        return self.possible_leagues['Fbref'].get(league_name)

    def get_all_league_names(self):
        """
        Devuelve la lista de nombres de todas las ligas disponibles.

        Return:
            list: Nombres de las ligas.
        """
        return list(self.possible_leagues['Fbref'].keys())

    def generate_player_urls(self):
        """
        Genera URLs completas para acceder a estadísticas de jugadores por liga, temporada y tipo de estadística.

        Return:
            dict: Diccionario anidado con URLs organizadas por liga y temporada.
                  Formato: {liga: {temporada: {tipo_estadistica: url}}}
        """
        urls = {}

        for league_name, league_data in self.possible_leagues['Fbref'].items():
            league_id = league_data['id']
            seasons = league_data['seasons']
            urls[league_name] = {}

            for season in seasons:
                season_urls = {}
                for stat_name, path in self.player_tables.items():
                    url = (
                        f"{self.base_url}{league_id}/{path}/{season}/"
                        f"{league_name.replace(' ', '-')}-Stats"
                    )
                    season_urls[stat_name] = url

                urls[league_name][season] = season_urls

        return urls
    
def format_dataframe_columns(df, stat_category):
    """
    Reformatea las columnas de un DataFrame eliminando los niveles de índice
    y añadiendo un sufijo basado en la estadística.

    Args:
        df (pd.DataFrame): El DataFrame original con columnas multinivel.
        stat_category (str): La estadística que se añadirá como sufijo a las columnas.

    Returns:
        pd.DataFrame: El DataFrame con columnas reformateadas.
    """
    # Verifica si las columnas del DataFrame tienen múltiples niveles (MultiIndex)
    if isinstance(df.columns, pd.MultiIndex):
        # Si tienen múltiples niveles, crea nombres planos combinando el segundo nivel (nombre de columna)
        # con el primero (categoría), junto con el sufijo proporcionado por stat_category
        df.columns = [f"{col[1]} ({col[0]} - {stat_category})" for col in df.columns]
    else:
        # Si las columnas no son multinivel, simplemente añade el sufijo con stat_category a cada nombre
        df.columns = [f"{col} ({stat_category})" for col in df.columns]
    # Devuelve el DataFrame con los nuevos nombres de columnas
    return df

#Llamada a la clase LeagueManager para generar URLs de jugadores
manager = LeagueManager()
player_urls = manager.generate_player_urls()
# Ver las URLs de La Liga 2024-2025
for stat, url in player_urls['La Liga']['2024-2025'].items():
    print(stat, "->", url)

Standard Stats -> https://fbref.com/en/comps/12/stats/players/2024-2025/La-Liga-Stats
Goalkeeping -> https://fbref.com/en/comps/12/keepers/players/2024-2025/La-Liga-Stats
Advanced Goalkeeping -> https://fbref.com/en/comps/12/keepersadv/players/2024-2025/La-Liga-Stats
Shooting -> https://fbref.com/en/comps/12/shooting/players/2024-2025/La-Liga-Stats
Passing -> https://fbref.com/en/comps/12/passing/players/2024-2025/La-Liga-Stats
Pass Types -> https://fbref.com/en/comps/12/passing_types/players/2024-2025/La-Liga-Stats
Goal and Shot Creation -> https://fbref.com/en/comps/12/gca/players/2024-2025/La-Liga-Stats
Defensive Actions -> https://fbref.com/en/comps/12/defense/players/2024-2025/La-Liga-Stats
Possession -> https://fbref.com/en/comps/12/possession/players/2024-2025/La-Liga-Stats
Playing Time -> https://fbref.com/en/comps/12/playingtime/players/2024-2025/La-Liga-Stats
Miscellaneous Stats -> https://fbref.com/en/comps/12/misc/players/2024-2025/La-Liga-Stats


In [2]:
first_stat, first_url = next(iter(player_urls['La Liga']['2024-2025'].items()))
print(f"Métrica general: {first_stat}")
print(f"URL: {first_url}")

Métrica general: Standard Stats
URL: https://fbref.com/en/comps/12/stats/players/2024-2025/La-Liga-Stats


In [6]:
from bs4 import BeautifulSoup, Comment

In [256]:
import requests
from bs4 import BeautifulSoup, Comment
import pandas as pd

def get_players_data(url, metrica_general=None):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # 1️⃣ Si no pasas metrica_general, lo extraemos de la URL
    if metrica_general is None:
        stat_match = re.search(r'/(\w+)/La-Liga-Stats', url)
        if stat_match:
            metrica_general = stat_match.group(1).replace('-', ' ').title()
        else:
            metrica_general = 'Standard Stats'  # Valor por defecto

    metrica_general_clean = metrica_general.replace(' ', '_')

    # Buscar tablas ocultas (comentadas)
    comments = soup.find_all(string=lambda text: isinstance(text, Comment))
    player_table = None

    for comment in comments:
        comment_soup = BeautifulSoup(comment, 'html.parser')
        tables = comment_soup.find_all('table')
        for table in tables:
            if table.find('td', {'data-stat': 'player'}):
                player_table = table
                break
        if player_table:
            break

    if not player_table:
        print("No se encontró la tabla de jugadores.")
        return None, None

    # -----------------------
    # 1️⃣ Procesar el encabezado de la tabla
    # -----------------------
    header_rows = player_table.find('thead').find_all('tr')
    last_header_row = header_rows[-1]

    columns_data = []
    column_names = []

    for th in last_header_row.find_all('th'):
        data_stat = th.get('data-stat')
        data_over_header = th.get('data-over-header') or 'General'
        data_over_header = data_over_header.replace(' ', '_')
        metrica_general_clean = metrica_general.replace(' ', '_')
        column_name = f"{data_stat}_{data_over_header}_{metrica_general_clean}"
        column_names.append(column_name)

        columns_data.append({
            'data-stat': data_stat,
            'data-over-header': data_over_header,
            'metrica-general': metrica_general
        })

    df_columns = pd.DataFrame(columns_data)

    # -----------------------
    # 2️⃣ Extraer las filas de jugadores
    # -----------------------
    data_rows = []
    for row in player_table.find('tbody').find_all('tr'):
        row_data = []
        for cell in row.find_all(['th', 'td']):
            cell_text = cell.get_text(strip=True)
            row_data.append(cell_text)
        if row_data:  # Evitar filas vacías
            data_rows.append(row_data)

    # -----------------------
    # 3️⃣ Crear el DataFrame de jugadores
    # -----------------------
    df_players = pd.DataFrame(data_rows, columns=column_names)

    return  df_players




In [274]:
def limpieza_df_players(df, url):
    # 🟡 1️⃣ Extraer la métrica general automáticamente desde la URL
    stat_match = re.search(r'/([^/]+)/La-Liga-Stats', url)
    if stat_match:
        metrica_general = stat_match.group(1).replace('-', ' ').title()
    else:
        metrica_general = 'Standard Stats'  # Valor por defecto si no encuentra nada

    metrica_general_clean = metrica_general.replace(' ', '_')

     
    # Filtrar dinámicamente las columnas a eliminar
    columns_to_drop = [
        col for col in df.columns
        if (col.startswith("ranker_") or col.startswith("matches_"))
        and col.endswith(f"_{metrica_general_clean}")
    ]

    if columns_to_drop:
        df = df.drop(columns=columns_to_drop)
    
    # Eliminar filas con encabezados repetidos
    player_cols = [col for col in df.columns if col.lower().startswith('player')]
    if player_cols:
        player_col = player_cols[0]
        # Filtrar filas donde esa columna contenga 'Player', 'Team' o 'Totals'
        df = df[~df[player_col].isin(['Player', 'Team', 'Totals'])]

    # Procesar la columna de nacionalidad si existe
    nationality_col = [col for col in df.columns if 'nationality' in col]
    if nationality_col:
        col_name = nationality_col[0]
        df[col_name] = df[col_name].astype(str).str.extract(r'([A-Z]+)$')

    # Extraer la parte de la competición
    competition_name_match = re.search(r'/([^/]+)-Stats(?:/|$)', url)
    if competition_name_match:
        competition_name = competition_name_match.group(1).replace('-', ' ')
    else:
        competition_name = 'Desconocida'

    competition_col = [col for col in df.columns if 'competition' in col]
    if not competition_col:
        df['competition'] = competition_name
    # Reemplazar celdas vacías por NaN y luego NaN por 0
    df.replace('', np.nan, inplace=True)
    df.fillna(0, inplace=True)
    df.reset_index(drop=True,inplace=True)

    
    return df


In [257]:
# Uso
url = 'https://fbref.com/en/comps/12/stats/La-Liga-Stats'
df_players_stats= get_players_data(url)
df_players_stats_limpio= limpieza_df_players(df_players_stats,url)
df_players_stats_limpio

Unnamed: 0,player_General_Stats,nationality_General_Stats,position_General_Stats,team_General_Stats,age_General_Stats,birth_year_General_Stats,games_Playing_Time_Stats,games_starts_Playing_Time_Stats,minutes_Playing_Time_Stats,minutes_90s_Playing_Time_Stats,...,assists_per90_Per_90_Minutes_Stats,goals_assists_per90_Per_90_Minutes_Stats,goals_pens_per90_Per_90_Minutes_Stats,goals_assists_pens_per90_Per_90_Minutes_Stats,xg_per90_Per_90_Minutes_Stats,xg_assist_per90_Per_90_Minutes_Stats,xg_xg_assist_per90_Per_90_Minutes_Stats,npxg_per90_Per_90_Minutes_Stats,npxg_xg_assist_per90_Per_90_Minutes_Stats,competition
0,Max Aarons,ENG,"DF,MF",Valencia,24,2000,4,1,120,1.3,...,0.00,0.00,0.00,0.00,0.00,0.02,0.02,0.00,0.02,La Liga
1,Rodrigo Abajas,ESP,DF,Valencia,21,2003,1,1,65,0.7,...,0.00,0.00,0.00,0.00,0.10,0.00,0.10,0.10,0.10,La Liga
2,Abel,ESP,DF,Osasuna,23,2000,35,20,2074,23.0,...,0.00,0.09,0.09,0.09,0.02,0.05,0.07,0.02,0.07,La Liga
3,Nabil Aberdin,FRA,MF,Getafe,21,2002,7,4,263,2.9,...,0.00,0.00,0.00,0.00,0.01,0.01,0.02,0.01,0.02,La Liga
4,Abdel Abqar,MAR,DF,Alavés,25,1999,29,29,2463,27.4,...,0.04,0.04,0.00,0.04,0.03,0.01,0.04,0.03,0.04,La Liga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,Bertuğ Yıldırım,TUR,FW,Getafe,22,2002,22,14,1083,12.0,...,0.00,0.08,0.08,0.08,0.21,0.10,0.31,0.21,0.31,La Liga
597,Arsen Zakharyan,RUS,"MF,FW",Real Sociedad,21,2003,3,1,73,0.8,...,0.00,1.23,1.23,1.23,0.24,0.12,0.36,0.24,0.36,La Liga
598,Bryan Zaragoza,ESP,"FW,MF",Osasuna,23,2001,27,22,1836,20.4,...,0.29,0.34,0.05,0.34,0.13,0.16,0.29,0.13,0.29,La Liga
599,Igor Zubeldia,ESP,DF,Real Sociedad,27,1997,28,27,2269,25.2,...,0.00,0.00,0.00,0.00,0.02,0.02,0.04,0.02,0.04,La Liga


In [252]:
url= 'https://fbref.com/en/comps/12/shooting/La-Liga-Stats'
df_players_shooting= get_players_data(url)
df_players_shooting_limpio= limpieza_df_players(df_players_shooting,url)
df_players_shooting_limpio

Unnamed: 0,player_General_Shooting,nationality_General_Shooting,position_General_Shooting,team_General_Shooting,age_General_Shooting,birth_year_General_Shooting,minutes_90s_General_Shooting,goals_Standard_Shooting,shots_Standard_Shooting,shots_on_target_Standard_Shooting,...,average_shot_distance_Standard_Shooting,shots_free_kicks_Standard_Shooting,pens_made_Standard_Shooting,pens_att_Standard_Shooting,xg_Expected_Shooting,npxg_Expected_Shooting,npxg_per_shot_Expected_Shooting,xg_net_Expected_Shooting,npxg_net_Expected_Shooting,competition
0,Max Aarons,ENG,"DF,MF",Valencia,24,2000,1.3,0,0,0,...,0,0,0,0,0.0,0.0,0,0.0,0.0,La Liga
1,Rodrigo Abajas,ESP,DF,Valencia,21,2003,0.7,0,1,0,...,24.5,0,0,0,0.1,0.1,0.07,-0.1,-0.1,La Liga
2,Abel,ESP,DF,Osasuna,23,2000,23.0,2,12,4,...,22.9,0,0,0,0.5,0.5,0.04,+1.5,+1.5,La Liga
3,Nabil Aberdin,FRA,MF,Getafe,21,2002,2.9,0,2,0,...,31.2,0,0,0,0.0,0.0,0.02,0.0,0.0,La Liga
4,Abdel Abqar,MAR,DF,Alavés,25,1999,27.4,0,12,1,...,8.0,0,0,0,0.9,0.9,0.08,-0.9,-0.9,La Liga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,Bertuğ Yıldırım,TUR,FW,Getafe,22,2002,12.0,1,19,7,...,11.4,0,0,0,2.5,2.5,0.13,-1.5,-1.5,La Liga
597,Arsen Zakharyan,RUS,"MF,FW",Real Sociedad,21,2003,0.8,1,4,2,...,22.7,0,0,0,0.2,0.2,0.05,+0.8,+0.8,La Liga
598,Bryan Zaragoza,ESP,"FW,MF",Osasuna,23,2001,20.4,1,42,13,...,20.2,0,0,0,2.7,2.7,0.06,-1.7,-1.7,La Liga
599,Igor Zubeldia,ESP,DF,Real Sociedad,27,1997,25.2,0,12,4,...,17.1,0,0,0,0.6,0.6,0.05,-0.6,-0.6,La Liga


In [251]:
url= 'https://fbref.com/en/comps/12/passing/La-Liga-Stats'
df_players_passing= get_players_data(url)
df_players_passing_limpio= limpieza_df_players(df_players_passing, url)
df_players_passing_limpio

Unnamed: 0,player_General_Passing,nationality_General_Passing,position_General_Passing,team_General_Passing,age_General_Passing,birth_year_General_Passing,minutes_90s_General_Passing,passes_completed_Total_Passing,passes_Total_Passing,passes_pct_Total_Passing,...,assists_General_Passing,xg_assist_General_Passing,pass_xa_Expected_Passing,xg_assist_net_Expected_Passing,assisted_shots_General_Passing,passes_into_final_third_General_Passing,passes_into_penalty_area_General_Passing,crosses_into_penalty_area_General_Passing,progressive_passes_General_Passing,competition
0,Max Aarons,ENG,"DF,MF",Valencia,24,2000,1.3,47,66,71.2,...,0,0.0,0.0,0.0,1,2,0,0,6,La Liga
1,Rodrigo Abajas,ESP,DF,Valencia,21,2003,0.7,17,29,58.6,...,0,0.0,0.0,0.0,0,0,0,0,2,La Liga
2,Abel,ESP,DF,Osasuna,23,2000,23.0,776,1071,72.5,...,0,1.0,1.5,-1.0,16,57,18,13,77,La Liga
3,Nabil Aberdin,FRA,MF,Getafe,21,2002,2.9,36,61,59.0,...,0,0.0,0.1,0.0,1,7,2,0,6,La Liga
4,Abdel Abqar,MAR,DF,Alavés,25,1999,27.4,719,939,76.6,...,1,0.3,0.4,+0.7,8,67,10,2,79,La Liga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,Bertuğ Yıldırım,TUR,FW,Getafe,22,2002,12.0,101,186,54.3,...,0,1.2,0.6,-1.2,9,9,9,3,14,La Liga
597,Arsen Zakharyan,RUS,"MF,FW",Real Sociedad,21,2003,0.8,28,33,84.8,...,0,0.1,0.1,-0.1,1,3,1,0,3,La Liga
598,Bryan Zaragoza,ESP,"FW,MF",Osasuna,23,2001,20.4,358,554,64.6,...,6,3.2,5.1,+2.8,36,23,34,20,50,La Liga
599,Igor Zubeldia,ESP,DF,Real Sociedad,27,1997,25.2,1281,1483,86.4,...,0,0.4,0.6,-0.4,4,93,6,0,101,La Liga


In [255]:
url= 'https://fbref.com/en/comps/12/passing_types/La-Liga-Stats'
df_players_passingtype= get_players_data(url)
df_players_passingtype_limpio= limpieza_df_players(df_players_passingtype, url)
df_players_passingtype_limpio

Unnamed: 0,player_General_Passing_Types,nationality_General_Passing_Types,position_General_Passing_Types,team_General_Passing_Types,age_General_Passing_Types,birth_year_General_Passing_Types,minutes_90s_General_Passing_Types,passes_General_Passing_Types,passes_live_Pass_Types_Passing_Types,passes_dead_Pass_Types_Passing_Types,...,crosses_Pass_Types_Passing_Types,throw_ins_Pass_Types_Passing_Types,corner_kicks_Pass_Types_Passing_Types,corner_kicks_in_Corner_Kicks_Passing_Types,corner_kicks_out_Corner_Kicks_Passing_Types,corner_kicks_straight_Corner_Kicks_Passing_Types,passes_completed_Outcomes_Passing_Types,passes_offsides_Outcomes_Passing_Types,passes_blocked_Outcomes_Passing_Types,competition
0,Max Aarons,ENG,"DF,MF",Valencia,24,2000,1.3,66,54,12,...,5,11,0,0,0,0,47,0,1,La Liga
1,Rodrigo Abajas,ESP,DF,Valencia,21,2003,0.7,29,21,8,...,1,8,0,0,0,0,17,0,2,La Liga
2,Abel,ESP,DF,Osasuna,23,2000,23.0,1071,847,220,...,85,206,0,0,0,0,776,4,47,La Liga
3,Nabil Aberdin,FRA,MF,Getafe,21,2002,2.9,61,58,2,...,0,0,0,0,0,0,36,1,3,La Liga
4,Abdel Abqar,MAR,DF,Alavés,25,1999,27.4,939,888,48,...,5,7,0,0,0,0,719,3,6,La Liga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,Bertuğ Yıldırım,TUR,FW,Getafe,22,2002,12.0,186,176,9,...,7,0,0,0,0,0,101,1,4,La Liga
597,Arsen Zakharyan,RUS,"MF,FW",Real Sociedad,21,2003,0.8,33,32,1,...,1,0,1,1,0,0,28,0,0,La Liga
598,Bryan Zaragoza,ESP,"FW,MF",Osasuna,23,2001,20.4,554,484,67,...,104,16,28,10,5,0,358,3,19,La Liga
599,Igor Zubeldia,ESP,DF,Real Sociedad,27,1997,25.2,1483,1431,44,...,1,1,0,0,0,0,1281,8,5,La Liga


In [258]:
url= 'https://fbref.com/en/comps/12/gca/La-Liga-Stats'
df_players_gca= get_players_data(url)
df_players_gca_limpio= limpieza_df_players(df_players_gca, url)
df_players_gca_limpio

Unnamed: 0,player_General_Gca,nationality_General_Gca,position_General_Gca,team_General_Gca,age_General_Gca,birth_year_General_Gca,minutes_90s_General_Gca,sca_SCA_Gca,sca_per90_SCA_Gca,sca_passes_live_SCA_Types_Gca,...,sca_defense_SCA_Types_Gca,gca_GCA_Gca,gca_per90_GCA_Gca,gca_passes_live_GCA_Types_Gca,gca_passes_dead_GCA_Types_Gca,gca_take_ons_GCA_Types_Gca,gca_shots_GCA_Types_Gca,gca_fouled_GCA_Types_Gca,gca_defense_GCA_Types_Gca,competition
0,Max Aarons,ENG,"DF,MF",Valencia,24,2000,1.3,1,0.75,1,...,0,0,0.00,0,0,0,0,0,0,La Liga
1,Rodrigo Abajas,ESP,DF,Valencia,21,2003,0.7,0,0.00,0,...,0,0,0.00,0,0,0,0,0,0,La Liga
2,Abel,ESP,DF,Osasuna,23,2000,23.0,39,1.69,29,...,2,4,0.17,2,0,1,0,0,1,La Liga
3,Nabil Aberdin,FRA,MF,Getafe,21,2002,2.9,3,1.03,3,...,0,0,0.00,0,0,0,0,0,0,La Liga
4,Abdel Abqar,MAR,DF,Alavés,25,1999,27.4,25,0.91,20,...,2,3,0.11,2,0,0,0,1,0,La Liga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,Bertuğ Yıldırım,TUR,FW,Getafe,22,2002,12.0,21,1.75,13,...,1,1,0.08,0,0,0,0,1,0,La Liga
597,Arsen Zakharyan,RUS,"MF,FW",Real Sociedad,21,2003,0.8,6,7.40,4,...,0,0,0.00,0,0,0,0,0,0,La Liga
598,Bryan Zaragoza,ESP,"FW,MF",Osasuna,23,2001,20.4,71,3.49,41,...,0,9,0.44,6,1,1,1,0,0,La Liga
599,Igor Zubeldia,ESP,DF,Real Sociedad,27,1997,25.2,17,0.67,16,...,0,1,0.04,1,0,0,0,0,0,La Liga


In [271]:
url= 'https://fbref.com/en/comps/12/defense/La-Liga-Stats'
df_players_defense= get_players_data(url)
df_players_defense_limpio= limpieza_df_players(df_players_defense, url)
df_players_defense_limpio

Unnamed: 0,player_General_Defense,nationality_General_Defense,position_General_Defense,team_General_Defense,age_General_Defense,birth_year_General_Defense,minutes_90s_General_Defense,tackles_Tackles_Defense,tackles_won_Tackles_Defense,tackles_def_3rd_Tackles_Defense,...,challenge_tackles_pct_Challenges_Defense,challenges_lost_Challenges_Defense,blocks_Blocks_Defense,blocked_shots_Blocks_Defense,blocked_passes_Blocks_Defense,interceptions_General_Defense,tackles_interceptions_General_Defense,clearances_General_Defense,errors_General_Defense,competition
0,Max Aarons,ENG,"DF,MF",Valencia,24,2000,1.3,4,4,2,...,80.0,1,1,0,1,0,4,3,0,La Liga
1,Rodrigo Abajas,ESP,DF,Valencia,21,2003,0.7,3,2,2,...,100.0,0,1,0,1,1,4,0,0,La Liga
2,Abel,ESP,DF,Osasuna,23,2000,23.0,61,37,28,...,61.9,24,29,4,25,18,79,64,2,La Liga
3,Nabil Aberdin,FRA,MF,Getafe,21,2002,2.9,6,4,3,...,100.0,0,3,0,3,0,6,5,0,La Liga
4,Abdel Abqar,MAR,DF,Alavés,25,1999,27.4,36,23,24,...,58.8,14,28,18,10,39,75,146,1,La Liga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,Bertuğ Yıldırım,TUR,FW,Getafe,22,2002,12.0,10,5,0,...,28.6,5,13,2,11,1,11,16,0,La Liga
597,Arsen Zakharyan,RUS,"MF,FW",Real Sociedad,21,2003,0.8,1,1,1,...,100.0,0,1,1,0,1,2,0,0,La Liga
598,Bryan Zaragoza,ESP,"FW,MF",Osasuna,23,2001,20.4,13,9,8,...,28.6,15,10,0,10,4,17,8,0,La Liga
599,Igor Zubeldia,ESP,DF,Real Sociedad,27,1997,25.2,34,22,21,...,53.8,12,32,18,14,31,65,100,2,La Liga


In [260]:
url= 'https://fbref.com/en/comps/12/possession/La-Liga-Stats'
df_players_possession= get_players_data(url)
df_players_possession_limpio= limpieza_df_players(df_players_possession, url)
df_players_possession_limpio

Unnamed: 0,player_General_Possession,nationality_General_Possession,position_General_Possession,team_General_Possession,age_General_Possession,birth_year_General_Possession,minutes_90s_General_Possession,touches_Touches_Possession,touches_def_pen_area_Touches_Possession,touches_def_3rd_Touches_Possession,...,carries_distance_Carries_Possession,carries_progressive_distance_Carries_Possession,progressive_carries_Carries_Possession,carries_into_final_third_Carries_Possession,carries_into_penalty_area_Carries_Possession,miscontrols_Carries_Possession,dispossessed_Carries_Possession,passes_received_Receiving_Possession,progressive_passes_received_Receiving_Possession,competition
0,Max Aarons,ENG,"DF,MF",Valencia,24,2000,1.3,85,1,21,...,215,103,0,3,0,3,1,49,10,La Liga
1,Rodrigo Abajas,ESP,DF,Valencia,21,2003,0.7,36,1,8,...,101,67,3,2,1,0,2,16,3,La Liga
2,Abel,ESP,DF,Osasuna,23,2000,23.0,1322,65,370,...,3269,1792,50,44,7,27,18,683,92,La Liga
3,Nabil Aberdin,FRA,MF,Getafe,21,2002,2.9,87,6,21,...,148,74,2,0,0,1,1,36,2,La Liga
4,Abdel Abqar,MAR,DF,Alavés,25,1999,27.4,1251,160,659,...,3531,2013,16,7,0,18,3,650,4,La Liga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,Bertuğ Yıldırım,TUR,FW,Getafe,22,2002,12.0,323,21,32,...,633,148,8,4,2,40,15,209,48,La Liga
597,Arsen Zakharyan,RUS,"MF,FW",Real Sociedad,21,2003,0.8,44,1,8,...,67,19,0,1,0,1,0,36,4,La Liga
598,Bryan Zaragoza,ESP,"FW,MF",Osasuna,23,2001,20.4,782,4,70,...,4839,2697,105,62,51,41,15,585,140,La Liga
599,Igor Zubeldia,ESP,DF,Real Sociedad,27,1997,25.2,1732,163,768,...,5787,3741,22,8,1,8,1,1145,3,La Liga


In [262]:
url= 'https://fbref.com/en/comps/12/misc/La-Liga-Stats'
df_players_misc= get_players_data(url)
df_players_misc_limpio= limpieza_df_players(df_players_misc, url)
df_players_misc_limpio

Unnamed: 0,player_General_Misc,nationality_General_Misc,position_General_Misc,team_General_Misc,age_General_Misc,birth_year_General_Misc,minutes_90s_General_Misc,cards_yellow_Performance_Misc,cards_red_Performance_Misc,cards_yellow_red_Performance_Misc,...,interceptions_Performance_Misc,tackles_won_Performance_Misc,pens_won_Performance_Misc,pens_conceded_Performance_Misc,own_goals_Performance_Misc,ball_recoveries_Performance_Misc,aerials_won_Aerial_Duels_Misc,aerials_lost_Aerial_Duels_Misc,aerials_won_pct_Aerial_Duels_Misc,competition
0,Max Aarons,ENG,"DF,MF",Valencia,24,2000,1.3,2,0,0,...,0,4,0,0,0,7,2,1,66.7,La Liga
1,Rodrigo Abajas,ESP,DF,Valencia,21,2003,0.7,1,0,0,...,1,2,0,0,0,2,0,1,0.0,La Liga
2,Abel,ESP,DF,Osasuna,23,2000,23.0,3,0,0,...,18,37,0,1,0,106,20,17,54.1,La Liga
3,Nabil Aberdin,FRA,MF,Getafe,21,2002,2.9,1,0,0,...,0,4,0,0,0,8,10,3,76.9,La Liga
4,Abdel Abqar,MAR,DF,Alavés,25,1999,27.4,13,0,0,...,39,23,1,2,0,87,40,35,53.3,La Liga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,Bertuğ Yıldırım,TUR,FW,Getafe,22,2002,12.0,7,0,0,...,1,5,1,0,0,24,73,63,53.7,La Liga
597,Arsen Zakharyan,RUS,"MF,FW",Real Sociedad,21,2003,0.8,0,0,0,...,1,1,0,0,0,2,0,0,0,La Liga
598,Bryan Zaragoza,ESP,"FW,MF",Osasuna,23,2001,20.4,4,0,0,...,4,9,0,0,0,55,2,10,16.7,La Liga
599,Igor Zubeldia,ESP,DF,Real Sociedad,27,1997,25.2,6,1,0,...,31,22,0,1,0,86,65,80,44.8,La Liga


In [263]:
url= 'https://fbref.com/en/comps/12/keepers/La-Liga-Stats'
df_players_keepers= get_players_data(url)
df_players_keepers_limpio= limpieza_df_players(df_players_keepers, url)
df_players_keepers_limpio

Unnamed: 0,player_General_Keepers,nationality_General_Keepers,position_General_Keepers,team_General_Keepers,age_General_Keepers,birth_year_General_Keepers,gk_games_Playing_Time_Keepers,gk_games_starts_Playing_Time_Keepers,gk_minutes_Playing_Time_Keepers,minutes_90s_Playing_Time_Keepers,...,gk_ties_Performance_Keepers,gk_losses_Performance_Keepers,gk_clean_sheets_Performance_Keepers,gk_clean_sheets_pct_Performance_Keepers,gk_pens_att_Penalty_Kicks_Keepers,gk_pens_allowed_Penalty_Kicks_Keepers,gk_pens_saved_Penalty_Kicks_Keepers,gk_pens_missed_Penalty_Kicks_Keepers,gk_pens_save_pct_Penalty_Kicks_Keepers,competition
0,Adrián,ESP,GK,Betis,37,1987,19,19,1710,19.0,...,5,4,3,15.8,1,1,0,0,0.0,La Liga
1,Julen Agirrezabala,ESP,GK,Athletic Club,23,2000,14,14,1206,13.4,...,4,1,5,35.7,2,1,1,0,50.0,La Liga
2,Augusto Batalla,ARG,GK,Rayo Vallecano,28,1996,32,32,2880,32.0,...,12,10,8,25.0,8,6,2,0,25.0,La Liga
3,Dani Cárdenas,ESP,GK,Rayo Vallecano,27,1997,6,6,540,6.0,...,1,2,2,33.3,0,0,0,0,0.0,La Liga
4,Jasper Cillessen,NED,GK,Las Palmas,35,1989,27,27,2335,25.9,...,8,13,3,11.1,8,4,3,1,42.9,La Liga
5,Diego Conde,ESP,GK,Villarreal,25,1998,22,22,1962,21.8,...,6,5,2,9.1,4,3,0,1,0.0,La Liga
6,Thibaut Courtois,BEL,GK,Real Madrid,32,1992,30,30,2700,30.0,...,6,4,11,36.7,3,3,0,0,0.0,La Liga
7,Stole Dimitrievski,MKD,GK,Valencia,30,1993,4,4,360,4.0,...,2,2,0,0.0,2,1,0,1,0.0,La Liga
8,Marko Dmitrović,SRB,GK,Leganés,32,1992,32,32,2880,32.0,...,11,13,10,31.3,8,7,0,1,0.0,La Liga
9,Aitor Fernández,ESP,GK,Osasuna,33,1991,1,1,90,1.0,...,1,0,1,100.0,0,0,0,0,0.0,La Liga


In [266]:
url= 'https://fbref.com/en/comps/12/keepersadv/La-Liga-Stats'
df_players_keepersadv= get_players_data(url)
df_players_keepersadv_limpio= limpieza_df_players(df_players_keepersadv, url)
df_players_keepersadv_limpio

Unnamed: 0,player_General_Keepersadv,nationality_General_Keepersadv,position_General_Keepersadv,team_General_Keepersadv,age_General_Keepersadv,birth_year_General_Keepersadv,minutes_90s_General_Keepersadv,gk_goals_against_Goals_Keepersadv,gk_pens_allowed_Goals_Keepersadv,gk_free_kick_goals_against_Goals_Keepersadv,...,gk_goal_kicks_Goal_Kicks_Keepersadv,gk_pct_goal_kicks_launched_Goal_Kicks_Keepersadv,gk_goal_kick_length_avg_Goal_Kicks_Keepersadv,gk_crosses_Crosses_Keepersadv,gk_crosses_stopped_Crosses_Keepersadv,gk_crosses_stopped_pct_Crosses_Keepersadv,gk_def_actions_outside_pen_area_Sweeper_Keepersadv,gk_def_actions_outside_pen_area_per90_Sweeper_Keepersadv,gk_avg_distance_def_actions_Sweeper_Keepersadv,competition
0,Adrián,ESP,GK,Betis,37,1987,19.0,27,1,2,...,109,33.0,33.8,257,14,5.4,16,0.84,11.7,La Liga
1,Julen Agirrezabala,ESP,GK,Athletic Club,23,2000,13.4,9,1,2,...,66,75.8,54.9,143,20,14.0,17,1.27,14.2,La Liga
2,Augusto Batalla,ARG,GK,Rayo Vallecano,28,1996,32.0,39,6,0,...,184,63.0,51.1,453,35,7.7,38,1.19,14.4,La Liga
3,Dani Cárdenas,ESP,GK,Rayo Vallecano,27,1997,6.0,6,0,0,...,46,67.4,51.9,79,2,2.5,1,0.17,10.2,La Liga
4,Jasper Cillessen,NED,GK,Las Palmas,35,1989,25.9,42,4,0,...,179,47.5,43.1,413,13,3.1,20,0.77,14.5,La Liga
5,Diego Conde,ESP,GK,Villarreal,25,1998,21.8,33,3,1,...,107,37.4,36.3,323,18,5.6,16,0.73,11.9,La Liga
6,Thibaut Courtois,BEL,GK,Real Madrid,32,1992,30.0,29,3,0,...,148,35.8,39.1,350,16,4.6,14,0.47,10.7,La Liga
7,Stole Dimitrievski,MKD,GK,Valencia,30,1993,4.0,6,1,0,...,20,80.0,53.6,51,3,5.9,16,4.0,25.7,La Liga
8,Marko Dmitrović,SRB,GK,Leganés,32,1992,32.0,49,7,1,...,303,92.4,66.2,579,49,8.5,53,1.66,14.5,La Liga
9,Aitor Fernández,ESP,GK,Osasuna,33,1991,1.0,0,0,0,...,11,100.0,59.9,14,1,7.1,1,1.0,10.6,La Liga


In [273]:
url= 'https://fbref.com/en/comps/12/playingtime/La-Liga-Stats'
df_players_playingtime= get_players_data(url)
df_players_playingtime_limpio= limpieza_df_players(df_players_playingtime, url)
df_players_playingtime_limpio

Unnamed: 0,player_General_Playingtime,nationality_General_Playingtime,position_General_Playingtime,team_General_Playingtime,age_General_Playingtime,birth_year_General_Playingtime,games_Playing_Time_Playingtime,minutes_Playing_Time_Playingtime,minutes_per_game_Playing_Time_Playingtime,minutes_pct_Playing_Time_Playingtime,...,on_goals_against_Team_Success_Playingtime,plus_minus_Team_Success_Playingtime,plus_minus_per90_Team_Success_Playingtime,plus_minus_wowy_Team_Success_Playingtime,on_xg_for_Team_Success_(xG)_Playingtime,on_xg_against_Team_Success_(xG)_Playingtime,xg_plus_minus_Team_Success_(xG)_Playingtime,xg_plus_minus_per90_Team_Success_(xG)_Playingtime,xg_plus_minus_wowy_Team_Success_(xG)_Playingtime,competition
0,Max Aarons,ENG,"DF,MF",Valencia,24,2000,4,120,30,3.5,...,3,-2,-1.50,-1.28,1.5,3.7,-2.3,-1.69,-1.52,La Liga
1,Álvaro Abajas,ESP,GK,Leganés,21,2003,0,0,0,0,...,0,0,0,0,0,0,0,0,0,La Liga
2,Rodrigo Abajas,ESP,DF,Valencia,21,2003,1,65,65,1.9,...,2,-1,-1.38,-1.14,1.4,0.7,+0.7,+0.93,+1.18,La Liga
3,Jones El-Abdellaoui,MAR,FW,Celta Vigo,18,2006,0,0,0,0,...,0,0,0,0,0,0,0,0,0,La Liga
4,Abel,ESP,DF,Osasuna,23,2000,35,2074,59,60.6,...,35,-5,-0.22,-0.28,25.7,34.4,-8.7,-0.38,-0.32,La Liga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
744,Bertuğ Yıldırım,TUR,FW,Getafe,22,2002,22,1083,49,31.7,...,10,-5,-0.42,-0.42,11.9,12.9,-0.9,-0.08,+0.25,La Liga
745,Arsen Zakharyan,RUS,"MF,FW",Real Sociedad,21,2003,3,73,24,2.1,...,0,+2,+2.47,+2.82,1.5,0.3,+1.3,+1.56,+1.63,La Liga
746,Bryan Zaragoza,ESP,"FW,MF",Osasuna,23,2001,27,1836,68,53.7,...,27,+2,+0.10,+0.44,23.3,28.7,-5.4,-0.27,-0.03,La Liga
747,Igor Zubeldia,ESP,DF,Real Sociedad,27,1997,28,2269,81,66.3,...,28,-6,-0.24,+0.15,29.4,27.6,+1.8,+0.07,+0.33,La Liga


In [275]:
def creacion_df_jugadores_estadistica_unica(url: str, guardar_csv=False, league='La Liga', season='2024',):
    # Obtener datos de la tabla
    df_sucio = get_players_data(url)
    
    # Limpiar los datos
    df_limpio = limpieza_df_players(df_sucio,  url=url)  # Asegúrate de pasar la URL a la función de limpieza

    # Extraer metrica_general de la URL
    metrica_general_match = re.search(r'/(\w+)/La-Liga-Stats', url)
    if metrica_general_match:
        metrica_general = metrica_general_match.group(1)
    else:
        metrica_general = 'unknown'

    # Suprimir espacios en el parámetro league
    league_clean = league.lower().replace(' ', '_')


    # Guardar CSV si es necesario
    if guardar_csv:
        df_limpio.to_csv(f'./df_players_{metrica_general}_{league_clean}_{season}.csv', index=False)
    
    return df_limpio

In [278]:
creacion_df_jugadores_estadistica_unica(url= 'https://fbref.com/en/comps/12/keepers/La-Liga-Stats', guardar_csv=True, league='La Liga', season='2024')

Unnamed: 0,player_General_Keepers,nationality_General_Keepers,position_General_Keepers,team_General_Keepers,age_General_Keepers,birth_year_General_Keepers,gk_games_Playing_Time_Keepers,gk_games_starts_Playing_Time_Keepers,gk_minutes_Playing_Time_Keepers,minutes_90s_Playing_Time_Keepers,...,gk_ties_Performance_Keepers,gk_losses_Performance_Keepers,gk_clean_sheets_Performance_Keepers,gk_clean_sheets_pct_Performance_Keepers,gk_pens_att_Penalty_Kicks_Keepers,gk_pens_allowed_Penalty_Kicks_Keepers,gk_pens_saved_Penalty_Kicks_Keepers,gk_pens_missed_Penalty_Kicks_Keepers,gk_pens_save_pct_Penalty_Kicks_Keepers,competition
0,Adrián,ESP,GK,Betis,37,1987,19,19,1710,19.0,...,5,4,3,15.8,1,1,0,0,0.0,La Liga
1,Julen Agirrezabala,ESP,GK,Athletic Club,23,2000,14,14,1206,13.4,...,4,1,5,35.7,2,1,1,0,50.0,La Liga
2,Augusto Batalla,ARG,GK,Rayo Vallecano,28,1996,32,32,2880,32.0,...,12,10,8,25.0,8,6,2,0,25.0,La Liga
3,Dani Cárdenas,ESP,GK,Rayo Vallecano,27,1997,6,6,540,6.0,...,1,2,2,33.3,0,0,0,0,0.0,La Liga
4,Jasper Cillessen,NED,GK,Las Palmas,35,1989,27,27,2335,25.9,...,8,13,3,11.1,8,4,3,1,42.9,La Liga
5,Diego Conde,ESP,GK,Villarreal,25,1998,22,22,1962,21.8,...,6,5,2,9.1,4,3,0,1,0.0,La Liga
6,Thibaut Courtois,BEL,GK,Real Madrid,32,1992,30,30,2700,30.0,...,6,4,11,36.7,3,3,0,0,0.0,La Liga
7,Stole Dimitrievski,MKD,GK,Valencia,30,1993,4,4,360,4.0,...,2,2,0,0.0,2,1,0,1,0.0,La Liga
8,Marko Dmitrović,SRB,GK,Leganés,32,1992,32,32,2880,32.0,...,11,13,10,31.3,8,7,0,1,0.0,La Liga
9,Aitor Fernández,ESP,GK,Osasuna,33,1991,1,1,90,1.0,...,1,0,1,100.0,0,0,0,0,0.0,La Liga


In [220]:
creacion_df_jugadores_estadistica_unica(url= 'https://fbref.com/es/comps/12/keepers/Estadisticas-de-La-Liga', guardar_csv=True, league='La Liga', season='2024')

Unnamed: 0,player_General_Standard_Stats,nationality_General_Standard_Stats,position_General_Standard_Stats,team_General_Standard_Stats,age_General_Standard_Stats,birth_year_General_Standard_Stats,gk_games_Tiempo_Jugado_Standard_Stats,gk_games_starts_Tiempo_Jugado_Standard_Stats,gk_minutes_Tiempo_Jugado_Standard_Stats,minutes_90s_Tiempo_Jugado_Standard_Stats,...,gk_ties_Rendimiento_Standard_Stats,gk_losses_Rendimiento_Standard_Stats,gk_clean_sheets_Rendimiento_Standard_Stats,gk_clean_sheets_pct_Rendimiento_Standard_Stats,gk_pens_att_Tiros_penales_Standard_Stats,gk_pens_allowed_Tiros_penales_Standard_Stats,gk_pens_saved_Tiros_penales_Standard_Stats,gk_pens_missed_Tiros_penales_Standard_Stats,gk_pens_save_pct_Tiros_penales_Standard_Stats,competition
0,Adrián,ESP,PO,Betis,37,1987,19,19,1710,19.0,...,5,4,3,15.8,1,1,0,0,0.0,Desconocida
1,Julen Agirrezabala,ESP,PO,Athletic Club,23,2000,14,14,1206,13.4,...,4,1,5,35.7,2,1,1,0,50.0,Desconocida
2,Augusto Batalla,ARG,PO,Rayo Vallecano,28,1996,32,32,2880,32.0,...,12,10,8,25.0,8,6,2,0,25.0,Desconocida
3,Dani Cárdenas,ESP,PO,Rayo Vallecano,27,1997,6,6,540,6.0,...,1,2,2,33.3,0,0,0,0,0,Desconocida
4,Jasper Cillessen,NED,PO,Las Palmas,35,1989,27,27,2335,25.9,...,8,13,3,11.1,8,4,3,1,42.9,Desconocida
5,Diego Conde,ESP,PO,Villarreal,25,1998,22,22,1962,21.8,...,6,5,2,9.1,4,3,0,1,0.0,Desconocida
6,Thibaut Courtois,BEL,PO,Real Madrid,32,1992,30,30,2700,30.0,...,6,4,11,36.7,3,3,0,0,0.0,Desconocida
7,Stole Dimitrievski,MKD,PO,Valencia,30,1993,4,4,360,4.0,...,2,2,0,0.0,2,1,0,1,0.0,Desconocida
8,Marko Dmitrović,SRB,PO,Leganés,32,1992,32,32,2880,32.0,...,11,13,10,31.3,8,7,0,1,0.0,Desconocida
9,Aitor Fernández,ESP,PO,Osasuna,33,1991,1,1,90,1.0,...,1,0,1,100.0,0,0,0,0,0,Desconocida


In [279]:
def creacion_df_general_torneo_fbref(league='La Liga', 
                                     season="2024-2025", 
                                     stat_list=None, 
                                     player_urls=None,
                                     guardar_csv=False):
    """
    Crea un DataFrame general con estadísticas de jugadores a partir de las URLs de FBref.

    Args:
        league (str): Nombre de la liga (e.g., 'La Liga').
        season (str): Temporada (e.g., '2024-2025').
        stat_list (list): Lista de estadísticas a procesar (e.g., ['Standard Stats', 'Shooting', ...]).
        player_urls (dict): Diccionario con las URLs por liga y temporada.
        guardar_csv (bool): Si se desea guardar cada DataFrame en CSV.

    Returns:
        pd.DataFrame: DataFrame general con las estadísticas combinadas.
    """

    if stat_list is None:
        stat_list = ["Standard Stats", "Shooting", "Passing", "Pass Types", 
                     "Goal and Shot Creation", "Defensive Actions", 
                     "Possession", "Miscellaneous Stats"]

    dfs = []

    for stat_name in stat_list:
        # Obtener la URL de la estadística actual
        try:
            url_actual = player_urls[league][season][stat_name]
        except KeyError:
            print(f"⚠️ No se encontró URL para la estadística '{stat_name}'. Se omite.")
            continue

        # Obtener el DataFrame limpio
        df_temp = creacion_df_jugadores_estadistica_unica(
            url=url_actual,
            guardar_csv=guardar_csv,
            league=league,
            season=season
        )
        if df_temp is not None:
            if 'competition' in df_temp.columns:
                df_temp = df_temp.drop(columns=['competition'])
            dfs.append(df_temp)
        else:
            print(f"⚠️ No se pudo procesar '{stat_name}'. Se omite.")

        # Pausa aleatoria
        time.sleep(random.uniform(2, 5))

    if not dfs:
        print("⚠️ No se generó ningún DataFrame.")
        return pd.DataFrame()

    # Concatenar DataFrames horizontalmente y eliminar columnas duplicadas
    df_general_final = pd.concat(dfs, axis=1)
    df_general_final = df_general_final.loc[:, ~df_general_final.columns.duplicated(keep='first')]

    # Insertar columna de Competición
    df_general_final.insert(3, 'competition', league)

    return df_general_final

In [280]:
df_la_liga_2024 = creacion_df_general_torneo_fbref(
    league='La Liga',
    season='2024-2025',
    stat_list=["Standard Stats", "Shooting", "Passing"],  # o todas las que quieras
    player_urls=player_urls
)
df_la_liga_2024

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col_name] = df[col_name].astype(str).str.extract(r'([A-Z]+)$')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['competition'] = competition_name
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace('', np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.py

Unnamed: 0,ranker_General_Standard_Stats,player_General_Standard_Stats,nationality_General_Standard_Stats,competition,position_General_Standard_Stats,team_General_Standard_Stats,age_General_Standard_Stats,birth_year_General_Standard_Stats,games_Playing_Time_Standard_Stats,games_starts_Playing_Time_Standard_Stats,...,passes_pct_long_Long_Standard_Stats,assists_General_Standard_Stats,xg_assist_General_Standard_Stats,pass_xa_Expected_Standard_Stats,xg_assist_net_Expected_Standard_Stats,assisted_shots_General_Standard_Stats,passes_into_final_third_General_Standard_Stats,passes_into_penalty_area_General_Standard_Stats,crosses_into_penalty_area_General_Standard_Stats,progressive_passes_General_Standard_Stats
0,1,Max Aarons,ENG,La Liga,"DF,MF",Valencia,24,2000,4,1,...,25.0,0,0.0,0.0,0.0,1,2,0,0,6
1,2,Rodrigo Abajas,ESP,La Liga,DF,Valencia,21,2003,1,1,...,28.6,0,0.0,0.0,0.0,0,0,0,0,2
2,3,Abel,ESP,La Liga,DF,Osasuna,23,2000,35,20,...,36.7,0,1.0,1.5,-1.0,16,57,18,13,77
3,4,Nabil Aberdin,FRA,La Liga,MF,Getafe,21,2002,7,4,...,61.5,0,0.0,0.1,0.0,1,7,2,0,6
4,5,Abdel Abqar,MAR,La Liga,DF,Alavés,25,1999,29,29,...,44.2,1,0.3,0.4,+0.7,8,67,10,2,79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
596,597,Bertuğ Yıldırım,TUR,La Liga,FW,Getafe,22,2002,22,14,...,44.4,0,1.2,0.6,-1.2,9,9,9,3,14
597,598,Arsen Zakharyan,RUS,La Liga,"MF,FW",Real Sociedad,21,2003,3,1,...,80.0,0,0.1,0.1,-0.1,1,3,1,0,3
598,599,Bryan Zaragoza,ESP,La Liga,"FW,MF",Osasuna,23,2001,27,22,...,49.5,6,3.2,5.1,+2.8,36,23,34,20,50
599,600,Igor Zubeldia,ESP,La Liga,DF,Real Sociedad,27,1997,28,27,...,52.1,0,0.4,0.6,-0.4,4,93,6,0,101


In [281]:
list(df_la_liga_2024.columns)

['ranker_General_Standard_Stats',
 'player_General_Standard_Stats',
 'nationality_General_Standard_Stats',
 'competition',
 'position_General_Standard_Stats',
 'team_General_Standard_Stats',
 'age_General_Standard_Stats',
 'birth_year_General_Standard_Stats',
 'games_Playing_Time_Standard_Stats',
 'games_starts_Playing_Time_Standard_Stats',
 'minutes_Playing_Time_Standard_Stats',
 'minutes_90s_Playing_Time_Standard_Stats',
 'goals_Performance_Standard_Stats',
 'assists_Performance_Standard_Stats',
 'goals_assists_Performance_Standard_Stats',
 'goals_pens_Performance_Standard_Stats',
 'pens_made_Performance_Standard_Stats',
 'pens_att_Performance_Standard_Stats',
 'cards_yellow_Performance_Standard_Stats',
 'cards_red_Performance_Standard_Stats',
 'xg_Expected_Standard_Stats',
 'npxg_Expected_Standard_Stats',
 'xg_assist_Expected_Standard_Stats',
 'npxg_xg_assist_Expected_Standard_Stats',
 'progressive_carries_Progression_Standard_Stats',
 'progressive_passes_Progression_Standard_Stats'

DATOS EQUIPOS

In [282]:
url = 'https://fbref.com/en/comps/12/stats/La-Liga-Stats'

In [290]:
def obtener_tabla_equipos_estadistica_unica(url_general, stats_vs=False, guardar_csv= False, league='La Liga', season='2024'):
    """
    Descarga y limpia la tabla de estadísticas de equipos desde una URL de FBref.
    Args:
        url_general (str): URL de la tabla de FBref.
        stats_vs (bool): True si la tabla deseada es la segunda (vs) en la página.
        league (str): Nombre de la liga
        season (str): Fecha de la temporada
    Returns:
        pd.DataFrame: Tabla limpia con columnas renombradas.
    """
    tables = pd.read_html(url_general)
    
    if stats_vs:
        df = tables[1]
    else:
        df = tables[0]
    
    # Comprobamos si la cabecera es un MultiIndex
    if isinstance(df.columns, pd.MultiIndex):
        # Extraemos la metrica_general desde la URL
        metrica_general = url_general.split('/')[-2]  # Ejemplo: 'shooting', 'passing'
        metrica_general = metrica_general.replace('-', '_').lower()

        # Procesamos las columnas
        columns_data = []
        new_columns = []

        for col in df.columns:
            over_header = col[0].strip().replace(' ', '_').lower()
            data_stat = col[1].strip().replace(' ', '_').lower()

            columns_data.append({
                'data-stat': data_stat,
                'data-over-header': over_header,
                'metrica-general': metrica_general
            })

            new_col_name = f"{data_stat}_{over_header}_{metrica_general}"
            new_columns.append(new_col_name)
        
        df.columns = new_columns

    # Eliminamos filas con nombres de cabecera duplicados o que sean filas vacías
    if any(df.iloc[:,0].str.contains('Squad', case=False, na=False)):
        df = df[~df.iloc[:,0].str.contains('Squad', case=False, na=False)].copy()
    
    # Resetear el índice
    df = df.reset_index(drop=True)

    # Extraer metrica_general de la URL
    metrica_general_match = re.search(r'/(\w+)/La-Liga-Stats', url)
    if metrica_general_match:
        metrica_general = metrica_general_match.group(1)
    else:
        metrica_general = 'unknown'

    # Suprimir espacios en el parámetro league
    league_clean = league.lower().replace(' ', '_')

    # Guardar CSV si es necesario
    if guardar_csv:
        df.to_csv(f'./df_equipos_{metrica_general}_{league_clean}_{season}.csv', index=False)

    return df


In [289]:
obtener_tabla_equipos_estadistica_unica(url, stats_vs=False, guardar_csv= True, league='La Liga', season='2024')

Unnamed: 0,squad_unnamed:_0_level_0_stats,#_pl_unnamed:_1_level_0_stats,age_unnamed:_2_level_0_stats,poss_unnamed:_3_level_0_stats,mp_playing_time_stats,starts_playing_time_stats,min_playing_time_stats,90s_playing_time_stats,gls_performance_stats,ast_performance_stats,...,gls_per_90_minutes_stats,ast_per_90_minutes_stats,g+a_per_90_minutes_stats,g-pk_per_90_minutes_stats,g+a-pk_per_90_minutes_stats,xg_per_90_minutes_stats,xag_per_90_minutes_stats,xg+xag_per_90_minutes_stats,npxg_per_90_minutes_stats,npxg+xag_per_90_minutes_stats
0,Alavés,29,26.6,45.3,38,418,3420,38.0,38,20,...,1.0,0.53,1.53,0.76,1.29,1.13,0.67,1.79,0.9,1.57
1,Athletic Club,31,26.9,48.4,38,418,3420,38.0,53,40,...,1.39,1.05,2.45,1.32,2.37,1.39,0.97,2.36,1.27,2.23
2,Atlético Madrid,24,28.4,52.6,38,418,3420,38.0,68,51,...,1.79,1.34,3.13,1.63,2.97,1.7,1.26,2.96,1.56,2.82
3,Barcelona,28,24.7,68.3,38,418,3420,38.0,99,72,...,2.61,1.89,4.5,2.42,4.32,2.41,1.78,4.18,2.24,4.02
4,Betis,36,27.5,52.4,38,418,3420,38.0,55,37,...,1.45,0.97,2.42,1.26,2.24,1.44,0.99,2.43,1.23,2.22
5,Celta Vigo,30,26.9,53.6,38,418,3420,38.0,58,40,...,1.53,1.05,2.58,1.32,2.37,1.43,1.01,2.44,1.24,2.26
6,Espanyol,27,25.2,39.8,38,418,3420,38.0,38,27,...,1.0,0.71,1.71,0.87,1.58,0.91,0.65,1.55,0.8,1.45
7,Getafe,30,27.7,41.8,38,418,3420,38.0,34,21,...,0.89,0.55,1.45,0.76,1.32,0.97,0.63,1.6,0.87,1.5
8,Girona,30,27.5,56.3,38,418,3420,38.0,43,31,...,1.13,0.82,1.95,1.03,1.84,1.12,0.83,1.95,1.02,1.85
9,Las Palmas,34,26.8,50.7,38,418,3420,38.0,38,27,...,1.0,0.71,1.71,0.92,1.63,0.96,0.69,1.65,0.87,1.56


In [287]:
obtener_tabla_equipos_estadistica_unica(url, stats_vs=True)

Unnamed: 0,squad_unnamed:_0_level_0_stats,#_pl_unnamed:_1_level_0_stats,age_unnamed:_2_level_0_stats,poss_unnamed:_3_level_0_stats,mp_playing_time_stats,starts_playing_time_stats,min_playing_time_stats,90s_playing_time_stats,gls_performance_stats,ast_performance_stats,...,gls_per_90_minutes_stats,ast_per_90_minutes_stats,g+a_per_90_minutes_stats,g-pk_per_90_minutes_stats,g+a-pk_per_90_minutes_stats,xg_per_90_minutes_stats,xag_per_90_minutes_stats,xg+xag_per_90_minutes_stats,npxg_per_90_minutes_stats,npxg+xag_per_90_minutes_stats
0,vs Alavés,29,26.9,54.7,38,418,3420,38.0,46,28,...,1.21,0.74,1.95,0.92,1.66,1.24,0.88,2.11,1.01,1.89
1,vs Athletic Club,31,26.6,51.6,38,418,3420,38.0,28,11,...,0.74,0.29,1.03,0.66,0.95,0.99,0.6,1.6,0.89,1.49
2,vs Atlético Madrid,24,26.8,47.4,38,418,3420,38.0,28,17,...,0.74,0.45,1.18,0.61,1.05,0.88,0.63,1.51,0.78,1.4
3,vs Barcelona,28,26.8,31.7,38,418,3420,38.0,39,31,...,1.03,0.82,1.84,0.92,1.74,1.1,0.86,1.96,1.02,1.88
4,vs Betis,36,27.0,47.6,38,418,3420,38.0,49,32,...,1.29,0.84,2.13,1.18,2.03,1.33,1.01,2.34,1.25,2.26
5,vs Celta Vigo,30,26.7,46.4,38,418,3420,38.0,55,45,...,1.45,1.18,2.63,1.37,2.55,1.14,0.88,2.02,1.05,1.93
6,vs Espanyol,27,27.0,60.2,38,418,3420,38.0,50,39,...,1.32,1.03,2.34,1.21,2.24,1.42,1.04,2.46,1.29,2.33
7,vs Getafe,30,27.0,58.2,38,418,3420,38.0,39,21,...,1.03,0.55,1.58,0.82,1.37,1.22,0.82,2.04,1.04,1.86
8,vs Girona,30,26.9,43.7,38,418,3420,38.0,59,46,...,1.55,1.21,2.76,1.47,2.68,1.33,0.96,2.29,1.2,2.16
9,vs Las Palmas,34,26.8,49.3,38,418,3420,38.0,58,43,...,1.53,1.13,2.66,1.39,2.53,1.76,1.25,3.01,1.56,2.81


In [None]:
if stats_vs:
              data = df_total[1]
        else:
              data = df_total[0]

In [284]:
table= pd.read_html(url)
table[0]

Unnamed: 0_level_0,Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Playing Time,Playing Time,Playing Time,Playing Time,Performance,Performance,...,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes
Unnamed: 0_level_1,Squad,# Pl,Age,Poss,MP,Starts,Min,90s,Gls,Ast,...,Gls,Ast,G+A,G-PK,G+A-PK,xG,xAG,xG+xAG,npxG,npxG+xAG
0,Alavés,29,26.6,45.3,38,418,3420,38.0,38,20,...,1.0,0.53,1.53,0.76,1.29,1.13,0.67,1.79,0.9,1.57
1,Athletic Club,31,26.9,48.4,38,418,3420,38.0,53,40,...,1.39,1.05,2.45,1.32,2.37,1.39,0.97,2.36,1.27,2.23
2,Atlético Madrid,24,28.4,52.6,38,418,3420,38.0,68,51,...,1.79,1.34,3.13,1.63,2.97,1.7,1.26,2.96,1.56,2.82
3,Barcelona,28,24.7,68.3,38,418,3420,38.0,99,72,...,2.61,1.89,4.5,2.42,4.32,2.41,1.78,4.18,2.24,4.02
4,Betis,36,27.5,52.4,38,418,3420,38.0,55,37,...,1.45,0.97,2.42,1.26,2.24,1.44,0.99,2.43,1.23,2.22
5,Celta Vigo,30,26.9,53.6,38,418,3420,38.0,58,40,...,1.53,1.05,2.58,1.32,2.37,1.43,1.01,2.44,1.24,2.26
6,Espanyol,27,25.2,39.8,38,418,3420,38.0,38,27,...,1.0,0.71,1.71,0.87,1.58,0.91,0.65,1.55,0.8,1.45
7,Getafe,30,27.7,41.8,38,418,3420,38.0,34,21,...,0.89,0.55,1.45,0.76,1.32,0.97,0.63,1.6,0.87,1.5
8,Girona,30,27.5,56.3,38,418,3420,38.0,43,31,...,1.13,0.82,1.95,1.03,1.84,1.12,0.83,1.95,1.02,1.85
9,Las Palmas,34,26.8,50.7,38,418,3420,38.0,38,27,...,1.0,0.71,1.71,0.92,1.63,0.96,0.69,1.65,0.87,1.56


OBTENER TABLA CLASIFICATORIA DE LA LIGA

In [118]:
def obtener_tabla_liga_principal(url_general):
    """
    Extrae y limpia la tabla clasificatoria de una liga desde FBref.

    Args:
        url_general (str): URL de la página de clasificación de la liga en FBref.

    Returns:
        pd.DataFrame: DataFrame limpio con la tabla clasificatoria, 
                      renombrando la columna 'RL' a 'Posicion' y eliminando la columna 'Notas'.
    """
    tabla = pd.read_html(url_general)
    tabla_sucia = tabla[0]
    tabla_sucia = tabla_sucia.rename(columns={'RL': 'Posicion'})
    tabla_limpia = tabla_sucia.drop(columns=['Notas'])
    return tabla_limpia

In [119]:
obtener_tabla_liga_principal('https://fbref.com/es/comps/12/Estadisticas-de-La-Liga')

Unnamed: 0,Posicion,Equipo,PJ,PG,PE,PP,GF,GC,DG,Pts,Pts/PJ,xG,xGA,xGD,xGD/90,Asistencia,Máximo Goleador del Equipo,Portero
0,1,Barcelona,38,28,4,6,102,39,63,88,2.32,91.5,41.9,49.5,1.3,45953,Robert Lewandowski - 27,Iñaki Peña
1,2,Real Madrid,38,26,6,6,78,38,40,84,2.21,75.3,42.8,32.5,0.86,69807,Kylian Mbappé - 31,Thibaut Courtois
2,3,Atlético Madrid,38,22,10,6,68,30,38,76,2.0,64.6,33.4,31.2,0.82,60883,Alexander Sørloth - 20,Jan Oblak
3,4,Athletic Club,38,19,13,6,54,29,25,70,1.84,53.0,37.8,15.2,0.4,48420,Oihan Sancet - 15,Unai Simón
4,5,Villarreal,38,20,10,8,71,51,20,70,1.84,64.8,44.4,20.4,0.54,18266,Ayoze Pérez - 19,Diego Conde
5,6,Betis,38,16,12,10,57,50,7,60,1.58,54.7,50.6,4.1,0.11,51542,Isco - 9,Adrián
6,7,Celta Vigo,38,16,7,15,59,57,2,55,1.45,54.2,43.4,10.9,0.29,21504,Borja Iglesias - 11,Vicente Guaita
7,8,Rayo Vallecano,38,13,13,12,41,45,-4,52,1.37,45.4,49.0,-3.5,-0.09,12908,Jorge de Frutos - 6,Augusto Batalla
8,9,Osasuna,38,12,16,10,48,52,-4,52,1.37,44.0,53.7,-9.6,-0.25,20476,Ante Budimir - 21,Sergio Herrera
9,10,Mallorca,38,13,9,16,35,44,-9,48,1.26,38.8,46.8,-8.1,-0.21,18502,"Cyle Larin, Vedat Muriqi - 7",Dominik Greif


DATOS JUGADOR CONCRETO - INFORME RECLUTAMIENTO 

In [18]:
url_jugador= 'https://fbref.com/es/jugadores/82ec26c1/scout/365_m1/Informe-de-reclutamiento-de-Lamine-Yamal'
url_jugador

'https://fbref.com/es/jugadores/82ec26c1/scout/365_m1/Informe-de-reclutamiento-de-Lamine-Yamal'

In [29]:
def obtener_jugadores_similares(url_jugador):
    """
    Extrae y limpia la tabla de jugadores similares del informe de reclutamiento de un jugador en FBref.

    Args:
        url_jugador (str): URL del informe de reclutamiento del jugador en FBref.

    Returns:
        pd.DataFrame: DataFrame limpio con los jugadores similares, sin columnas irrelevantes y con la nacionalidad normalizada.
    """
    #Lee todas las tablas HTML de la página del informe de reclutamiento
    tablas = pd.read_html(url_jugador)
    
    #Selecciona la segunda tabla (índice 1), que suele contener los jugadores similares
    tabla_sucia = tablas[1]
    
    #Elimina las columnas 'RL' y 'Comparar', que no aportan información relevante
    tabla_limpia = tabla_sucia.drop(columns=['RL', 'Comparar'])
    
    #Normaliza la columna de nacionalidad: extrae solo el código de país en mayúsculas
    nationality_col = [col for col in tabla_limpia.columns if 'País' in col]
    if nationality_col:
        col_name = nationality_col[0]
        tabla_limpia[col_name] = tabla_limpia[col_name].astype(str).str.extract(r'([A-Z]+)$')
    
    #Devuelve el DataFrame limpio
    return tabla_limpia
    

In [30]:
obtener_jugadores_similares(url_jugador)

Unnamed: 0,Jugador,País,Equipo
0,Ousmane Dembélé,FRA,Paris Saint-Germain
1,Vinicius Júnior,BRA,Real Madrid
2,Sávio,BRA,Manchester City
3,Bukayo Saka,ENG,Arsenal
4,Florian Wirtz,GER,Leverkusen
5,Leroy Sané,GER,Bayern München
6,Rayan Cherki,FRA,Lyon
7,Désiré Doué,FRA,Paris Saint-Germain
8,Raphinha,BRA,Barcelona
9,Michael Olise,FRA,Bayern München


In [108]:
def obtener_tabla_datos_jugador_por90_percentiles(url_jugador):
    """
    Extrae y limpia la tabla de percentiles 'Por 90' de un informe de reclutamiento de jugador en FBref.

    Args:
        url_jugador (str): URL del informe de reclutamiento del jugador en FBref.

    Returns:
        pd.DataFrame: DataFrame limpio con los datos de percentiles 'Por 90' del jugador.
    """


    #Lee todas las tablas HTML de la página del informe de reclutamiento

    tablas = pd.read_html(url_jugador)

    #Selecciona la tercera tabla (índice 2), que suele contener los percentiles 'Por 90'

    tabla_sucia = tablas[2]

    #Elimina el primer nivel del MultiIndex de columnas si existe

    tabla_sucia.columns = tabla_sucia.columns.droplevel(0)

    #Elimina filas completamente vacías

    tabla_sucia = tabla_sucia.dropna()

    #Filtra filas para quedarse solo con las que tienen valores numéricos en 'Por 90'

    tabla_sucia = tabla_sucia[~tabla_sucia['Por 90'].str.contains(r'[a-zA-Z]', na=False)] 

    #Limpia la columna 'Por 90': elimina el símbolo '%' y convierte a numérico

    tabla_sucia['Por 90'] = tabla_sucia['Por 90'].str.replace('%', '', regex=True)
    tabla_sucia['Por 90'] = pd.to_numeric(tabla_sucia['Por 90'], errors='coerce')

    #Convierte la columna 'Percentil' a tipo numérico

    tabla_sucia['Percentil'] = pd.to_numeric(tabla_sucia['Percentil'], errors='coerce')

    #Reinicia el índice del DataFrame limpio

    tabla_limpia = tabla_sucia.reset_index(drop=True)

    #Devuelve el DataFrame limpio
    
    return tabla_limpia 

In [106]:
obtener_tabla_datos_jugador_por90_percentiles(url_jugador)

Unnamed: 0,Estadísticas,Por 90,Percentil
0,Goles,0.32,64
1,Asistencias,0.36,89
2,Goles + Asistencias,0.68,80
3,Goles sin penalización,0.32,70
4,Tiros penales ejecutados,0.00,35
...,...,...,...
130,Goles en contra,0.00,50
131,Recuperación de pelotas,3.83,42
132,Aéreos Ganados,0.05,1
133,Aéreos Perdidos,0.07,99


In [100]:
# Lee todas las tablas que encuentre en la página
tablas = pd.read_html(url_jugador
)

# Muestra cuántas tablas encontró
print(f"Se encontraron {len(url_jugador)} tablas")

# Para ver el contenido de la primera tabla
tabla_percentil_pero90= tablas[2]
tabla_percentil_pero90

Se encontraron 93 tablas


Unnamed: 0_level_0,Estadísticas estándar,Estadísticas estándar,Estadísticas estándar
Unnamed: 0_level_1,Estadísticas,Por 90,Percentil
0,Goles,0.32,64
1,Asistencias,0.36,89
2,Goles + Asistencias,0.68,80
3,Goles sin penalización,0.32,70
4,Tiros penales ejecutados,0.00,35
...,...,...,...
164,Recuperación de pelotas,3.83,42
165,,,
166,Aéreos Ganados,0.05,1
167,Aéreos Perdidos,0.07,99


In [101]:
tabla_percentil_pero90.columns = tabla_percentil_pero90.columns.droplevel(0)
tabla_percentil_pero90 

Unnamed: 0,Estadísticas,Por 90,Percentil
0,Goles,0.32,64
1,Asistencias,0.36,89
2,Goles + Asistencias,0.68,80
3,Goles sin penalización,0.32,70
4,Tiros penales ejecutados,0.00,35
...,...,...,...
164,Recuperación de pelotas,3.83,42
165,,,
166,Aéreos Ganados,0.05,1
167,Aéreos Perdidos,0.07,99


In [102]:
tabla_percentil_pero90 = tabla_percentil_pero90.dropna()
tabla_percentil_pero90

Unnamed: 0,Estadísticas,Por 90,Percentil
0,Goles,0.32,64
1,Asistencias,0.36,89
2,Goles + Asistencias,0.68,80
3,Goles sin penalización,0.32,70
4,Tiros penales ejecutados,0.00,35
...,...,...,...
163,Goles en contra,0.00,50
164,Recuperación de pelotas,3.83,42
166,Aéreos Ganados,0.05,1
167,Aéreos Perdidos,0.07,99


In [103]:
tabla_percentil_pero90 = tabla_percentil_pero90[~tabla_percentil_pero90['Por 90'].str.contains(r'[a-zA-Z]', na=False)] 
tabla_percentil_pero90['Por 90'] = tabla_percentil_pero90['Por 90'].str.replace('%', '', regex=True)  # Eliminar el símbolo '%'
tabla_percentil_pero90['Por 90'] = pd.to_numeric(tabla_percentil_pero90['Por 90'], errors='coerce')  # Convertir a número, los no convertibles serán NaN
tabla_percentil_pero90['Percentil'] = pd.to_numeric(tabla_percentil_pero90['Percentil'], errors='coerce')
tabla_percentil_pero90 = tabla_percentil_pero90.reset_index(drop=True)
tabla_percentil_pero90

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tabla_percentil_pero90['Por 90'] = tabla_percentil_pero90['Por 90'].str.replace('%', '', regex=True)  # Eliminar el símbolo '%'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tabla_percentil_pero90['Por 90'] = pd.to_numeric(tabla_percentil_pero90['Por 90'], errors='coerce')  # Convertir a número, los no convertibles serán NaN
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/p

Unnamed: 0,Estadísticas,Por 90,Percentil
0,Goles,0.32,64
1,Asistencias,0.36,89
2,Goles + Asistencias,0.68,80
3,Goles sin penalización,0.32,70
4,Tiros penales ejecutados,0.00,35
...,...,...,...
130,Goles en contra,0.00,50
131,Recuperación de pelotas,3.83,42
132,Aéreos Ganados,0.05,1
133,Aéreos Perdidos,0.07,99
