# Data Processing

In [46]:
import os
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

In [47]:
# Define la ruta a la carpeta donde se guardan los archivos
output_dir = "../data/scrapped"

try:
    # 1. Lista todos los archivos en la carpeta
    all_files = os.listdir(output_dir)

    # 2. Filtra solo los archivos que terminan en '.parquet'
    parquet_files = [os.path.join(output_dir, f) for f in all_files if f.endswith('.parquet')]

    # 3. Verifica si hay archivos para procesar
    if not parquet_files:
        print("No se encontraron archivos .parquet en la carpeta especificada.")
    else:
        print(f"Se encontraron {len(parquet_files)} archivos .parquet. Concatenando...")

        # 4. Lee cada archivo Parquet en un DataFrame y los guarda en una lista
        list_of_dfs = [pd.read_parquet(f) for f in parquet_files]

        # 5. Concatena todos los DataFrames de la lista
        combined_df = pd.concat(list_of_dfs, ignore_index=True)

        print("¡Concatenación exitosa! Los archivos se han unido en un solo DataFrame.")
        print("Se ha creado un DataFrame con las siguientes dimensiones:")
        print(f"Filas: {combined_df.shape[0]}, Columnas: {combined_df.shape[1]}")

except FileNotFoundError:
    print("Error: No se encontró la carpeta especificada.")
    print(f"Por favor, revisa que la carpeta '{output_dir}' exista.")
except ImportError:
    print("Error: La librería 'pyarrow' o 'fastparquet' no está instalada.")
    print("Para leer archivos .parquet, necesitas instalar una de estas librerías. Puedes usar el siguiente comando: pip install pyarrow")
except Exception as e:
    print(f"Ocurrió un error inesperado: {e}")

Se encontraron 378 archivos .parquet. Concatenando...
¡Concatenación exitosa! Los archivos se han unido en un solo DataFrame.
Se ha creado un DataFrame con las siguientes dimensiones:
Filas: 9033, Columnas: 38


## Tipos de datos

In [48]:
combined_df.head(3)

Unnamed: 0,IdJugador,IdClub,IdEquipo,Nombre,NombreCompleto,Puntos,ReboteDefensivo,ReboteOfensivo,RebotesTotales,Asistencias,Recuperaciones,Perdidas,TaponCometido,TaponRecibido,FaltaCometida,FaltaRecibida,Valoracion,TiempoJuego,CincoInicial,equipo,TirosDosAciertos,TirosDosFallos,TirosTresAciertos,TirosTresFallos,TirosLibresAciertos,TirosLibresFallos,plus_minus,posesiones_consumidas,posesiones_jugadas,rebote_of_disp,rebote_def_disp,puntos_q4_y_prorroga,puntos_clutch,min_q4,min_q4_y_prorroga,min_clutch,posesiones_estimadas,partido_key
0,78377,1498,70040,"ARAUJO, M.","ARAUJO, MAXIMO",3,0,0,0,0,0,0,0,0,1,0,1,18:05,False,ATENAS (C),0,1,1,0,0,0,1,2,53,26,22,0,0,10.0,10.0,0.53,2.0,ATENAS (C) vs BOCA (007/10/2024 22:10)
1,326699,1498,70040,"BUENDIA, C.","BUENDIA, CARLOS MANUEL",1,0,0,0,1,0,1,0,0,0,3,1,07:57,True,ATENAS (C),0,0,0,2,1,1,-15,4,14,7,4,0,0,0.0,0.0,0.0,3.88,ATENAS (C) vs BOCA (007/10/2024 22:10)
2,273565,1498,70040,"MONTERO, J.","MONTERO, JOSE IGNACIO",2,0,0,0,3,1,2,0,0,0,2,4,23:27,False,ATENAS (C),0,0,0,2,2,0,-2,5,44,19,17,0,0,2.13,2.13,0.0,4.88,ATENAS (C) vs BOCA (007/10/2024 22:10)


In [49]:
# Renombrar columnas en combined_df y df_players
_map = {
    'posesiones_consumidas': 'plays_consumidas',
    'posesiones_jugadas': 'plays_jugadas',
    'posesiones_estimadas': 'posesiones_finalizadas'
}

# Rename in combined_df (safe: ignores missing)
combined_df.rename(columns=_map, inplace=True)


In [50]:
# Convertir la columna 'TiempoJuego' de string "MM:SS" a segundos como entero
def tiempo_a_segundos(tiempo_str):
    try:
        minutos, segundos = map(int, tiempo_str.split(":"))
        return minutos * 60 + segundos
    except Exception:
        return 0

combined_df["TiempoJuego_seg"] = combined_df["TiempoJuego"].apply(tiempo_a_segundos)
combined_df["TiempoJuego_min"] = combined_df["TiempoJuego_seg"] / 60

In [51]:
combined_df.shape

(9033, 40)

In [52]:
categorical_cols=['IdJugador', 'IdClub', 'IdEquipo','Nombre', 'NombreCompleto', 'equipo', 'partido_key']
numerical_cols=['Puntos', 'ReboteDefensivo','ReboteOfensivo', 'RebotesTotales', 'Asistencias', 'Recuperaciones',
                'Perdidas', 'TaponCometido', 'TaponRecibido', 'FaltaCometida','FaltaRecibida', 'Valoracion', 
                'TirosDosAciertos', 'TirosDosFallos','TirosTresAciertos', 'TirosTresFallos', 'TirosLibresAciertos',
                'TirosLibresFallos', 'plus_minus', 'plays_consumidas','plays_jugadas', 'rebote_of_disp', 
                'rebote_def_disp','puntos_q4_y_prorroga', 'puntos_clutch', 'posesiones_finalizadas','TiempoJuego_seg','TiempoJuego_min']

In [53]:
# Convertir columnas categóricas a 'category' y numéricas a 'float' (o 'int' si no hay NaN)
for col in categorical_cols:
    if col in combined_df.columns:
        combined_df[col] = combined_df[col].astype('category')

for col in numerical_cols:
    if col in combined_df.columns:
        combined_df[col] = combined_df[col].astype(float)


## Nacionalidad de Jugadores

In [54]:
extranjeros_data = [
    {"NombreCompleto": "CHACON TIRADO, MARCOS", "nacionalidad": "Cubano"},
    {"NombreCompleto": "MILLER, TAVARIO EARNEST PTRISTIAN", "nacionalidad": "Bahamense"},
    {"NombreCompleto": "RAMIREZ ALCANTARA, KELVIN LEANDRO", "nacionalidad": "Dominicano"},
    {"NombreCompleto": "OWENS, DEMARCO RASHAD", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "THOMAS JR, MARCUS WILEY", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "VORHEES, WILLIAM LEONARD-DEUBLER", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "FERGUSON, ROMEAO VENILL", "nacionalidad": "Estadounidense"}, 
    {"NombreCompleto": "LOCKETT, PHILLIP DOMINIQUE DANIEL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "CLARKE, CHRISTOPHER ASHTON", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "THORNTON, WILLIE ALFORD", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "DANIELS, TRAVIS DORREL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "DIGGS, AVERY GERELL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BECTON, REGINALD GEQUAN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "CARRERAS PEGUERO, XAVIER MANUEL", "nacionalidad": "Dominicano"},
    {"NombreCompleto": "THOMAS III, CHARLES PRICE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "KRAMER, KELBY JOHN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "GIVENS, SAMUEL JAMAL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BOWIE JR, JULIUS R", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "CALFANI PERSINCULA, MATHIAS KENY", "nacionalidad": "Uruguayo"},
    {"NombreCompleto": "MORRISON, DOMINIQUE MONTEL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ALEXANDER, QUINTIN IMMANUEL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "CARDENAS ZAMORA, JORDAN ISRAEL", "nacionalidad": "Ecuatoriano"},
    {"NombreCompleto": "ASCANIO SOLORZANO, JOSE GREGORIO", "nacionalidad": "Venezolano"},
    {"NombreCompleto": "WALTON, ZACHERY CHRISTOPHER", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "THOMAS, DISCHON KYIR", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "PRIDDY, NATHAN WAYNE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "WHITFIELD III, ROBERT JAMARCUS", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "HORTON, KENNETH WILCHER", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "HOLT, EMMITT DWIGHT", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "JENKINS, JALEN KEMAL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "SANDERS, NAKIE GERALD", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BOND JR, TIMOTHY LAMONT", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ROBINSON, BRANDON LAMAR", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "OBENG MENSAH, YAW", "nacionalidad": "Canadiense"},
    {"NombreCompleto": "TROCHA MORELOS, TONNY JOSE", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "CRAION JR, MICHAEL JOE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BELL, RANDY TYREE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "DANIELS, DEANDRE MARTISE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "REESE V, JAMES LANARD", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "WATSON, EARL OVREL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ROQUE MARTINEZ, ROMARIO JOSE", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "PAYTON CLOTTEY, EMMANUEL TRAVON", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "FIELDS, CALEB JOSEPH", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ANDERSON, ALPHONSO JORDAN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BASTARDO JOSE, RAYMON SCARLIN", "nacionalidad": "Dominicano"},
    {"NombreCompleto": "BANYARD, NICHOLAS RYAN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "MAXWELL, DU'VAUGHN ELISHA", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "RUIZ RUIZ, JOSE DANIEL", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "FUNDORA ARRECHAVALETA, YASMANY	", "nacionalidad": "Cubano"}, 
    {"NombreCompleto": "KRAYEM, OMAR NABIL", "nacionalidad": "Palestino"}, # Estadounidense
    {"NombreCompleto": "WALLACE, DEVANTE RASHAD-KEITH", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "STOKES, KAMAU THUTMOSES", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "LOWERY, DISHON LURELL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ALI, PRINCE ADAMS", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BONIZIOLI HONORATO, ITALO	", "nacionalidad": "Brasileño"}, 
    {"NombreCompleto": "CARTER, MYLES JUSTIN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "HOOPER, CHRISTOPHER JALEEL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "OPOKU, NANA KWASI HYEAKURO", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "HAMILTON, ISAAC BRANDON", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "NAYLOR, CAMERON LEVELE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "MADRIGAL RENTERIA, JUAN ESTEBAN", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "NIEBLES HERRERA, EDWIN JOSE", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "PETRI, NICHOLAS", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "GUERRA CAÑATE, YEFERSON ANTONIO", "nacionalidad": "Venezolano"},
    {"NombreCompleto": "GARCIA GUERRERO, JORGE LUIS", "nacionalidad": "Venezolano"}, 
    {"NombreCompleto": "HERNANDEZ, MANUEL ALONSO", "nacionalidad": "Estadounidense"}, # Mexicano
    {"NombreCompleto": "FRANCA DOS SANTOS, JOAO VITOR", "nacionalidad": "Brasileño"},
    {"NombreCompleto": "IBARGUEN ANDREWS, ANDRES FELIPE", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "CARABALI POROZO, BRYAN JOSE", "nacionalidad": "Ecuatoriano"},
    {"NombreCompleto": "HUNT, DARIO NATHAN HENRY", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "IANGUAS, PEDRO HENRIQUE", "nacionalidad": "Brasileño"},
    {"NombreCompleto": "LOUIS, ARNOLD MICHAEL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "COOPER, THOMAS", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "COOPER, THOMAS", "nacionalidad": "Brasileño"}, 
]

df_extranjeros = pd.DataFrame(extranjeros_data)
df_extranjeros['nacionalidad'].value_counts()

nacionalidad
Estadounidense    50
Colombiano         6
Brasileño          4
Dominicano         3
Venezolano         3
Ecuatoriano        2
Cubano             2
Bahamense          1
Uruguayo           1
Canadiense         1
Palestino          1
Name: count, dtype: int64

In [55]:
# Eliminar espacios en blanco en 'NombreCompleto' para ambos DataFrames
df_extranjeros['NombreCompleto'] = df_extranjeros['NombreCompleto'].str.strip()
combined_df['NombreCompleto'] = combined_df['NombreCompleto'].str.strip()

# Realizar el left join para agregar la nacionalidad
combined_df = combined_df.merge(df_extranjeros, on='NombreCompleto', how='left')

# Rellenar los valores nulos con 'Argentino'
combined_df['nacionalidad'] = combined_df['nacionalidad'].fillna('Argentino')

In [56]:
# Extraer el rival
combined_df['rival'] = combined_df['partido_key'].str.extract(r'vs\s(.+?)\s\(')
combined_df['fecha_hora'] = combined_df['partido_key'].str[-20:]
combined_df['fecha_hora'] = combined_df['fecha_hora'].str[3:13]

In [57]:
combined_df.head(3)

Unnamed: 0,IdJugador,IdClub,IdEquipo,Nombre,NombreCompleto,Puntos,ReboteDefensivo,ReboteOfensivo,RebotesTotales,Asistencias,Recuperaciones,Perdidas,TaponCometido,TaponRecibido,FaltaCometida,FaltaRecibida,Valoracion,TiempoJuego,CincoInicial,equipo,TirosDosAciertos,TirosDosFallos,TirosTresAciertos,TirosTresFallos,TirosLibresAciertos,TirosLibresFallos,plus_minus,plays_consumidas,plays_jugadas,rebote_of_disp,rebote_def_disp,puntos_q4_y_prorroga,puntos_clutch,min_q4,min_q4_y_prorroga,min_clutch,posesiones_finalizadas,partido_key,TiempoJuego_seg,TiempoJuego_min,nacionalidad,rival,fecha_hora
0,78377,1498,70040,"ARAUJO, M.","ARAUJO, MAXIMO",3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,18:05,False,ATENAS (C),0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,53.0,26.0,22.0,0.0,0.0,10.0,10.0,0.53,2.0,ATENAS (C) vs BOCA (007/10/2024 22:10),1085.0,18.083333,Argentino,BOCA,07/10/2024
1,326699,1498,70040,"BUENDIA, C.","BUENDIA, CARLOS MANUEL",1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,3.0,1.0,07:57,True,ATENAS (C),0.0,0.0,0.0,2.0,1.0,1.0,-15.0,4.0,14.0,7.0,4.0,0.0,0.0,0.0,0.0,0.0,3.88,ATENAS (C) vs BOCA (007/10/2024 22:10),477.0,7.95,Argentino,BOCA,07/10/2024
2,273565,1498,70040,"MONTERO, J.","MONTERO, JOSE IGNACIO",2.0,0.0,0.0,0.0,3.0,1.0,2.0,0.0,0.0,0.0,2.0,4.0,23:27,False,ATENAS (C),0.0,0.0,0.0,2.0,2.0,0.0,-2.0,5.0,44.0,19.0,17.0,0.0,0.0,2.13,2.13,0.0,4.88,ATENAS (C) vs BOCA (007/10/2024 22:10),1407.0,23.45,Argentino,BOCA,07/10/2024


Con esta base quiero formar una que tenga las columnas:
- ..._suma (nuericas sumadas)
- ..._prom_minutos ( _suma / minutos_jugados_suma)
- TirosDeCampo_suma = (TirosDosAciertos_suma + TirosDosFallos_suma + TirosTresAciertos_suma + TirosTresFallos_suma)
- TOV% 100 × (Perdidas_suma / plays_consumidas_suma)
- eFG%  ((TirosDosAciertos_suma + 1.5 * TirosTresAciertos_suma) / TirosDeCampo_suma)
- ORB% = 100 × (ReboteOfensivo_suma / rebote_of_disp_suma)
- DRB% = 100 × (ReboteDefensivo_suma / rebote_def_disp_suma)
- RB% = 100 × (RebotesTotales_suma /( rebote_of_disp_suma + rebote_def_disp_suma ))
- FTr = (TirosLibresAciertos_suma / TirosDeCampo_suma) 
- TS% = 100 × (Puntos_suma / (2 × (TirosDeCampo_suma + 0.44 × (TirosLibresAciertos + TirosLibresFallos))))
- USG% = 100 × (posesiones_consumidas_suma / plays_jugadas_suma)
- AST% = 100 × (Asistencias_suma / ((TiempoJuego_min_suma / (minutos_totales_equipo / 5)) * tiros_campo_anotados_equipo)  - tiros_campo_anotados_jugador) EVALUAR
- AST/TOVr = (Asistencias_suma / Perdidas_suma)
- game_score = Puntos_suma 
                + (0.4 * (TirosDosAciertos_suma + TirosTresAciertos_suma))  
                - (0.7 * TirosDeCampo_suma)
                - (0.4 * (TirosLibresFallos)
                + (0.7 * ReboteOfensivo_suma)
                + (0.3 * ReboteDefensivo_suma)
                + Recuperaciones_suma
                + (0.7 * Asistencias_suma)
                + (0.7 * TaponCometido_suma)
                - (0.4 * FaltaCometida_suma)
                - Perdidas_suma)

## Group By

In [59]:
# 1. Agregación Inicial (igual que antes)
columnas_numericas_distintas = ['puntos_q4_y_prorroga', 'puntos_clutch']  # ← ya definidas por ti
columnas_numericas = combined_df.select_dtypes(include=np.number).columns.drop("NombreCompleto", errors='ignore')
columnas_no_numericas = combined_df.select_dtypes(exclude=np.number).columns.drop("NombreCompleto", errors='ignore')

agg_dict = {col: 'sum' for col in columnas_numericas}
agg_dict.update({col: 'last' for col in columnas_no_numericas})

df_players = combined_df.groupby("NombreCompleto", observed=False).agg(agg_dict).reset_index()
df_players = df_players.rename(columns={col: f"{col}_suma" for col in columnas_numericas})

# --- NUEVO: set de columnas que NO deben ir al escalado genérico a 40min
cols_especiales = {'puntos_q4_y_prorroga', 'puntos_clutch'}

# 2. Cálculo de métricas avanzadas
if 'TiempoJuego_min_suma' in df_players.columns:
    minutos_jugados = df_players["TiempoJuego_min_suma"]
    minutos_jugados_eff = minutos_jugados.clip(lower=1)   # <-- piso en 1

    for col in columnas_numericas:
        if col in cols_especiales:
            continue
        col_sum = f"{col}_suma"
        if col_sum in df_players.columns:
            df_players[f"{col}_prom_40min"] = (df_players[col_sum] / minutos_jugados_eff) * 40



    # Calcular Tiros de Campo totales
    df_players['TirosDeCampo_suma'] = (
        df_players['TirosDosAciertos_suma'] +
        df_players['TirosDosFallos_suma'] +
        df_players['TirosTresAciertos_suma'] +
        df_players['TirosTresFallos_suma']
    )

    # Calcular TOV%, evitando división por cero
    df_players['TOV%'] = np.where(
        df_players['plays_consumidas_suma'] > 0,
        100 * (df_players['Perdidas_suma'] / df_players['plays_consumidas_suma']),
        np.nan
    )

    # Calcular eFG%, evitando división por cero
    df_players['eFG%'] = np.where(
        df_players['TirosDeCampo_suma'] > 0,
        100 * (df_players['TirosDosAciertos_suma'] + 1.5 * df_players['TirosTresAciertos_suma']) / df_players['TirosDeCampo_suma'],
        np.nan
    )

    # Calcular ORB%, DRB% y RB%, evitando división por cero
    if 'rebote_of_disp_suma' in df_players.columns and 'rebote_def_disp_suma' in df_players.columns:
        df_players['ORB%'] = np.where(
            df_players['rebote_of_disp_suma'] > 0,
            100 * (df_players['ReboteOfensivo_suma'] / df_players['rebote_of_disp_suma']),
            np.nan
        )
        df_players['DRB%'] = np.where(
            df_players['rebote_def_disp_suma'] > 0,
            100 * (df_players['ReboteDefensivo_suma'] / df_players['rebote_def_disp_suma']),
            np.nan
        )
        df_players['RB%'] = np.where(
            (df_players['rebote_of_disp_suma'] + df_players['rebote_def_disp_suma']) > 0,
            100 * (df_players['RebotesTotales_suma'] / (df_players['rebote_of_disp_suma'] + df_players['rebote_def_disp_suma'])),
            np.nan
        )

    # Calcular FTr, evitando división por cero
    df_players['FTr'] = np.where(
        df_players['TirosDeCampo_suma'] > 0,
        df_players['TirosLibresAciertos_suma'] / df_players['TirosDeCampo_suma'],
        np.nan
    )

    # Calcular TS%, evitando división por cero
    denominator_ts = 2 * (df_players['TirosDeCampo_suma'] + 0.44 * (df_players['TirosLibresAciertos_suma'] + df_players['TirosLibresFallos_suma']))
    df_players['TS%'] = np.where(
        denominator_ts > 0,
        100 * (df_players['Puntos_suma'] / denominator_ts),
        np.nan
    )

    # Calcular USG%, evitando división por cero
    df_players['USG%'] = np.where(
        df_players['plays_jugadas_suma'] > 0,
        100 * (df_players['posesiones_consumidas_suma'] / df_players['plays_jugadas_suma']),
        np.nan
    )
    
    # Calcular AST/TOVr, evitando división por cero
    df_players['AST/TOVr'] = np.where(
        df_players['Perdidas_suma'] > 0,
        df_players['Asistencias_suma'] / df_players['Perdidas_suma'],
        np.nan
    )

    # Calcular Game Score (esta métrica no tiene divisiones)
    df_players['game_score_suma'] = (
        df_players['Puntos_suma'] +
        (0.4 * df_players['TirosDeCampo_suma']) -
        (0.7 * df_players['TirosDeCampo_suma']) -
        (0.4 * df_players['TirosLibresFallos_suma']) +
        (0.7 * df_players['ReboteOfensivo_suma']) +
        (0.3 * df_players['ReboteDefensivo_suma']) +
        df_players['Recuperaciones_suma'] +
        (0.7 * df_players['Asistencias_suma']) +
        (0.7 * df_players['TaponCometido_suma']) -
        (0.4 * df_players['FaltaCometida_suma']) -
        df_players['Perdidas_suma']
    )

    # Calcular Ritmo y Rating Ofensivo (Falta posesiones por partido)
    # df_players['PACE_40min'] = np.where(
    #     df_players['TiempoJuego_min_suma'] > 0,
    #     (df_players['posesiones_finalizadas_suma'] / df_players['TiempoJuego_min_suma']) * 40,
    #     np.nan
    # )

    df_players['ORTG'] = np.where(
        df_players['posesiones_finalizadas_suma'] > 0,
        (df_players['Puntos_suma'] / df_players['posesiones_finalizadas_suma']) * 100,
        np.nan
    )

# ---  normalización específica para clutch y Q4+OT ---
# (a) Puntos clutch por 5 minutos
if {'puntos_clutch_suma', 'min_clutch_suma'}.issubset(df_players.columns):
    min_clutch_eff = df_players['min_clutch_suma'].clip(lower=1)  # <-- piso en 1
    df_players['puntos_clutch_prom_5min'] = (df_players['puntos_clutch_suma'] / min_clutch_eff) * 5

# (b) Puntos Q4+OT por 10 minutos
if {'puntos_q4_y_prorroga_suma', 'min_q4_y_prorroga_suma'}.issubset(df_players.columns):
    min_q4ot_eff = df_players['min_q4_y_prorroga_suma'].clip(lower=1)  # <-- piso en 1
    df_players['puntos_q4pror_prom_10min'] = (df_players['puntos_q4_y_prorroga_suma'] / min_q4ot_eff) * 10


# 3. Agregar la columna de partidos jugados 
cantidad_partidos = combined_df[combined_df["TiempoJuego_seg"] > 0].groupby("NombreCompleto", observed=False).size()
df_players["CantidadPartidosJugados"] = df_players["NombreCompleto"].map(cantidad_partidos).fillna(0).astype(int)
df_players['game_score_prom'] = np.where(
        df_players['CantidadPartidosJugados'] > 0,
        (df_players['game_score_suma'] / df_players['CantidadPartidosJugados']),
        np.nan
    )
df_players['MinutosJugadosPorPartido'] = np.where(
        df_players['CantidadPartidosJugados'] > 0,
        (df_players['TiempoJuego_min_suma'] / df_players['CantidadPartidosJugados']),
        np.nan
    )

KeyError: 'posesiones_consumidas_suma'

In [None]:
df_players

Unnamed: 0,NombreCompleto,Puntos_suma,ReboteDefensivo_suma,ReboteOfensivo_suma,RebotesTotales_suma,Asistencias_suma,Recuperaciones_suma,Perdidas_suma,TaponCometido_suma,TaponRecibido_suma,FaltaCometida_suma,FaltaRecibida_suma,Valoracion_suma,TirosDosAciertos_suma,TirosDosFallos_suma,TirosTresAciertos_suma,TirosTresFallos_suma,TirosLibresAciertos_suma,TirosLibresFallos_suma,plus_minus_suma,posesiones_consumidas_suma,posesiones_jugadas_suma,rebote_of_disp_suma,rebote_def_disp_suma,puntos_q4_y_prorroga_suma,puntos_clutch_suma,min_q4_suma,min_q4_y_prorroga_suma,min_clutch_suma,posesiones_estimadas_suma,TiempoJuego_seg_suma,TiempoJuego_min_suma,IdJugador,IdClub,IdEquipo,Nombre,TiempoJuego,CincoInicial,equipo,partido_key,nacionalidad,rival,fecha_hora,Puntos_prom_40min,ReboteDefensivo_prom_40min,ReboteOfensivo_prom_40min,RebotesTotales_prom_40min,Asistencias_prom_40min,Recuperaciones_prom_40min,Perdidas_prom_40min,TaponCometido_prom_40min,TaponRecibido_prom_40min,FaltaCometida_prom_40min,FaltaRecibida_prom_40min,Valoracion_prom_40min,TirosDosAciertos_prom_40min,TirosDosFallos_prom_40min,TirosTresAciertos_prom_40min,TirosTresFallos_prom_40min,TirosLibresAciertos_prom_40min,TirosLibresFallos_prom_40min,plus_minus_prom_40min,posesiones_consumidas_prom_40min,posesiones_jugadas_prom_40min,rebote_of_disp_prom_40min,rebote_def_disp_prom_40min,min_q4_prom_40min,min_q4_y_prorroga_prom_40min,min_clutch_prom_40min,posesiones_estimadas_prom_40min,TiempoJuego_seg_prom_40min,TiempoJuego_min_prom_40min,TirosDeCampo_suma,TOV%,eFG%,ORB%,DRB%,RB%,FTr,TS%,USG%,AST/TOVr,game_score_suma,ORTG,puntos_clutch_prom_5min,puntos_q4pror_prom_10min,CantidadPartidosJugados,game_score_prom,MinutosJugadosPorPartido
0,"AALIYA, LEE ABRAHAM",261.0,82.0,42.0,124.0,18.0,18.0,25.0,25.0,4.0,60.0,51.0,252.0,79.0,51.0,23.0,74.0,34.0,31.0,98.0,278.0,1508.0,663.0,683.0,104.0,2.0,202.59,202.59,15.06,238.60,37472.0,624.533333,326138,1790,69616,"AALIYA, L.",21:02,False,INSTITUTO,INSTITUTO vs QUIMSA (002/12/2024 22:10),Argentino,QUIMSA,02/12/2024,16.716482,5.251921,2.690009,7.941930,1.152861,1.152861,1.601196,1.601196,0.256191,3.842869,3.266439,16.140051,5.059778,3.266439,1.473100,4.739539,2.177626,1.985482,6.276687,17.805295,96.584116,42.463706,43.744663,12.975448,12.975448,0.964560,15.281810,2400.0,40.0,227.0,8.992806,50.000000,6.334842,12.005857,9.212481,0.149780,51.056338,18.435013,0.720000,233.6,109.388097,0.664011,5.133521,34,6.870588,18.368627
1,"ACEVEDO, MAXIMILIANO JUNIORS",2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,0.0,1.0,0.0,0.0,-6.0,4.0,26.0,15.0,10.0,2.0,0.0,10.79,10.79,1.32,3.00,694.0,11.566667,149814,2305,69510,"ACEVEDO, M.",00:00,False,OBERA,OBERA vs RIACHUELO (LR) (001/12/2024 21:00),Argentino,RIACHUELO,01/12/2024,6.916427,3.458213,3.458213,6.916427,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.458213,3.458213,6.916427,0.000000,3.458213,0.000000,0.000000,-20.749280,13.832853,89.913545,51.873199,34.582133,37.314121,37.314121,4.564841,10.374640,2400.0,40.0,4.0,0.000000,25.000000,6.666667,10.000000,8.000000,0.000000,25.000000,15.384615,,1.8,66.666667,0.000000,1.853568,4,0.450000,2.891667
2,"ACEVEDO, SEBASTIAN IGNACIO",285.0,104.0,24.0,128.0,24.0,26.0,30.0,1.0,5.0,84.0,65.0,266.0,55.0,59.0,43.0,76.0,46.0,9.0,-12.0,287.0,2047.0,992.0,828.0,64.0,12.0,242.03,249.60,44.73,263.20,50524.0,842.066667,209965,1426,69281,"ACEVEDO, S.",21:53,True,SAN MARTIN (C),SAN MARTIN (C) vs GIMNASIA (CR) (004/12/2024 2...,Argentino,GIMNASIA,04/12/2024,13.538120,4.940226,1.140052,6.080279,1.140052,1.235057,1.425065,0.047502,0.237511,3.990183,3.087642,12.635579,2.612620,2.802628,2.042594,3.610165,2.185100,0.427520,-0.570026,13.633125,97.236957,47.122160,39.331803,11.496952,11.856543,2.124772,12.502573,2400.0,40.0,233.0,10.452962,51.287554,2.419355,12.560386,7.032967,0.197425,55.404355,14.020518,0.800000,239.4,108.282675,1.341382,2.564103,38,6.300000,22.159649
3,"ACTIS, JOAQUIN MATIAS",0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,-4.0,1.0,8.0,6.0,3.0,0.0,0.0,3.57,3.57,0.17,1.00,226.0,3.766667,225273,88,69164,"ACTIS, J.",00:00,False,SAN LORENZO,SAN LORENZO vs BOCA (017/11/2024 21:05),Argentino,BOCA,17/11/2024,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,10.619469,0.000000,0.000000,0.000000,0.000000,-10.619469,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-42.477876,10.619469,84.955752,63.716814,31.858407,37.911504,37.911504,1.805310,10.619469,2400.0,40.0,0.0,100.000000,,0.000000,0.000000,0.000000,,,12.500000,0.000000,-1.0,0.000000,0.000000,0.000000,1,-1.000000,3.766667
4,"ACUÑA, ROBERTO SANTIAGO",213.0,95.0,49.0,144.0,20.0,16.0,28.0,11.0,10.0,79.0,48.0,238.0,87.0,59.0,2.0,15.0,33.0,23.0,114.0,215.0,1372.0,599.0,597.0,48.0,2.0,141.68,141.68,16.27,166.64,39416.0,656.933333,271401,1790,69616,"ACUÑA, R.",18:15,False,INSTITUTO,INSTITUTO vs QUIMSA (002/12/2024 22:10),Argentino,QUIMSA,02/12/2024,12.969353,5.784453,2.983560,8.768013,1.217780,0.974224,1.704891,0.669779,0.608890,4.810229,2.922671,14.491577,5.297341,3.592450,0.121778,0.913335,2.009336,1.400447,6.941344,13.091131,83.539679,36.472498,36.350721,8.626751,8.626751,0.990664,10.146539,2400.0,40.0,163.0,13.023256,55.214724,8.180301,15.912898,12.040134,0.202454,56.757621,15.670554,0.714286,195.8,127.820451,0.614628,3.387916,35,5.594286,18.769524
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
372,"WHITFIELD III, ROBERT JAMARCUS",155.0,32.0,5.0,37.0,18.0,13.0,14.0,0.0,3.0,26.0,24.0,124.0,25.0,25.0,30.0,51.0,15.0,4.0,-7.0,153.0,731.0,316.0,305.0,72.0,10.0,118.01,118.01,18.22,148.36,17536.0,292.266667,330064,1474,68570,"WHITFIELD, R.",08:49,False,GIMNASIA (CR),GIMNASIA (CR) vs FERRO (018/05/2025 20:00),Estadounidense,FERRO,18/05/2025,21.213504,4.379562,0.684307,5.063869,2.463504,1.779197,1.916058,0.000000,0.410584,3.558394,3.284672,16.970803,3.421533,3.421533,4.105839,6.979927,2.052920,0.547445,-0.958029,20.939781,100.045620,43.248175,41.742701,16.151004,16.151004,2.493613,20.304745,2400.0,40.0,131.0,9.150327,53.435115,1.582278,10.491803,5.958132,0.114504,55.611366,20.930233,1.285714,128.4,104.475600,2.744237,6.101178,19,6.757895,15.382456
373,"WOLINSKY, FACUNDO ARIEL",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.0,0.000000,182885,46,69144,"WOLINSKY, F.",00:00,False,FERRO,FERRO vs PEÑAROL (MDP) (009/10/2024 20:00),Argentino,PEÑAROL,09/10/2024,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,,,,,,,,,,0.0,,0.000000,0.000000,0,,
374,"ZAWADSKI, FARAMIR",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.0,0.000000,274124,2314,69185,"ZAWADSKI, F.",00:00,False,ZARATE BASKET,RIACHUELO (LR) vs ZARATE BASKET (006/11/2024 2...,Argentino,ZARATE BASKET,06/11/2024,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,,,,,,,,,,0.0,,0.000000,0.000000,0,,
375,"ZEZULAR, FEDERICO",170.0,34.0,22.0,56.0,10.0,27.0,17.0,14.0,2.0,62.0,26.0,149.0,55.0,37.0,15.0,26.0,15.0,10.0,123.0,161.0,1314.0,537.0,579.0,58.0,1.0,195.50,195.50,15.65,139.00,27668.0,461.133333,271328,1869,69992,"ZEZULAR, F.",04:49,False,QUIMSA,INSTITUTO vs QUIMSA (002/12/2024 22:10),Argentino,QUIMSA,02/12/2024,14.746277,2.949255,1.908342,4.857597,0.867428,2.342056,1.474628,1.214399,0.173486,5.378054,2.255313,12.924678,4.770854,3.209484,1.301142,2.255313,1.301142,0.867428,10.669365,13.965592,113.980049,46.580888,50.224086,16.958219,16.958219,1.357525,12.057250,2400.0,40.0,133.0,10.559006,58.270677,4.096834,5.872193,5.017921,0.112782,59.027778,12.252664,0.588235,153.7,122.302158,0.319489,2.966752,37,4.154054,12.463063


In [None]:
# Seleccionar columnas numéricas
numeric_cols = df_players.select_dtypes(include=np.number).columns

# Calcular estadísticas para cada columna numérica
for col in numeric_cols:
    print(f"Columna: {col}")
    print(f"  Máximo: {df_players[col].max()}")
    print(f"  Mínimo: {df_players[col].min()}")
    print(f"  Cantidad de nulos: {df_players[col].isnull().sum()}")
    print(f"  Cuantil 25%: {df_players[col].quantile(0.25)}")
    print(f"  Cuantil 50% (mediana): {df_players[col].quantile(0.5)}")
    print(f"  Cuantil 75%: {df_players[col].quantile(0.75)}")
    print("-" * 40)

Columna: Puntos_suma
  Máximo: 669.0
  Mínimo: 0.0
  Cantidad de nulos: 0
  Cuantil 25%: 4.0
  Cuantil 50% (mediana): 119.0
  Cuantil 75%: 288.0
----------------------------------------
Columna: ReboteDefensivo_suma
  Máximo: 306.0
  Mínimo: 0.0
  Cantidad de nulos: 0
  Cuantil 25%: 2.0
  Cuantil 50% (mediana): 38.0
  Cuantil 75%: 90.0
----------------------------------------
Columna: ReboteOfensivo_suma
  Máximo: 93.0
  Mínimo: 0.0
  Cantidad de nulos: 0
  Cuantil 25%: 0.0
  Cuantil 50% (mediana): 9.0
  Cuantil 75%: 22.0
----------------------------------------
Columna: RebotesTotales_suma
  Máximo: 379.0
  Mínimo: 0.0
  Cantidad de nulos: 0
  Cuantil 25%: 2.0
  Cuantil 50% (mediana): 47.0
  Cuantil 75%: 116.0
----------------------------------------
Columna: Asistencias_suma
  Máximo: 188.0
  Mínimo: 0.0
  Cantidad de nulos: 0
  Cuantil 25%: 1.0
  Cuantil 50% (mediana): 14.0
  Cuantil 75%: 39.0
----------------------------------------
Columna: Recuperaciones_suma
  Máximo: 87.0
  Mín

Revisar:
- CantidadPartidosJugados Máximo: 40
- TS% Máximo: 150.0
- eFG% Máximo: 150.0
- TOV% Máximo: 100.0
- rebote_def_disp_prom_minutos Máximo: 165.0
- plays_jugadas_prom_minutos Máximo: 255.0
- plus_minus_prom_minutos Máximo: 82.5
- nulos: 29 

In [None]:
df_players[df_players["ORTG"] > 200]

Unnamed: 0,NombreCompleto,Puntos_suma,ReboteDefensivo_suma,ReboteOfensivo_suma,RebotesTotales_suma,Asistencias_suma,Recuperaciones_suma,Perdidas_suma,TaponCometido_suma,TaponRecibido_suma,FaltaCometida_suma,FaltaRecibida_suma,Valoracion_suma,TirosDosAciertos_suma,TirosDosFallos_suma,TirosTresAciertos_suma,TirosTresFallos_suma,TirosLibresAciertos_suma,TirosLibresFallos_suma,plus_minus_suma,posesiones_consumidas_suma,posesiones_jugadas_suma,rebote_of_disp_suma,rebote_def_disp_suma,puntos_q4_y_prorroga_suma,puntos_clutch_suma,min_q4_suma,min_q4_y_prorroga_suma,min_clutch_suma,posesiones_estimadas_suma,TiempoJuego_seg_suma,TiempoJuego_min_suma,IdJugador,IdClub,IdEquipo,Nombre,TiempoJuego,CincoInicial,equipo,partido_key,nacionalidad,rival,fecha_hora,Puntos_prom_40min,ReboteDefensivo_prom_40min,ReboteOfensivo_prom_40min,RebotesTotales_prom_40min,Asistencias_prom_40min,Recuperaciones_prom_40min,Perdidas_prom_40min,TaponCometido_prom_40min,TaponRecibido_prom_40min,FaltaCometida_prom_40min,FaltaRecibida_prom_40min,Valoracion_prom_40min,TirosDosAciertos_prom_40min,TirosDosFallos_prom_40min,TirosTresAciertos_prom_40min,TirosTresFallos_prom_40min,TirosLibresAciertos_prom_40min,TirosLibresFallos_prom_40min,plus_minus_prom_40min,posesiones_consumidas_prom_40min,posesiones_jugadas_prom_40min,rebote_of_disp_prom_40min,rebote_def_disp_prom_40min,min_q4_prom_40min,min_q4_y_prorroga_prom_40min,min_clutch_prom_40min,posesiones_estimadas_prom_40min,TiempoJuego_seg_prom_40min,TiempoJuego_min_prom_40min,TirosDeCampo_suma,TOV%,eFG%,ORB%,DRB%,RB%,FTr,TS%,USG%,AST/TOVr,game_score_suma,ORTG,puntos_clutch_prom_5min,puntos_q4pror_prom_10min,CantidadPartidosJugados,game_score_prom,MinutosJugadosPorPartido
108,"DELIA, TOMAS FRANCISCO",11.0,3.0,2.0,5.0,1.0,1.0,1.0,0.0,0.0,2.0,5.0,19.0,2.0,0.0,0.0,0.0,7.0,1.0,-21.0,7.0,121.0,33.0,50.0,5.0,0.0,25.02,25.02,0.6,4.52,2590.0,43.166667,328816,2314,69185,"DELIA, T.",22:23,False,ZARATE BASKET,PLATENSE vs ZARATE BASKET (011/05/2025 20:00),Argentino,ZARATE BASKET,11/05/2025,10.19305,2.779923,1.853282,4.633205,0.926641,0.926641,0.926641,0.0,0.0,1.853282,4.633205,17.606178,1.853282,0.0,0.0,0.0,6.486486,0.926641,-19.459459,6.486486,112.123552,30.579151,46.332046,23.184556,23.184556,0.555985,4.188417,2400.0,40.0,2.0,14.285714,100.0,6.060606,6.0,6.024096,3.5,99.637681,5.785124,1.0,12.2,243.362832,0.0,1.998401,4,3.05,10.791667
111,"DIAZ, FABRICIO HERNAN",7.0,6.0,4.0,10.0,3.0,1.0,0.0,0.0,2.0,4.0,3.0,17.0,3.0,0.0,0.0,0.0,1.0,1.0,16.0,4.0,61.0,23.0,23.0,3.0,0.0,13.66,13.66,1.49,1.88,1738.0,28.966667,179887,1903,69941,"DIAZ, F.",00:00,False,RIACHUELO (LR),UNION (SF) vs RIACHUELO (LR) (003/12/2024 21:00),Argentino,RIACHUELO,03/12/2024,9.666283,8.285386,5.52359,13.808976,4.142693,1.380898,0.0,0.0,2.761795,5.52359,4.142693,23.475259,4.142693,0.0,0.0,0.0,1.380898,1.380898,22.094361,5.52359,84.234753,31.760644,31.760644,18.863061,18.863061,2.057537,2.596087,2400.0,40.0,3.0,0.0,100.0,17.391304,26.086957,21.73913,0.333333,90.206186,6.557377,,11.8,372.340426,0.0,2.196193,6,1.966667,4.827778
183,"IANGUAS, PEDRO HENRIQUE",123.0,96.0,54.0,150.0,14.0,6.0,14.0,12.0,3.0,64.0,24.0,212.0,56.0,25.0,0.0,0.0,11.0,11.0,-65.0,104.0,1136.0,529.0,464.0,14.0,6.0,122.07,127.07,32.7,54.68,27463.0,457.716667,271956,80,70071,"IANGUAS, P.",26:16,False,PLATENSE,PLATENSE vs PEÑAROL (MDP) (011/10/2024 22:05),Brasileño,PEÑAROL,11/10/2024,10.749008,8.389469,4.719077,13.108546,1.223464,0.524342,1.223464,1.048684,0.262171,5.59298,2.097367,18.526745,4.893857,2.184758,0.0,0.0,0.961293,0.961293,-5.68037,9.088592,99.275389,46.229472,40.549102,10.667735,11.104686,2.857663,4.778502,2400.0,40.0,81.0,13.461538,69.135802,10.20794,20.689655,15.10574,0.135802,67.820909,9.15493,1.0,145.5,224.945135,0.917431,1.101755,26,5.596154,17.604487
277,"PIKALUK, MATIAS",3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,4.0,1.0,5.0,3.0,1.0,3.0,0.0,2.77,2.77,1.4,1.0,205.0,3.416667,273660,1869,69992,"PIKALUK, M.",00:00,False,QUIMSA,INSTITUTO vs QUIMSA (002/12/2024 22:10),Argentino,QUIMSA,02/12/2024,35.121951,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.121951,0.0,0.0,11.707317,0.0,0.0,0.0,46.829268,11.707317,58.536585,35.121951,11.707317,32.429268,32.429268,16.390244,11.707317,2400.0,40.0,1.0,0.0,150.0,0.0,0.0,0.0,0.0,150.0,20.0,,2.7,300.0,0.0,10.830325,3,0.9,1.138889


In [None]:
df_players[df_players["puntos_clutch_prom_5min"] > 6]

Unnamed: 0,NombreCompleto,Puntos_suma,ReboteDefensivo_suma,ReboteOfensivo_suma,RebotesTotales_suma,Asistencias_suma,Recuperaciones_suma,Perdidas_suma,TaponCometido_suma,TaponRecibido_suma,FaltaCometida_suma,FaltaRecibida_suma,Valoracion_suma,TirosDosAciertos_suma,TirosDosFallos_suma,TirosTresAciertos_suma,TirosTresFallos_suma,TirosLibresAciertos_suma,TirosLibresFallos_suma,plus_minus_suma,posesiones_consumidas_suma,posesiones_jugadas_suma,rebote_of_disp_suma,rebote_def_disp_suma,puntos_q4_y_prorroga_suma,puntos_clutch_suma,min_q4_suma,min_q4_y_prorroga_suma,min_clutch_suma,posesiones_estimadas_suma,TiempoJuego_seg_suma,TiempoJuego_min_suma,IdJugador,IdClub,IdEquipo,Nombre,TiempoJuego,CincoInicial,equipo,partido_key,nacionalidad,rival,fecha_hora,Puntos_prom_40min,ReboteDefensivo_prom_40min,ReboteOfensivo_prom_40min,RebotesTotales_prom_40min,Asistencias_prom_40min,Recuperaciones_prom_40min,Perdidas_prom_40min,TaponCometido_prom_40min,TaponRecibido_prom_40min,FaltaCometida_prom_40min,FaltaRecibida_prom_40min,Valoracion_prom_40min,TirosDosAciertos_prom_40min,TirosDosFallos_prom_40min,TirosTresAciertos_prom_40min,TirosTresFallos_prom_40min,TirosLibresAciertos_prom_40min,TirosLibresFallos_prom_40min,plus_minus_prom_40min,posesiones_consumidas_prom_40min,posesiones_jugadas_prom_40min,rebote_of_disp_prom_40min,rebote_def_disp_prom_40min,min_q4_prom_40min,min_q4_y_prorroga_prom_40min,min_clutch_prom_40min,posesiones_estimadas_prom_40min,TiempoJuego_seg_prom_40min,TiempoJuego_min_prom_40min,TirosDeCampo_suma,TOV%,eFG%,ORB%,DRB%,RB%,FTr,TS%,USG%,AST/TOVr,game_score_suma,ORTG,puntos_clutch_prom_5min,puntos_q4pror_prom_10min,CantidadPartidosJugados,game_score_prom,MinutosJugadosPorPartido
31,"BANYARD, NICHOLAS RYAN",107.0,37.0,7.0,44.0,2.0,3.0,15.0,8.0,2.0,23.0,37.0,111.0,30.0,25.0,2.0,5.0,41.0,20.0,-22.0,105.0,372.0,162.0,158.0,48.0,21.0,50.68,53.99,17.17,96.84,10155.0,169.25,326704,80,70071,"BANYARD, N.",25:10,True,PLATENSE,PLATENSE vs PEÑAROL (MDP) (011/10/2024 22:05),Estadounidense,PEÑAROL,11/10/2024,25.288035,8.744461,1.654357,10.398818,0.472674,0.70901,3.545052,1.890694,0.472674,5.435746,8.744461,26.233383,7.090103,5.908419,0.472674,1.181684,9.689808,4.726736,-5.199409,24.815362,87.917282,38.286558,37.341211,11.977548,12.759823,4.057903,22.886854,2400.0,40.0,62.0,14.285714,53.225806,4.320988,23.417722,13.75,0.66129,60.220621,28.225806,0.133333,82.2,110.491532,6.115317,8.890535,8,10.275,21.15625
167,"GRONDA SEBA, THIAGO KALIL",19.0,2.0,0.0,2.0,2.0,1.0,2.0,0.0,1.0,11.0,9.0,8.0,3.0,4.0,3.0,4.0,4.0,3.0,-12.0,19.0,121.0,51.0,49.0,7.0,3.0,12.54,12.54,0.69,19.08,3382.0,56.366667,326428,1541,69636,"GRONDA, T.",00:00,False,LA UNION FSA.,LA UNION FSA. vs GIMNASIA (CR) (030/11/2024 21...,Argentino,GIMNASIA,30/11/2024,13.483146,1.419279,0.0,1.419279,1.419279,0.709639,1.419279,0.0,0.709639,7.806032,6.386753,5.677114,2.128918,2.838557,2.128918,2.838557,2.838557,2.128918,-8.515671,13.483146,85.866351,36.191603,34.772324,8.898876,8.898876,0.489651,13.539917,2400.0,40.0,14.0,10.526316,53.571429,0.0,4.081633,2.0,0.285714,55.620609,15.702479,1.0,10.2,99.580713,15.0,5.582137,7,1.457143,8.052381


In [None]:
df_players[df_players["TS%"] > 100]

Unnamed: 0,NombreCompleto,Puntos_suma,ReboteDefensivo_suma,ReboteOfensivo_suma,RebotesTotales_suma,Asistencias_suma,Recuperaciones_suma,Perdidas_suma,TaponCometido_suma,TaponRecibido_suma,FaltaCometida_suma,FaltaRecibida_suma,Valoracion_suma,TirosDosAciertos_suma,TirosDosFallos_suma,TirosTresAciertos_suma,TirosTresFallos_suma,TirosLibresAciertos_suma,TirosLibresFallos_suma,plus_minus_suma,posesiones_consumidas_suma,posesiones_jugadas_suma,rebote_of_disp_suma,rebote_def_disp_suma,puntos_q4_y_prorroga_suma,puntos_clutch_suma,min_q4_suma,min_q4_y_prorroga_suma,min_clutch_suma,posesiones_estimadas_suma,TiempoJuego_seg_suma,TiempoJuego_min_suma,IdJugador,IdClub,IdEquipo,Nombre,TiempoJuego,CincoInicial,equipo,partido_key,nacionalidad,rival,fecha_hora,Puntos_prom_40min,ReboteDefensivo_prom_40min,ReboteOfensivo_prom_40min,RebotesTotales_prom_40min,Asistencias_prom_40min,Recuperaciones_prom_40min,Perdidas_prom_40min,TaponCometido_prom_40min,TaponRecibido_prom_40min,FaltaCometida_prom_40min,FaltaRecibida_prom_40min,Valoracion_prom_40min,TirosDosAciertos_prom_40min,TirosDosFallos_prom_40min,TirosTresAciertos_prom_40min,TirosTresFallos_prom_40min,TirosLibresAciertos_prom_40min,TirosLibresFallos_prom_40min,plus_minus_prom_40min,posesiones_consumidas_prom_40min,posesiones_jugadas_prom_40min,rebote_of_disp_prom_40min,rebote_def_disp_prom_40min,min_q4_prom_40min,min_q4_y_prorroga_prom_40min,min_clutch_prom_40min,posesiones_estimadas_prom_40min,TiempoJuego_seg_prom_40min,TiempoJuego_min_prom_40min,TirosDeCampo_suma,TOV%,eFG%,ORB%,DRB%,RB%,FTr,TS%,USG%,AST/TOVr,game_score_suma,ORTG,puntos_clutch_prom_5min,puntos_q4pror_prom_10min,CantidadPartidosJugados,game_score_prom,MinutosJugadosPorPartido
197,"LARRAZA, JUAN IGNACIO",20.0,10.0,1.0,11.0,2.0,4.0,2.0,0.0,0.0,6.0,1.0,28.0,1.0,0.0,6.0,2.0,0.0,0.0,13.0,11.0,171.0,63.0,79.0,11.0,0.0,54.79,54.79,3.14,10.0,4096.0,68.266667,271254,1424,69990,"LARRAZA, J.",00:00,False,REGATAS (C),REGATAS (C) vs GIMNASIA (CR) (002/12/2024 21:30),Argentino,GIMNASIA,02/12/2024,11.71875,5.859375,0.585938,6.445312,1.171875,2.34375,1.171875,0.0,0.0,3.515625,0.585938,16.40625,0.585938,0.0,3.515625,1.171875,0.0,0.0,7.617188,6.445312,100.195312,36.914062,46.289062,32.103516,32.103516,1.839844,5.859375,2400.0,40.0,9.0,18.181818,111.111111,1.587302,12.658228,7.746479,0.0,111.111111,6.432749,1.0,22.0,200.0,0.0,2.007666,17,1.294118,4.015686
277,"PIKALUK, MATIAS",3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,4.0,1.0,5.0,3.0,1.0,3.0,0.0,2.77,2.77,1.4,1.0,205.0,3.416667,273660,1869,69992,"PIKALUK, M.",00:00,False,QUIMSA,INSTITUTO vs QUIMSA (002/12/2024 22:10),Argentino,QUIMSA,02/12/2024,35.121951,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.121951,0.0,0.0,11.707317,0.0,0.0,0.0,46.829268,11.707317,58.536585,35.121951,11.707317,32.429268,32.429268,16.390244,11.707317,2400.0,40.0,1.0,0.0,150.0,0.0,0.0,0.0,0.0,150.0,20.0,,2.7,300.0,0.0,10.830325,3,0.9,1.138889


In [None]:
df_players[df_players["CantidadPartidosJugados"] > 38]

Unnamed: 0,NombreCompleto,Puntos_suma,ReboteDefensivo_suma,ReboteOfensivo_suma,RebotesTotales_suma,Asistencias_suma,Recuperaciones_suma,Perdidas_suma,TaponCometido_suma,TaponRecibido_suma,FaltaCometida_suma,FaltaRecibida_suma,Valoracion_suma,TirosDosAciertos_suma,TirosDosFallos_suma,TirosTresAciertos_suma,TirosTresFallos_suma,TirosLibresAciertos_suma,TirosLibresFallos_suma,plus_minus_suma,posesiones_consumidas_suma,posesiones_jugadas_suma,rebote_of_disp_suma,rebote_def_disp_suma,puntos_q4_y_prorroga_suma,puntos_clutch_suma,min_q4_suma,min_q4_y_prorroga_suma,min_clutch_suma,posesiones_estimadas_suma,TiempoJuego_seg_suma,TiempoJuego_min_suma,IdJugador,IdClub,IdEquipo,Nombre,TiempoJuego,CincoInicial,equipo,partido_key,nacionalidad,rival,fecha_hora,Puntos_prom_40min,ReboteDefensivo_prom_40min,ReboteOfensivo_prom_40min,RebotesTotales_prom_40min,Asistencias_prom_40min,Recuperaciones_prom_40min,Perdidas_prom_40min,TaponCometido_prom_40min,TaponRecibido_prom_40min,FaltaCometida_prom_40min,FaltaRecibida_prom_40min,Valoracion_prom_40min,TirosDosAciertos_prom_40min,TirosDosFallos_prom_40min,TirosTresAciertos_prom_40min,TirosTresFallos_prom_40min,TirosLibresAciertos_prom_40min,TirosLibresFallos_prom_40min,plus_minus_prom_40min,posesiones_consumidas_prom_40min,posesiones_jugadas_prom_40min,rebote_of_disp_prom_40min,rebote_def_disp_prom_40min,min_q4_prom_40min,min_q4_y_prorroga_prom_40min,min_clutch_prom_40min,posesiones_estimadas_prom_40min,TiempoJuego_seg_prom_40min,TiempoJuego_min_prom_40min,TirosDeCampo_suma,TOV%,eFG%,ORB%,DRB%,RB%,FTr,TS%,USG%,AST/TOVr,game_score_suma,ORTG,puntos_clutch_prom_5min,puntos_q4pror_prom_10min,CantidadPartidosJugados,game_score_prom,MinutosJugadosPorPartido
90,"COOPER, THOMAS",466.0,180.0,38.0,218.0,90.0,36.0,70.0,4.0,18.0,112.0,144.0,506.0,104.0,132.0,48.0,90.0,114.0,30.0,90.0,510.0,3150.0,1314.0,1428.0,186.0,20.0,494.52,494.52,39.94,469.36,78180.0,1303.0,327539,19,70600,"COOPER, T.",16:21,False,BOCA,ARGENTINO (J) vs BOCA (005/12/2024 21:00),Brasileño,BOCA,05/12/2024,14.305449,5.52571,1.166539,6.692249,2.762855,1.105142,2.148887,0.122794,0.552571,3.438219,4.420568,15.533384,3.192632,4.052187,1.473523,2.762855,3.499616,0.920952,2.762855,15.656178,96.699923,40.337682,43.837299,15.180967,15.180967,1.226094,14.408596,2400.0,40.0,374.0,13.72549,47.058824,2.891933,12.605042,7.950401,0.304813,53.274191,16.190476,1.285714,409.4,99.284132,2.503756,3.761223,72,5.686111,18.097222
334,"STENTA, NICOLAS",251.0,80.0,40.0,120.0,40.0,25.0,31.0,13.0,10.0,65.0,39.0,263.0,81.0,48.0,23.0,52.0,20.0,19.0,28.0,250.0,2310.0,996.0,1004.0,59.0,11.0,292.6,292.6,35.49,212.16,52140.0,869.0,181745,19,70600,"STENTA, N.",21:30,True,BOCA,BOCA vs ZARATE BASKET (012/10/2024 11:30),Argentino,ZARATE BASKET,12/10/2024,11.55351,3.682394,1.841197,5.52359,1.841197,1.150748,1.426928,0.598389,0.460299,2.991945,1.795167,12.105869,3.728423,2.209436,1.058688,2.393556,0.920598,0.874568,1.288838,11.50748,106.329114,45.8458,46.214039,13.468354,13.468354,1.633602,9.765708,2400.0,40.0,204.0,12.4,56.617647,4.016064,7.968127,6.0,0.098039,56.746247,10.822511,1.290323,239.3,118.306938,1.549732,2.016405,40,5.9825,21.725


In [None]:
# Guardar df_players y combined_df en formato parquet en la carpeta data\processed
df_players.to_parquet("../data/processed/df_players.parquet", index=False)
combined_df.to_parquet("../data/processed/combined_df.parquet", index=False)