# Data Processing

In [30]:
import os
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

In [31]:
# Define la ruta a la carpeta donde se guardan los archivos
output_dir = "../data/scrapped"

try:
    # 1. Lista todos los archivos en la carpeta
    all_files = os.listdir(output_dir)

    # 2. Filtra solo los archivos que terminan en '.parquet'
    parquet_files = [os.path.join(output_dir, f) for f in all_files if f.endswith('.parquet')]

    # 3. Verifica si hay archivos para procesar
    if not parquet_files:
        print("No se encontraron archivos .parquet en la carpeta especificada.")
    else:
        print(f"Se encontraron {len(parquet_files)} archivos .parquet. Concatenando...")

        # 4. Lee cada archivo Parquet en un DataFrame y los guarda en una lista
        list_of_dfs = [pd.read_parquet(f) for f in parquet_files]

        # 5. Concatena todos los DataFrames de la lista
        combined_df = pd.concat(list_of_dfs, ignore_index=True)

        print("¡Concatenación exitosa! Los archivos se han unido en un solo DataFrame.")
        print("Se ha creado un DataFrame con las siguientes dimensiones:")
        print(f"Filas: {combined_df.shape[0]}, Columnas: {combined_df.shape[1]}")

except FileNotFoundError:
    print("Error: No se encontró la carpeta especificada.")
    print(f"Por favor, revisa que la carpeta '{output_dir}' exista.")
except ImportError:
    print("Error: La librería 'pyarrow' o 'fastparquet' no está instalada.")
    print("Para leer archivos .parquet, necesitas instalar una de estas librerías. Puedes usar el siguiente comando: pip install pyarrow")
except Exception as e:
    print(f"Ocurrió un error inesperado: {e}")

Se encontraron 376 archivos .parquet. Concatenando...
¡Concatenación exitosa! Los archivos se han unido en un solo DataFrame.
Se ha creado un DataFrame con las siguientes dimensiones:
Filas: 8985, Columnas: 35


## Tipos de datos

In [32]:
combined_df.head(3)

Unnamed: 0,IdJugador,IdClub,IdEquipo,Nombre,NombreCompleto,Puntos,ReboteDefensivo,ReboteOfensivo,RebotesTotales,Asistencias,Recuperaciones,Perdidas,TaponCometido,TaponRecibido,FaltaCometida,FaltaRecibida,Valoracion,TiempoJuego,CincoInicial,equipo,TirosDosAciertos,TirosDosFallos,TirosTresAciertos,TirosTresFallos,TirosLibresAciertos,TirosLibresFallos,plus_minus,posesiones_consumidas,posesiones_jugadas,rebote_of_disp,rebote_def_disp,puntos_q4_y_prorroga,puntos_clutch,posesiones_estimadas,partido_key
0,78377,1498,70040,"ARAUJO, M.","ARAUJO, MAXIMO",3,0,0,0,0,0,0,0,0,1,0,1,18:05,False,ATENAS (C),0,1,1,0,0,0,1,2,53,26,22,0,0,2.0,ATENAS (C) vs BOCA (007/10/2024 22:10)
1,326699,1498,70040,"BUENDIA, C.","BUENDIA, CARLOS MANUEL",1,0,0,0,1,0,1,0,0,0,3,1,07:57,True,ATENAS (C),0,0,0,2,1,1,-15,4,14,7,4,0,0,3.88,ATENAS (C) vs BOCA (007/10/2024 22:10)
2,273565,1498,70040,"MONTERO, J.","MONTERO, JOSE IGNACIO",2,0,0,0,3,1,2,0,0,0,2,4,23:27,False,ATENAS (C),0,0,0,2,2,0,-2,5,44,19,17,0,0,4.88,ATENAS (C) vs BOCA (007/10/2024 22:10)


In [33]:
# Convertir la columna 'TiempoJuego' de string "MM:SS" a segundos como entero
def tiempo_a_segundos(tiempo_str):
    try:
        minutos, segundos = map(int, tiempo_str.split(":"))
        return minutos * 60 + segundos
    except Exception:
        return 0

combined_df["TiempoJuego_seg"] = combined_df["TiempoJuego"].apply(tiempo_a_segundos)
combined_df["TiempoJuego_min"] = combined_df["TiempoJuego_seg"] / 60

In [34]:
combined_df.shape

(8985, 37)

In [35]:
categorical_cols=['IdJugador', 'IdClub', 'IdEquipo','Nombre', 'NombreCompleto', 'equipo', 'partido_key']
numerical_cols=['Puntos', 'ReboteDefensivo','ReboteOfensivo', 'RebotesTotales', 'Asistencias', 'Recuperaciones',
                'Perdidas', 'TaponCometido', 'TaponRecibido', 'FaltaCometida','FaltaRecibida', 'Valoracion', 
                'TirosDosAciertos', 'TirosDosFallos','TirosTresAciertos', 'TirosTresFallos', 'TirosLibresAciertos',
                'TirosLibresFallos', 'plus_minus', 'posesiones_consumidas','posesiones_jugadas', 'rebote_of_disp', 
                'rebote_def_disp','puntos_q4_y_prorroga', 'puntos_clutch', 'posesiones_estimadas','TiempoJuego_seg','TiempoJuego_min']

In [36]:
# Convertir columnas categóricas a 'category' y numéricas a 'float' (o 'int' si no hay NaN)
for col in categorical_cols:
    if col in combined_df.columns:
        combined_df[col] = combined_df[col].astype('category')

for col in numerical_cols:
    if col in combined_df.columns:
        combined_df[col] = combined_df[col].astype(float)


## Nacionalidad de Jugadores

In [37]:
extranjeros_data = [
    {"NombreCompleto": "CHACON TIRADO, MARCOS", "nacionalidad": "Cubano"},
    {"NombreCompleto": "MILLER, TAVARIO EARNEST PTRISTIAN", "nacionalidad": "Bahamense"},
    {"NombreCompleto": "RAMIREZ ALCANTARA, KELVIN LEANDRO", "nacionalidad": "Dominicano"},
    {"NombreCompleto": "OWENS, DEMARCO RASHAD", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "THOMAS JR, MARCUS WILEY", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "VORHEES, WILLIAM LEONARD-DEUBLER", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "FERGUSON, ROMEAO VENILL", "nacionalidad": "Estadounidense"}, 
    {"NombreCompleto": "LOCKETT, PHILLIP DOMINIQUE DANIEL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "CLARKE, CHRISTOPHER ASHTON", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "THORNTON, WILLIE ALFORD", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "DANIELS, TRAVIS DORREL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "DIGGS, AVERY GERELL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BECTON, REGINALD GEQUAN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "CARRERAS PEGUERO, XAVIER MANUEL", "nacionalidad": "Dominicano"},
    {"NombreCompleto": "THOMAS III, CHARLES PRICE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "KRAMER, KELBY JOHN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "GIVENS, SAMUEL JAMAL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BOWIE JR, JULIUS R", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "CALFANI PERSINCULA, MATHIAS KENY", "nacionalidad": "Uruguayo"},
    {"NombreCompleto": "MORRISON, DOMINIQUE MONTEL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ALEXANDER, QUINTIN IMMANUEL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "CARDENAS ZAMORA, JORDAN ISRAEL", "nacionalidad": "Ecuatoriano"},
    {"NombreCompleto": "ASCANIO SOLORZANO, JOSE GREGORIO", "nacionalidad": "Venezolano"},
    {"NombreCompleto": "WALTON, ZACHERY CHRISTOPHER", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "THOMAS, DISCHON KYIR", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "PRIDDY, NATHAN WAYNE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "WHITFIELD III, ROBERT JAMARCUS", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "HORTON, KENNETH WILCHER", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "HOLT, EMMITT DWIGHT", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "JENKINS, JALEN KEMAL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "SANDERS, NAKIE GERALD", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BOND JR, TIMOTHY LAMONT", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ROBINSON, BRANDON LAMAR", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "OBENG MENSAH, YAW", "nacionalidad": "Canadiense"},
    {"NombreCompleto": "TROCHA MORELOS, TONNY JOSE", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "CRAION JR, MICHAEL JOE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BELL, RANDY TYREE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "DANIELS, DEANDRE MARTISE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "REESE V, JAMES LANARD", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "WATSON, EARL OVREL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ROQUE MARTINEZ, ROMARIO JOSE", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "PAYTON CLOTTEY, EMMANUEL TRAVON", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "FIELDS, CALEB JOSEPH", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ANDERSON, ALPHONSO JORDAN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BASTARDO JOSE, RAYMON SCARLIN", "nacionalidad": "Dominicano"},
    {"NombreCompleto": "BANYARD, NICHOLAS RYAN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "MAXWELL, DU'VAUGHN ELISHA", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "RUIZ RUIZ, JOSE DANIEL", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "FUNDORA ARRECHAVALETA, YASMANY	", "nacionalidad": "Cubano"}, 
    {"NombreCompleto": "KRAYEM, OMAR NABIL", "nacionalidad": "Palestino"}, # Estadounidense
    {"NombreCompleto": "WALLACE, DEVANTE RASHAD-KEITH", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "STOKES, KAMAU THUTMOSES", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "LOWERY, DISHON LURELL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ALI, PRINCE ADAMS", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BONIZIOLI HONORATO, ITALO	", "nacionalidad": "Brasileño"}, 
    {"NombreCompleto": "CARTER, MYLES JUSTIN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "HOOPER, CHRISTOPHER JALEEL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "OPOKU, NANA KWASI HYEAKURO", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "HAMILTON, ISAAC BRANDON", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "NAYLOR, CAMERON LEVELE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "MADRIGAL RENTERIA, JUAN ESTEBAN", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "NIEBLES HERRERA, EDWIN JOSE", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "PETRI, NICHOLAS", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "GUERRA CAÑATE, YEFERSON ANTONIO", "nacionalidad": "Venezolano"},
    {"NombreCompleto": "GARCIA GUERRERO, JORGE LUIS", "nacionalidad": "Venezolano"}, 
    {"NombreCompleto": "HERNANDEZ, MANUEL ALONSO", "nacionalidad": "Estadounidense"}, # Mexicano
]

df_extranjeros = pd.DataFrame(extranjeros_data)
df_extranjeros['nacionalidad'].value_counts()

nacionalidad
Estadounidense    47
Colombiano         5
Dominicano         3
Venezolano         3
Cubano             2
Bahamense          1
Uruguayo           1
Ecuatoriano        1
Canadiense         1
Palestino          1
Brasileño          1
Name: count, dtype: int64

In [38]:
# Eliminar espacios en blanco en 'NombreCompleto' para ambos DataFrames
df_extranjeros['NombreCompleto'] = df_extranjeros['NombreCompleto'].str.strip()
combined_df['NombreCompleto'] = combined_df['NombreCompleto'].str.strip()

# Realizar el left join para agregar la nacionalidad
combined_df = combined_df.merge(df_extranjeros, on='NombreCompleto', how='left')

# Rellenar los valores nulos con 'Argentino'
combined_df['nacionalidad'] = combined_df['nacionalidad'].fillna('Argentino')

In [39]:
# Extraer el rival
combined_df['rival'] = combined_df['partido_key'].str.extract(r'vs\s(.+?)\s\(')
combined_df['fecha_hora'] = combined_df['partido_key'].str[-20:]
combined_df['fecha_hora'] = combined_df['fecha_hora'].str[3:13]

In [40]:
combined_df.head(3)

Unnamed: 0,IdJugador,IdClub,IdEquipo,Nombre,NombreCompleto,Puntos,ReboteDefensivo,ReboteOfensivo,RebotesTotales,Asistencias,Recuperaciones,Perdidas,TaponCometido,TaponRecibido,FaltaCometida,FaltaRecibida,Valoracion,TiempoJuego,CincoInicial,equipo,TirosDosAciertos,TirosDosFallos,TirosTresAciertos,TirosTresFallos,TirosLibresAciertos,TirosLibresFallos,plus_minus,posesiones_consumidas,posesiones_jugadas,rebote_of_disp,rebote_def_disp,puntos_q4_y_prorroga,puntos_clutch,posesiones_estimadas,partido_key,TiempoJuego_seg,TiempoJuego_min,nacionalidad,rival,fecha_hora
0,78377,1498,70040,"ARAUJO, M.","ARAUJO, MAXIMO",3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,18:05,False,ATENAS (C),0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,53.0,26.0,22.0,0.0,0.0,2.0,ATENAS (C) vs BOCA (007/10/2024 22:10),1085.0,18.083333,Argentino,BOCA,07/10/2024
1,326699,1498,70040,"BUENDIA, C.","BUENDIA, CARLOS MANUEL",1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,3.0,1.0,07:57,True,ATENAS (C),0.0,0.0,0.0,2.0,1.0,1.0,-15.0,4.0,14.0,7.0,4.0,0.0,0.0,3.88,ATENAS (C) vs BOCA (007/10/2024 22:10),477.0,7.95,Argentino,BOCA,07/10/2024
2,273565,1498,70040,"MONTERO, J.","MONTERO, JOSE IGNACIO",2.0,0.0,0.0,0.0,3.0,1.0,2.0,0.0,0.0,0.0,2.0,4.0,23:27,False,ATENAS (C),0.0,0.0,0.0,2.0,2.0,0.0,-2.0,5.0,44.0,19.0,17.0,0.0,0.0,4.88,ATENAS (C) vs BOCA (007/10/2024 22:10),1407.0,23.45,Argentino,BOCA,07/10/2024


In [41]:
print("Resumen de la columna 'TirosDosFallos':")
print("Máximo:", combined_df['TirosDosFallos'].max())
print("Mínimo:", combined_df['TirosDosFallos'].min())
print("Promedio:", combined_df['TirosDosFallos'].mean())

Resumen de la columna 'TirosDosFallos':
Máximo: 10.0
Mínimo: 0.0
Promedio: 1.358263772954925


Con esta base quiero formar una que tenga las columnas:
- ..._suma (nuericas sumadas)
- ..._prom_minutos ( _suma / minutos_jugados_suma)
- TirosDeCampo_suma = (TirosDosAciertos_suma + TirosDosFallos_suma + TirosTresAciertos_suma + TirosTresFallos_suma)
- TOV% 100 × (Perdidas_suma / posesiones_consumidas_suma)
- eFG%  ((TirosDosAciertos_suma + 1.5 * TirosTresAciertos_suma) / TirosDeCampo_suma)
- ORB% = 100 × (ReboteOfensivo_suma / rebote_of_disp_suma)
- DRB% = 100 × (ReboteDefensivo_suma / rebote_def_disp_suma)
- RB% = 100 × (RebotesTotales_suma /( rebote_of_disp_suma + rebote_def_disp_suma ))
- FTr = (TirosLibresAciertos_suma / TirosDeCampo_suma) 
- TS% = 100 × (Puntos_suma / (2 × (TirosDeCampo_suma + 0.44 × (TirosLibresAciertos + TirosLibresFallos))))
- USG% = 100 × (posesiones_consumidas_suma / posesiones_jugadas_suma)
- AST% = 100 × (Asistencias_suma / ((TiempoJuego_min_suma / (minutos_totales_equipo / 5)) * tiros_campo_anotados_equipo)  - tiros_campo_anotados_jugador) EVALUAR
- AST/TOVr = (Asistencias_suma / Perdidas_suma)
- game_score = Puntos_suma 
                + (0.4 * (TirosDosAciertos_suma + TirosTresAciertos_suma))  
                - (0.7 * TirosDeCampo_suma)
                - (0.4 * (TirosLibresFallos)
                + (0.7 * ReboteOfensivo_suma)
                + (0.3 * ReboteDefensivo_suma)
                + Recuperaciones_suma
                + (0.7 * Asistencias_suma)
                + (0.7 * TaponCometido_suma)
                - (0.4 * FaltaCometida_suma)
                - Perdidas_suma)

## Group By TODO continuar desde aca

In [48]:
# 1. Agregación Inicial de columnas (sin cambios)
# Identificar columnas numéricas y no numéricas
columnas_numericas = combined_df.select_dtypes(include=np.number).columns.drop("NombreCompleto", errors='ignore')
columnas_no_numericas = combined_df.select_dtypes(exclude=np.number).columns.drop("NombreCompleto", errors='ignore')

# Crear el diccionario de agregaciones
agg_dict = {col: 'sum' for col in columnas_numericas}
agg_dict.update({col: 'last' for col in columnas_no_numericas})

# Realizar la agregación
df_players = combined_df.groupby("NombreCompleto", observed=False).agg(agg_dict).reset_index()

# Renombrar las columnas sumadas
df_players = df_players.rename(columns={col: f"{col}_suma" for col in columnas_numericas})

# 2. Cálculo de métricas avanzadas
# Asegurar que 'TiempoJuego_min_suma' exista para los cálculos
if 'TiempoJuego_min_suma' in df_players.columns:
    
    # Calcular promedios por minuto, evitando la división por cero
    minutos_jugados = df_players["TiempoJuego_min_suma"]
    for col in columnas_numericas:
        if f"{col}_suma" in df_players.columns:
            df_players[f"{col}_prom_minutos"] = np.where(
                minutos_jugados > 0,
                df_players[f"{col}_suma"] / minutos_jugados,
                np.nan
            )

    # Calcular Tiros de Campo totales
    df_players['TirosDeCampo_suma'] = (
        df_players['TirosDosAciertos_suma'] +
        df_players['TirosDosFallos_suma'] +
        df_players['TirosTresAciertos_suma'] +
        df_players['TirosTresFallos_suma']
    )

    # Calcular TOV%, evitando división por cero
    df_players['TOV%'] = np.where(
        df_players['posesiones_consumidas_suma'] > 0,
        100 * (df_players['Perdidas_suma'] / df_players['posesiones_consumidas_suma']),
        np.nan
    )

    # Calcular eFG%, evitando división por cero
    df_players['eFG%'] = np.where(
        df_players['TirosDeCampo_suma'] > 0,
        100 * (df_players['TirosDosAciertos_suma'] + 1.5 * df_players['TirosTresAciertos_suma']) / df_players['TirosDeCampo_suma'],
        np.nan
    )

    # Calcular ORB%, DRB% y RB%, evitando división por cero
    if 'rebote_of_disp_suma' in df_players.columns and 'rebote_def_disp_suma' in df_players.columns:
        df_players['ORB%'] = np.where(
            df_players['rebote_of_disp_suma'] > 0,
            100 * (df_players['ReboteOfensivo_suma'] / df_players['rebote_of_disp_suma']),
            np.nan
        )
        df_players['DRB%'] = np.where(
            df_players['rebote_def_disp_suma'] > 0,
            100 * (df_players['ReboteDefensivo_suma'] / df_players['rebote_def_disp_suma']),
            np.nan
        )
        df_players['RB%'] = np.where(
            (df_players['rebote_of_disp_suma'] + df_players['rebote_def_disp_suma']) > 0,
            100 * (df_players['RebotesTotales_suma'] / (df_players['rebote_of_disp_suma'] + df_players['rebote_def_disp_suma'])),
            np.nan
        )

    # Calcular FTr, evitando división por cero
    df_players['FTr'] = np.where(
        df_players['TirosDeCampo_suma'] > 0,
        df_players['TirosLibresAciertos_suma'] / df_players['TirosDeCampo_suma'],
        np.nan
    )

    # Calcular TS%, evitando división por cero
    denominator_ts = 2 * (df_players['TirosDeCampo_suma'] + 0.44 * (df_players['TirosLibresAciertos_suma'] + df_players['TirosLibresFallos_suma']))
    df_players['TS%'] = np.where(
        denominator_ts > 0,
        100 * (df_players['Puntos_suma'] / denominator_ts),
        np.nan
    )

    # Calcular USG%, evitando división por cero
    df_players['USG%'] = np.where(
        df_players['posesiones_jugadas_suma'] > 0,
        100 * (df_players['posesiones_consumidas_suma'] / df_players['posesiones_jugadas_suma']),
        np.nan
    )
    
    # Calcular AST/TOVr, evitando división por cero
    df_players['AST/TOVr'] = np.where(
        df_players['Perdidas_suma'] > 0,
        df_players['Asistencias_suma'] / df_players['Perdidas_suma'],
        np.nan
    )

    # Calcular Game Score (esta métrica no tiene divisiones)
    df_players['game_score_suma'] = (
        df_players['Puntos_suma'] +
        (0.4 * df_players['TirosDeCampo_suma']) -
        (0.7 * df_players['TirosDeCampo_suma']) -
        (0.4 * df_players['TirosLibresFallos_suma']) +
        (0.7 * df_players['ReboteOfensivo_suma']) +
        (0.3 * df_players['ReboteDefensivo_suma']) +
        df_players['Recuperaciones_suma'] +
        (0.7 * df_players['Asistencias_suma']) +
        (0.7 * df_players['TaponCometido_suma']) -
        (0.4 * df_players['FaltaCometida_suma']) -
        df_players['Perdidas_suma']
    )


# 3. Agregar la columna de partidos jugados (sin cambios)
cantidad_partidos = combined_df[combined_df["TiempoJuego_seg"] > 0].groupby("NombreCompleto", observed=False).size()
df_players["CantidadPartidosJugados"] = df_players["NombreCompleto"].map(cantidad_partidos).fillna(0).astype(int)
df_players['game_score_prom'] = np.where(
        df_players['CantidadPartidosJugados'] > 0,
        (df_players['game_score_suma'] / df_players['CantidadPartidosJugados']),
        np.nan
    )

In [43]:
df_players

Unnamed: 0,NombreCompleto,Puntos_suma,ReboteDefensivo_suma,ReboteOfensivo_suma,RebotesTotales_suma,Asistencias_suma,Recuperaciones_suma,Perdidas_suma,TaponCometido_suma,TaponRecibido_suma,FaltaCometida_suma,FaltaRecibida_suma,Valoracion_suma,TirosDosAciertos_suma,TirosDosFallos_suma,TirosTresAciertos_suma,TirosTresFallos_suma,TirosLibresAciertos_suma,TirosLibresFallos_suma,plus_minus_suma,posesiones_consumidas_suma,posesiones_jugadas_suma,rebote_of_disp_suma,rebote_def_disp_suma,puntos_q4_y_prorroga_suma,puntos_clutch_suma,posesiones_estimadas_suma,TiempoJuego_seg_suma,TiempoJuego_min_suma,IdJugador,IdClub,IdEquipo,Nombre,TiempoJuego,CincoInicial,equipo,partido_key,nacionalidad,rival,fecha_hora,Puntos_prom_minutos,ReboteDefensivo_prom_minutos,ReboteOfensivo_prom_minutos,RebotesTotales_prom_minutos,Asistencias_prom_minutos,Recuperaciones_prom_minutos,Perdidas_prom_minutos,TaponCometido_prom_minutos,TaponRecibido_prom_minutos,FaltaCometida_prom_minutos,FaltaRecibida_prom_minutos,Valoracion_prom_minutos,TirosDosAciertos_prom_minutos,TirosDosFallos_prom_minutos,TirosTresAciertos_prom_minutos,TirosTresFallos_prom_minutos,TirosLibresAciertos_prom_minutos,TirosLibresFallos_prom_minutos,plus_minus_prom_minutos,posesiones_consumidas_prom_minutos,posesiones_jugadas_prom_minutos,rebote_of_disp_prom_minutos,rebote_def_disp_prom_minutos,puntos_q4_y_prorroga_prom_minutos,puntos_clutch_prom_minutos,posesiones_estimadas_prom_minutos,TiempoJuego_seg_prom_minutos,TiempoJuego_min_prom_minutos,TirosDeCampo_suma,TOV%,eFG%,ORB%,DRB%,RB%,FTr,TS%,USG%,AST/TOVr,game_score_suma,CantidadPartidosJugados,game_score_prom
0,"AALIYA, LEE ABRAHAM",261.0,82.0,42.0,124.0,18.0,18.0,25.0,25.0,4.0,60.0,51.0,252.0,79.0,51.0,23.0,74.0,34.0,31.0,98.0,278.0,1508.0,663.0,683.0,104.0,2.0,238.60,37472.0,624.533333,326138,1790,69616,"AALIYA, L.",21:02,False,INSTITUTO,INSTITUTO vs QUIMSA (002/12/2024 22:10),Argentino,QUIMSA,02/12/2024,0.417912,0.131298,0.067250,0.198548,0.028822,0.028822,0.040030,0.040030,0.006405,0.096072,0.081661,0.403501,0.126494,0.081661,0.036827,0.118488,0.054441,0.049637,0.156917,0.445132,2.414603,1.061593,1.093617,0.166524,0.003202,0.382045,60.0,1.0,227.0,8.992806,0.500000,6.334842,12.005857,9.212481,0.149780,51.056338,18.435013,0.720000,233.6,34,6.870588
1,"ACEVEDO, MAXIMILIANO JUNIORS",2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,0.0,1.0,0.0,0.0,-6.0,4.0,26.0,15.0,10.0,2.0,0.0,3.00,694.0,11.566667,149814,2305,69510,"ACEVEDO, M.",00:00,False,OBERA,OBERA vs RIACHUELO (LR) (001/12/2024 21:00),Argentino,RIACHUELO,01/12/2024,0.172911,0.086455,0.086455,0.172911,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.086455,0.086455,0.172911,0.000000,0.086455,0.000000,0.000000,-0.518732,0.345821,2.247839,1.296830,0.864553,0.172911,0.000000,0.259366,60.0,1.0,4.0,0.000000,0.250000,6.666667,10.000000,8.000000,0.000000,25.000000,15.384615,,1.8,4,0.450000
2,"ACEVEDO, SEBASTIAN IGNACIO",285.0,104.0,24.0,128.0,24.0,26.0,30.0,1.0,5.0,84.0,65.0,266.0,55.0,59.0,43.0,76.0,46.0,9.0,-12.0,287.0,2047.0,992.0,828.0,64.0,12.0,263.20,50524.0,842.066667,209965,1426,69281,"ACEVEDO, S.",21:53,True,SAN MARTIN (C),SAN MARTIN (C) vs GIMNASIA (CR) (004/12/2024 2...,Argentino,GIMNASIA,04/12/2024,0.338453,0.123506,0.028501,0.152007,0.028501,0.030876,0.035627,0.001188,0.005938,0.099755,0.077191,0.315889,0.065315,0.070066,0.051065,0.090254,0.054628,0.010688,-0.014251,0.340828,2.430924,1.178054,0.983295,0.076003,0.014251,0.312564,60.0,1.0,233.0,10.452962,0.512876,2.419355,12.560386,7.032967,0.197425,55.404355,14.020518,0.800000,239.4,38,6.300000
3,"ACTIS, JOAQUIN MATIAS",0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,-4.0,1.0,8.0,6.0,3.0,0.0,0.0,1.00,226.0,3.766667,225273,88,69164,"ACTIS, J.",00:00,False,SAN LORENZO,SAN LORENZO vs BOCA (017/11/2024 21:05),Argentino,BOCA,17/11/2024,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.265487,0.000000,0.000000,0.000000,0.000000,-0.265487,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.061947,0.265487,2.123894,1.592920,0.796460,0.000000,0.000000,0.265487,60.0,1.0,0.0,100.000000,,0.000000,0.000000,0.000000,,,12.500000,0.000000,-1.0,1,-1.000000
4,"ACUÑA, ROBERTO SANTIAGO",213.0,95.0,49.0,144.0,20.0,16.0,28.0,11.0,10.0,79.0,48.0,238.0,87.0,59.0,2.0,15.0,33.0,23.0,114.0,215.0,1372.0,599.0,597.0,48.0,2.0,166.64,39416.0,656.933333,271401,1790,69616,"ACUÑA, R.",18:15,False,INSTITUTO,INSTITUTO vs QUIMSA (002/12/2024 22:10),Argentino,QUIMSA,02/12/2024,0.324234,0.144611,0.074589,0.219200,0.030444,0.024356,0.042622,0.016744,0.015222,0.120256,0.073067,0.362289,0.132434,0.089811,0.003044,0.022833,0.050233,0.035011,0.173534,0.327278,2.088492,0.911812,0.908768,0.073067,0.003044,0.253663,60.0,1.0,163.0,13.023256,0.552147,8.180301,15.912898,12.040134,0.202454,56.757621,15.670554,0.714286,195.8,35,5.594286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
372,"WHITFIELD III, ROBERT JAMARCUS",145.0,29.0,4.0,33.0,16.0,10.0,14.0,0.0,3.0,25.0,23.0,113.0,24.0,22.0,28.0,46.0,13.0,4.0,-35.0,141.0,673.0,293.0,281.0,67.0,10.0,137.48,16373.0,272.883333,330064,1474,68570,"WHITFIELD, R.",08:49,False,GIMNASIA (CR),GIMNASIA (CR) vs FERRO (018/05/2025 20:00),Estadounidense,FERRO,18/05/2025,0.531363,0.106273,0.014658,0.120931,0.058633,0.036646,0.051304,0.000000,0.010994,0.091614,0.084285,0.414096,0.087950,0.080621,0.102608,0.168570,0.047639,0.014658,-0.128260,0.516704,2.466255,1.073719,1.029744,0.245526,0.036646,0.503805,60.0,1.0,120.0,9.929078,0.550000,1.365188,10.320285,5.749129,0.108333,56.871666,20.950966,1.142857,116.1,18,6.450000
373,"WOLINSKY, FACUNDO ARIEL",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.000000,182885,46,69144,"WOLINSKY, F.",00:00,False,FERRO,FERRO vs PEÑAROL (MDP) (009/10/2024 20:00),Argentino,PEÑAROL,09/10/2024,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,0.0,0,
374,"ZAWADSKI, FARAMIR",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.000000,274124,2314,69185,"ZAWADSKI, F.",00:00,False,ZARATE BASKET,RIACHUELO (LR) vs ZARATE BASKET (006/11/2024 2...,Argentino,ZARATE BASKET,06/11/2024,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,0.0,0,
375,"ZEZULAR, FEDERICO",170.0,34.0,22.0,56.0,10.0,27.0,17.0,14.0,2.0,62.0,26.0,149.0,55.0,37.0,15.0,26.0,15.0,10.0,123.0,161.0,1314.0,537.0,579.0,58.0,1.0,139.00,27668.0,461.133333,271328,1869,69992,"ZEZULAR, F.",04:49,False,QUIMSA,INSTITUTO vs QUIMSA (002/12/2024 22:10),Argentino,QUIMSA,02/12/2024,0.368657,0.073731,0.047709,0.121440,0.021686,0.058551,0.036866,0.030360,0.004337,0.134451,0.056383,0.323117,0.119271,0.080237,0.032529,0.056383,0.032529,0.021686,0.266734,0.349140,2.849501,1.164522,1.255602,0.125777,0.002169,0.301431,60.0,1.0,133.0,10.559006,0.582707,4.096834,5.872193,5.017921,0.112782,59.027778,12.252664,0.588235,153.7,37,4.154054


In [50]:
# Guardar df_players y combined_df en formato parquet en la carpeta data\processed
df_players.to_parquet("../data/processed/df_players.parquet", index=False)
combined_df.to_parquet("../data/processed/combined_df.parquet", index=False)