# Data Processing

In [31]:
import os
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

In [32]:
# Define la ruta a la carpeta donde se guardan los archivos
output_dir = "../data/scrapped"

try:
    # 1. Lista todos los archivos en la carpeta
    all_files = os.listdir(output_dir)

    # 2. Filtra solo los archivos que terminan en '.parquet'
    parquet_files = [os.path.join(output_dir, f) for f in all_files if f.endswith('.parquet')]

    # 3. Verifica si hay archivos para procesar
    if not parquet_files:
        print("No se encontraron archivos .parquet en la carpeta especificada.")
    else:
        print(f"Se encontraron {len(parquet_files)} archivos .parquet. Concatenando...")

        # 4. Lee cada archivo Parquet en un DataFrame y los guarda en una lista
        list_of_dfs = [pd.read_parquet(f) for f in parquet_files]

        # 5. Concatena todos los DataFrames de la lista
        combined_df = pd.concat(list_of_dfs, ignore_index=True)

        print("¡Concatenación exitosa! Los archivos se han unido en un solo DataFrame.")
        print("Se ha creado un DataFrame con las siguientes dimensiones:")
        print(f"Filas: {combined_df.shape[0]}, Columnas: {combined_df.shape[1]}")

except FileNotFoundError:
    print("Error: No se encontró la carpeta especificada.")
    print(f"Por favor, revisa que la carpeta '{output_dir}' exista.")
except ImportError:
    print("Error: La librería 'pyarrow' o 'fastparquet' no está instalada.")
    print("Para leer archivos .parquet, necesitas instalar una de estas librerías. Puedes usar el siguiente comando: pip install pyarrow")
except Exception as e:
    print(f"Ocurrió un error inesperado: {e}")

Se encontraron 378 archivos .parquet. Concatenando...
¡Concatenación exitosa! Los archivos se han unido en un solo DataFrame.
Se ha creado un DataFrame con las siguientes dimensiones:
Filas: 9033, Columnas: 35


## Tipos de datos

In [33]:
combined_df.head(3)

Unnamed: 0,IdJugador,IdClub,IdEquipo,Nombre,NombreCompleto,Puntos,ReboteDefensivo,ReboteOfensivo,RebotesTotales,Asistencias,Recuperaciones,Perdidas,TaponCometido,TaponRecibido,FaltaCometida,FaltaRecibida,Valoracion,TiempoJuego,CincoInicial,equipo,TirosDosAciertos,TirosDosFallos,TirosTresAciertos,TirosTresFallos,TirosLibresAciertos,TirosLibresFallos,plus_minus,posesiones_consumidas,posesiones_jugadas,rebote_of_disp,rebote_def_disp,puntos_q4_y_prorroga,puntos_clutch,posesiones_estimadas,partido_key
0,96264,1932,0,"ARIAS, J.","ARIAS, JUAN PABLO",0,0,0,0,0,0,0,0,0,0,0,0,00:00,False,ARGENTINO (J),0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,ARGENTINO (J) vs ATENAS (C) (002/04/2025 21:00)
1,96269,1932,0,"CAPELLI, S.","CAPELLI, SANTIAGO",0,1,0,1,1,0,0,0,0,1,0,1,06:05,False,ARGENTINO (J),0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,ARGENTINO (J) vs ATENAS (C) (002/04/2025 21:00)
2,323790,1932,0,"FRONTERA, R.","FRONTERA, RAMIRO JEREMIAS",5,6,0,6,2,1,1,1,2,3,2,3,24:02,False,ARGENTINO (J),2,0,0,0,1,0,-21,11,48,30,15,0,0,3.44,ARGENTINO (J) vs ATENAS (C) (002/04/2025 21:00)


In [34]:
# Convertir la columna 'TiempoJuego' de string "MM:SS" a segundos como entero
def tiempo_a_segundos(tiempo_str):
    try:
        minutos, segundos = map(int, tiempo_str.split(":"))
        return minutos * 60 + segundos
    except Exception:
        return 0

combined_df["TiempoJuego_seg"] = combined_df["TiempoJuego"].apply(tiempo_a_segundos)
combined_df["TiempoJuego_min"] = combined_df["TiempoJuego_seg"] / 60

In [35]:
combined_df

Unnamed: 0,IdJugador,IdClub,IdEquipo,Nombre,NombreCompleto,Puntos,ReboteDefensivo,ReboteOfensivo,RebotesTotales,Asistencias,Recuperaciones,Perdidas,TaponCometido,TaponRecibido,FaltaCometida,FaltaRecibida,Valoracion,TiempoJuego,CincoInicial,equipo,TirosDosAciertos,TirosDosFallos,TirosTresAciertos,TirosTresFallos,TirosLibresAciertos,TirosLibresFallos,plus_minus,posesiones_consumidas,posesiones_jugadas,rebote_of_disp,rebote_def_disp,puntos_q4_y_prorroga,puntos_clutch,posesiones_estimadas,partido_key,TiempoJuego_seg,TiempoJuego_min
0,96264,1932,0,"ARIAS, J.","ARIAS, JUAN PABLO",0,0,0,0,0,0,0,0,0,0,0,0,00:00,False,ARGENTINO (J),0,0,0,0,0,0,0,0,0,0,0,0,0,0.00,ARGENTINO (J) vs ATENAS (C) (002/04/2025 21:00),0,0.000000
1,96269,1932,0,"CAPELLI, S.","CAPELLI, SANTIAGO",0,1,0,1,1,0,0,0,0,1,0,1,06:05,False,ARGENTINO (J),0,0,0,0,0,0,0,0,0,0,0,0,0,0.00,ARGENTINO (J) vs ATENAS (C) (002/04/2025 21:00),365,6.083333
2,323790,1932,0,"FRONTERA, R.","FRONTERA, RAMIRO JEREMIAS",5,6,0,6,2,1,1,1,2,3,2,3,24:02,False,ARGENTINO (J),2,0,0,0,1,0,-21,11,48,30,15,0,0,3.44,ARGENTINO (J) vs ATENAS (C) (002/04/2025 21:00),1442,24.033333
3,214713,1932,0,"CAPORALETTI, M.","CAPORALETTI, MARTINO",0,0,0,0,1,0,0,0,0,1,0,0,02:47,False,ARGENTINO (J),0,0,0,0,0,0,-2,0,7,3,3,0,0,0.00,ARGENTINO (J) vs ATENAS (C) (002/04/2025 21:00),167,2.783333
4,323785,1932,0,"ALLENDE, T.","ALLENDE, TOMAS DANIEL",5,4,1,5,1,2,1,1,0,1,2,8,19:37,True,ARGENTINO (J),0,0,1,0,2,0,-15,8,66,34,26,0,0,1.88,ARGENTINO (J) vs ATENAS (C) (002/04/2025 21:00),1177,19.616667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9028,144733,1913,0,"BORSATTI, M.","BORSATTI, MATIAS DANIEL",0,3,0,3,2,3,0,0,0,3,0,4,11:01,False,UNION (SF),0,0,0,0,0,0,-16,1,20,12,12,0,0,0.00,ZARATE BASKET vs UNION (SF) (020/12/2024 21:00),661,11.016667
9029,182439,1913,0,"CHAMORRO, F.","CHAMORRO, FACUNDO",9,1,0,1,0,1,2,0,0,2,3,7,13:40,False,UNION (SF),1,0,1,0,4,0,-5,8,41,22,18,0,0,5.76,ZARATE BASKET vs UNION (SF) (020/12/2024 21:00),820,13.666667
9030,325983,1913,0,"COSOLITO, M.","COSOLITO, MAURO NICOLAS",3,6,1,7,1,0,1,1,0,3,3,2,29:17,True,UNION (SF),0,0,0,0,3,0,8,10,63,31,24,0,0,1.32,ZARATE BASKET vs UNION (SF) (020/12/2024 21:00),1757,29.283333
9031,274147,1913,0,"ASTULFI, S.","ASTULFI BOGLIC, SEGUNDO",0,0,0,0,0,0,0,0,0,0,0,0,00:00,False,UNION (SF),0,0,0,0,0,0,0,0,0,0,0,0,0,0.00,ZARATE BASKET vs UNION (SF) (020/12/2024 21:00),0,0.000000


In [36]:
categorical_cols=['IdJugador', 'IdClub', 'IdEquipo','Nombre', 'NombreCompleto', 'equipo', 'partido_key']
numerical_cols=['Puntos', 'ReboteDefensivo','ReboteOfensivo', 'RebotesTotales', 'Asistencias', 'Recuperaciones',
                'Perdidas', 'TaponCometido', 'TaponRecibido', 'FaltaCometida','FaltaRecibida', 'Valoracion', 
                'TirosDosAciertos', 'TirosDosFallos','TirosTresAciertos', 'TirosTresFallos', 'TirosLibresAciertos',
                'TirosLibresFallos', 'plus_minus', 'posesiones_consumidas','posesiones_jugadas', 'rebote_of_disp', 
                'rebote_def_disp','puntos_q4_y_prorroga', 'puntos_clutch', 'posesiones_estimadas','TiempoJuego_seg','TiempoJuego_min']

In [37]:
# Convertir columnas categóricas a 'category' y numéricas a 'float' (o 'int' si no hay NaN)
for col in categorical_cols:
    if col in combined_df.columns:
        combined_df[col] = combined_df[col].astype('category')

for col in numerical_cols:
    if col in combined_df.columns:
        combined_df[col] = combined_df[col].astype(float)


## Group By

In [38]:
# Definir las columnas que son numéricas y no son 'NombreCompleto'
columnas_numericas = combined_df.select_dtypes(include=np.number).columns.drop("NombreCompleto", errors='ignore')
columnas_no_numericas = combined_df.select_dtypes(exclude=np.number).columns.drop("NombreCompleto", errors='ignore')

# Crear el diccionario de agregaciones
agg_dict = {col: 'sum' for col in columnas_numericas}
agg_dict.update({col: 'last' for col in columnas_no_numericas})

# Realizar la agregación
df_players = combined_df.groupby("NombreCompleto", observed=False).agg(agg_dict).reset_index()

# Renombrar las columnas sumadas
df_players = df_players.rename(columns={col: f"{col}_suma" for col in columnas_numericas})

# Agregar la columna CantidadPartidosJugados
cantidad_partidos = combined_df[combined_df["TiempoJuego_seg"] > 0].groupby("NombreCompleto", observed=False).size()
df_players["CantidadPartidosJugados"] = df_players["NombreCompleto"].map(cantidad_partidos).fillna(0).astype(int)

In [39]:
df_players

Unnamed: 0,NombreCompleto,Puntos_suma,ReboteDefensivo_suma,ReboteOfensivo_suma,RebotesTotales_suma,Asistencias_suma,Recuperaciones_suma,Perdidas_suma,TaponCometido_suma,TaponRecibido_suma,FaltaCometida_suma,FaltaRecibida_suma,Valoracion_suma,TirosDosAciertos_suma,TirosDosFallos_suma,TirosTresAciertos_suma,TirosTresFallos_suma,TirosLibresAciertos_suma,TirosLibresFallos_suma,plus_minus_suma,posesiones_consumidas_suma,posesiones_jugadas_suma,rebote_of_disp_suma,rebote_def_disp_suma,puntos_q4_y_prorroga_suma,puntos_clutch_suma,posesiones_estimadas_suma,TiempoJuego_seg_suma,TiempoJuego_min_suma,IdJugador,IdClub,IdEquipo,Nombre,TiempoJuego,CincoInicial,equipo,partido_key,CantidadPartidosJugados
0,"AALIYA, LEE ABRAHAM",261.0,82.0,42.0,124.0,18.0,18.0,25.0,25.0,4.0,60.0,51.0,252.0,79.0,0.0,23.0,0.0,34.0,0.0,98.0,278.0,1508.0,663.0,683.0,104.0,2.0,99.96,37472.0,624.533333,326138,1790,0,"AALIYA, L.",17:57,False,INSTITUTO,UNION (SF) vs INSTITUTO (005/12/2024 21:00),34
1,"ACEVEDO, MAXIMILIANO JUNIORS",2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,-6.0,4.0,26.0,15.0,10.0,2.0,0.0,0.00,694.0,11.566667,149814,2305,0,"ACEVEDO, M.",00:00,False,OBERA,ZARATE BASKET vs OBERA (029/01/2025 22:00),4
2,"ACEVEDO, SEBASTIAN IGNACIO",285.0,104.0,24.0,128.0,24.0,26.0,30.0,1.0,5.0,84.0,65.0,266.0,55.0,0.0,43.0,0.0,46.0,0.0,-12.0,287.0,2047.0,992.0,828.0,64.0,12.0,124.24,50524.0,842.066667,209965,1426,0,"ACEVEDO, S.",18:08,False,SAN MARTIN (C),ZARATE BASKET vs SAN MARTIN (C) (003/04/2025 2...,38
3,"ACTIS, JOAQUIN MATIAS",0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,-4.0,1.0,8.0,6.0,3.0,0.0,0.0,1.00,226.0,3.766667,225273,88,0,"ACTIS, J.",00:00,False,SAN LORENZO,SAN LORENZO vs SAN MARTIN (C) (027/01/2025 20:00),1
4,"ACUÑA, ROBERTO SANTIAGO",213.0,95.0,49.0,144.0,20.0,16.0,28.0,11.0,10.0,79.0,48.0,238.0,87.0,0.0,2.0,0.0,33.0,0.0,114.0,215.0,1372.0,599.0,597.0,48.0,2.0,87.08,39416.0,656.933333,330244,2376,0,"ACUÑA, R.",20:04,True,PEÑAROL (MDP),UNION (SF) vs PEÑAROL (MDP) (002/05/2025 21:00),35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
373,"WHITFIELD III, ROBERT JAMARCUS",155.0,32.0,5.0,37.0,18.0,13.0,14.0,0.0,3.0,26.0,24.0,124.0,25.0,0.0,30.0,0.0,15.0,0.0,-7.0,153.0,731.0,316.0,305.0,72.0,10.0,71.60,17536.0,292.266667,330064,1474,0,"WHITFIELD, R.",19:16,False,GIMNASIA (CR),ZARATE BASKET vs GIMNASIA (CR) (023/03/2025 20...,19
374,"WOLINSKY, FACUNDO ARIEL",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.000000,182885,46,0,"WOLINSKY, F.",00:00,False,FERRO,PLATENSE vs FERRO (026/10/2024 20:00),0
375,"ZAWADSKI, FARAMIR",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.000000,274124,2314,0,"ZAWADSKI, F.",00:00,False,ZARATE BASKET,ZARATE BASKET vs INDEPENDIENTE (O) (017/10/202...,0
376,"ZEZULAR, FEDERICO",170.0,34.0,22.0,56.0,10.0,27.0,17.0,14.0,2.0,62.0,26.0,149.0,55.0,0.0,15.0,0.0,15.0,0.0,123.0,161.0,1314.0,537.0,579.0,58.0,1.0,72.60,27668.0,461.133333,271328,1869,0,"ZEZULAR, F.",05:35,False,QUIMSA,ZARATE BASKET vs QUIMSA (025/04/2025 21:00),37


## Nacionalidad de Jugadores

In [40]:
extranjeros_data = [
    {"NombreCompleto": "CHACON TIRADO, MARCOS", "nacionalidad": "Cubano"},
    {"NombreCompleto": "MILLER, TAVARIO EARNEST PTRISTIAN", "nacionalidad": "Bahamense"},
    {"NombreCompleto": "RAMIREZ ALCANTARA, KELVIN LEANDRO", "nacionalidad": "Dominicano"},
    {"NombreCompleto": "OWENS, DEMARCO RASHAD", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "THOMAS JR, MARCUS WILEY", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "VORHEES, WILLIAM LEONARD-DEUBLER", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "FERGUSON, ROMEAO VENILL", "nacionalidad": "Estadounidense"}, 
    {"NombreCompleto": "LOCKETT, PHILLIP DOMINIQUE DANIEL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "CLARKE, CHRISTOPHER ASHTON", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "THORNTON, WILLIE ALFORD", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "DANIELS, TRAVIS DORREL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "DIGGS, AVERY GERELL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BECTON, REGINALD GEQUAN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "CARRERAS PEGUERO, XAVIER MANUEL", "nacionalidad": "Dominicano"},
    {"NombreCompleto": "THOMAS III, CHARLES PRICE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "KRAMER, KELBY JOHN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "GIVENS, SAMUEL JAMAL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BOWIE JR, JULIUS R", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "CALFANI PERSINCULA, MATHIAS KENY", "nacionalidad": "Uruguayo"},
    {"NombreCompleto": "MORRISON, DOMINIQUE MONTEL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ALEXANDER, QUINTIN IMMANUEL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "CARDENAS ZAMORA, JORDAN ISRAEL", "nacionalidad": "Ecuatoriano"},
    {"NombreCompleto": "ASCANIO SOLORZANO, JOSE GREGORIO", "nacionalidad": "Venezolano"},
    {"NombreCompleto": "WALTON, ZACHERY CHRISTOPHER", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "THOMAS, DISCHON KYIR", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "PRIDDY, NATHAN WAYNE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "WHITFIELD III, ROBERT JAMARCUS", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "HORTON, KENNETH WILCHER", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "HOLT, EMMITT DWIGHT", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "JENKINS, JALEN KEMAL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "SANDERS, NAKIE GERALD", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BOND JR, TIMOTHY LAMONT", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ROBINSON, BRANDON LAMAR", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "OBENG MENSAH, YAW", "nacionalidad": "Canadiense"},
    {"NombreCompleto": "TROCHA MORELOS, TONNY JOSE", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "CRAION JR, MICHAEL JOE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BELL, RANDY TYREE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "DANIELS, DEANDRE MARTISE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "REESE V, JAMES LANARD", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "WATSON, EARL OVREL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ROQUE MARTINEZ, ROMARIO JOSE", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "PAYTON CLOTTEY, EMMANUEL TRAVON", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "FIELDS, CALEB JOSEPH", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ANDERSON, ALPHONSO JORDAN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BASTARDO JOSE, RAYMON SCARLIN", "nacionalidad": "Dominicano"},
    {"NombreCompleto": "BANYARD, NICHOLAS RYAN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "MAXWELL, DU'VAUGHN ELISHA", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "RUIZ RUIZ, JOSE DANIEL", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "FUNDORA ARRECHAVALETA, YASMANY	", "nacionalidad": "Cubano"}, 
    {"NombreCompleto": "KRAYEM, OMAR NABIL", "nacionalidad": "Palestino"}, # Estadounidense
    {"NombreCompleto": "WALLACE, DEVANTE RASHAD-KEITH", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "STOKES, KAMAU THUTMOSES", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "LOWERY, DISHON LURELL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "ALI, PRINCE ADAMS", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "BONIZIOLI HONORATO, ITALO	", "nacionalidad": "Brasileño"}, 
    {"NombreCompleto": "CARTER, MYLES JUSTIN", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "HOOPER, CHRISTOPHER JALEEL", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "OPOKU, NANA KWASI HYEAKURO", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "HAMILTON, ISAAC BRANDON", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "NAYLOR, CAMERON LEVELE", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "MADRIGAL RENTERIA, JUAN ESTEBAN", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "NIEBLES HERRERA, EDWIN JOSE", "nacionalidad": "Colombiano"},
    {"NombreCompleto": "PETRI, NICHOLAS", "nacionalidad": "Estadounidense"},
    {"NombreCompleto": "GUERRA CAÑATE, YEFERSON ANTONIO", "nacionalidad": "Venezolano"},
    {"NombreCompleto": "GARCIA GUERRERO, JORGE LUIS", "nacionalidad": "Venezolano"}, 
    {"NombreCompleto": "HERNANDEZ, MANUEL ALONSO", "nacionalidad": "Estadounidense"}, # Mexicano
]

df_extranjeros = pd.DataFrame(extranjeros_data)
df_extranjeros['nacionalidad'].value_counts()

nacionalidad
Estadounidense    47
Colombiano         5
Dominicano         3
Venezolano         3
Cubano             2
Bahamense          1
Uruguayo           1
Ecuatoriano        1
Canadiense         1
Palestino          1
Brasileño          1
Name: count, dtype: int64

In [41]:
# Eliminar espacios en blanco adelante y atrás en 'NombreCompleto'
df_players['NombreCompleto'] = df_players['NombreCompleto'].str.strip()
df_extranjeros['NombreCompleto'] = df_extranjeros['NombreCompleto'].str.strip()

# Left join para agregar la nacionalidad
df_players = df_players.merge(df_extranjeros, on='NombreCompleto', how='left')
df_players['nacionalidad'] = df_players['nacionalidad'].fillna('Argentino')

In [42]:
df_players['nacionalidad'].value_counts()

nacionalidad
Argentino         312
Estadounidense     47
Colombiano          5
Venezolano          3
Dominicano          3
Cubano              2
Uruguayo            1
Brasileño           1
Ecuatoriano         1
Palestino           1
Bahamense           1
Canadiense          1
Name: count, dtype: int64

In [43]:
# Eliminar espacios en blanco en 'NombreCompleto' para ambos DataFrames
df_extranjeros['NombreCompleto'] = df_extranjeros['NombreCompleto'].str.strip()
combined_df['NombreCompleto'] = combined_df['NombreCompleto'].str.strip()

# Realizar el left join para agregar la nacionalidad
combined_df = combined_df.merge(df_extranjeros, on='NombreCompleto', how='left')

# Rellenar los valores nulos con 'Argentino'
combined_df['nacionalidad'] = combined_df['nacionalidad'].fillna('Argentino')

In [48]:
# Extraer el rival
combined_df['rival'] = combined_df['partido_key'].str.extract(r'vs\s(.+?)\s\(')
combined_df['fecha_hora'] = combined_df['partido_key'].str[-20:]
combined_df['fecha_hora'] = combined_df['fecha_hora'].str[3:13]
combined_df

Unnamed: 0,IdJugador,IdClub,IdEquipo,Nombre,NombreCompleto,Puntos,ReboteDefensivo,ReboteOfensivo,RebotesTotales,Asistencias,Recuperaciones,Perdidas,TaponCometido,TaponRecibido,FaltaCometida,FaltaRecibida,Valoracion,TiempoJuego,CincoInicial,equipo,TirosDosAciertos,TirosDosFallos,TirosTresAciertos,TirosTresFallos,TirosLibresAciertos,TirosLibresFallos,plus_minus,posesiones_consumidas,posesiones_jugadas,rebote_of_disp,rebote_def_disp,puntos_q4_y_prorroga,puntos_clutch,posesiones_estimadas,partido_key,TiempoJuego_seg,TiempoJuego_min,nacionalidad,rival,fecha_hora
0,96264,1932,0,"ARIAS, J.","ARIAS, JUAN PABLO",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,00:00,False,ARGENTINO (J),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,ARGENTINO (J) vs ATENAS (C) (002/04/2025 21:00),0.0,0.000000,Argentino,ATENAS,02/04/2025
1,96269,1932,0,"CAPELLI, S.","CAPELLI, SANTIAGO",0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,06:05,False,ARGENTINO (J),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,ARGENTINO (J) vs ATENAS (C) (002/04/2025 21:00),365.0,6.083333,Argentino,ATENAS,02/04/2025
2,323790,1932,0,"FRONTERA, R.","FRONTERA, RAMIRO JEREMIAS",5.0,6.0,0.0,6.0,2.0,1.0,1.0,1.0,2.0,3.0,2.0,3.0,24:02,False,ARGENTINO (J),2.0,0.0,0.0,0.0,1.0,0.0,-21.0,11.0,48.0,30.0,15.0,0.0,0.0,3.44,ARGENTINO (J) vs ATENAS (C) (002/04/2025 21:00),1442.0,24.033333,Argentino,ATENAS,02/04/2025
3,214713,1932,0,"CAPORALETTI, M.","CAPORALETTI, MARTINO",0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,02:47,False,ARGENTINO (J),0.0,0.0,0.0,0.0,0.0,0.0,-2.0,0.0,7.0,3.0,3.0,0.0,0.0,0.00,ARGENTINO (J) vs ATENAS (C) (002/04/2025 21:00),167.0,2.783333,Argentino,ATENAS,02/04/2025
4,323785,1932,0,"ALLENDE, T.","ALLENDE, TOMAS DANIEL",5.0,4.0,1.0,5.0,1.0,2.0,1.0,1.0,0.0,1.0,2.0,8.0,19:37,True,ARGENTINO (J),0.0,0.0,1.0,0.0,2.0,0.0,-15.0,8.0,66.0,34.0,26.0,0.0,0.0,1.88,ARGENTINO (J) vs ATENAS (C) (002/04/2025 21:00),1177.0,19.616667,Argentino,ATENAS,02/04/2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9028,144733,1913,0,"BORSATTI, M.","BORSATTI, MATIAS DANIEL",0.0,3.0,0.0,3.0,2.0,3.0,0.0,0.0,0.0,3.0,0.0,4.0,11:01,False,UNION (SF),0.0,0.0,0.0,0.0,0.0,0.0,-16.0,1.0,20.0,12.0,12.0,0.0,0.0,0.00,ZARATE BASKET vs UNION (SF) (020/12/2024 21:00),661.0,11.016667,Argentino,UNION,20/12/2024
9029,182439,1913,0,"CHAMORRO, F.","CHAMORRO, FACUNDO",9.0,1.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,2.0,3.0,7.0,13:40,False,UNION (SF),1.0,0.0,1.0,0.0,4.0,0.0,-5.0,8.0,41.0,22.0,18.0,0.0,0.0,5.76,ZARATE BASKET vs UNION (SF) (020/12/2024 21:00),820.0,13.666667,Argentino,UNION,20/12/2024
9030,325983,1913,0,"COSOLITO, M.","COSOLITO, MAURO NICOLAS",3.0,6.0,1.0,7.0,1.0,0.0,1.0,1.0,0.0,3.0,3.0,2.0,29:17,True,UNION (SF),0.0,0.0,0.0,0.0,3.0,0.0,8.0,10.0,63.0,31.0,24.0,0.0,0.0,1.32,ZARATE BASKET vs UNION (SF) (020/12/2024 21:00),1757.0,29.283333,Argentino,UNION,20/12/2024
9031,274147,1913,0,"ASTULFI, S.","ASTULFI BOGLIC, SEGUNDO",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,00:00,False,UNION (SF),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,ZARATE BASKET vs UNION (SF) (020/12/2024 21:00),0.0,0.000000,Argentino,UNION,20/12/2024
