# ETL NBA Joins - Proyecto Daft17_Group01_PF
Este notebook realiza los joins finales del modelo NBA, uniendo las tablas limpias en su versión final.


In [1]:

# ==========================================================
# BLOQUE 1: IMPORTACIÓN Y CONFIGURACIÓN INICIAL
# ==========================================================
import os
import pandas as pd

path_base = r"C:\Users\juanl\Downloads\final\csv"

files = {
    "player": os.path.join(path_base, "player_clean.csv"),
    "common_player_info": os.path.join(path_base, "common_player_info_clean.csv"),
    "team": os.path.join(path_base, "team_clean.csv"),
    "game": os.path.join(path_base, "game_clean.csv"),
    "game_summary": os.path.join(path_base, "game_summary_clean.csv"),
    "other_stats": os.path.join(path_base, "other_stats_clean.csv")
}

frames = {}
for name, path in files.items():
    try:
        df = pd.read_csv(path, low_memory=False)
        frames[name] = df
        print(f"✅ {name} cargado: {df.shape[0]} filas, {df.shape[1]} columnas")
    except Exception as e:
        print(f"❌ Error cargando {name}: {e}")

# ==========================================================
# BLOQUE 2: JOIN DE JUGADORES
# ==========================================================
def detectar_clave(df1, df2):
    posibles = ["person_id", "player_id", "id"]
    for c in posibles:
        if c in df1.columns and c in df2.columns:
            return c
    return None

clave = detectar_clave(frames["player"], frames["common_player_info"])
print(f"\n🔗 Clave de unión detectada: {clave}\n")

player_enriched = pd.merge(
    frames["player"],
    frames["common_player_info"],
    on=clave,
    how="left",
    suffixes=("_player", "_info")
)

print(f"✅ Join completado: player_enriched ({player_enriched.shape[0]} filas, {player_enriched.shape[1]} columnas)")

player_out = os.path.join(path_base, "player_enriched_final.csv")
player_enriched.to_csv(player_out, index=False)
print(f"💾 Exportado → {player_out}")

# ==========================================================
# BLOQUE 3: COPIA DE LAS DEMÁS TABLAS COMO FINALES
# ==========================================================
final_tables = {
    "team_final.csv": frames["team"],
    "game_final.csv": frames["game"],
    "game_summary_final.csv": frames["game_summary"],
    "other_stats_final.csv": frames["other_stats"]
}

for fname, df in final_tables.items():
    out_path = os.path.join(path_base, fname)
    df.to_csv(out_path, index=False)
    print(f"💾 Copiada {fname} ({df.shape[0]} filas, {df.shape[1]} columnas)")

# ==========================================================
# BLOQUE 4: REPORTE DE CALIDAD DE JOIN
# ==========================================================
resumen = [
    {"Tabla": "player_enriched_final", "Filas": len(player_enriched), "Columnas": len(player_enriched.columns)}
]

for fname, df in final_tables.items():
    resumen.append({"Tabla": fname, "Filas": len(df), "Columnas": len(df.columns)})

reporte = pd.DataFrame(resumen)
print("\n📊 Resumen general de tablas finales:")
display(reporte)


✅ player cargado: 4831 filas, 5 columnas
✅ common_player_info cargado: 4171 filas, 32 columnas
✅ team cargado: 30 filas, 7 columnas
✅ game cargado: 65698 filas, 54 columnas
✅ game_summary cargado: 58110 filas, 12 columnas
✅ other_stats cargado: 28271 filas, 26 columnas

🔗 Clave de unión detectada: None

✅ Join completado: player_enriched (4899 filas, 35 columnas)
💾 Exportado → C:\Users\juanl\Downloads\final\csv\player_enriched_final.csv
💾 Copiada team_final.csv (30 filas, 7 columnas)
💾 Copiada game_final.csv (65698 filas, 54 columnas)
💾 Copiada game_summary_final.csv (58110 filas, 12 columnas)
💾 Copiada other_stats_final.csv (28271 filas, 26 columnas)

📊 Resumen general de tablas finales:


Unnamed: 0,Tabla,Filas,Columnas
0,player_enriched_final,4899,35
1,team_final.csv,30,7
2,game_final.csv,65698,54
3,game_summary_final.csv,58110,12
4,other_stats_final.csv,28271,26
