In [None]:
# ============================================================
#  HUDL / STATSBOMB → PLAYER-LEVEL DATASET + SCATTER PLOT
#  Autor: tú (plantilla lista para Hackatón)
# ============================================================

# --- 0) Dependencias
import os, time, json, math, getpass
from pathlib import Path
from urllib.parse import urljoin

import requests
import pandas as pd
from tqdm.auto import tqdm
import plotly.express as px

# --- 1) Configuración API + caché local
API_BASE = "https://data.statsbomb.com/"  # usa el host que te dieron (v4/v5/v6 cambia por endpoint)
CACHE_DIR = Path("./_sb_cache"); CACHE_DIR.mkdir(exist_ok=True)

SB_USER = input("Usuario StatsBomb/Hudl: ").strip()
SB_PASS = getpass.getpass("Contraseña (no se mostrará): ").strip()

def api_get(path:str, params=None, max_tries=3, sleep=1.2) -> dict|list:
    """
    GET con autenticación básica, con reintentos y caché simple a disco.
    path: ej '/api/v6/competitions'  (¡ojo con la versión!)
    """
    # cache key
    key = path.strip("/").replace("/", "_")
    if params:
        key += "_" + "_".join(f"{k}-{v}" for k,v in sorted(params.items()))
    cache_file = CACHE_DIR / f"{key}.json"
    if cache_file.exists():
        return json.loads(cache_file.read_text(encoding="utf-8"))

    url = urljoin(API_BASE, path.lstrip("/"))
    for _ in range(max_tries):
        try:
            r = requests.get(url, auth=(SB_USER, SB_PASS), params=params, timeout=60)
            if r.status_code == 200:
                data = r.json()
                cache_file.write_text(json.dumps(data), encoding="utf-8")
                return data
            elif r.status_code in (401,403,404):
                raise RuntimeError(f"Error {r.status_code}: {r.text[:160]}")
            else:
                time.sleep(sleep)
        except requests.exceptions.RequestException as e:
            time.sleep(sleep)
    raise RuntimeError(f"Fallo al obtener {url}")

# --- 2) Wrappers de endpoints (ajusta versión que tengas habilitada)
def get_competitions():
    # v4 para competitions es usual: /api/v4/competitions
    return api_get("/api/v4/competitions")

def get_competition_seasons(competition_id:int):
    # v6: /api/v6/competitions/{competition_id}/seasons
    return api_get(f"/api/v6/competitions/{competition_id}/seasons")

def get_matches(competition_id:int, season_id:int):
    # v6: /api/v6/competitions/{competition_id}/seasons/{season_id}/matches
    return api_get(f"/api/v6/competitions/{competition_id}/seasons/{season_id}/matches")

def get_events(match_id:int):
    # v5: /api/v5/events/{match_id}
    return api_get(f"/api/v5/events/{match_id}")

# --- 3) Utilidades para construir minutos por jugador
def estimate_minutes_from_events(events:list) -> dict:
    """
    Devuelve {player_id: minutes_played} por equipo, estimando desde
    'Starting XI' y 'Substitution'. Basta para análisis de alto nivel.
    """
    # tiempo final ~ último minuto visto (suma de periodos)
    last_minute = 0
    for ev in events:
        m = ev.get("minute", 0) + (ev.get("second", 0)/60.0)
        if m > last_minute: last_minute = m
    # si no hay nada, asume 90
    match_end = max(90.0, last_minute)

    on_time = {}   # player_id -> minute_on
    played   = {}  # player_id -> minutes total acumulados

    # arranque: Starting XI de ambos equipos
    for ev in events:
        if ev.get("type",{}).get("name") == "Starting XI":
            for p in ev.get("tactics",{}).get("lineup", []):
                pid = p["player"]["id"]
                on_time[pid] = 0.0
                played.setdefault(pid, 0.0)

    # sustituciones
    for ev in events:
        if ev.get("type",{}).get("name") == "Substitution":
            minute = float(ev.get("minute", 0))
            # sale
            out_player = ev.get("player",{}).get("id")
            if out_player in on_time:
                played[out_player] += minute - on_time[out_player]
                del on_time[out_player]
            # entra
            in_player = ev.get("substitution",{}).get("replacement",{}).get("id")
            if in_player:
                on_time[in_player] = minute
                played.setdefault(in_player, 0.0)

    # cierra los que siguen en cancha
    for pid, t_on in on_time.items():
        played[pid] += match_end - t_on

    return played  # minutos por player_id

# --- 4) Construcción de métricas por jugador a partir de eventos
def player_metrics_from_events(events:list) -> pd.DataFrame:
    """
    Retorna DF con columnas: player_id, player_name, team, shots, goals,
    duels, duels_per90, xA (sum(xG de tiros asistidos)), minutes_played.
    """
    # minutos
    minutes = estimate_minutes_from_events(events)

    # preconstruye lookup de pases por id → (player_id)
    pass_by_id = {}
    for ev in events:
        if ev.get("type",{}).get("name") == "Pass":
            eid = ev.get("id")
            pid = ev.get("player",{}).get("id")
            pass_by_id[eid] = pid

    # contadores
    agg = {}

    def touch(pid, name, team):
        if pid not in agg:
            agg[pid] = {
                "player_id": pid, "player_name": name, "team": team,
                "shots": 0, "goals": 0, "duels": 0, "xA": 0.0,
            }

    # recorre eventos
    for ev in events:
        etype = ev.get("type",{}).get("name")
        pid   = ev.get("player",{}).get("id")
        pname = ev.get("player",{}).get("name")
        team  = ev.get("team",{}).get("name")

        if not pid: 
            continue

        if etype == "Shot":
            touch(pid, pname, team)
            agg[pid]["shots"] += 1
            if ev.get("shot",{}).get("outcome",{}).get("name") == "Goal":
                agg[pid]["goals"] += 1
            # xA: suma xG al pasador clave (key_pass_id)
            key_id = ev.get("shot",{}).get("key_pass_id")
            xg = float(ev.get("shot",{}).get("statsbomb_xg", 0.0) or 0.0)
            if key_id in pass_by_id:
                passer = pass_by_id[key_id]
                # necesitamos nombre/team del pasador (no siempre está en este ev)
                # lo rellenaremos luego al fusionar con minutos si no lo vimos aún
                touch(passer, f"Player {passer}", "Unknown")
                agg[passer]["xA"] += xg

        elif etype == "Duel":
            touch(pid, pname, team)
            agg[pid]["duels"] += 1

        elif etype == "Pass":
            # ya registrado arriba cuando pase como 'key pass' en un tiro
            touch(pid, pname, team)

        elif etype == "Starting XI":
            # nombres/teams por si solo vemos al jugador en lineup
            for p in ev.get("tactics",{}).get("lineup", []):
                pid2 = p["player"]["id"]; name2 = p["player"]["name"]
                t2   = ev.get("team",{}).get("name")
                touch(pid2, name2, t2)

    # pasa a DF
    df = pd.DataFrame(agg.values())
    if df.empty:
        return df

    # minutos
    df["minutes_played"] = df["player_id"].map(lambda pid: minutes.get(pid, 0.0))

    # métricas por 90
    df["duels_per90"] = df["duels"] / (df["minutes_played"]/90.0).replace(0, pd.NA)

    # limpia nombres 'Unknown' con lo que tengamos de los mismos eventos
    # (si un pasador solo apareció por key_pass y no vimos su nombre en otro evento)
    id_to_name = {ev.get("player",{}).get("id"): ev.get("player",{}).get("name")
                  for ev in events if ev.get("player")}
    id_to_team = {ev.get("player",{}).get("id"): ev.get("team",{}).get("name")
                  for ev in events if ev.get("player")}
    df["player_name"] = df.apply(
        lambda r: id_to_name.get(r["player_id"], r["player_name"]), axis=1
    )
    df["team"] = df.apply(
        lambda r: id_to_team.get(r["player_id"], r["team"]), axis=1
    )

    return df

# --- 5) Pipeline por (competition_id, season_id) → DF de jugadores
def build_players_season_df(competition_id:int, season_id:int, min_minutes:int=600) -> pd.DataFrame:
    """
    Descarga todos los partidos de la temporada y agrega métricas a nivel jugador.
    """
    matches = get_matches(competition_id, season_id)
    # algunos endpoints devuelven lista con dicts {match_id: ..., home_team:..., away_team:...}
    match_ids = [m.get("match_id") or m.get("id") for m in matches if (m.get("match_status") == "available" or True)]
    match_ids = [mid for mid in match_ids if mid is not None]

    rows = []
    for mid in tqdm(match_ids, desc="Partidos"):
        evs = get_events(mid)
        dfm = player_metrics_from_events(evs)
        if not dfm.empty:
            dfm["match_id"] = mid
            rows.append(dfm)

    if not rows:
        return pd.DataFrame()

    allp = pd.concat(rows, ignore_index=True)

    # agrega por jugador (suma de partidos)
    grp = allp.groupby(["player_id","player_name","team"], as_index=False).agg({
        "shots":"sum","goals":"sum","duels":"sum","xA":"sum","minutes_played":"sum"
    })
    grp["duels_per90"] = grp["duels"] / (grp["minutes_played"]/90.0).replace(0, pd.NA)

    # filtra por minutos mínimos
    grp = grp[grp["minutes_played"] >= min_minutes].reset_index(drop=True)
    return grp

# --- 6) UX mínima: elegir competencia/temporada y graficar
comps = pd.DataFrame(get_competitions())
print("Columnas en /competitions:", list(comps.columns))
print("Primeras filas:\n", comps.head(10))

# sugiere Liga MX si está
cand = comps[comps["competition_name"].str.contains("Liga MX", case=False, na=False)]
if not cand.empty:
    print("\nCandidatos Liga MX:\n", cand[["competition_id","competition_name","season_id","season_name"]].head(10))
else:
    print("\nElige competition_id/season_id de la tabla anterior.")

competition_id = int(input("competition_id: ").strip())
season_id      = int(input("season_id: ").strip())

players_df = build_players_season_df(competition_id, season_id, min_minutes=600)
print("Players DF shape:", players_df.shape)
display(players_df.head())

# --- 7) Scatter interactivo (elige ejes y color)
if players_df.empty:
    raise SystemExit("No hay datos de jugadores. Verifica IDs o permisos de temporada.")

num_cols = ["xA","duels_per90","minutes_played","shots","goals"]
num_cols = [c for c in num_cols if c in players_df.columns]

print("\nVariables numéricas disponibles para graficar:", num_cols)
x_var = input(f"X (ej {num_cols[0]}): ").strip() or num_cols[0]
y_var = input(f"Y (ej {num_cols[1]}): ").strip() or (num_cols[1] if len(num_cols)>1 else num_cols[0])
color_var = input(f"Color (ej {num_cols[-1]}): ").strip() or num_cols[-1]

fig = px.scatter(
    players_df,
    x=x_var, y=y_var,
    color=color_var,
    hover_name="player_name",
    hover_data=["team","minutes_played","shots","goals","xA","duels_per90"],
    size="minutes_played",
    color_continuous_scale="RdYlGn",
    template="plotly_white",
    title=f"{x_var} vs {y_var} — color: {color_var}"
)
# líneas de mediana (útiles para lectura rápida)
fig.add_vline(x=players_df[x_var].median(), line_dash="dash", line_color="gray")
fig.add_hline(y=players_df[y_var].median(), line_dash="dash", line_color="gray")
fig.update_layout(height=720)
fig.show()
