In [None]:
# ============================================================
#  COLAB ONE-CELL: Estilo ofensivo de UN equipo (Liga MX)
#  - StatsBomb Data API (Basic Auth)
#  - Elección de temporada y equipo
#  - Descarga todos los partidos del equipo
#  - Métricas + Radar (equipo vs media de liga)
# ============================================================

import os, getpass, time, json
from urllib.parse import urljoin
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# ---------------------------
# 0) CREDENCIALES Y CONFIG
# ---------------------------
API_BASE = "https://data.statsbomb.com/"
STATS_USER = input("Usuario StatsBomb/Hudl: ").strip()
STATS_PASS = getpass.getpass("Contraseña (no se mostrará): ")
CACHE_DIR = Path("/content/statsbomb_cache"); CACHE_DIR.mkdir(exist_ok=True)
OUT_DIR = Path("/content/lmx_outputs"); OUT_DIR.mkdir(parents=True, exist_ok=True)

import requests
def api_get(path, params=None, max_tries=4, backoff=1.0, verbose=True):
    """GET con HTTP Basic Auth + reintentos."""
    url = urljoin(API_BASE, path.lstrip("/"))
    for t in range(1, max_tries+1):
        try:
            r = requests.get(url, auth=(STATS_USER, STATS_PASS), params=params, timeout=45)
            if r.status_code == 200:
                return r.json()
            elif r.status_code in (401,403,404):
                raise RuntimeError(f"{r.status_code} {r.text[:200]}")
            else:
                if verbose: print(f"[{t}/{max_tries}] {r.status_code} -> {r.text[:160]}")
        except requests.exceptions.RequestException as e:
            if verbose: print(f"[{t}/{max_tries}] Error conexión: {e}")
        time.sleep(backoff*t)
    raise RuntimeError(f"No fue posible obtener {url}")

# ---------------------------
# 1) COMPETENCIAS + TEMPORADAS
# ---------------------------
comps = api_get("/api/v4/competitions")
dfc = pd.DataFrame(comps)
if dfc.empty:
    raise RuntimeError("No se recibieron competiciones. Verifica credenciales o acceso.")

# detectar Liga MX por nombre
mx_mask = dfc["competition_name"].str.contains("liga mx|mex", case=False, na=False)
mx = dfc.loc[mx_mask, ["competition_id","competition_name","season_id","season_name"]].drop_duplicates()
if mx.empty:
    raise RuntimeError("No se encontró 'Liga MX' en /competitions. Imprime dfc para inspeccionar.")

print("Temporadas disponibles de Liga MX:")
for i, (_, row) in enumerate(mx.reset_index(drop=True).iterrows()):
    print(f"[{i}] comp_id={row['competition_id']} | {row['competition_name']} | season_id={row['season_id']} | {row['season_name']}")
idx = input("Elige índice de temporada: ").strip()
idx = int(idx) if idx.isdigit() and 0 <= int(idx) < len(mx) else 0
row = mx.reset_index(drop=True).iloc[idx]
competition_id = int(row["competition_id"])
season_id = row["season_id"]  # puede ser str (ej. '2024/25')

# ---------------------------
# 2) PARTIDOS DE LA TEMPORADA
# ---------------------------
# Matches: versión documentada suele ser /api/v6/competitions/{cid}/seasons/{sid}/matches
matches = api_get(f"/api/v6/competitions/{competition_id}/seasons/{season_id}/matches")
dfm = pd.DataFrame(matches)
if dfm.empty:
    raise RuntimeError("No se recibieron partidos para esa temporada. Prueba otra temporada.")

# lista equipos presentes
home_names = dfm["home_team"].apply(lambda x: x.get("name") if isinstance(x, dict) else x)
away_names = dfm["away_team"].apply(lambda x: x.get("name") if isinstance(x, dict) else x)
teams = sorted(set(home_names.dropna()).union(set(away_names.dropna())))
print("\nEquipos en la temporada:\n", ", ".join(teams))

team_sel = input("\n👉 Escribe el nombre EXACTO del equipo a analizar (tal como aparece arriba): ").strip()
if team_sel not in teams:
    # intentar matching case-insens
    cand = [t for t in teams if t.lower()==team_sel.lower()]
    if cand: team_sel = cand[0]
    else: raise RuntimeError(f"No se encontró el equipo '{team_sel}'. Revisa el listado.")

# filtrar partidos del equipo elegido
mask_team = (home_names.str.lower()==team_sel.lower()) | (away_names.str.lower()==team_sel.lower())
team_matches = dfm.loc[mask_team].reset_index(drop=True)
if team_matches.empty:
    raise RuntimeError("No hay partidos para ese equipo en esta temporada.")

print(f"\n{team_sel}: {len(team_matches)} partidos en la temporada '{row['season_name']}'.")

# ---------------------------
# 3) DESCARGA EVENTS DE TODOS SUS PARTIDOS (con caché)
# ---------------------------
def get_xy(v, idx):
    if isinstance(v,(list,tuple)) and len(v)>idx and v[idx] is not None:
        return float(v[idx])
    return np.nan

def load_events_match(mid):
    cache = CACHE_DIR / f"events_{mid}.json"
    if cache.exists():
        with open(cache,"r",encoding="utf-8") as f: return json.load(f)
    js = api_get(f"/api/v4/events/{mid}")
    with open(cache,"w",encoding="utf-8") as f: json.dump(js,f)
    time.sleep(0.15)  # anti rate-limit
    return js

dfs = []
for i, m in team_matches.iterrows():
    mid = m.get("match_id") or m.get("id")
    ev_json = load_events_match(mid)
    if not ev_json: 
        print(f"⚠ sin eventos: {mid}")
        continue
    df_ev = pd.json_normalize(ev_json)
    df_ev["match_id"] = mid
    dfs.append(df_ev)
    if (i+1) % 5 == 0:
        print(f"Descargados {i+1}/{len(team_matches)} partidos…")
all_ev = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
print(f"\nEventos totales del equipo y sus rivales (todos los partidos): {len(all_ev):,}")

if all_ev.empty:
    raise RuntimeError("No se obtuvieron eventos. Revisa permisos de API o prueba otra temporada.")

# ---------------------------
# 4) INGENIERÍA DE MÉTRICAS OFENSIVAS
# ---------------------------
ev = all_ev.copy()
ev["etype"] = ev["type.name"]
ev["team_name"] = ev["team.name"]

# inicio
ev["x"] = ev["location"].apply(lambda v: get_xy(v,0))
ev["y"] = ev["location"].apply(lambda v: get_xy(v,1))

# fin: usar pass.end_location, si no carry, si no shot
end_x = np.where(ev["pass.end_location"].notna(), ev["pass.end_location"].apply(lambda v: get_xy(v,0)), np.nan)
end_y = np.where(ev["pass.end_location"].notna(), ev["pass.end_location"].apply(lambda v: get_xy(v,1)), np.nan)
mask = np.isnan(end_x)
end_x = np.where(mask, ev["carry.end_location"].apply(lambda v: get_xy(v,0)), end_x)
end_y = np.where(mask, ev["carry.end_location"].apply(lambda v: get_xy(v,1)), end_y)
mask = np.isnan(end_x)
end_x = np.where(mask, ev["shot.end_location"].apply(lambda v: get_xy(v,0)), end_x)
end_y = np.where(mask, ev["shot.end_location"].apply(lambda v: get_xy(v,1)), end_y)
ev["end_x"], ev["end_y"] = end_x, end_y

# flags de tipo
ev["is_pass"]  = ev["etype"].eq("Pass")
ev["is_carry"] = ev["etype"].eq("Carry")
ev["is_shot"]  = ev["etype"].eq("Shot")

# Geometría StatsBomb
PITCH_LENGTH = 120.0
FINAL_THIRD_X = 80.0
BOX_X, BOX_Y_MIN, BOX_Y_MAX = 102.0, 18.0, 62.0
PROG_PASS_ADV  = 0.25 * PITCH_LENGTH
PROG_CARRY_ADV = 0.20 * PITCH_LENGTH

ev["dx"] = ev["end_x"] - ev["x"]
ev["prog_pass"]   = ev["is_pass"]  & (ev["dx"] >= PROG_PASS_ADV)
ev["prog_carry"]  = ev["is_carry"] & (ev["dx"] >= PROG_CARRY_ADV)
ev["third_entry"] = (ev["is_pass"] | ev["is_carry"]) & (ev["end_x"] >= FINAL_THIRD_X)
ev["pass2box"]    = ev["is_pass"] & (ev["end_x"] >= BOX_X) & pd.Series(ev["end_y"]).between(BOX_Y_MIN, BOX_Y_MAX, inclusive="both")
ev["touch_in_box"]= (ev["x"] >= BOX_X) & pd.Series(ev["y"]).between(BOX_Y_MIN, BOX_Y_MAX, inclusive="both")

# cruces completos si viene en la API
ev["cross_complete"] = False
if "pass.cross" in ev.columns and "pass.outcome.name" in ev.columns:
    ev["cross_complete"] = ev["is_pass"] & ev["pass.cross"].fillna(False) & ev["pass.outcome.name"].eq("Complete")

# ---------------------------
# 5) AGREGA POR EQUIPO-PARTIDO Y LIGA
# ---------------------------
grp = ev.groupby(["team_name","match_id"])
agg = grp.agg(
    passes=("is_pass","sum"),
    carries=("is_carry","sum"),
    shots=("is_shot","sum"),
    prog_passes=("prog_pass","sum"),
    prog_carries=("prog_carry","sum"),
    third_entries=("third_entry","sum"),
    passes2box=("pass2box","sum"),
    touches_box=("touch_in_box","sum"),
    crosses_cmp=("cross_complete","sum"),
).reset_index()

# promedio por partido para cada equipo (toda la liga)
team_mean = agg.groupby("team_name").mean(numeric_only=True).reset_index()

# separar el equipo objetivo vs el resto de la liga
team_row = team_mean.loc[team_mean["team_name"].str.lower()==team_sel.lower()]
if team_row.empty:
    raise RuntimeError("No se encontró el promedio por partido del equipo seleccionado.")

league_mean = team_mean.copy()

# ---------------------------
# 6) ÍNDICES DE ESTILO
# ---------------------------
def build_style(df):
    out = pd.DataFrame({"team": df["team_name"]})
    out["BuildUp"] = 0.5*df["prog_passes"] + 0.5*df["prog_carries"]
    out["Penetration"] = 0.40*df["third_entries"] + 0.40*df["passes2box"] + 0.20*df["touches_box"]
    out["Finishing"] = df["shots"]
    out["Transition"] = (df["prog_passes"] + df["prog_carries"]) / df["passes"].clip(lower=1)
    return out

style_team = build_style(team_row)
style_league = build_style(league_mean).mean(numeric_only=True).to_frame().T  # promedio de la liga

print("\nÍndices del equipo (promedio por partido):")
display(style_team)

# ---------------------------
# 7) RADAR — Equipo vs Promedio Liga
# ---------------------------
features = ["BuildUp","Penetration","Finishing","Transition"]

def radar_dual(row_team, row_league, features, title):
    vals_team = row_team[features].values.astype(float)
    vals_leag = row_league[features].values.astype(float)

    # normalizar 0-1 con respecto al rango de la liga
    mins = league_mean.pipe(build_style)[features].min().values
    maxs = league_mean.pipe(build_style)[features].max().values
    t = (vals_team - mins) / (maxs - mins + 1e-9)
    l = (vals_leag - mins) / (maxs - mins + 1e-9)

    t = np.concatenate([t, t[:1]]); l = np.concatenate([l, l[:1]])
    ang = np.linspace(0, 2*np.pi, len(features), endpoint=False); ang = np.concatenate([ang, ang[:1]])

    fig = plt.figure(figsize=(6.5,6.5))
    ax = fig.add_subplot(111, polar=True)
    ax.plot(ang, l, label="Media Liga"); ax.fill(ang, l, alpha=0.12)
    ax.plot(ang, t, label=row_team["team"].values[0]); ax.fill(ang, t, alpha=0.25)
    ax.set_xticks(ang[:-1]); ax.set_xticklabels(features, fontsize=10)
    ax.set_yticklabels([]); ax.set_title(title); ax.legend(loc="upper right")
    plt.show()

radar_dual(style_team, style_league, features, title=f"Perfil Ofensivo — {style_team['team'].values[0]} vs Media Liga")

# ---------------------------
# 8) EXPORTS (opcional)
# ---------------------------
team_mean.to_csv(OUT_DIR/"team_mean_per_match.csv", index=False)
style_team.to_csv(OUT_DIR/"style_team_indices.csv", index=False)
(league_mean.pipe(build_style)).to_csv(OUT_DIR/"style_league_allteams.csv", index=False)
print("\nCSV guardados en:", OUT_DIR)
