In [None]:
import json
import os
from collections import defaultdict
import pandas as pd


FOLDER = "data/playersbio/matches"  # carpeta con los JSON

EVENT_IDS = {
    1: "Pass", 3: "Take On", 7: "Tackle", 8: "Interception",
    10: "Save", 11: "Claim", 12: "Clearance", 13: "Miss",
    14: "Post", 15: "AttemptSaved", 16: "Goal", 
    39: "TempAttempt",
    41: "Punch", 44: "Aerial", 45: "Challenge", 49: "BallRecovery",
    50: "Dispossessed", 53: "CrossNotClaimed", 59: "KeeperSweeper",
    60: "ChanceMissed", 61: "BallTouch", 63: "TempSave",
    64: "Resume", 66: "PossessionData", 67: "FiftyFifty",
    73: "OtherBallContact", 74: "BlockedPass",
    83: "AttemptedTackle",
}

teams = defaultdict(lambda: defaultdict(int))

def get_team_name(match, contestant_id):
    for c in match["contestant"]:
        if c["id"] == contestant_id:
            return c["name"]
    return None


# ============================
# PROCESAR TODOS LOS PARTIDOS
# ============================
for file in os.listdir(FOLDER):
    if not file.endswith(".json"):
        continue

    with open(os.path.join(FOLDER, file), encoding="utf-8") as f:
        data = json.load(f)

    match = data["matchInfo"]
    events = data["liveData"]["event"]

    # Metadata
    season = match["tournamentCalendar"]["name"]
    competition = match["competition"]["name"]
    phase = match["stage"]["name"]

    local = match["contestant"][0]["name"]
    visitante = match["contestant"][1]["name"]

    # PJ
    teams[local]["PJ"] += 1
    teams[visitante]["PJ"] += 1

    # metadata por equipo
    for team in [local, visitante]:
        teams[team]["season"] = season
        teams[team]["competition"] = competition
        teams[team]["phase"] = phase

    # -------------------------
    # EVENTOS DEL PARTIDO
    # -------------------------
    for ev in events:
        tid = ev["typeId"]

        if tid in EVENT_IDS:
            event_name = EVENT_IDS[tid]
            team_name = get_team_name(match, ev["contestantId"])
            if team_name is None:
                continue

            # ==========================
            # ðŸŸ¦ PASSES (LÃ“GICA ESPECIAL)
            # ==========================
            if tid == 1:   # Pass
                teams[team_name]["Pass"] += 1

                if ev.get("outcome") == 1:
                    teams[team_name]["Pass_success"] += 1
                else:
                    teams[team_name]["Pass_failed"] += 1

            else:
                teams[team_name][event_name] += 1

            # Qualifiers
            if "qualifier" in ev:
                for q in ev["qualifier"]:
                    key = f"{event_name}_Q{q['qualifierId']}"
                    teams[team_name][key] += 1


# ============================
# CREAR DATAFRAME FINAL
# ============================
df = pd.DataFrame(teams).T.reset_index().rename(columns={"index": "team"})

df.to_parquet("LigaArgentina2024.parquet", index=False)

print("Archivo guardado: LigaArgentina2024.parquet")
print(df.head())


Archivo guardado: LigaArgentina2024.parquet
                  team  PJ season                 competition      phase  \
0          Estudiantes  32   2024  Liga Profesional Argentina  1st Phase   
1     Barracas Central  29   2024  Liga Profesional Argentina  1st Phase   
2         Boca Juniors  31   2024  Liga Profesional Argentina  1st Phase   
3          River Plate  29   2024  Liga Profesional Argentina  1st Phase   
4  Central CÃ³rdoba SdE  28   2024  Liga Profesional Argentina  1st Phase   

    Pass Pass_success Pass_Q279 Pass_Q56 Pass_Q178  ... BlockedPass_Q189  \
0  16279        12708        70    16279     14200  ...              NaN   
1  10886         7747        68    10886      9054  ...              NaN   
2  14683        11491        57    14683     12977  ...              NaN   
3  16734        13499        54    16734     14863  ...              NaN   
4  12579         9542        64    12579     10993  ...              NaN   

  Miss_Q233 BallRecovery_Q343 Goal_Q68 Pu