In [41]:
from pathlib import Path
import pandas as pd
import json

# Ajusta la carpeta de un partido de prueba
MATCH_DIR = Path(r"C:\Users\manue\OneDrive\Escritorio\Proyecto WhoScored\data\MatchCenter\Competition\Season\20250815_Girona_vs_Rayo_Vallecano_1913916")

def read_csv_safe(p: Path):
    if not p.exists(): 
        return None
    for sep in [",",";","|"]:
        for enc in ["utf-8","cp1252","latin-1"]:
            try:
                return pd.read_csv(p, sep=sep, encoding=enc)
            except Exception:
                pass
    return None

def read_json_safe(p: Path):
    if not p.exists(): 
        return None
    try:
        return json.loads(p.read_text(encoding="utf-8"))
    except Exception:
        # jsonlines
        try:
            return [json.loads(x) for x in p.read_text(encoding="utf-8").splitlines() if x.strip()]
        except Exception:
            return None

def load_match_folder_generic(match_dir: Path) -> dict:
    """
    Devuelve un dict con TODOS los CSV/JSON en subcarpetas 'csv' y 'json'.
    - No renombra ni quita columnas.
    - Claves = nombre de archivo sin extensión (p.ej. 'events_passes').
    - Valores = DataFrame (CSV/JSON list) o dict (JSON único).
    """
    out = {}

    # CSV
    csv_dir = match_dir / "csv"
    if csv_dir.exists():
        for p in csv_dir.glob("*.csv"):
            df = read_csv_safe(p)
            if df is not None:
                out[p.stem] = df  # sin tocar columnas

    # JSON
    json_dir = match_dir / "json"
    if json_dir.exists():
        for p in json_dir.glob("*.json"):
            js = read_json_safe(p)
            if js is None:
                continue
            if isinstance(js, list):
                out[p.stem] = pd.DataFrame(js)  # listas → DF
            elif isinstance(js, dict):
                out[p.stem] = js                 # diccionario → lo dejamos como dict
            else:
                out[p.stem] = js

    return out

dfs_raw = load_match_folder_generic(MATCH_DIR)

# Inspección rápida de columnas, sin truncar
for k, v in dfs_raw.items():
    if isinstance(v, pd.DataFrame):
        print(f"—— {k} —— shape={v.shape}")
        for c in v.columns.tolist():
            print("   •", c)
    else:
        print(f"—— {k} —— (dict) keys={list(v.keys())[:12]}…")
    print()

—— events —— shape=(1486, 18)
   • match_id
   • eventId
   • minute
   • second
   • expandedMinute
   • period
   • teamId
   • playerId
   • x
   • y
   • endX
   • endY
   • typeValue
   • typeName
   • outcomeValue
   • outcomeName
   • relatedEventId
   • qualifiers

—— events_defensive —— shape=(244, 13)
   • match_id
   • eventId
   • minute
   • second
   • expandedMinute
   • period
   • teamId
   • playerId
   • x
   • y
   • typeName
   • outcomeName
   • qualifiers

—— events_gk_actions —— shape=(22, 15)
   • match_id
   • eventId
   • minute
   • second
   • expandedMinute
   • period
   • teamId
   • playerId
   • x
   • y
   • typeName
   • outcomeName
   • gk_goal_mouth_y
   • gk_goal_mouth_z
   • qualifiers

—— events_passes —— shape=(943, 24)
   • match_id
   • eventId
   • minute
   • second
   • expandedMinute
   • period
   • teamId
   • playerId
   • x
   • y
   • endX
   • endY
   • typeName
   • outcomeName
   • pass_outcome
   • is_key_pass
   • is_assist
   •