# Extraer un partido de Sofascore con `match_id` (ScraperFC)

Este cuaderno toma un `match_id` de Sofascore y utiliza **ScraperFC** para obtener:

- Información general del partido (equipos, marcador, fecha, estadio, etc.).
- Estadísticas del equipo.
- Estadísticas de jugadores.
- Posiciones promedio.
- Shotmap y momentum.

> Ajusta el `match_id` en la siguiente celda antes de ejecutar.


In [None]:
import pandas as pd
import ScraperFC as sfc
from pathlib import Path


## Configuración

Coloca tu `match_id` aquí. Puedes pegar directamente el ID o un `match_url` completo.


In [None]:
match_input = "12345678"  # <- Reemplaza con tu match_id o URL de Sofascore


In [None]:
def normalize_match_id(match_input: str) -> str:
    match_input = str(match_input).strip()
    if "#id:" in match_input:
        return match_input.split("#id:")[-1]
    return match_input

match_id = normalize_match_id(match_input)
match_id


## Inicializar ScraperFC


In [None]:
sofascore = sfc.Sofascore()


## Información general del partido


In [None]:
match_dict = sofascore.get_match_dict(match_id)

summary = {
    "match_id": match_id,
    "home_team": match_dict.get("homeTeam", {}).get("name"),
    "away_team": match_dict.get("awayTeam", {}).get("name"),
    "home_score": (match_dict.get("homeScore") or {}).get("display"),
    "away_score": (match_dict.get("awayScore") or {}).get("display"),
    "tournament": (match_dict.get("tournament") or {}).get("name"),
    "season": (match_dict.get("season") or {}).get("name"),
    "round": (match_dict.get("roundInfo") or {}).get("round"),
    "start_time": pd.to_datetime(match_dict.get("startTimestamp"), unit="s", errors="coerce"),
    "venue": (match_dict.get("venue") or {}).get("name"),
    "referee": (match_dict.get("referee") or {}).get("name"),
}

pd.DataFrame([summary])


## Scraping de datos del partido


In [None]:
team_stats_df = sofascore.scrape_team_match_stats(match_id)
player_stats_df = sofascore.scrape_player_match_stats(match_id)
avg_positions_df = sofascore.scrape_player_average_positions(match_id)
shotmap_df = sofascore.scrape_match_shots(match_id)
momentum_df = sofascore.scrape_match_momentum(match_id)

{
    "team_stats": team_stats_df.shape,
    "player_stats": player_stats_df.shape,
    "avg_positions": avg_positions_df.shape,
    "shotmap": shotmap_df.shape,
    "momentum": momentum_df.shape,
}


## ETL para Admin (Fantasy)

Este bloque transforma las estadísticas de ScraperFC al formato que espera el admin del Fantasy:
`player_id, match_id, goals, assists, minutesplayed, saves, fouls, yellow_cards, red_cards, clean_sheet, goals_conceded`.

- Usa columnas disponibles en `player_stats_df`.
- Completa `goals_conceded` a partir del marcador si no existe en el dataset.
- Calcula `clean_sheet` cuando corresponde (0/1) usando `goals_conceded` y minutos.


In [None]:
import numpy as np

output_dir = Path("matches_details")
output_dir.mkdir(parents=True, exist_ok=True)


def coalesce_column(df: pd.DataFrame, candidates: list[str]) -> pd.Series:
    for col in candidates:
        if col in df.columns:
            return df[col]
    return pd.Series([None] * len(df))

def normalize_position(value: object) -> str:
    if value is None or pd.isna(value):
        return ""
    text = str(value).strip().lower()
    if text in {"gk", "goalkeeper", "goalie", "portero"}:
        return "GK"
    if text in {"defender", "def", "df", "d", "defensa"}:
        return "DEF"
    if text in {"midfielder", "mid", "mf", "m", "mediocampista"}:
        return "MID"
    if text in {"forward", "fw", "f", "delantero"}:
        return "FWD"
    return text.upper()

player_id = coalesce_column(player_stats_df, ["playerId", "player_id", "player.id"])
team_id = coalesce_column(player_stats_df, ["teamId", "team_id", "team.id"])
position = coalesce_column(player_stats_df, ["position", "playerPosition", "positionName", "player.position"]).apply(normalize_position)
minutesplayed = coalesce_column(player_stats_df, ["minutesPlayed", "minutesplayed", "minutes"])
goals = coalesce_column(player_stats_df, ["goals"])
assists = coalesce_column(player_stats_df, ["assists"])
saves = coalesce_column(player_stats_df, ["saves", "savesMade"])
fouls = coalesce_column(player_stats_df, ["fouls", "foulsCommitted"])
yellow_cards = coalesce_column(player_stats_df, ["yellowCards", "yellow_cards"])
red_cards = coalesce_column(player_stats_df, ["redCards", "red_cards"])
goals_conceded = coalesce_column(player_stats_df, ["goalsConceded", "goals_conceded"])

home_team_id = match_dict.get("homeTeam", {}).get("id")
away_team_id = match_dict.get("awayTeam", {}).get("id")
home_score = (match_dict.get("homeScore") or {}).get("display")
away_score = (match_dict.get("awayScore") or {}).get("display")

def _to_int(value, default=0):
    try:
        if value is None or (isinstance(value, float) and np.isnan(value)):
            return default
        return int(float(value))
    except Exception:
        return default

team_goals_conceded = {}
if home_team_id is not None and away_team_id is not None:
    team_goals_conceded[home_team_id] = _to_int(away_score, None)
    team_goals_conceded[away_team_id] = _to_int(home_score, None)

clean_sheet = []
goals_conceded_filled = []

for idx in range(len(player_stats_df)):
    team_value = team_id.iloc[idx] if len(team_id) > idx else None
    conceded_val = goals_conceded.iloc[idx] if len(goals_conceded) > idx else None
    if (conceded_val is None or (isinstance(conceded_val, float) and np.isnan(conceded_val))) and team_value in team_goals_conceded:
        conceded_val = team_goals_conceded[team_value]
    goals_conceded_filled.append(conceded_val)

    minutes_val = minutesplayed.iloc[idx] if len(minutesplayed) > idx else 0
    position_val = position.iloc[idx] if len(position) > idx else ""
    is_defensive = position_val in {"GK", "DEF"}
    if conceded_val is None or (isinstance(conceded_val, float) and np.isnan(conceded_val)):
        clean_sheet.append(None)
    else:
        clean_sheet.append(1 if is_defensive and _to_int(minutes_val, 0) >= 60 and _to_int(conceded_val, 0) == 0 else 0)

admin_stats_df = pd.DataFrame({
    "player_id": player_id,
    "match_id": int(match_id),
    "goals": goals,
    "assists": assists,
    "minutesplayed": minutesplayed,
    "saves": saves,
    "fouls": fouls,
    "yellow_cards": yellow_cards,
    "red_cards": red_cards,
    "clean_sheet": clean_sheet,
    "goals_conceded": goals_conceded_filled,
})

admin_stats_df = admin_stats_df.fillna(0)
for col in ["player_id", "match_id", "goals", "assists", "minutesplayed", "saves", "fouls", "yellow_cards", "red_cards", "clean_sheet", "goals_conceded"]:
    admin_stats_df[col] = pd.to_numeric(admin_stats_df[col], errors="coerce").fillna(0).astype(int)

admin_stats_df.head()


### Texto para pegar en Admin

Copia el siguiente bloque y pégalo en el textarea de Admin (cada fila es un jugador).


In [None]:
admin_lines = admin_stats_df.apply(
    lambda row: ",".join(
        str(row[col])
        for col in [
            "player_id",
            "match_id",
            "goals",
            "assists",
            "minutesplayed",
            "saves",
            "fouls",
            "yellow_cards",
            "red_cards",
            "clean_sheet",
            "goals_conceded",
        ]
    ),
    axis=1,
)
admin_payload = "\n".join(admin_lines)
admin_payload[:1000]  # preview


In [None]:
admin_stats_df.to_csv(output_dir / f"AdminStats_{match_id}.csv", index=False)


### Vista rápida de los datos


In [None]:
team_stats_df.head()


In [None]:
player_stats_df.head()


## Guardar a Excel (opcional)


In [None]:
output_dir = Path("matches_details")
output_dir.mkdir(parents=True, exist_ok=True)

output_file = output_dir / f"Sofascore_{match_id}.xlsx"
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
    team_stats_df.to_excel(writer, sheet_name="Team Stats", index=False)
    player_stats_df.to_excel(writer, sheet_name="Player Stats", index=False)
    avg_positions_df.to_excel(writer, sheet_name="Average Positions", index=False)
    shotmap_df.to_excel(writer, sheet_name="Shotmap", index=False)
    momentum_df.to_excel(writer, sheet_name="Match Momentum", index=False)

output_file
