# Extraer un partido de Sofascore con `match_id` (ScraperFC)

Este cuaderno toma un `match_id` de Sofascore y utiliza **ScraperFC** para obtener:

- Información general del partido (equipos, marcador, fecha, estadio, etc.).
- Estadísticas del equipo.
- Estadísticas de jugadores.
- Posiciones promedio.
- Shotmap y momentum.

> Ajusta el `match_id` en la siguiente celda antes de ejecutar.


In [1]:
import pandas as pd
import ScraperFC as sfc
from pathlib import Path


## Configuración

Coloca tu `match_id` aquí. Puedes pegar directamente el ID o un `match_url` completo.


In [2]:
match_input = "15438956"  # <- Reemplaza con tu match_id o URL de Sofascore


In [3]:
def normalize_match_id(match_input: str) -> str:
    match_input = str(match_input).strip()
    if "#id:" in match_input:
        return match_input.split("#id:")[-1]
    return match_input

match_id = normalize_match_id(match_input)
match_id


'15438956'

## Inicializar ScraperFC


In [4]:
sofascore = sfc.Sofascore()


## Información general del partido


In [5]:
match_dict = sofascore.get_match_dict(match_id)

summary = {
    "match_id": match_id,
    "home_team": match_dict.get("homeTeam", {}).get("name"),
    "away_team": match_dict.get("awayTeam", {}).get("name"),
    "home_score": (match_dict.get("homeScore") or {}).get("display"),
    "away_score": (match_dict.get("awayScore") or {}).get("display"),
    "tournament": (match_dict.get("tournament") or {}).get("name"),
    "season": (match_dict.get("season") or {}).get("name"),
    "round": (match_dict.get("roundInfo") or {}).get("round"),
    "start_time": pd.to_datetime(match_dict.get("startTimestamp"), unit="s", errors="coerce"),
    "venue": (match_dict.get("venue") or {}).get("name"),
    "referee": (match_dict.get("referee") or {}).get("name"),
}

pd.DataFrame([summary])


Running


Unnamed: 0,match_id,home_team,away_team,home_score,away_score,tournament,season,round,start_time,venue,referee
0,15438956,FC Cajamarca,Deportivo Garcilaso,1,1,"Liga 1, Apertura",Liga 1 2026,2,2026-02-09 20:30:00,,Jordi Espinoza


## ETL para Admin (Fantasy)

Este bloque transforma las estadísticas de ScraperFC al formato que espera el admin del Fantasy:
`player_id, match_id, goals, assists, minutesplayed, saves, fouls, yellow_cards, red_cards, clean_sheet, goals_conceded`.

- Usa columnas disponibles en `player_stats_df`.
- Completa `goals_conceded` a partir del marcador si no existe en el dataset.
- Calcula `clean_sheet` cuando corresponde (0/1) usando `goals_conceded` y minutos.


In [6]:
# Extraer stats de jugadores desde SofaScore
try:
    player_stats_df = sofascore.scrape_player_match_stats(match_id)
except Exception as exc:
    print('No se pudo scrapear player stats:', exc)
    player_stats_df = pd.DataFrame()

if not isinstance(player_stats_df, pd.DataFrame):
    player_stats_df = pd.DataFrame(player_stats_df or [])

if player_stats_df.empty:
    print('player_stats_df vacio: revisa match_id o el scraper.')


In [7]:
import numpy as np

output_dir = Path("matches_details")
output_dir.mkdir(parents=True, exist_ok=True)


def coalesce_column(df: pd.DataFrame, candidates: list[str]) -> pd.Series:
    for col in candidates:
        if col in df.columns:
            return df[col]
    return pd.Series([None] * len(df))

def normalize_position(value: object) -> str:
    if value is None or pd.isna(value):
        return ""
    text = str(value).strip().lower()
    if text in {"gk", "goalkeeper", "goalie", "portero"}:
        return "GK"
    if text in {"defender", "def", "df", "d", "defensa"}:
        return "D"
    if text in {"midfielder", "mid", "mf", "m", "mediocampista"}:
        return "M"
    if text in {"forward", "fw", "f", "delantero"}:
        return "F"
    return text.upper()

player_id = coalesce_column(player_stats_df, ["id", "player_id", "player.id"])
team_id = coalesce_column(player_stats_df, ["teamId", "team_id", "team.id"])
position = coalesce_column(player_stats_df, ["pos", "playerPosition", "positionName", "player.position"]).apply(normalize_position)
minutesPlayed = coalesce_column(player_stats_df, ["minutesPlayed", "minutesplayed", "minutes"])
goals = coalesce_column(player_stats_df, ["goals"])
assists = coalesce_column(player_stats_df, ["goalAssist"])
saves = coalesce_column(player_stats_df, ["saves", "savesMade"])
fouls = coalesce_column(player_stats_df, ["fouls", "foulsCommitted"])

home_team_id = match_dict.get("homeTeam", {}).get("id")
away_team_id = match_dict.get("awayTeam", {}).get("id")
home_score = (match_dict.get("homeScore") or {}).get("display")
away_score = (match_dict.get("awayScore") or {}).get("display")

def _to_int(value, default=0):
    try:
        if value is None or (isinstance(value, float) and np.isnan(value)):
            return default
        return int(float(value))
    except Exception:
        return default

team_goals_conceded = {}
if home_team_id is not None and away_team_id is not None:
    team_goals_conceded[home_team_id] = _to_int(away_score, None)
    team_goals_conceded[away_team_id] = _to_int(home_score, None)

clean_sheet = []
goals_conceded_filled = []

for idx in range(len(player_stats_df)):
    team_value = team_id.iloc[idx] if len(team_id) > idx else None
    minutes_val = minutesPlayed.iloc[idx] if len(minutesPlayed) > idx else 0
    position_val = position.iloc[idx] if len(position) > idx else ""
    is_defensive = position_val in {"GK", "D"}

yellow_cards = coalesce_column(player_stats_df, ["yellowCards", "yellow_cards", "yellow", "yc"])
red_cards = coalesce_column(player_stats_df, ["redCards", "red_cards", "red", "rc"])

admin_stats_df = pd.DataFrame({
    "player_id": player_id,
    "match_id": match_id,
    "goals": goals,
    "assists": assists,
    "minutesplayed": minutesPlayed,
    "saves": saves,
    "fouls": fouls,
    "yellow_cards": yellow_cards,
    "red_cards": red_cards,
    "clean_sheet": None,
    "goals_conceded": None,
})

admin_stats_df = admin_stats_df.fillna(0)
for col in ["player_id", "match_id", "minutesplayed", "goals", "assists", "saves", "fouls", "yellow_cards", "red_cards"]:
    admin_stats_df[col] = pd.to_numeric(admin_stats_df[col], errors="coerce").fillna(0).astype(int)

admin_stats_df.head()


  admin_stats_df = admin_stats_df.fillna(0)


Unnamed: 0,player_id,match_id,goals,assists,minutesplayed,saves,fouls,yellow_cards,red_cards,clean_sheet,goals_conceded
0,894214,15438956,0,0,90,4,0,0,0,0,0
1,984175,15438956,0,0,90,0,1,0,0,0,0
2,932098,15438956,0,0,90,0,2,0,0,0,0
3,1526674,15438956,0,0,90,0,1,0,0,0,0
4,973682,15438956,0,0,90,0,1,0,0,0,0


### Texto para pegar en Admin

Copia el siguiente bloque y pégalo en el textarea de Admin (cada fila es un jugador).


In [8]:
admin_lines = admin_stats_df.apply(
    lambda row: ",".join(
        str(row[col])
        for col in [
            "player_id",
            "match_id",
            "minutesplayed",
            "goals",
            "assists",
            "saves",
            "fouls",
            "yellow_cards",
            "red_cards",
            "clean_sheet",
            "goals_conceded",
        ]
    ),
    axis=1,
)
admin_payload = "\n".join(admin_lines)
admin_payload[:1000]  # preview


'894214,15438956,90,0,0,4,0,0,0,0,0\n984175,15438956,90,0,0,0,1,0,0,0,0\n932098,15438956,90,0,0,0,2,0,0,0,0\n1526674,15438956,90,0,0,0,1,0,0,0,0\n973682,15438956,90,0,0,0,1,0,0,0,0\n883393,15438956,59,0,0,0,1,0,0,0,0\n832001,15438956,90,0,0,0,4,0,0,0,0\n814017,15438956,90,0,0,0,5,0,0,0,0\n340077,15438956,74,0,0,0,1,0,0,0,0\n356340,15438956,87,0,0,0,0,0,0,0,0\n31175,15438956,90,1,0,0,2,0,0,0,0\n1464527,15438956,31,0,0,0,1,0,0,0,0\n1463863,15438956,16,0,0,0,4,0,0,0,0\n933730,15438956,12,0,0,0,0,0,0,0,0\n1807599,15438956,0,0,0,0,0,0,0,0,0\n1020957,15438956,0,0,0,0,0,0,0,0,0\n1633854,15438956,0,0,0,0,0,0,0,0,0\n1107870,15438956,0,0,0,0,0,0,0,0,0\n2125211,15438956,0,0,0,0,0,0,0,0,0\n1049075,15438956,0,0,0,0,0,0,0,0,0\n846373,15438956,90,0,0,1,0,0,0,0,0\n1018233,15438956,83,0,0,0,1,0,0,0,0\n873645,15438956,90,0,0,0,1,0,0,0,0\n338883,15438956,90,0,0,0,0,0,0,0,0\n1018255,15438956,90,0,0,0,1,0,0,0,0\n842529,15438956,90,0,0,0,0,0,0,0,0\n385894,15438956,69,0,0,0,0,0,0,0,0\n1009256,15438956,69,0,0

In [9]:
admin_stats_df.to_csv(output_dir / f"Fantasy_{match_id}.csv", index=False)


### Vista rápida de los datos


In [10]:
player_stats_df.head()


Unnamed: 0,name,slug,shortName,position,jerseyNumber,height,userCount,gender,id,country,...,shotValueNormalized,shotOffTarget,onTargetScoringAttempt,totalOffside,goals,bigChanceMissed,bigChanceCreated,hitWoodwork,teamName,captain
0,Carlos Mosquera,carlos-mosquera,C. Mosquera,G,1,191,28,M,894214,"{'alpha2': 'CO', 'alpha3': 'COL', 'name': 'Col...",...,,,,,,,,,FC Cajamarca,
1,Jose Anthony Gallardo,jose-anthony-gallardo,J. Gallardo,D,15,170,8,M,984175,"{'alpha2': 'PE', 'alpha3': 'PER', 'name': 'Per...",...,,,,,,,,,FC Cajamarca,
2,Alexis Rodas,rodas-alexis,A. Rodas,D,14,182,11,M,932098,"{'alpha2': 'PY', 'alpha3': 'PRY', 'name': 'Par...",...,,,,,,,,,FC Cajamarca,
3,Matias Almiron,matias-almiron,M. Almiron,D,22,188,34,M,1526674,"{'alpha2': 'UY', 'alpha3': 'URY', 'name': 'Uru...",...,,,,,,,,,FC Cajamarca,
4,Ricardo Lagos,ricardo-lagos,R. Lagos,D,13,175,170,M,973682,"{'alpha2': 'PE', 'alpha3': 'PER', 'name': 'Per...",...,,,,,,,,,FC Cajamarca,
