# Librerias 

In [1]:
import numpy as np
import pandas as pd
from sqlalchemy import create_engine

# Conexion 

In [2]:
user = "root"
password = "Levp13aa"
host = "localhost"
database = "futbol_dw"

engine = create_engine(f"mysql+pymysql://{user}:{password}@{host}/{database}")

# Funciones auxiliares 

In [3]:
BIG_FIVE_LEAGUES = ["Laliga", "Premier League", "Serie A", "Bundesliga", "Ligue 1"]

def cosine_sim(a, b):
    """Cosine similarity robusta a NaNs y normas cero."""
    a = np.array(a, dtype=float)
    b = np.array(b, dtype=float)
    if np.any(np.isnan(a)) or np.any(np.isnan(b)):
        return np.nan
    na = np.linalg.norm(a)
    nb = np.linalg.norm(b)
    if na == 0 or nb == 0:
        return np.nan
    return float(np.dot(a, b) / (na * nb))

def zscore_series(s: pd.Series):
    """Devuelve z-score de una Serie, ignorando NaNs y evitando std=0."""
    m = s.mean()
    sd = s.std()
    if sd == 0:
        return pd.Series(np.nan, index=s.index)
    return (s - m) / sd

Creacion de vectores de estilo de quipo / jugador 

In [4]:
def get_team_profile(engine, team_name: str, season: str) -> pd.Series:
    """
    Devuelve el perfil medio de estilo del equipo (por temporada)
    usando la tabla silver_teams.
    """
    query = f"""
    SELECT
        team_name,
        season,
        league,
        AVG(idx_possession_style)    AS pos_style,
        AVG(idx_verticality_style)   AS vert_style,
        AVG(idx_offensive_style)     AS off_style,
        AVG(idx_defensive_style)     AS def_style,
        AVG(idx_aggressiveness_style) AS aggr_style
    FROM silver_teams
    WHERE team_name = '{team_name}'
      AND season = '{season}'
    GROUP BY team_name, season, league;
    """
    df = pd.read_sql(query, engine)
    if df.empty:
        raise ValueError(f"No se encontró silver_teams para {team_name} - {season}")
    return df.iloc[0]  # Serie con columnas: pos_style, vert_style, ...

def build_team_style_vector(team_row: pd.Series) -> np.ndarray:
    """
    Construye el vector de estilo del equipo en el espacio:
    [posesión, verticalidad, ataque, defensa, agresividad]
    """
    return np.array([
        team_row["pos_style"],
        team_row["vert_style"],
        team_row["off_style"],
        team_row["def_style"],
        team_row["aggr_style"],
    ], dtype=float)

def get_players_pool(
    engine,
    season: str | None = None,
    leagues: list[str] | None = None,
    leagues_big5: bool = False,
    min_minutes: int = 600,
) -> pd.DataFrame:
    """
    Devuelve un DataFrame con jugadores agregados por jugador-temporada
    desde silver_players, con minutos totales y los índices de estilo.
    """
    where_clauses = ["1=1"]
    if season is not None:
        where_clauses.append(f"season = '{season}'")
    if leagues_big5:
        leagues = BIG_FIVE_LEAGUES
    if leagues is not None and len(leagues) > 0:
        leagues_str = ",".join([f"'{l}'" for l in leagues])
        where_clauses.append(f"league IN ({leagues_str})")

    where_sql = " AND ".join(where_clauses)

    query = f"""
    SELECT
        id_jugador,
        player_name,
        team_name,
        league,
        season,
        position,
        SUM(minutes_played)              AS minutes_total,
        AVG(idx_finishing)               AS finishing,
        AVG(idx_playmaking)              AS playmaking,
        AVG(idx_progression)             AS progression,
        AVG(idx_involvement)             AS involvement,
        AVG(idx_defending)               AS defending,
        AVG(idx_discipline)              AS discipline,
        AVG(player_age_numeric)          AS age_years
    FROM (
        SELECT
            *,
            CAST(SUBSTRING_INDEX(player_age, '-', 1) AS SIGNED) AS player_age_numeric
        FROM silver_players
    ) sp
    WHERE {where_sql}
    GROUP BY
        id_jugador, player_name, team_name, league, season, position
    HAVING minutes_total >= {min_minutes};
    """

    df = pd.read_sql(query, engine)
    return df

def project_player_to_team_space(row: pd.Series) -> np.ndarray:
    """
    Proyecta el estilo del jugador al espacio de estilo de equipo:
    [posesión, verticalidad, ataque, defensa, agresividad]

    Mapping (puedes tunearlo:
        - posesión: involvement + playmaking
        - verticalidad: progression
        - ataque: finishing + playmaking
        - defensa: defending
        - agresividad: inverso de disciplina
    """
    finishing   = row.get("finishing",   np.nan)
    playmaking  = row.get("playmaking",  np.nan)
    progression = row.get("progression", np.nan)
    involvement = row.get("involvement", np.nan)
    defending   = row.get("defending",   np.nan)
    discipline  = row.get("discipline",  np.nan)

    pos = np.nanmean([involvement, playmaking])
    vert = progression
    att = np.nanmean([finishing, playmaking])
    deff = defending
    aggr = -discipline  # más disciplina => menos agresivo

    return np.array([pos, vert, att, deff, aggr], dtype=float)

Perfil objetivo y vector ideal de rol

In [5]:
def build_team_target_vector(
    x_real: np.ndarray,
    x_coach: np.ndarray | None = None,
    lam: float = 0.0
) -> np.ndarray:
    """
    Combina el perfil real del equipo y el perfil deseado del entrenador:
    x_target = (1 - lam) * x_real + lam * x_coach
    Si x_coach es None, se usa solo x_real.
    """
    if x_coach is None or lam <= 0:
        return x_real
    return (1.0 - lam) * x_real + lam * x_coach

Cálculo de style_fit, needs_fit y scores adicionales

In [None]:
def compute_style_and_needs_scores(
    df_players: pd.DataFrame,
    x_team_target: np.ndarray,
    x_ideal_role: np.ndarray,
) -> pd.DataFrame:
    """
    Añade columnas:
        - player_style_vec (no se guarda, solo se usa internamente)
        - style_fit
        - needs_fit
    """
    need_vec = x_ideal_role - x_team_target

    style_fit_list = []
    needs_fit_list = []

    for _, row in df_players.iterrows():
        x_player_teamspace = project_player_to_team_space(row)
        sf = cosine_sim(x_team_target, x_player_teamspace)
        nf = float(np.dot(need_vec, x_player_teamspace))
        style_fit_list.append(sf)
        needs_fit_list.append(nf)

    df_players = df_players.copy()
    df_players["style_fit"] = style_fit_list
    df_players["needs_fit"] = needs_fit_list

    return df_players

def add_age_and_discipline_scores(df_players: pd.DataFrame) -> pd.DataFrame:
    """
    A partir de age_years e idx_discipline (discipline),
    construye:
        - edad_potencial (z-score invertido: más joven => mayor)
        - disciplina_score (z-score: mejor disciplina => mayor)
    """
    df = df_players.copy()

    if "age_years" in df.columns:
        df["edad_z"] = zscore_series(df["age_years"])
        df["edad_potencial"] = -df["edad_z"]  # más joven => valor más alto
    else:
        df["edad_potencial"] = np.nan

    if "discipline" in df.columns:
        df["disciplina_z"] = zscore_series(df["discipline"])
        df["disciplina_score"] = df["disciplina_z"]
    else:
        df["disciplina_score"] = np.nan

    return df

# =========================
# 5. Score final y recomendación
# =========================

def compute_final_score(
    df_players: pd.DataFrame,
    alpha: float = 0.4,
    beta: float = 0.3,
    gamma: float = 0.2,
    delta: float = 0.1,
) -> pd.DataFrame:
    """
    Calcula:
        score = alpha * style_fit +
                beta  * needs_fit +
                gamma * edad_potencial +
                delta * disciplina_score
    """
    df = df_players.copy()
    for col in ["style_fit", "needs_fit", "edad_potencial", "disciplina_score"]:
        if col not in df.columns:
            df[col] = np.nan

    df["score"] = (
        alpha * df["style_fit"] +
        beta  * df["needs_fit"] +
        gamma * df["edad_potencial"] +
        delta * df["disciplina_score"]
    )
    return df

def recommend_players_for_team(
    engine,
    team_name: str,
    season: str,
    x_ideal_role: list[float] | np.ndarray,
    leagues: list[str] | None = None,
    leagues_big5: bool = True,
    min_minutes: int = 600,
    alpha: float = 0.4,
    beta: float = 0.3,
    gamma: float = 0.2,
    delta: float = 0.1,
    x_coach: list[float] | np.ndarray | None = None,
    lam_coach: float = 0.0,
    top_n: int = 20,
) -> pd.DataFrame:
    """
    Pipeline completo:

    1) Obtiene el perfil real del equipo desde silver_teams.
    2) Construye x_team_target combinando realidad + perfil del entrenador.
    3) Construye need = x_ideal_role - x_team_target.
    4) Obtiene pool de jugadores (5 grandes ligas por defecto) desde silver_players.
    5) Calcula style_fit, needs_fit, edad_potencial, disciplina_score.
    6) Calcula score final.
    7) Devuelve top_n jugadores ordenados por score.

    x_ideal_role y x_coach deben tener dimensión 5:
        [posesión, verticalidad, ataque, defensa, agresividad]
    """
    # 1) Perfil de equipo
    team_row = get_team_profile(engine, team_name, season)
    x_team_real = build_team_style_vector(team_row)

    # 2) Perfil target (realidad + entrenador)
    x_team_target = build_team_target_vector(
        x_real=x_team_real,
        x_coach=np.array(x_coach, dtype=float) if x_coach is not None else None,
        lam=lam_coach,
    )

    # 3) Pool de jugadores
    df_players = get_players_pool(
        engine=engine,
        season=season,
        leagues=leagues,
        leagues_big5=leagues_big5,
        min_minutes=min_minutes,
    )

    if df_players.empty:
        raise ValueError("No se encontraron jugadores en el pool con esos filtros.")

    # 4) Style_fit y needs_fit
    x_ideal_role = np.array(x_ideal_role, dtype=float)
    df_players = compute_style_and_needs_scores(
        df_players=df_players,
        x_team_target=x_team_target,
        x_ideal_role=x_ideal_role,
    )

    # 5) Edad y disciplina
    df_players = add_age_and_discipline_scores(df_players)

    # 6) Score final
    df_players = compute_final_score(
        df_players,
        alpha=alpha,
        beta=beta,
        gamma=gamma,
        delta=delta,
    )

    # 7) Ordenar y devolver top_n
    df_players = df_players.sort_values("score", ascending=False).reset_index(drop=True)
    return df_players.head(top_n)


# 