In [1]:
!pip install scikit-learn pandas numpy



In [2]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import ndcg_score

In [4]:
df = pd.read_csv('fifa23_male_unicos.csv', encoding='utf-8')

# Filtrar jugadores que están en un club
df = df[df['club_name'].notna()]

# Asegurar que value_eur no tenga NaNs
df['value_eur'] = df['value_eur'].fillna(0)

# Ordenar por valor dentro de cada club
df_sorted = df.sort_values(by=['club_name', 'value_eur'], ascending=[True, False])
ground_truth = df_sorted.groupby('club_name').head(22).reset_index(drop=True)

# Lista de clubes a usar
clubs = ["Borussia Dortmund", "FC Bayern München", "Paris Saint Germain", "Real Madrid"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['value_eur'] = df['value_eur'].fillna(0)


Ground Truth: Mejores 22 por club

Modelo iKNN para seleccionar jugadores similares

In [5]:
features = ['overall', 'potential', 'pace', 'shooting', 'passing', 'dribbling',
            'defending', 'physic', 'value_eur', 'age']

X = df[features].fillna(0)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

knn = NearestNeighbors(n_neighbors=100)
knn.fit(X_scaled)

FUNCIÓN: SELECCIÓN POR FORMACIÓN

In [6]:
def select_team_with_formation(df_source):
    df_source = df_source.copy()
    df_source['positions'] = df_source['player_positions'].fillna('').apply(lambda x: x.split(','))

    def get_role(positions):
        if any(p.strip() == 'GK' for p in positions):
            return 'GK'
        elif any(p.strip() in ['CB', 'LB', 'RB', 'LCB', 'RCB', 'LWB', 'RWB'] for p in positions):
            return 'DEF'
        elif any(p.strip() in ['CDM', 'CM', 'CAM', 'LM', 'RM', 'LAM', 'RAM', 'LCM', 'RCM'] for p in positions):
            return 'MID'
        elif any(p.strip() in ['ST', 'CF', 'LF', 'RF'] for p in positions):
            return 'ST'
        return 'OTHER'

    df_source['role'] = df_source['positions'].apply(get_role)

    def pick_players(role, n):
        return df_source[df_source['role'] == role].sort_values(by='value_eur', ascending=False).head(n)

    gk = pick_players('GK', 2)
    defenders = pick_players('DEF', 8)
    mids = pick_players('MID', 8)
    strikers = pick_players('ST', 4)

    team = pd.concat([gk, defenders, mids, strikers])
    return team.reset_index(drop=True)

Crear equipos con formación

In [7]:
from sklearn.metrics import ndcg_score

def evaluate_ndcg(generated_team, reference_team, k=22):
    """
    Calcula el nDCG@k entre el equipo generado y el equipo real (ground truth),
    basado en la importancia del valor del jugador (value_eur).
    """
    # Unir por player_id
    df_merged = pd.merge(
        generated_team[['player_id', 'value_eur']].copy(),
        reference_team[['player_id', 'value_eur']].copy(),
        on='player_id',
        how='left',
        suffixes=('_gen', '_true')
    )

    # Los jugadores que no están en el ground truth tienen relevancia 0
    df_merged['value_eur_true'] = df_merged['value_eur_true'].fillna(0)

    # Rankear por orden de aparición en el equipo generado
    true_relevance = np.asarray([df_merged['value_eur_true'].values])
    scores = np.asarray([df_merged['value_eur_gen'].values])

    # Normalizamos nDCG respecto al ideal (ground truth con mejor ranking posible)
    return ndcg_score(true_relevance, scores, k=min(k, len(df_merged)))


In [8]:
def evaluate_team(generated_team, reference_team):
    gen_names = set(generated_team['player_id'])
    ref_names = set(reference_team['player_id'])

    true_positives = len(gen_names & ref_names)
    precision = true_positives / len(gen_names)
    recall = true_positives / len(ref_names)
    novelty = 1 - (true_positives / len(ref_names))  # Qué tan diferentes son

    return {
        'precision': precision,
        'recall': recall,
        'novelty': novelty
    }

In [9]:
results = {}

for club in clubs:
    print(f"\nEvaluando club: {club}")

    # Ground truth para el club
    reference_team = ground_truth[ground_truth['club_name'] == club]
    reference_team = select_team_with_formation(reference_team)

    # Jugador semilla (más valioso del club)
    seed_player = reference_team.sort_values(by='value_eur', ascending=False).iloc[0]
    seed_idx = df[df['player_id'] == seed_player['player_id']].index[0]

    # Vecinos más cercanos al semilla
    distances, indices = knn.kneighbors([X_scaled[seed_idx]])
    similar_players = df.iloc[indices[0]]

    # Crear equipo con formación
    team_iknn = select_team_with_formation(similar_players)

    # Equipo aleatorio para comparar
    team_random_all = df.sample(n=100, random_state=42)
    team_random = select_team_with_formation(team_random_all)

    # Evaluar ambos equipos
    eval_iknn = evaluate_team(team_iknn, reference_team)
    eval_random = evaluate_team(team_random, reference_team)

    # Evaluación con nDCG
    ndcg_iknn = evaluate_ndcg(team_iknn, reference_team)
    ndcg_random = evaluate_ndcg(team_random, reference_team)

    # Guardar resultados
    results[club] = {
    'iknn': eval_iknn,
    'random': eval_random,
    'ndcg_iknn': ndcg_iknn,
    'ndcg_random': ndcg_random,
    'iknn_team': team_iknn[['short_name', 'role', 'value_eur']],
    'reference_team': reference_team[['short_name', 'role', 'value_eur']],
}



Evaluando club: Borussia Dortmund

Evaluando club: FC Bayern München

Evaluando club: Paris Saint Germain

Evaluando club: Real Madrid


In [10]:
for club, res in results.items():
    print(f"\n=== {club} ===")
    print("iKNN Evaluation:", res['iknn'])
    print("Random Evaluation:", res['random'])
    print(f"nDCG iKNN: {res['ndcg_iknn']:.4f}")
    print(f"nDCG Random: {res['ndcg_random']:.4f}")


=== Borussia Dortmund ===
iKNN Evaluation: {'precision': 0.0, 'recall': 0.0, 'novelty': 1.0}
Random Evaluation: {'precision': 0.0, 'recall': 0.0, 'novelty': 1.0}
nDCG iKNN: 0.0000
nDCG Random: 0.0000

=== FC Bayern München ===
iKNN Evaluation: {'precision': 0.15, 'recall': 0.17647058823529413, 'novelty': 0.8235294117647058}
Random Evaluation: {'precision': 0.0, 'recall': 0.0, 'novelty': 1.0}
nDCG iKNN: 0.4422
nDCG Random: 0.0000

=== Paris Saint Germain ===
iKNN Evaluation: {'precision': 0.13636363636363635, 'recall': 0.17647058823529413, 'novelty': 0.8235294117647058}
Random Evaluation: {'precision': 0.0, 'recall': 0.0, 'novelty': 1.0}
nDCG iKNN: 0.8066
nDCG Random: 0.0000

=== Real Madrid ===
iKNN Evaluation: {'precision': 0.09090909090909091, 'recall': 0.10526315789473684, 'novelty': 0.8947368421052632}
Random Evaluation: {'precision': 0.0, 'recall': 0.0, 'novelty': 1.0}
nDCG iKNN: 0.2841
nDCG Random: 0.0000
