In [1]:
import pandas as pd
import numpy as np
import joblib

In [2]:
# --- Cargar el Modelo y los Datos ---
MODEL_PATH = '../models/lgbm_tuned_model.pkl'
DATA_PATH = '../data/processed/featured_player_dataset.csv'

# Cargar el modelo entrenado
model = joblib.load(MODEL_PATH)
print("Modelo optimizado cargado.")

# Cargar el dataset con las features
df = pd.read_csv(DATA_PATH)
print("Dataset con features cargado.")

Modelo optimizado cargado.
Dataset con features cargado.


In [3]:
# --- Preparar los Datos para la Predicción ---
TARGET = 'market_value_in_eur'
cols_to_exclude = [
    'player_id', TARGET, 'goals', 'assists', 'minutes_played', 'games_played',
    'name', 'position', 'sub_position', 'country_of_citizenship', 
    'current_club_name', 'contract_expiration_date', 'valuation_date'
]
features = [col for col in df.columns if col not in cols_to_exclude]
X = df[features].select_dtypes(include=np.number)


In [4]:
# --- Hacer Predicciones y Calcular Errores ---
print("\nRealizando predicciones sobre todo el dataset...")

# Predecir en escala logarítmica y revertir a euros
preds_log = model.predict(X)
df['predicted_value'] = np.expm1(preds_log)

# Calcular el error
df['error'] = df['predicted_value'] - df['market_value_in_eur']
df['abs_error'] = abs(df['error'])
df['error_percent'] = (df['error'] / df['market_value_in_eur']) * 100

print("Cálculo de errores completado.")



Realizando predicciones sobre todo el dataset...
Cálculo de errores completado.


In [5]:
# =============================================================================
# Análisis 1: Los 10 Mayores Errores Absolutos (en Euros)
# =============================================================================
# Estos son los jugadores donde el modelo se equivocó por más millones de euros.
# Nos ayuda a ver si hay un patrón en los jugadores de élite.

top_10_worst_errors = df.sort_values(by='abs_error', ascending=False).head(10)

print("\n--- Top 10 Mayores Errores Absolutos ---")
# Formatear valores para mejor legibilidad
display_cols = ['name', 'position', 'age', 'market_value_in_eur', 'predicted_value', 'error']
top_10_worst_errors[display_cols].style.format({
    'market_value_in_eur': "€{:,.0f}",
    'predicted_value': "€{:,.0f}",
    'error': "€{:,.0f}"
})


--- Top 10 Mayores Errores Absolutos ---


Unnamed: 0,name,position,age,market_value_in_eur,predicted_value,error
5299,Florian Wirtz,Midfield,21.634497,"€140,000,000","€29,741,796","€-110,258,204"
3561,Vinicius Junior,Attack,24.459959,"€200,000,000","€102,043,242","€-97,956,758"
5200,Jude Bellingham,Midfield,21.497604,"€180,000,000","€90,288,273","€-89,711,727"
3412,Declan Rice,Midfield,25.921971,"€110,000,000","€22,757,523","€-87,242,477"
3906,Phil Foden,Attack,24.804928,"€130,000,000","€50,624,067","€-79,375,933"
3221,Kylian Mbappé,Attack,26.250513,"€170,000,000","€96,092,419","€-73,907,581"
3410,Rodri,Midfield,28.4846,"€130,000,000","€58,135,679","€-71,864,321"
4468,Luis Díaz,Attack,27.92334,"€85,000,000","€17,246,315","€-67,753,685"
4647,Anthony Gordon,Attack,24.060233,"€65,000,000","€869,010","€-64,130,990"
5167,Julián Alvarez,Attack,25.136208,"€90,000,000","€30,509,424","€-59,490,576"


In [6]:
# =============================================================================
# Análisis 2: Los 10 Mayores Errores Relativos (Positivos y Negativos)
# =============================================================================
# Estos son los jugadores donde el modelo se equivocó más en porcentaje.
# Nos ayuda a encontrar perfiles que el modelo fundamentalmente malinterpreta.

# Top 10 Sobreestimaciones
top_10_overestimated = df.sort_values(by='error_percent', ascending=False).head(10)

# Top 10 Subestimaciones
top_10_underestimated = df.sort_values(by='error_percent', ascending=True).head(10)

print("\n--- Top 10 Mayores Sobreestimaciones (%) ---")
display(top_10_overestimated[display_cols].style.format({
    'market_value_in_eur': "€{:,.0f}",
    'predicted_value': "€{:,.0f}",
    'error': "€{:,.0f}"
}))

print("\n--- Top 10 Mayores Subestimaciones (%) ---")
display(top_10_underestimated[display_cols].style.format({
    'market_value_in_eur': "€{:,.0f}",
    'predicted_value': "€{:,.0f}",
    'error': "€{:,.0f}"
}))


--- Top 10 Mayores Sobreestimaciones (%) ---


Unnamed: 0,name,position,age,market_value_in_eur,predicted_value,error
2700,Harry McKirdy,Attack,26.685832,"€20,000","€680,573","€660,573"
5384,Yusuf Karagöz,Goalkeeper,25.456537,"€50,000","€1,465,855","€1,415,855"
1540,Stephen Kingsley,Defender,30.63655,"€70,000","€1,928,695","€1,858,695"
5982,Andriy Kitela,Defender,20.010951,"€50,000","€1,225,397","€1,175,397"
2553,Georgiy Melkadze,Attack,27.956194,"€500,000","€11,513,580","€11,013,580"
1452,Andriy Bliznichenko,Attack,29.382615,"€200,000","€2,898,576","€2,698,576"
3790,Hüseyin Türkmen,Defender,27.214237,"€150,000","€2,094,153","€1,944,153"
5254,Bandiougou Fadiga,Midfield,23.351129,"€400,000","€5,567,552","€5,167,552"
4657,Jeriel Dorsett,Defender,20.610541,"€250,000","€3,475,108","€3,225,108"
6228,Vladyslav Ostrovskyi,Attack,20.260096,"€50,000","€689,992","€639,992"



--- Top 10 Mayores Subestimaciones (%) ---


Unnamed: 0,name,position,age,market_value_in_eur,predicted_value,error
4647,Anthony Gordon,Attack,24.060233,"€65,000,000","€869,010","€-64,130,990"
5683,Lewis Hall,Defender,20.52293,"€30,000,000","€477,060","€-29,522,940"
3295,Kamil Grabara,Goalkeeper,25.94935,"€14,000,000","€244,473","€-13,755,527"
8,Cristiano Ronaldo,Attack,38.346338,"€15,000,000","€340,913","€-14,659,087"
5534,Eren Dinkçi,Attack,23.019849,"€12,000,000","€276,544","€-11,723,456"
5347,Merlin Röhl,Midfield,22.461328,"€12,000,000","€322,956","€-11,677,044"
4808,Noah Atubolu,Goalkeeper,22.839151,"€15,000,000","€408,853","€-14,591,147"
4550,Enock Mwepu,Midfield,24.451745,"€18,000,000","€534,164","€-17,465,836"
4359,Marcin Bulka,Goalkeeper,25.185489,"€20,000,000","€717,220","€-19,282,780"
2915,Christian Pulisic,Attack,26.250513,"€50,000,000","€1,882,513","€-48,117,487"
