# 🧠 Agente RAG de Scouting Futbolístico con LangChain

Este notebook permite hacer consultas inteligentes sobre una base de datos de jugadores usando LangChain + FAISS + GPT-4.

In [61]:
from langchain_experimental.agents import create_csv_agent
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv
import os
from langchain_experimental.utilities import PythonREPL



In [62]:
load_dotenv()

True

In [63]:
# Load the OpenAI API key from the environment variable
if os.getenv("OPENAI_API_KEY") is None or os.getenv("OPENAI_API_KEY") == "":
        print("OPENAI_API_KEY is not set")
        exit(1)
else:
        print("OPENAI_API_KEY is set")

OPENAI_API_KEY is set


In [64]:
csv_file = "datos/datos_jugadores_v5/jugadores_total.csv"

In [102]:
llm = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=300)


In [66]:
prefix = """
Eres un agente experto en análisis de jugadores de fútbol. Solo puedes usar el DataFrame `df` que contiene columnas como 'player_name', 'team', 'value_eur', 'goals_per90', etc.

**Solo puedes utilizar los datos que están dentro de ese DataFrame.**
**No debes usar conocimiento externo.**
No debes mencionar a jugadores que no estén en el DataFrame.

Guía de traducción para interpretar posiciones (columna main_position):
- portero → goalkeeper
- lateral → left back, right back
- defensa central → center back
- mediocentro defensivo → defensive midfield
- mediocentro → center midfield
- mediocentro ofensivo → offensive midfield
- extremo → winger
- delantero → striker

⚠️ No debes usar información externa, solo el DataFrame.

Cuando te pregunten, debes:

- Mostrar como respuesta final solo una tabla con las columnas `player_name`, `team`, `value_eur` y aquellas relacionadas con la pregunta (por ejemplo `goals_per90` si se pregunta por goles).

"""

In [84]:
from mplsoccer import PyPizza
import matplotlib.pyplot as plt

def pizza_radar_jugador(player_name, df_percentils, df_total):

    # Diccionario de etiquetas por rol
    etiquetas_posiciones = {
        "goalkeeper": ["Goalkeeper"],
        "center_back": ['Center Back', 'Right Center Back', 'Left Center Back'],
        "side_back": ['Left Back', 'Right Back', 'Right Wing Back', 'Left Wing Back'],
        "defensive_midfield": ['Right Defensive Midfield', 'Center Defensive Midfield', 'Left Defensive Midfield'],
        "center_midfield": ['Right Center Midfield', 'Center Midfield', 'Left Center Midfield'],
        "offensive_midfield": ['Left Attacking Midfield', 'Center Attacking Midfield', 'Right Attacking Midfield'],
        "winger": ['Left Wing', 'Left Midfield', 'Right Wing', 'Right Midfield'],
        "striker": ['Center Forward', 'Secondary Striker', 'Left Center Forward', 'Right Center Forward']
    }

        # Mapear cada tipo de posición a columnas y etiquetas del radar
    radar_config = {
        "goalkeeper": (
            ["saves_per90", "save_percentage", "xg_against_minus_goals_conceded_per90",
            "keeper_sweeper_per90", "penalty_save_percentage", "aerial_dominance_index_per90",
             "pass_completion_rate"],
            ["Paradas /90", "Porcentaje paradas", "xG - Goles /90", "Sweeper /90", "Penaltis detenidos %",
            "Dominio aéreo", "Precisión pase %"]
        ),
        "center_back": (
            ["tackles_successful_per90", "interceptions_per90", "clearances_per90",
            "blocks_per90", "times_dribbled_past_per90", "progressive_passes_per90",
            "duel_success_rate", "goals_scored_per90"],
            ["Tackles exitosos /90", "Intercepciones /90", "Despejes /90", "Bloqueos /90", "Regates sufridos /90",
            "Pases progresivos /90", "Duelos ganados %", "Goles /90"]
        ),
        "side_back": (
            ["interception_success_rate", "pressures_per90", "duels_won_per90", "tackles_successful_per90",
            "progressive_carries_per90", "chances_created_per90", "goals_scored_per90", "fouls_committed_per90"],
            ["Intercepciones %", "Presiones /90", "Duelos ganados /90", "Tackles exitosos /90",
            "Conducciones progresivas /90", "Ocasiones creadas /90", "Goles /90", "Faltas cometidas /90"]
        ),
        "defensive_midfield": (
            ["ball_recoveries_per90", "duels_won_per90", "duel_success_rate", "interceptions_per90",
            "pass_completion_rate", "progressive_passes_per90", "chances_created_per90", "fouls_committed_per90"],
            ["Recuperaciones /90", "Duelos ganados /90", "Duelos ganados %", "Intercepciones /90",
            "Precisión pase %", "Pases progresivos /90", "Ocasiones creadas /90", "Faltas cometidas /90"]
        ),
        "center_midfield": (
            ["duels_won_per90", "pressures_per90", "dribble_success_rate", "pass_completion_rate",
            "progressive_passes_per90", "chances_created_per90", "goal_assists_per90", "goals_scored_per90"],
            ["Duelos ganados /90", "Presiones /90", "Éxito regate %", "Precisión pase %",
            "Pases progresivos /90", "Ocasiones creadas /90", "Asistencias /90", "Goles /90"]
        ),
        "offensive_midfield": (
            ["pass_completion_rate", "goal_assists_per90", "chances_created_per90", "dribbles_completed_per90",
            "dribble_success_rate", "progressive_passes_per90", "fouls_won_per90", "goals_scored_per90"],
            ["Precisión pase %", "Asistencias /90", "Ocasiones creadas /90", "Regates completados /90",
            "Éxito regate %", "Pases progresivos /90", "Faltas recibidas /90", "Goles /90"]
        ),
        "winger": (
            ["dribbles_completed_per90", "dribble_success_rate", "progressive_carries_per90", "chances_created_per90",
            "goal_assists_per90", "goals_scored_per90", "goals_minus_xg_per90", "fouls_won_per90"],
            ["Regates completados /90", "Éxito regate %", "Conducciones progresivas /90", "Ocasiones creadas /90",
            "Asistencias /90", "Goles /90", "Goles - xG /90", "Faltas recibidas /90"]
        ),
        "striker": (
            ["goals_scored_per90", "goals_minus_xg_per90", "headed_shot_duel_rate", "dribbles_completed_per90",
            "chances_created_per90", "goal_assists_per90", "pass_completion_rate", "fouls_won_per90"],
            ["Goles /90", "Goles - xG /90", "Duelos aéreos %", "Regates completados /90", "Ocasiones creadas /90",
            "Asistencias /90", "Precisión pase %", "Faltas recibidas /90"]
        )
    }

    # Obtener fila del jugador
    row = df_total[df_total["player_name"] == player_name].iloc[0]
    position_label = row["main_position"]
    team = row["team"]
    rating = round(float(row["rating"]), 2) if str(row["rating"]).replace('.', '', 1).isdigit() else "S.V"
    market_value = row["value_eur"] / 1000000


    # Determinar rol del jugador
    rol = None
    for key, etiquetas in etiquetas_posiciones.items():
        if position_label in etiquetas:
            rol = key
            break
    if rol not in radar_config:
        raise ValueError(f"No se ha definido radar para la posición: {position_label} (rol: {rol})")

    columnas, labels = radar_config[rol]

    # Extraer percentiles del DataFrame de percentiles
    valores = df_percentils[df_percentils["player_name"] == player_name][columnas].iloc[0].tolist()
    valores = [round(v, 2) for v in valores]

    # Colores
    slice_colors = ["#1A78CF"] * 3 + ["#FF9300"] * 3 + ["#D70232"] * (len(valores) - 6)
    text_colors = ["#F2F2F2"] * 3 + ["#000000"] * 3 + ["#F2F2F2"] * (len(valores) - 6)

    # Crear radar
    baker = PyPizza(
        params=labels,
        background_color="#EBEBE9",
        straight_line_color="#EBEBE9",
        straight_line_lw=1,
        last_circle_lw=0,
        other_circle_lw=0,
        inner_circle_size=20
    )

    fig, ax = baker.make_pizza(
        valores,
        figsize=(8, 8),
        color_blank_space="same",
        slice_colors=slice_colors,
        value_colors=text_colors,
        param_location=110
    )

    # Título y rating en el centro
    fig.text(0.5, 0.97, f"{player_name} - {team}", size=18, weight='bold', ha='center')
    fig.text(0.5, 0.94, f"Market value: {market_value} M€", size=12, weight='semibold', ha='center')
    fig.text(0.5125, 0.485, f"{rating}", size=18, weight='bold', ha='center')

    plt.show()

In [103]:

agent = create_csv_agent(
            llm, csv_file, verbose=True, prefix=prefix,agent_type="openai-functions")

In [104]:
question = "Tell me 5 players plays as a defensive_midfield with highest pass_completion_rate, and whose height_cm is more than 180, and whose value_eur is less than 2000000.0"

In [105]:
agent.invoke(question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df[(df['main_position'] == 'defensive midfield') & (df['pass_completion_rate'] > 0) & (df['height_cm'] > 180) & (df['value_eur'] < 2000000.0)].sort_values(by='pass_completion_rate', ascending=False)[['player_name', 'team', 'value_eur', 'pass_completion_rate', 'height_cm']].head(5)"}`


[0m[36;1m[1;3m                       player_name               team  value_eur  \
1668                  Julian Weigl  Borussia Dortmund  1500000.0   
198     Rodrigo Hernández Cascante         Villarreal   500000.0   
259   José Vicente Gómez Umpiérrez         Las Palmas  1200000.0   
1315                  Seydou Kéita            AS Roma  1700000.0   
163             Joan Jordán Moreno           Espanyol   600000.0   

      pass_completion_rate  height_cm  
1668                 92.26        186  
198                  88.00        182  
259                  87.73        188  
1315                

{'input': 'Tell me 5 players plays as a defensive_midfield with highest pass_completion_rate, and whose height_cm is more than 180, and whose value_eur is less than 2000000.0',
 'output': 'Aquí están los 5 jugadores que juegan como mediocentro defensivo con la tasa más alta de completación de pases, cuya altura es mayor a 180 cm y cuyo valor es menor a 2,000,000.0 euros:\n\n1. Julian Weigl del Borussia Dortmund con una tasa de completación de pases del 92.26%, altura de 186 cm y un valor de 1,500,000.0 euros.\n2. Rodrigo Hernández Cascante del Villarreal con una tasa de completación de pases del 88.00%, altura de 182 cm y un valor de 500,000.0 euros.\n3. José Vicente Gómez Umpiérrez de Las Palmas con una tasa de completación de pases del 87.73%, altura de 188 cm y un valor de 1,200,000.0 euros.\n4. Seydou Kéita de AS Roma con una tasa de completación de pases del 87.34%, altura de 183 cm y un valor de 1,700,000.0 euros.\n5. Joan Jordán Moreno de Espanyol con una tasa de completación de

In [106]:
agent.invoke("Tell me 5 goalkeepers with highest clean_sheets and save_percentage, and whose height_cm is more than 185, and whose value_eur is less than 2000000.0")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df[(df['main_position']=='goalkeeper') & (df['height_cm']>185) & (df['value_eur']<2000000.0)].nlargest(5, ['clean_sheets', 'save_percentage'])[['player_name', 'team', 'value_eur', 'clean_sheets', 'save_percentage']]"}`


[0m[36;1m[1;3m                   player_name                 team  value_eur  clean_sheets  \
1646   Rune Almenning Jarstein        Hertha Berlin   825000.0            12   
1195  Albano Benjamín Bizzarri               Chievo  1000000.0            11   
1760             Ramazan Özcan           Ingolstadt  1300000.0            10   
1277          Łukasz Skorupski               Empoli  1800000.0             9   
1650            Lukáš Hrádecký  Eintracht Frankfurt  1500000.0             8   

      save_percentage  
1646            71.06  
1195            74.53  
1760            67.05  
1277            67.19  
1650            69.94  [0m[32;1m[1;3mHere are the 5

{'input': 'Tell me 5 goalkeepers with highest clean_sheets and save_percentage, and whose height_cm is more than 185, and whose value_eur is less than 2000000.0',
 'output': 'Here are the 5 goalkeepers with the highest clean sheets and save percentage, whose height is more than 185 cm and value is less than 2,000,000 euros:\n\n1. Rune Almenning Jarstein from Hertha Berlin\n   - Clean Sheets: 12\n   - Save Percentage: 71.06%\n   - Value: 825,000 euros\n\n2. Albano Benjamín Bizzarri from Chievo\n   - Clean Sheets: 11\n   - Save Percentage: 74.53%\n   - Value: 1,000,000 euros\n\n3. Ramazan Özcan from Ingolstadt\n   - Clean Sheets: 10\n   - Save Percentage: 67.05%\n   - Value: 1,300,000 euros\n\n4. Łukasz Skorupski from Empoli\n   - Clean Sheets: 9\n   - Save Percentage: 67.19%\n   - Value: 1,800,000 euros\n\n5. Lukáš Hrádecký from Eintracht Frankfurt\n   - Clean Sheets: 8\n   - Save Percentage: 69.94%\n   - Value: 1,500,000 euros'}

In [107]:
import shutil
shutil.rmtree('cache')


PermissionError: [WinError 32] El proceso no tiene acceso al archivo porque está siendo utilizado por otro proceso: 'cache\\cache_db_0.11.db'