<a href="https://colab.research.google.com/github/Chunsen41/Player-Market-Evaluation-/blob/main/Football_Analysis_App.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# 1. Load the dataset

df = pd.read_csv("footballData.csv")

In [None]:
# 2. Select core columns for Version 1 of Football Oracle AI
core_cols = [
    "sofifa_id",
    "short_name", "long_name",
    "age", "height_cm", "weight_kg",
    "nationality", "club_name",
    "overall", "potential",
    "value_eur", "wage_eur",
    "player_positions", "preferred_foot",
    "pace", "shooting", "passing", "dribbling", "defending", "physic"
]

df_core = df[core_cols].copy()

In [None]:
# 3. Handle missing values

# 3a. club_name: small % missing -> fill with "No Club"
df_core["club_name"] = df_core["club_name"].fillna("No Club")

# 3b. numeric stats: fill with median
num_cols = ["age", "height_cm", "weight_kg",
            "overall", "potential", "value_eur", "wage_eur",
            "pace", "shooting", "passing", "dribbling", "defending", "physic"]

for col in num_cols:
    median_value = df_core[col].median()
    df_core[col] = df_core[col].fillna(median_value)

# 4. Quick check that we have no missing values in df_core
print(df_core.isna().sum())
print(df_core.head())


sofifa_id           0
short_name          0
long_name           0
age                 0
height_cm           0
weight_kg           0
nationality         0
club_name           0
overall             0
potential           0
value_eur           0
wage_eur            0
player_positions    0
preferred_foot      0
pace                0
shooting            0
passing             0
dribbling           0
defending           0
physic              0
dtype: int64
   sofifa_id         short_name                            long_name  age  \
0     158023           L. Messi       Lionel Andrés Messi Cuccittini   33   
1      20801  Cristiano Ronaldo  Cristiano Ronaldo dos Santos Aveiro   35   
2     200389           J. Oblak                            Jan Oblak   27   
3     188545     R. Lewandowski                   Robert Lewandowski   31   
4     190871          Neymar Jr        Neymar da Silva Santos Júnior   28   

   height_cm  weight_kg nationality            club_name  overall  potential  \
0   

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

# We assume df_core is already created and cleaned

df_dl = df_core.copy()

# ---- 1) Encode preferred_foot (Left/Right) ----
df_dl["preferred_foot"] = df_dl["preferred_foot"].map({"Right": 1, "Left": 0})

# ---- 2) Simplify player_positions to the first listed position ----
df_dl["main_position"] = df_dl["player_positions"].apply(lambda x: x.split(",")[0].strip())

# ---- 3) Encode nationality, club_name, main_position ----
enc_nat = LabelEncoder()
enc_club = LabelEncoder()
enc_pos = LabelEncoder()

df_dl["nationality_enc"] = enc_nat.fit_transform(df_dl["nationality"])
df_dl["club_enc"] = enc_club.fit_transform(df_dl["club_name"])
df_dl["position_enc"] = enc_pos.fit_transform(df_dl["main_position"])

# ---- 4) Select final numeric features for deep learning ----
feature_cols = [
    "age", "height_cm", "weight_kg",
    "overall", "potential",
    "pace", "shooting", "passing", "dribbling", "defending", "physic",
    "preferred_foot",
    "nationality_enc", "club_enc", "position_enc"
]

X = df_dl[feature_cols].values
y_value = df_dl["value_eur"].values

# ---- 5) Scale numeric features ----
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print("Feature shape:", X_scaled.shape)
print("Scaled sample:", X_scaled[0])


Feature shape: (18944, 15)
Scaled sample: [ 1.65505537 -1.63955552 -0.42750624  3.90200649  3.58656347  1.66886499
  2.99273552  3.48251869  3.41067019 -0.88971555  0.04035454 -1.79263023
 -1.50217483 -0.65018746  1.29062463]


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

# Target: log-transformed value (to shrink huge euro amounts)
df_dl["log_value_eur"] = np.log1p(df_dl["value_eur"].astype("float32"))
y_log_value = df_dl["log_value_eur"].values.astype("float32")

# Train/validation split
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y_log_value, test_size=0.2, random_state=42
)

print("X_train:", X_train.shape, "X_val:", X_val.shape)


X_train: (15155, 15) X_val: (3789, 15)


In [None]:
!pip install tensorflow -q


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Clear any previous model from memory
tf.keras.backend.clear_session()

input_dim = X_train.shape[1]

inputs = keras.Input(shape=(input_dim,), name="player_features")

x = layers.Dense(128, activation="relu")(inputs)
x = layers.Dense(64, activation="relu")(x)
x = layers.Dense(32, activation="relu")(x)

output = layers.Dense(1, name="log_value")(x)

value_model = keras.Model(inputs=inputs, outputs=output, name="value_regressor")

value_model.summary()


In [None]:
value_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss="mse",
    metrics=["mae"]   # this is MAE on log scale
)

history = value_model.fit(
    X_train,
    y_train,
    validation_data=(X_val, y_val),
    epochs=20,        # start with 20, can increase if needed
    batch_size=128,
    verbose=1
)


Epoch 1/20
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 86.8808 - mae: 7.8657 - val_loss: 5.8581 - val_mae: 1.6396
Epoch 2/20
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 4.9833 - mae: 1.4805 - val_loss: 4.4256 - val_mae: 1.3069
Epoch 3/20
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 3.8614 - mae: 1.2110 - val_loss: 3.7645 - val_mae: 1.0869
Epoch 4/20
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 3.3750 - mae: 1.0267 - val_loss: 3.3005 - val_mae: 0.9656
Epoch 5/20
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3.0677 - mae: 0.8832 - val_loss: 2.9752 - val_mae: 0.8162
Epoch 6/20
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.6674 - mae: 0.7501 - val_loss: 2.8037 - val_mae: 0.6648
Epoch 7/20
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step -

In [None]:
# 1. Predict on validation set in log-scale
y_val_pred_log = value_model.predict(X_val).flatten()

# 2. Convert both true and predicted values back to euros
y_val_true_eur = np.expm1(y_val)          # inverse of log1p
y_val_pred_eur = np.expm1(y_val_pred_log)

# 3. Compute MAE in euros
mae_eur = np.mean(np.abs(y_val_true_eur - y_val_pred_eur))
mean_value = np.mean(y_val_true_eur)

print(f"Validation MAE in euros: {mae_eur:,.0f} €")
print(f"Average player value in validation set: {mean_value:,.0f} €")
print(f"Relative MAE: {mae_eur / mean_value:.2%}")


[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Validation MAE in euros: 1,171,855 €
Average player value in validation set: 2,199,327 €
Relative MAE: 53.28%


In [None]:
# Baseline: always predict the mean log value (from TRAIN set!)
baseline_log = y_train.mean()

# Baseline predictions in log-space
y_val_pred_log_baseline = np.full_like(y_val, baseline_log)

# Convert both true & predicted to euros
y_val_true_eur = np.expm1(y_val)
y_val_pred_eur_baseline = np.expm1(y_val_pred_log_baseline)

# Baseline MAE in euros
mae_eur_baseline = np.mean(np.abs(y_val_true_eur - y_val_pred_eur_baseline))
mean_value = np.mean(y_val_true_eur)

print(f"Baseline Validation MAE in euros: {mae_eur_baseline:,.0f} €")
print(f"Average player value in validation set: {mean_value:,.0f} €")
print(f"Baseline Relative MAE: {mae_eur_baseline / mean_value:.2%}")


Baseline Validation MAE in euros: 1,885,066 €
Average player value in validation set: 2,199,327 €
Baseline Relative MAE: 85.71%


In [None]:
from sklearn.preprocessing import StandardScaler

feature_cols_clean = [
    "age", "height_cm", "weight_kg",
    "overall", "potential",
    "pace", "shooting", "passing", "dribbling", "defending", "physic",
    "preferred_foot",     # binary is fine
    "position_enc"        # still useful
]

X_clean = df_dl[feature_cols_clean].values

scaler_clean = StandardScaler()
X_clean_scaled = scaler_clean.fit_transform(X_clean)

y_log_value = df_dl["log_value_eur"].values.astype("float32")

from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(
    X_clean_scaled, y_log_value, test_size=0.2, random_state=42
)


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping

tf.keras.backend.clear_session()

input_dim = X_train.shape[1]

inputs = keras.Input(shape=(input_dim,), name="player_features")
x = layers.Dense(128, activation="relu")(inputs)
x = layers.Dense(64, activation="relu")(x)
x = layers.Dense(32, activation="relu")(x)
output = layers.Dense(1)(x)

value_model = keras.Model(inputs, output)

value_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss="mse",
    metrics=["mae"]
)

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=3,
    restore_best_weights=True
)

history = value_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=30,
    batch_size=128,
    callbacks=[early_stop],
    verbose=1
)


Epoch 1/30
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 100.0766 - mae: 8.7881 - val_loss: 7.0699 - val_mae: 1.8373
Epoch 2/30
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5.8418 - mae: 1.6496 - val_loss: 4.8248 - val_mae: 1.3333
Epoch 3/30
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 4.4044 - mae: 1.2444 - val_loss: 3.8447 - val_mae: 1.0509
Epoch 4/30
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3.8305 - mae: 1.0139 - val_loss: 3.2531 - val_mae: 0.8680
Epoch 5/30
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.6251 - mae: 0.7718 - val_loss: 2.9600 - val_mae: 0.7387
Epoch 6/30
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.6609 - mae: 0.6694 - val_loss: 2.8401 - val_mae: 0.7040
Epoch 7/30
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 

In [None]:
y_val_pred_log = value_model.predict(X_val).flatten()

y_val_true_eur = np.expm1(y_val)
y_val_pred_eur = np.expm1(y_val_pred_log)

mae_eur = np.mean(np.abs(y_val_true_eur - y_val_pred_eur))
mean_value = np.mean(y_val_true_eur)

print(f"Validation MAE in euros (CLEAN FEATURES): {mae_eur:,.0f} €")
print(f"Average player value: {mean_value:,.0f} €")
print(f"Relative MAE: {mae_eur / mean_value:.2%}")


[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Validation MAE in euros (CLEAN FEATURES): 745,893 €
Average player value: 2,199,327 €
Relative MAE: 33.91%


Input (15 or 14 features)
 → Dense(128)
 → Dense(64)
 → Dense(32)
 → Dense(1)  (log_value)


In [None]:
# The second-to-last layer is our "embedding" layer (Dense 32)
embedding_layer = value_model.layers[-2]

embedding_model = keras.Model(
    inputs=value_model.input,
    outputs=embedding_layer.output,
    name="player_embedding_model"
)

embedding_model.summary()


In [None]:
# Recompute X_clean and X_clean_scaled for ALL rows
X_clean = df_dl[feature_cols_clean].values
X_clean_scaled = scaler_clean.transform(X_clean)

# Get embeddings for all players
all_embeddings = embedding_model.predict(X_clean_scaled)

print("Embeddings shape:", all_embeddings.shape)  # should be (18944, 32)


[1m592/592[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Embeddings shape: (18944, 32)


In [None]:
player_meta = df_dl[[
    "short_name", "long_name",
    "age", "overall", "potential",
    "value_eur", "wage_eur",
    "club_name", "nationality",
    "main_position"
]].copy()

# Add embeddings as a new column
player_meta["embedding"] = list(all_embeddings)

player_meta.head()


Unnamed: 0,short_name,long_name,age,overall,potential,value_eur,wage_eur,club_name,nationality,main_position,embedding
0,L. Messi,Lionel Andrés Messi Cuccittini,33,93,93,67500000,560000,FC Barcelona,Argentina,RW,"[11.062723, 0.0, 5.7143784, 0.0, 0.0, 0.0, 0.0..."
1,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,35,92,92,46000000,220000,Juventus,Portugal,ST,"[10.397632, 0.0, 6.005182, 0.0, 0.0, 0.0, 0.0,..."
2,J. Oblak,Jan Oblak,27,91,93,75000000,125000,Atlético Madrid,Slovenia,GK,"[10.605287, 0.0, 5.7310643, 0.0, 0.0, 0.0, 0.0..."
3,R. Lewandowski,Robert Lewandowski,31,91,91,80000000,240000,FC Bayern München,Poland,ST,"[10.387987, 0.0, 5.9554243, 0.0, 0.0, 0.0, 0.0..."
4,Neymar Jr,Neymar da Silva Santos Júnior,28,91,91,90000000,270000,Paris Saint-Germain,Brazil,LW,"[11.072604, 0.0, 6.6465487, 0.0, 0.0, 0.0, 0.0..."


In [None]:
from numpy.linalg import norm

# Pre-normalize embeddings for fast cosine similarity
emb_matrix = np.vstack(player_meta["embedding"].values)   # shape (N, 32)
emb_norms = norm(emb_matrix, axis=1, keepdims=True)
emb_matrix_normed = emb_matrix / (emb_norms + 1e-8)


In [None]:
def find_similar_players_deep(name, top_k=5):
    # 1. Find the player index (case-insensitive match on short_name)
    mask = player_meta["short_name"].str.lower() == name.lower()
    if not mask.any():
        return None, f"Player '{name}' not found in dataset."

    idx = player_meta[mask].index[0]

    # 2. Get that player's normalized embedding
    emb = emb_matrix_normed[idx:idx+1]  # shape (1, 32)

    # 3. Compute cosine similarity with all players
    sims = emb @ emb_matrix_normed.T     # shape (1, N)
    sims = sims.flatten()

    # 4. Get top_k most similar players (excluding the player himself)
    # sort descending, skip idx
    sims[idx] = -1  # exclude self
    top_idx = np.argsort(-sims)[:top_k]

    # 5. Build a nice result DataFrame
    results = player_meta.iloc[top_idx][[
        "short_name", "club_name", "age",
        "overall", "potential", "value_eur", "main_position"
    ]].copy()
    results["similarity"] = sims[top_idx]

    return results, None


In [None]:
similar, err = find_similar_players_deep("L. Messi", top_k=5)
if err:
    print(err)
else:
    print(similar)


         short_name        club_name  age  overall  potential  value_eur  \
34   Bernardo Silva  Manchester City   25       87         88   60000000   
128   Douglas Costa         Juventus   29       84         84   30500000   
129      Iago Aspas         RC Celta   32       84         84   25000000   
288        T. Lemar  Atlético Madrid   24       81         85   23000000   
11         M. Salah        Liverpool   28       90         90   78000000   

    main_position  similarity  
34             RW    0.999485  
128            LM    0.999412  
129            ST    0.999311  
288            LM    0.999275  
11             RW    0.999250  


In [None]:
def get_player_features(name):
    """
    Look up a player by short_name (case-insensitive),
    return (row, scaled_features) or (None, error_message).
    """
    mask = df_dl["short_name"].str.lower() == name.lower()
    if not mask.any():
        return None, f"Player '{name}' not found in dataset."

    row = df_dl[mask].iloc[0]

    # Extract features in the same order as training
    x = row[feature_cols_clean].values.astype("float32").reshape(1, -1)
    x_scaled = scaler_clean.transform(x)

    return row, x_scaled


In [None]:
def analyze_player(name, top_k=5, value_margin=0.15):
    """
    Analyze a player:
      - predict market value from deep model
      - compare with real dataset value
      - label as under/over/fairly valued
      - list similar players using deep embeddings
    """
    # 1) Look up player & features
    row, x_scaled_or_err = get_player_features(name)
    if row is None:
        return None, x_scaled_or_err  # that's the error message

    x_scaled = x_scaled_or_err

    # 2) Predict log-value and convert to euros
    pred_log = value_model.predict(x_scaled)[0, 0]
    pred_value_eur = float(np.expm1(pred_log))

    # 3) Actual value from dataset
    actual_value_eur = float(row["value_eur"])

    # 4) Compute difference
    diff = pred_value_eur - actual_value_eur
    rel_diff = diff / actual_value_eur if actual_value_eur > 0 else None

    if rel_diff is None:
        valuation_label = "unknown"
    elif rel_diff > value_margin:
        valuation_label = "undervalued"   # model says he's worth MORE than current
    elif rel_diff < -value_margin:
        valuation_label = "overvalued"    # model says he's worth LESS
    else:
        valuation_label = "fairly valued"

    # 5) Get similar players
    similar_df, sim_err = find_similar_players_deep(name, top_k=top_k)
    if similar_df is None:
        similar_players = []
    else:
        similar_players = similar_df.to_dict(orient="records")

    # 6) Build a structured summary dict
    summary = {
        "player_name": row["short_name"],
        "long_name": row["long_name"],
        "age": int(row["age"]),
        "club": row["club_name"],
        "nationality": row["nationality"],
        "position": row["main_position"],
        "overall": int(row["overall"]),
        "potential": int(row["potential"]),
        "actual_value_eur": actual_value_eur,
        "predicted_value_eur": pred_value_eur,
        "value_diff_eur": diff,
        "relative_diff": rel_diff,
        "valuation_label": valuation_label,
        "similar_players": similar_players,
    }

    return summary, None


In [None]:
summary, err = analyze_player("M. Salah", top_k=5)

if err:
    print("Error:", err)
else:
    from pprint import pprint
    pprint(summary)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
{'actual_value_eur': 78000000.0,
 'age': 28,
 'club': 'Liverpool',
 'long_name': 'Mohamed Salah Ghaly',
 'nationality': 'Egypt',
 'overall': 90,
 'player_name': 'M. Salah',
 'position': 'RW',
 'potential': 90,
 'predicted_value_eur': 136812416.0,
 'relative_diff': 0.7540053333333333,
 'similar_players': [{'age': 32,
                      'club_name': 'Paris Saint-Germain',
                      'main_position': 'RW',
                      'overall': 87,
                      'potential': 87,
                      'short_name': 'A. Di María',
                      'similarity': 0.9998263120651245,
                      'value_eur': 39000000},
                     {'age': 23,
                      'club_name': 'Stade Rennais FC',
                      'main_position': 'RM',
                      'overall': 81,
                      'potential': 85,
                      'short_name': 'Raphinha',
                     

In [None]:
!pip install openai -q


In [None]:
import os
os.environ["OPENAI_API_KEY"] = " here"

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


In [None]:
SCOUT_SYSTEM_PROMPT = """
You are Football Oracle AI, a world-class football scout and data analyst.

You are given:
- A structured JSON summary of a player (ratings, value, model prediction, similar players).
- Optionally, a specific question from the user.

Your job:
- Speak like a professional European club scout (smart, calm, technical).
- Blend data and football intelligence (not just stats).
- Comment on:
  - Player profile (role, strengths, weaknesses).
  - Whether the player seems under/over valued.
  - How they compare stylistically to similar players.
  - How they might fit into different systems or clubs, if asked.

Write your answer in clear, conversational English, 1–3 short paragraphs plus bullets if helpful.
Avoid repeating raw JSON; interpret it.
"""


In [None]:
import json

def oracle_scout_report(player_name, extra_question=None, model_name="gpt-4.1-mini"):
    """
    Use analyze_player() + OpenAI to generate a natural-language scout report.
    """
    summary, err = analyze_player(player_name, top_k=5)
    if err:
        return f"Error: {err}"

    # This is the structured context we'll give to the model
    summary_json = json.dumps(summary, indent=2)

    # Build the user message content
    if extra_question:
        user_content = (
            f"Here is the structured data for the player:\n\n{summary_json}\n\n"
            f"User question: {extra_question}\n\n"
            "Give a detailed but concise scouting report."
        )
    else:
        user_content = (
            f"Here is the structured data for the player:\n\n{summary_json}\n\n"
            "Give a detailed but concise scouting report."
        )

    response = client.chat.completions.create(
        model=model_name,
        messages=[
            {"role": "system", "content": SCOUT_SYSTEM_PROMPT},
            {"role": "user", "content": user_content},
        ],
        temperature=0.7,
    )

    return response.choices[0].message.content


In [None]:
report = oracle_scout_report(
    "M. Salah",
    extra_question="Would he still be a good signing for a top club in a high-pressing system?",
    model_name="gpt-4o-mini"
)
print(report)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Mohamed Salah, at 28 years old, remains one of the premier right-wingers in world football, currently showcasing his talents at Liverpool. With an overall rating of 90 and a strong potential to maintain that level, Salah's profile highlights his explosive pace, technical dribbling, and clinical finishing ability. His strengths lie not only in goal-scoring but also in creating opportunities for teammates, making him a multifaceted threat in attack.

Despite being valued at €78 million, recent predictions suggest that his market value could exceed €136 million, indicating he is currently undervalued. This discrepancy implies that top clubs would be acquiring a player who is not only a proven performer but also has the potential for further impact in the coming years.

Salah's playing style is quite comparable to other elite forwards listed, such as Ángel Di María and Bernardo Silva, both of whom possess similar techn

In [None]:
report = oracle_scout_report(
    "M. Salah",
    extra_question="Would he still be a good signing for a top club in a high-pressing system?"
)
print(report)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Mohamed Salah at 28 years old remains a world-class right winger with an overall rating of 90, demonstrating elite technical skills, pace, and directness in attack. His profile suits a high-pressing system well: Salah’s work rate off the ball, combined with his agility and quick decision-making, enables him to effectively press defenders and quickly transition to attack. His ability to exploit spaces behind the defensive line and his proficiency in one-on-one situations make him a constant threat in fast, intense tactical setups.

Valuation-wise, Salah appears significantly undervalued compared to his predicted market worth, reflecting his sustained high level and influence on the pitch. When compared to similar players like Ángel Di María and Bernardo Silva, Salah offers a more consistent goal threat and end product, while also possessing the stamina and tactical intelligence required in intense pressing systems. 

In [None]:
# 1. Compute model predictions for ALL players using clean features
X_all_clean = df_dl[feature_cols_clean].values
X_all_clean_scaled = scaler_clean.transform(X_all_clean)

y_all_pred_log = value_model.predict(X_all_clean_scaled).flatten()
df_dl["predicted_value_eur"] = np.expm1(y_all_pred_log)

# 2. Diffs: how under/over-valued each player is
df_dl["value_diff_eur"] = df_dl["predicted_value_eur"] - df_dl["value_eur"]
df_dl["relative_diff"] = df_dl["value_diff_eur"] / df_dl["value_eur"].replace(0, np.nan)

# 3. For convenience in dashboards:
df_dl["is_undervalued_20"] = df_dl["relative_diff"] > 0.20  # >20% undervalued

# 4. Keep a dashboard-friendly subset
dashboard_cols = [
    "short_name", "long_name", "age", "nationality", "club_name", "main_position",
    "overall", "potential", "value_eur", "predicted_value_eur",
    "value_diff_eur", "relative_diff"
]
df_dash = df_dl[dashboard_cols].copy()


[1m592/592[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step


In [None]:
import pandas as pd

def ui_player_oracle(player_name, question):
    if not player_name:
        return "Please enter a player name.", pd.DataFrame()

    # Get structured summary
    summary, err = analyze_player(player_name, top_k=5)
    if err:
        return f"Error: {err}", pd.DataFrame()

    # Build LLM scouting report
    report = oracle_scout_report(player_name, extra_question=question or "")

    # Build similar players table
    sim_df = pd.DataFrame(summary["similar_players"])
    if not sim_df.empty:
        # Clean up column names a bit
        sim_df = sim_df.rename(columns={
            "short_name": "Player",
            "club_name": "Club",
            "main_position": "Pos",
            "overall": "OVR",
            "potential": "POT",
            "value_eur": "Value (€)",
            "similarity": "Similarity"
        })

    return report, sim_df


In [None]:
def ui_club_dashboard(position, min_age, max_age, max_value, only_undervalued):
    df = df_dash.copy()

    # Age filter
    df = df[(df["age"] >= min_age) & (df["age"] <= max_age)]

    # Position filter (main_position contains, e.g. "ST", "RW")
    if position != "Any":
        df = df[df["main_position"].str.contains(position)]

    # Value filter
    if max_value > 0:
        df = df[df["value_eur"] <= max_value]

    # Undervalued filter
    if only_undervalued:
        df = df[df["relative_diff"] > 0.20]  # >20% undervalued

    # Sort by how undervalued they are (highest relative_diff first)
    df = df.sort_values("relative_diff", ascending=False)

    # Nicely formatted
    show_cols = [
        "short_name", "age", "nationality", "club_name", "main_position",
        "overall", "potential", "value_eur", "predicted_value_eur", "value_diff_eur", "relative_diff"
    ]
    df = df[show_cols].head(100)  # limit rows

    df = df.rename(columns={
        "short_name": "Player",
        "club_name": "Club",
        "main_position": "Pos",
        "overall": "OVR",
        "potential": "POT",
        "value_eur": "Current (€)",
        "predicted_value_eur": "Model (€)",
        "value_diff_eur": "Diff (€)",
        "relative_diff": "Diff (%)"
    })

    df["Diff (%)"] = (df["Diff (%)"] * 100).round(1)

    return df


In [None]:
def ui_transfer_simulator(club_name, budget, position, max_age):
    df = df_dash.copy()

    # Exclude current club’s players as "signings"
    if club_name != "Any":
        df = df[df["club_name"] != club_name]

    # Age filter
    df = df[df["age"] <= max_age]

    # Position filter
    if position != "Any":
        df = df[df["main_position"].str.contains(position)]

    # Undervalued filter (stronger threshold for bargains)
    df = df[df["relative_diff"] > 0.15]  # >15% under

    # Sort by model value ascending (cheap but good)
    df = df.sort_values("predicted_value_eur")

    # Build a "running total" to respect budget (greedy)
    selected_rows = []
    total_cost = 0.0

    for _, row in df.iterrows():
        price = row["value_eur"]
        if total_cost + price <= budget:
            selected_rows.append(row)
            total_cost += price

    if not selected_rows:
        return f"No suitable signings found within budget €{int(budget):,}.", pd.DataFrame()

    rec_df = pd.DataFrame(selected_rows)
    show_cols = [
        "short_name", "age", "nationality", "club_name", "main_position",
        "overall", "potential", "value_eur", "predicted_value_eur", "value_diff_eur", "relative_diff"
    ]
    rec_df = rec_df[show_cols]
    rec_df = rec_df.rename(columns={
        "short_name": "Player",
        "club_name": "From Club",
        "main_position": "Pos",
        "overall": "OVR",
        "potential": "POT",
        "value_eur": "Current (€)",
        "predicted_value_eur": "Model (€)",
        "value_diff_eur": "Diff (€)",
        "relative_diff": "Diff (%)"
    })
    rec_df["Diff (%)"] = (rec_df["Diff (%)"] * 100).round(1)

    summary_text = f"Total estimated cost of recommended signings: €{int(total_cost):,}"

    return summary_text, rec_df


In [None]:
def oracle_general_football(question, model_name="gpt-4o-mini"):
    if not question:
        return "Ask me anything about football, tactics, players, or data."

    response = client.chat.completions.create(
        model=model_name,
        messages=[
            {
                "role": "system",
                "content": "You are a football tactics and analytics expert. Answer clearly and with tactical depth.",
            },
            {"role": "user", "content": question},
        ],
        temperature=0.7,
    )
    return response.choices[0].message.content


In [None]:
import math

def ui_scout_value_estimator(
    name,
    age,
    position,
    pace,
    dribbling,
    finishing,
    passing,
    workrate,
    height,
    league,
    injury,
    contract_months
):
    """
    Simple numeric estimator + readable report.
    Returns:
      - report (markdown string)
      - raw numbers (dict) -> for JSON display
    """

    # Safety defaults
    player_name = name if name else "Unnamed Prospect"
    pos = position if position and position != "Any" else "Unknown"

    # Categorical factors
    league_factor_map = {
        "Top 5 League": 1.00,
        "Tier 2 League": 0.75,
        "Tier 3 League": 0.55,
        "Youth League": 0.35
    }
    league_factor = league_factor_map.get(league, 0.6)

    injury_factor_map = {
        "None": 1.00,
        "Low": 0.90,
        "Medium": 0.75,
        "High": 0.55
    }
    injury_factor = injury_factor_map.get(injury, 0.8)

    workrate_factor_map = {
        "Low": 0.90,
        "Medium": 1.00,
        "High": 1.10
    }
    workrate_factor = workrate_factor_map.get(workrate, 1.0)

    # Core “skill score”
    skill_score = (
        pace * 0.20 +
        dribbling * 0.25 +
        finishing * 0.25 +
        passing * 0.20
    ) * workrate_factor

    # Age curve (peak around 24–28)
    age_factor = max(0.5, 1.2 - abs(26 - age) * 0.05)

    # Height factor – tiny adjustment
    height_factor = 1.0 + (height - 180) / 1000.0 if height else 1.0

    # Contract factor – longer contract, higher value
    contract_factor = 1.0 + (contract_months / 60.0)

    # Base euro value
    base_value = skill_score * 150_000
    estimated_value = (
        base_value *
        league_factor *
        injury_factor *
        age_factor *
        height_factor *
        contract_factor
    )

    estimated_value = max(0, estimated_value)  # no negatives

    report = f"""
### 🧮 Estimated Market Value: **€{estimated_value:,.0f}**

**Player:** {player_name}
**Position:** {pos}
**Age:** {age}
**League Level:** {league}
**Injury History:** {injury}
**Contract Remaining:** {contract_months} months

### 📊 Internal Scouting Indicators
- Skill Score: **{skill_score:.1f}**
- Age Factor: **{age_factor:.2f}** (peak ≈ 24–28)
- Work Rate Factor: **{workrate_factor:.2f}**
- League Factor: **{league_factor:.2f}**
- Injury Factor: **{injury_factor:.2f}**
- Contract Factor: **{contract_factor:.2f}**

Use this as a **rough model-based benchmark**.
You can tweak sliders to simulate development, injuries, or a move to a stronger league.
"""

    raw = {
        "estimated_value_eur": round(estimated_value, 2),
        "skill_score": round(skill_score, 2),
        "age_factor": round(age_factor, 3),
        "workrate_factor": round(workrate_factor, 3),
        "league_factor": round(league_factor, 3),
        "injury_factor": round(injury_factor, 3),
        "contract_factor": round(contract_factor, 3),
    }

    return report, raw


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

def ui_demographics_eda(position, club, nationality, min_age, max_age):
    """
    Simple demographic / EDA view over df_dl.
    Returns:
      - markdown summary
      - small aggregated table
      - age distribution plot
      - nationality distribution plot
      - value vs age scatter
    """
    df = df_dl.copy()

    # --- FILTERS ---
    # Age
    df = df[(df["age"] >= min_age) & (df["age"] <= max_age)]

    # Position filter
    if position != "Any":
        df = df[df["main_position"].astype(str).str.contains(position, na=False)]

    # Club filter
    if club != "Any":
        df = df[df["club_name"] == club]

    # Nationality filter
    if nationality != "Any":
        df = df[df["nationality"] == nationality]

    if df.empty:
        # Return empty objects but valid types
        empty_fig = plt.figure()
        return (
            "### No players matched the selected filters.",
            pd.DataFrame(),
            empty_fig,
            empty_fig,
            empty_fig,
        )

    # --- SUMMARY TEXT ---
    n_players = len(df)
    avg_age = df["age"].mean()
    avg_ovr = df["overall"].mean() if "overall" in df.columns else None
    avg_value = df["value_eur"].mean() if "value_eur" in df.columns else None

    summary_lines = [
        f"### 🎯 Demographic Snapshot",
        f"- Players in slice: **{n_players}**",
        f"- Average age: **{avg_age:.1f}** years",
    ]
    if avg_ovr is not None:
        summary_lines.append(f"- Average overall rating: **{avg_ovr:.1f}**")
    if avg_value is not None:
        summary_lines.append(f"- Average market value: **€{avg_value:,.0f}**")

    summary_md = "\n".join(summary_lines)

    # --- AGG TABLE: positions + nationalities (top) ---
    pos_counts = (
        df["main_position"]
        .value_counts()
        .reset_index()
        .rename(columns={"index": "Position", "main_position": "Count"})
    )

    nat_counts = (
        df["nationality"]
        .value_counts()
        .head(10)
        .reset_index()
        .rename(columns={"index": "Nationality", "nationality": "Count"})
    )

    agg_table = pd.concat(
        [
            pos_counts.head(10).assign(Metric="Position"),
            nat_counts.assign(Metric="Nationality"),
        ],
        ignore_index=True,
    )

    # --- PLOTS ---
    # 1) Age distribution
    fig_age, ax_age = plt.subplots()
    ax_age.hist(df["age"], bins=range(int(df["age"].min()), int(df["age"].max()) + 2), edgecolor="black")
    ax_age.set_title("Age Distribution")
    ax_age.set_xlabel("Age")
    ax_age.set_ylabel("Number of players")
    fig_age.tight_layout()

    # 2) Top 10 nationalities
    nat_vc = df["nationality"].value_counts().head(10)
    fig_nat, ax_nat = plt.subplots()
    nat_vc.plot(kind="bar", ax=ax_nat)
    ax_nat.set_title("Top 10 Nationalities")
    ax_nat.set_ylabel("Number of players")
    ax_nat.set_xlabel("Nationality")
    fig_nat.tight_layout()

    # 3) Value vs Age scatter (if value exists)
    if "value_eur" in df.columns:
        fig_val, ax_val = plt.subplots()
        ax_val.scatter(df["age"], df["value_eur"])
        ax_val.set_title("Value vs Age")
        ax_val.set_xlabel("Age")
        ax_val.set_ylabel("Market value (€)")
        fig_val.tight_layout()
    else:
        fig_val = plt.figure()

    return summary_md, agg_table, fig_age, fig_nat, fig_val


In [None]:
import gradio as gr
import matplotlib.pyplot as plt
import pandas as pd
from openai import OpenAI

client = OpenAI()  # assumes your OPENAI_API_KEY is set in env

# -------------------------------------------------------------------
# Your existing dataframes & UI functions should already be defined:
#   - df_dl (full player dataset)
#   - df_dash (dashboard/transfer subset)
#   - ui_player_oracle(...)
#   - ui_club_dashboard(...)
#   - ui_transfer_simulator(...)
#   - oracle_general_football(...)
# -------------------------------------------------------------------

# For dropdowns (your original ones)
all_positions = sorted(df_dl["main_position"].dropna().unique().tolist())
all_positions = ["Any"] + all_positions

all_clubs = sorted(df_dl["club_name"].dropna().unique().tolist())
all_clubs = ["Any"] + all_clubs

# New: nationality list for EDA
all_nationalities = sorted(df_dl["nationality"].dropna().unique().tolist())
all_nationalities = ["Any"] + all_nationalities


# =========================
#  A) SCOUT VALUATION (LLM)
# =========================
def ui_scout_value_openai(
    name,
    age,
    position,
    overall,
    potential,
    current_value,
    league_level,
    contract_months,
    foot,
    style_notes
):
    """
    Manual scout interface → send profile to OpenAI → get valuation & report.
    Returns a markdown string.
    """

    player_name = name if name else "Unnamed Prospect"
    current_value = current_value or 0

    profile_text = f"""
You are an elite football scout and sporting director.
Estimate a realistic transfer market value (in euros) and provide a scouting report
for the following player. Use modern European market assumptions.

Player profile:
- Name: {player_name}
- Age: {age}
- Position: {position}
- Preferred foot: {foot}
- Overall rating (0-100): {overall}
- Potential rating (0-100): {potential}
- Current estimated club valuation (if known): €{current_value:,.0f}
- League level: {league_level}
- Months remaining on contract: {contract_months}
- Playing style / notes: {style_notes}

Instructions:
1. Start with a single line: "Estimated Fair Market Value: €X".
2. Then give a short section "Summary Profile" (2–3 bullet points).
3. Add a section "Tactical Fit" explaining which systems/leagues suit the player.
4. If current_value > 0, say whether the player is undervalued or overvalued
   and by roughly what percentage.
5. Keep it concise (around 200–300 words).
"""

    resp = client.responses.create(
        model="gpt-4.1-mini",   # adjust to your preferred OpenAI model
        input=profile_text
    )

    # Extract plain text from the response
    ai_text = resp.output[0].content[0].text

    return ai_text


# =========================
#  B) DEMOGRAPHICS + EDA
# =========================
def ui_demographics_eda(position, club, nationality, min_age, max_age):
    """
    Simple demographic / EDA view over df_dl.
    Returns:
      - markdown summary
      - small aggregated table
      - age distribution plot
      - nationality distribution plot
      - value vs age scatter
    """
    df = df_dl.copy()

    # --- FILTERS ---
    df = df[(df["age"] >= min_age) & (df["age"] <= max_age)]

    if position != "Any":
        df = df[df["main_position"].astype(str).str.contains(position, na=False)]

    if club != "Any":
        df = df[df["club_name"] == club]

    if nationality != "Any":
        df = df[df["nationality"] == nationality]

    if df.empty:
        empty_fig = plt.figure()
        return (
            "### No players matched the selected filters.",
            pd.DataFrame(),
            empty_fig,
            empty_fig,
            empty_fig,
        )

    # --- SUMMARY TEXT ---
    n_players = len(df)
    avg_age = df["age"].mean()
    avg_ovr = df["overall"].mean() if "overall" in df.columns else None
    avg_value = df["value_eur"].mean() if "value_eur" in df.columns else None

    summary_lines = [
        "### 🎯 Demographic Snapshot",
        f"- Players in slice: **{n_players}**",
        f"- Average age: **{avg_age:.1f}** years",
    ]
    if avg_ovr is not None:
        summary_lines.append(f"- Average overall rating: **{avg_ovr:.1f}**")
    if avg_value is not None:
        summary_lines.append(f"- Average market value: **€{avg_value:,.0f}**")

    summary_md = "\n".join(summary_lines)

    # --- AGG TABLE: positions + nationalities (top) ---
    pos_counts = (
        df["main_position"]
        .value_counts()
        .reset_index()
        .rename(columns={"index": "Position", "main_position": "Count"})
    )

    nat_counts = (
        df["nationality"]
        .value_counts()
        .head(10)
        .reset_index()
        .rename(columns={"index": "Nationality", "nationality": "Count"})
    )

    agg_table = pd.concat(
        [
            pos_counts.head(10).assign(Metric="Position"),
            nat_counts.assign(Metric="Nationality"),
        ],
        ignore_index=True,
    )

    # --- PLOTS ---
    # 1) Age distribution
    fig_age, ax_age = plt.subplots()
    ax_age.hist(
        df["age"],
        bins=range(int(df["age"].min()), int(df["age"].max()) + 2),
        edgecolor="black"
    )
    ax_age.set_title("Age Distribution")
    ax_age.set_xlabel("Age")
    ax_age.set_ylabel("Number of players")
    fig_age.tight_layout()

    # 2) Top 10 nationalities
    nat_vc = df["nationality"].value_counts().head(10)
    fig_nat, ax_nat = plt.subplots()
    nat_vc.plot(kind="bar", ax=ax_nat)
    ax_nat.set_title("Top 10 Nationalities")
    ax_nat.set_ylabel("Number of players")
    ax_nat.set_xlabel("Nationality")
    fig_nat.tight_layout()

    # 3) Value vs Age scatter (if value exists)
    if "value_eur" in df.columns:
        fig_val, ax_val = plt.subplots()
        ax_val.scatter(df["age"], df["value_eur"])
        ax_val.set_title("Value vs Age")
        ax_val.set_xlabel("Age")
        ax_val.set_ylabel("Market value (€)")
        fig_val.tight_layout()
    else:
        fig_val = plt.figure()

    return summary_md, agg_table, fig_age, fig_nat, fig_val


In [None]:
with gr.Blocks(title="Football Oracle AI Suite") as demo:
    gr.Markdown("# ⚽ Football Oracle AI Suite\nA deep-learning + LLM powered football scouting assistant.")

    # -----------------
    # 1) PLAYER ORACLE
    # -----------------
    with gr.Tab("Player Oracle"):
        gr.Markdown("### 🔍 Player Scout & Similarity Search")
        player_name_in = gr.Textbox(label="Player name (as in dataset, e.g. 'M. Salah')")
        question_in = gr.Textbox(
            label="Extra question (optional)",
            placeholder="e.g. How would he fit in a high-pressing 4-3-3?"
        )
        report_out = gr.Markdown(label="Scout Report")
        sim_table_out = gr.Dataframe(label="Similar Players")

        analyze_btn = gr.Button("Analyze Player")
        analyze_btn.click(
            fn=ui_player_oracle,
            inputs=[player_name_in, question_in],
            outputs=[report_out, sim_table_out]
        )

    # -----------------
    # 2) CLUB DASHBOARD
    # -----------------
    with gr.Tab("Club Dashboard"):
        gr.Markdown("### 📊 Undervalued Players Finder")
        pos_dd = gr.Dropdown(choices=all_positions, label="Position", value="Any")
        min_age_slider = gr.Slider(16, 40, value=18, step=1, label="Min Age")
        max_age_slider = gr.Slider(16, 40, value=30, step=1, label="Max Age")
        max_val_slider = gr.Number(value=20000000, label="Max Current Value (€)")
        undervalued_chk = gr.Checkbox(
            value=True,
            label="Only show clearly undervalued players (>20%)"
        )

        dash_table_out = gr.Dataframe(label="Matching Players")

        dash_btn = gr.Button("Find Players")
        dash_btn.click(
            fn=ui_club_dashboard,
            inputs=[pos_dd, min_age_slider, max_age_slider, max_val_slider, undervalued_chk],
            outputs=[dash_table_out]
        )

    # -----------------
    # 3) TRANSFER SIMULATOR
    # -----------------
    with gr.Tab("Transfer Simulator"):
        gr.Markdown("### 🧩 Transfer Window Simulator")
        club_dd = gr.Dropdown(choices=all_clubs, label="Your Club", value="Any")
        budget_num = gr.Number(value=50000000, label="Transfer Budget (€)")
        pos_sim_dd = gr.Dropdown(choices=all_positions, label="Target Position", value="Any")
        max_age_sim = gr.Slider(16, 40, value=28, step=1, label="Max Age for Targets")

        sim_summary_out = gr.Markdown(label="Summary")
        sim_table_out = gr.Dataframe(label="Suggested Signings")

        sim_btn = gr.Button("Suggest Signings")
        sim_btn.click(
            fn=ui_transfer_simulator,
            inputs=[club_dd, budget_num, pos_sim_dd, max_age_sim],
            outputs=[sim_summary_out, sim_table_out]
        )

    # -----------------
    # 4) ASK ORACLE (GENERAL Q&A)
    # -----------------
    with gr.Tab("Ask Oracle"):
        gr.Markdown("### 🧠 Ask Football Oracle (General Q&A)")
        oracle_q_in = gr.Textbox(
            label="Your question",
            lines=3,
            placeholder="e.g. Explain the role of an inverted full-back in modern football."
        )
        oracle_a_out = gr.Markdown(label="Oracle Answer")

        oracle_btn = gr.Button("Ask")
        oracle_btn.click(
            fn=oracle_general_football,
            inputs=[oracle_q_in],
            outputs=[oracle_a_out]
        )

    # -----------------
    # 5) SCOUT VALUATION (NEW)
    # -----------------
    with gr.Tab("Scout Valuation"):
        gr.Markdown(
            "### 🧾 Manual Scout Valuation\n"
            "Enter a player profile to get an AI-driven valuation and report."
        )

        scout_name_in = gr.Textbox(label="Player Name (optional)", placeholder="e.g. J. Doe")
        scout_age_in = gr.Slider(16, 40, value=24, step=1, label="Age")
        scout_pos_in = gr.Dropdown(choices=all_positions, value="Any", label="Main Position")

        scout_overall_in = gr.Slider(40, 99, value=75, step=1, label="Overall Rating")
        scout_potential_in = gr.Slider(40, 99, value=82, step=1, label="Potential Rating")

        scout_current_val_in = gr.Number(
            value=0,
            label="Current Club Valuation (€) (0 if unknown)"
        )

        scout_league_in = gr.Dropdown(
            [
                "Top 5 League",
                "Strong Tier 2 (e.g. Eredivisie, Championship)",
                "Smaller European League",
                "Non-European / Developing League",
            ],
            value="Top 5 League",
            label="League Level"
        )

        scout_contract_in = gr.Slider(0, 60, value=24, step=6, label="Months Remaining on Contract")

        scout_foot_in = gr.Dropdown(
            ["Right", "Left", "Both"],
            value="Right",
            label="Preferred Foot"
        )

        scout_style_in = gr.Textbox(
            label="Playing Style / Key Notes",
            lines=3,
            placeholder=(
                "e.g. Explosive winger, likes to cut inside, strong in 1v1s "
                "but defensive work rate is average."
            )
        )

        scout_report_out = gr.Markdown(label="Scout Valuation Report")

        scout_btn = gr.Button("Generate Valuation")
        scout_btn.click(
            fn=ui_scout_value_openai,
            inputs=[
                scout_name_in,
                scout_age_in,
                scout_pos_in,
                scout_overall_in,
                scout_potential_in,
                scout_current_val_in,
                scout_league_in,
                scout_contract_in,
                scout_foot_in,
                scout_style_in
            ],
            outputs=[scout_report_out]
        )

    # -----------------
    # 6) DEMOGRAPHICS & EDA (NEW)
    # -----------------
    with gr.Tab("Demographics & EDA"):
        gr.Markdown(
            "### 🌍 Dataset Demographics & EDA\n"
            "Explore age, nationality, and position distributions from the underlying dataset."
        )

        demo_pos_dd = gr.Dropdown(choices=all_positions, value="Any", label="Filter by Position")
        demo_club_dd = gr.Dropdown(choices=all_clubs, value="Any", label="Filter by Club")
        demo_nat_dd = gr.Dropdown(choices=all_nationalities, value="Any", label="Filter by Nationality")

        demo_min_age = gr.Slider(16, 40, value=18, step=1, label="Min Age")
        demo_max_age = gr.Slider(16, 40, value=35, step=1, label="Max Age")

        demo_run_btn = gr.Button("Run Demographic EDA")

        demo_summary_out = gr.Markdown(label="Summary")
        demo_table_out = gr.Dataframe(label="Aggregated Counts (Positions & Nationalities)")
        demo_age_plot_out = gr.Plot(label="Age Distribution")
        demo_nat_plot_out = gr.Plot(label="Top Nationalities")
        demo_val_plot_out = gr.Plot(label="Value vs Age")

        demo_run_btn.click(
            fn=ui_demographics_eda,
            inputs=[demo_pos_dd, demo_club_dd, demo_nat_dd, demo_min_age, demo_max_age],
            outputs=[
                demo_summary_out,
                demo_table_out,
                demo_age_plot_out,
                demo_nat_plot_out,
                demo_val_plot_out,
            ],
        )

demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2e51c6408399fe7edb.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


