In [None]:
import tkinter as tk
from tkinter import Canvas, Toplevel, Entry, Button, PhotoImage, messagebox
from PIL import Image, ImageTk
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder


df = pd.read_csv("D:/5th Semester/Artificial Intelligence/futbal_DatasetProj.csv")
df.head()

In [3]:
df.columns = df.columns.str.replace(' ', '_')  # Replace spaces with underscores
mask = (df['current_value'] > 0) & (df['highest_value'] > 0)
df_filtered = df[mask]
df_modified = df_filtered[df_filtered['position_encoded'] != 1].copy()

In [5]:
# Feature engineering
df_modified['performance'] = (
    (df_modified['goals'] * 0.5) +
    (df_modified['assists'] * 0.25) +
    (df_modified['goals_conceded'] * -0.5) +
    (df_modified['clean_sheets'] * 0.5) +
    (df_modified['yellow_cards'] * -0.5) +
    (df_modified['second_yellow_cards'] * -1.0) +
    (df_modified['red_cards'] * -1.5) +
    (df_modified['minutes_played'] * 0.25) +
    (df_modified['days_injured'] * -0.25) +
    (df_modified['award'] * 0.5)
)

In [7]:
def categorize_age(age):
    if 15 <= age < 20:
        return 1.0
    elif 20 <= age < 25:
        return 0.75
    elif 25 <= age < 30:
        return 0.5
    elif 30 <= age < 35:
        return 0.25
    elif 35 <= age <= 40:
        return 0.1
    else:
        return 0

df_modified['age_group'] = df_modified['age'].apply(categorize_age)

In [9]:

df_modified['adjusted_current_value'] = df_modified['current_value'] * 0.5

In [11]:
df_modified

Unnamed: 0,player,team,name,position,height,age,appearance,goals,assists,yellow_cards,...,days_injured,games_injured,award,current_value,highest_value,position_encoded,winger,performance,age_group,adjusted_current_value
3,/lisandro-martinez/profil/spieler/480762,Manchester United,Lisandro Martínez,Defender Centre-Back,175.000000,25.0,82,0.028090,0.056180,0.224719,...,175,22,9,50000000,50000000,2,0,1562.665730,0.50,25000000.0
4,/raphael-varane/profil/spieler/164770,Manchester United,Raphaël Varane,Defender Centre-Back,191.000000,30.0,63,0.017889,0.017889,0.053667,...,238,51,21,40000000,80000000,2,0,1208.736583,0.25,20000000.0
5,/harry-maguire/profil/spieler/177907,Manchester United,Harry Maguire,Defender Centre-Back,194.000000,30.0,68,0.037799,0.000000,0.302394,...,148,27,1,25000000,70000000,2,0,1153.848803,0.25,12500000.0
6,/victor-lindelof/profil/spieler/184573,Manchester United,Victor Lindelöf,Defender Centre-Back,187.000000,28.0,70,0.000000,0.032901,0.115153,...,95,19,10,15000000,35000000,2,0,1348.950649,0.50,7500000.0
7,/phil-jones/profil/spieler/117996,Manchester United,Phil Jones,Defender Centre-Back,185.000000,31.0,8,0.000000,0.000000,0.216346,...,932,169,7,2000000,20000000,2,0,-125.608173,0.25,1000000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10749,/aidan-simmons/profil/spieler/867763,Western Sydney Wanderers,Aidan Simmons,Attack-RightWinger,181.240353,20.0,16,0.175953,0.087977,0.263930,...,0,0,0,75000,75000,4,1,255.728006,0.75,37500.0
10750,/kusini-yengi/profil/spieler/708099,Western Sydney Wanderers,Kusini Yengi,Attack Centre-Forward,190.000000,24.0,26,0.372671,0.186335,0.186335,...,102,18,0,300000,300000,4,0,336.889752,0.75,150000.0
10751,/nathanael-blair/profil/spieler/1023268,Western Sydney Wanderers,Nathanael Blair,Attack Centre-Forward,181.240353,19.0,20,0.375000,0.000000,0.187500,...,0,0,0,50000,50000,4,0,240.093750,1.00,25000.0
10752,/zachary-sapsford/profil/spieler/703657,Western Sydney Wanderers,Zachary Sapsford,Attack Centre-Forward,181.240353,20.0,17,0.312139,0.104046,0.000000,...,0,0,0,50000,50000,4,0,216.276012,0.75,25000.0


In [13]:
# Select the 4 engineered input columns
df_final = df_modified[['age_group', 'performance', 'adjusted_current_value']].copy()

# Rename for clarity
df_final.rename(columns={
    'age_group': 'AGE',
    'performance': 'PERFORMANCE',
    'adjusted_current_value': 'CURRENT_VALUE',
    
}, inplace=True)

# Print the final dataset
print(df_final.head())

    AGE  PERFORMANCE  CURRENT_VALUE
3  0.50  1562.665730     25000000.0
4  0.25  1208.736583     20000000.0
5  0.25  1153.848803     12500000.0
6  0.50  1348.950649      7500000.0
7  0.25  -125.608173      1000000.0


In [15]:
x = df_modified.drop(['current_value','highest_value','player','team','position','goals','assists','goals_conceded','clean_sheets','yellow_cards','second_yellow_cards','red_cards','minutes_played','days_injured','award','height','age','adjusted_current_value','winger','games_injured','appearance'],axis = 1)  #data training variable
y = df_modified['current_value']                                                                                 #test variable
y_log = np.log1p(y)
crossVal_fold = KFold(n_splits=10, shuffle=True, random_state=42)

test_mse = []
test_mae = []
test_r2 = []
predictions = np.zeros(len(df_modified))
ideal_cycle = []

In [17]:
fold = 1
for train_index,test_index in crossVal_fold.split(x):
    print(f"Training Fold: ,{fold}")

    x_train = x.iloc[train_index]
    x_test = x.iloc[test_index]
    y_train = y_log.iloc[train_index]
    y_test = y_log.iloc[test_index]

    x_train = x_train.select_dtypes(include=['int64', 'float64', 'bool'   ]).copy()
    x_test = x_test.select_dtypes(include=['int64', 'float64', 'bool']).copy()

    model = XGBRegressor(n_estimators=1000, learning_rate=0.1, max_depth=6, random_state=42)
    model.set_params(eval_metric='rmse', early_stopping_rounds=50) 

    # fit() method ka kaam model ko train karna hota hai.
    # Model ko training data diya jata hai taake woh apne parameters ko adjust karke predictions seekh sake.
    model.fit(x_train,y_train, 
              eval_set = [(x_test,y_test)],  #eval_set ka use model ki performance evaluate karne ke liye hota hai training ke dauran
              verbose = False)   #verbose ka kaam training ke output ko control karna hota hai.
# False ka matlab: Training ke dauran koi output print nahi hoga. Yeh useful hai jab aapko output nahi chahiye aur training silently complete karni ho.
    
    best_iteration = model.best_iteration   # woh iteration number batata hai jahan model ne testing data par best performance di.
    ideal_cycle.append(best_iteration)    # us best iteration number ko ek list me save karta hai taake aap har fold ka result dekh saken.
    #Yeh useful hota hai jab aapko samajhna ho ke training ke kitne rounds ke baad optimal results mile.
    
    y_test_prediction = model.predict(x_test)  
# Model jo training ke baad ready ho gaya hai, ab testing data (x_test) ke inputs par predictions karta hai. model.predict() function ka use predictions banane ke liye hota hai.
    
    predictions[test_index] = y_test_prediction #Cross-validation ke har fold me testing set ke predictions ko ek global array (predictions) me save karte hain.

    mse = mean_squared_error(y_test, y_test_prediction)   
    mae = mean_absolute_error(y_test, y_test_prediction)
    r2 = r2_score(y_test, y_test_prediction)
    
    test_mse.append(mse)
    test_mae.append(mae)
    test_r2.append(r2)

    print(f"FOLD {fold}: MSE: {mse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}, Best Iteration: {best_iteration}")
    fold = fold + 1

Training Fold: ,1
FOLD 1: MSE: 1.7622, MAE: 1.0888, R²: 0.3517, Best Iteration: 21
Training Fold: ,2
FOLD 2: MSE: 1.7716, MAE: 1.0631, R²: 0.3889, Best Iteration: 33
Training Fold: ,3
FOLD 3: MSE: 1.8381, MAE: 1.0895, R²: 0.3346, Best Iteration: 26
Training Fold: ,4
FOLD 4: MSE: 1.7018, MAE: 1.0685, R²: 0.3702, Best Iteration: 28
Training Fold: ,5
FOLD 5: MSE: 1.6919, MAE: 1.0475, R²: 0.3584, Best Iteration: 31
Training Fold: ,6
FOLD 6: MSE: 1.7328, MAE: 1.0647, R²: 0.3367, Best Iteration: 24
Training Fold: ,7
FOLD 7: MSE: 1.6981, MAE: 1.0575, R²: 0.4102, Best Iteration: 59
Training Fold: ,8
FOLD 8: MSE: 1.6827, MAE: 1.0503, R²: 0.3768, Best Iteration: 27
Training Fold: ,9
FOLD 9: MSE: 1.7022, MAE: 1.0677, R²: 0.4199, Best Iteration: 53
Training Fold: ,10
FOLD 10: MSE: 1.6366, MAE: 1.0372, R²: 0.3550, Best Iteration: 27


In [19]:
df_modified['predicted_value'] = np.expm1(predictions)
def get_player_market_value(player_name):
    player_row = df_modified[df_modified['name'] == player_name]

    if not player_row.empty:
        current_value = player_row['current_value'].values[0]
        predicted_value = player_row['predicted_value'].values[0]
        print(f"Player: {player_name}")
        print(f"Current Market Value: {current_value}")
        print(f"Predicted Market Value: {predicted_value}")
    else:
        print(f"Player '{player_name}' not found in the dataset.")


player_name = input("Enter player name: ")
get_player_market_value(player_name)

Enter player name:  Antony


Player: Antony
Current Market Value: 70000000
Predicted Market Value: 10905570.8262206


In [21]:
x.columns

Index(['name', 'position_encoded', 'performance', 'age_group'], dtype='object')

In [None]:
# GUI Code
root = tk.Tk()
root.title("Player Valuation Predictor")
root.geometry("800x600")
root.resizable(False, False)

background_color = "#8A2BE2"
right_bg_color = "#1E90FF"
button_bg_color = "white"
button_fg_color = "#000000"
font_title = ("Times New Roman", 30, "bold")
font_button = ("Arial", 14, "bold")

def display_prediction(player_name, current_value, predicted_value):
    result_window = Toplevel(root)
    result_window.title("Player Valuation Result")
    result_window.geometry("400x200")
    result_window.resizable(False, False)
    if current_value and predicted_value:
        label = tk.Label(result_window, text=f"Player: {player_name}", font=("Arial", 14))
        label.pack(pady=10)
        current_label = tk.Label(result_window, text=f"Current Value: ${current_value} million", font=("Arial", 12))
        current_label.pack(pady=10)
        predicted_label = tk.Label(result_window, text=f"Predicted Value: ${predicted_value:.2f} million", font=("Arial", 12, "bold"))
        predicted_label.pack(pady=10)
    else:
        error_label = tk.Label(result_window, text=f"Player '{player_name}' not found!", font=("Arial", 14), fg="red")
        error_label.pack(pady=20)

def open_search_bar():
    search_window = Toplevel(root)
    search_window.title("Enter Player Name")
    search_window.geometry("400x250")
    search_window.resizable(False, False)

    label = tk.Label(search_window, text="Enter Player Name:", font=("Arial", 14))
    label.pack(pady=10)

    entry = Entry(search_window, font=("Arial", 12), width=30, bd=3)
    entry.pack(pady=10)

    def get_player_name():
        player_name = entry.get()
        if player_name:
            search_window.destroy()
            current_value, predicted_value = get_player_market_value(player_name)
            display_prediction(player_name, current_value, predicted_value)
        else:
            messagebox.showwarning("Warning", "Please enter a player name.")

    def clear_input():
        entry.delete(0, tk.END)

    button_frame = tk.Frame(search_window)
    button_frame.pack(pady=10)

    submit_button = Button(button_frame, text="Submit", font=("Arial", 12, "bold"),
                           bg=button_bg_color, fg=button_fg_color, width=10, command=get_player_name)
    submit_button.grid(row=0, column=0, padx=10)

    clear_button = Button(button_frame, text="Clear", font=("Arial", 12, "bold"),
                          bg=button_bg_color, fg=button_fg_color, width=10, command=clear_input)
    clear_button.grid(row=0, column=1, padx=10)

left_frame = tk.Frame(root, bg=background_color, width=350, height=600)
left_frame.pack(side="left", fill="both", expand=True)

title_label = tk.Label(left_frame, text="PLAYER\nVALUATION\nPREDICTOR", font=font_title,
                       bg=background_color, fg="black", justify="center")
title_label.pack(pady=60)

button1 = tk.Button(left_frame, text="Enter Player Name", font=font_button,
                    bg=button_bg_color, fg=button_fg_color, bd=10, relief="flat", command=open_search_bar)
button1.pack(pady=10, ipadx=20, ipady=10)

right_canvas = Canvas(root, bg=right_bg_color, width=350, height=600, highlightthickness=0)
right_canvas.pack(side="right", fill="both", expand=True)

root.mainloop()

Player: Antony
Current Market Value: 70000000
Predicted Market Value: 10905570.8262206


Exception in Tkinter callback
Traceback (most recent call last):
  File "D:\jupyter\Lib\tkinter\__init__.py", line 1968, in __call__
    return self.func(*args)
           ^^^^^^^^^^^^^^^^
  File "C:\Users\USER\AppData\Local\Temp\ipykernel_21672\2976624932.py", line 46, in get_player_name
    current_value, predicted_value = get_player_market_value(player_name)
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: cannot unpack non-iterable NoneType object


In [25]:
x.columns

Index(['name', 'position_encoded', 'performance', 'age_group'], dtype='object')