In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

### Ready for Transfer Market Classification `(Random Forest Classifier)`

In [2]:
ucl = pd.read_excel('UCL_playerstats_orig.xlsx')

#### HAPI 1️⃣: Krijojmë kolonen target → High_Market_Potential

In [14]:
ucl['High_Potential'] = (
    (ucl['Age'] <= 23) &
    (
        (ucl['Goals'] >= 4) |
        (ucl['Assists'] >= 4) |
        (ucl['Rating'] >= 7.4) |
        (ucl['MOTM_Awards'] > 1)
    )
).astype(int)


In [15]:
ucl['High_Potential']

0      0
1      0
2      0
3      0
4      0
      ..
205    0
206    0
207    0
208    0
209    0
Name: High_Potential, Length: 210, dtype: int64

#### Hapi 2️⃣ veqorit per klasifikim

In [17]:
# Veqorit
features = ['Age', 'Goals', 'Assists', 'Rating', 'MOTM_Awards']
X = ucl[features]
y = ucl['High_Potential']

#### HAPI 3️⃣: Train/test split dhe modelimi me Logistic Regression

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [19]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X_train, y_train)
preds = model.predict(X_test)

#### HAPI 4️⃣: Vlersimi i modelit

In [20]:

from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

print("\nReady for Transfer Market Classification:")
print("Accuracy :", round(accuracy_score(y_test, preds), 2))
print("Precision:", round(precision_score(y_test, preds), 2))
print("Recall   :", round(recall_score(y_test, preds), 2))
print("F1-Score :", round(f1_score(y_test, preds), 2))


Ready for Transfer Market Classification:
Accuracy : 0.97
Precision: 1.0
Recall   : 0.78
F1-Score : 0.88


In [21]:
ucl['Predicted_Potential'] = model.predict(X)

potential_players = ucl[ucl['Predicted_Potential'] == 1][['Player', 'Team' ,'Age', 'Goals', 'Assists','Rating']]

In [22]:
potential_players

Unnamed: 0,Player,Team,Age,Goals,Assists,Rating
5,Jude Bellingham,Real Madrid,21,3,2,7.6
15,Yamal,Barcelona,17,5,3,7.92
50,Santiago Gimenez,Milan,23,6,1,7.33
56,Pedri,Barcelona,22,0,2,7.48
63,Benjamin Sesko,Leipzig,21,4,0,6.91
82,Barcola,Paris SG,22,3,4,7.22
88,Bukayo Saka,Arsenal FC,23,6,2,7.49
93,Desire Doue,Paris SG,19,5,5,7.39
113,Adeyemi,BVB Dortmund,23,5,1,7.54
129,Nuno Mendes,Paris SG,22,4,2,7.74
