In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [2]:
ucl = pd.read_excel('UCL_playerstats_orig.xlsx')

#### HAPI 1️⃣: Krijojmë kolonen target → High_Market_Potential

In [134]:
ucl['High_Market_Potential'] = (
    (ucl['Age'] <= 23) &
    (
        (ucl['Rating'] > 7.40) |
        (ucl['Goals'] > 4 ) |
        (ucl['Assists'] > 4 )
    )
).astype(int)

In [135]:
ucl['High_Market_Potential']

0      0
1      0
2      0
3      0
4      0
      ..
205    0
206    0
207    0
208    0
209    0
Name: High_Market_Potential, Length: 210, dtype: int64

#### Hapi 2️⃣ veqorit per klasifikim

In [154]:
features = [
    'Age', 'Position', 'Team', 'Rating','Goals','Assists'
]

X = pd.get_dummies(ucl[features], drop_first=True)
y = ucl['High_Market_Potential']

#### HAPI 3️⃣: Train/test split dhe modelimi me Logistic Regression

In [155]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)

model = LogisticRegression(max_iter=1000,)
model.fit(X_train, y_train)
preds = model.predict(X_test)

In [156]:

print("Logistic Regression PER High Market Potential:")
print("Accuracy :", round(accuracy_score(y_test, preds), 2))
print("Precision:", round(precision_score(y_test, preds), 2))
print("Recall   :", round(recall_score(y_test, preds), 2))
print("F1-Score :", round(f1_score(y_test, preds), 2))

Logistic Regression PER High Market Potential:
Accuracy : 0.95
Precision: 0.64
Recall   : 0.69
F1-Score : 0.67


#### HAPI 4️⃣: Vlersimi i modelit

In [157]:
ucl['Predicted_Potential'] = model.predict(X)

potential_players = ucl[ucl['Predicted_Potential'] == 1][['Player', 'Team' ,'Age', 'Goals', 'Assists','Rating']]

In [158]:
potential_players

Unnamed: 0,Player,Team,Age,Goals,Assists,Rating
5,Jude Bellingham,Real Madrid,21,3,2,7.6
15,Yamal,Barcelona,17,5,3,7.92
37,Vinicius Jr,Real Madrid,24,8,3,7.79
50,Santiago Gimenez,Milan,23,6,1,7.33
63,Benjamin Sesko,Leipzig,21,4,0,6.91
81,Quenda,Sporting CP,18,0,0,6.9
82,Barcola,Paris SG,22,3,4,7.22
88,Bukayo Saka,Arsenal FC,23,6,2,7.49
90,Kenan Yildiz,Juventus,20,1,1,6.97
93,Desire Doue,Paris SG,19,5,5,7.39
