In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

### Elite Player Classification `(Logistic Regression)`

In [43]:
ucl = pd.read_excel('UCL_playerstats_orig.xlsx')

In [44]:
ucl.head()

Unnamed: 0,PlayerID,Player,Position,Age,Nationality,Team,Competitions,Phase,Phase_Tier,Value10^6,...,Balls_recovered,Tackles_Won,Tackles_Lost,Saves,Goals_Conceded,Clean_Sheets,MOTM_Awards,Minutes_played,Match_played,DNP
0,1,Theo Hernandez,Defender,27,France,Milan,Serie A,Knockout_phase,D,40.0,...,53,2,5,0,0,0,0,861,10,0
1,2,Nicolo Barella,Midfielder,28,Italy,Inter,Serie A,Final,S,80.0,...,32,8,15,0,0,0,2,994,13,2
2,3,Lewandowski,Forward,36,Poland,Barcelona,La Liga,Semi_finals,A,15.0,...,11,4,1,0,0,0,2,985,13,1
3,4,Raphinha,Forward,28,Brazil,Barcelona,La Liga,Semi_finals,A,80.0,...,18,2,7,0,0,0,3,1225,14,0
4,5,Van Dijk,Defender,33,Netherlands,Liverpool FC,Premier League,Round16,C,50.0,...,51,3,3,0,0,0,0,840,9,1


#### HAPI 1️⃣ Krijojm kolonen target → Elite_Player

In [45]:
ucl['Elite_Player'] = (
    (ucl['Goals'] > 5) |
    (ucl['Assists'] > 5) |
    (ucl['Minutes_played'] >= 900) |
    (ucl['Total_attempts'] > 30) |
    (ucl['Chances_Created'] > 30) |
    (ucl['Rating'] > 7.4)

).astype(int)

#### HAPI 2️⃣ Splitimi i Train dhe Test

In [46]:
features = ['Age', 'Goals', 'Assists', 'Rating', 'Minutes_played','Total_attempts','Chances_Created']
X = ucl[features]
y = ucl['Elite_Player']

In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

#### HAPI 3️⃣ Modelimi me Logistic Regression

In [48]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
preds = model.predict(X_test)

#### HAPI 4️⃣ Metrika e Klasifikimit (Accuracy, Precision, Recall, F1-Score)

In [59]:
model.score(X_test, y_test)

0.7936507936507936

In [58]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print("Elite Player Classification:")
print("Accuracy :", round(accuracy_score(y_test, preds), 2))
print("Precision:", round(precision_score(y_test, preds), 2))
print("Recall   :", round(recall_score(y_test, preds), 2))
print("F1-Score :", round(f1_score(y_test, preds), 2))



Elite Player Classification:
Accuracy : 0.79
Precision: 0.85
Recall   : 0.72
F1-Score : 0.78


0.7936507936507936

In [50]:
ucl['Predicted_Elite_Player'] = model.predict(X)

In [51]:
ucl['Predicted_Elite_Player'].value_counts()

Predicted_Elite_Player
0    129
1     81
Name: count, dtype: int64