Library

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

Load Dataset

In [5]:
data = pd.read_csv('../../Data/Club/all_data_of_clubs.csv')
data.rename(columns={"Unnamed: 0": "Team"}, inplace=True)

Data Cleaning

In [6]:
columns_to_drop = ['Team', 'Fixtures', 'Performances']
data_cleaned = data.drop(columns=columns_to_drop)

Create Target Variables

In [None]:
data_cleaned['Y_champion'] = (data_cleaned['Rank'] == 1).astype(int)
data_cleaned['Y_top4'] = (data_cleaned['Rank'] <= 4).astype(int)

Feature Selection

In [8]:
selected_features = [
    'Goals', 'Goals per match', 'Shots', 'Shots on target', 'Shooting accuracy %',
    'Passes', 'Passes per match', 'Pass accuracy %', 'Clean sheets', 'Goals Conceded',
    'Goals conceded per match', 'Tackles', 'Tackle success %', 'Yellow cards', 
    'Red cards', 'Points'
]

X_champion = data_cleaned[selected_features]
y_champion = data_cleaned['Y_champion']

X_top4 = data_cleaned[selected_features]
y_top4 = data_cleaned['Y_top4']

Split Data

In [9]:
X_train_champion, X_test_champion, y_train_champion, y_test_champion = train_test_split(
    X_champion, y_champion, test_size=0.3, random_state=42)

X_train_top4, X_test_top4, y_train_top4, y_test_top4 = train_test_split(
    X_top4, y_top4, test_size=0.3, random_state=42)

Normalize Data

In [10]:
scaler = StandardScaler()
X_train_champion_scaled = scaler.fit_transform(X_train_champion)
X_test_champion_scaled = scaler.transform(X_test_champion)
X_train_top4_scaled = scaler.fit_transform(X_train_top4)
X_test_top4_scaled = scaler.transform(X_test_top4)

Train Logistic Regression for Champion Prediction

In [11]:
model_champion = LogisticRegression(random_state=42, max_iter=500)
model_champion.fit(X_train_champion_scaled, y_train_champion)
y_pred_champion = model_champion.predict(X_test_champion_scaled)
accuracy_champion = accuracy_score(y_test_champion, y_pred_champion)
print("Champion Prediction Accuracy (Logistic Regression):", accuracy_champion)
print(classification_report(y_test_champion, y_pred_champion))

Champion Prediction Accuracy (Logistic Regression): 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6



Train Random Forest for Top-4 Prediction

In [12]:
model_top4_rf = RandomForestClassifier(random_state=42, n_estimators=100)
model_top4_rf.fit(X_train_top4_scaled, y_train_top4)
y_pred_top4_rf = model_top4_rf.predict(X_test_top4_scaled)
accuracy_top4_rf = accuracy_score(y_test_top4, y_pred_top4_rf)
print("Top-4 Prediction Accuracy (Random Forest):", accuracy_top4_rf)
print(classification_report(y_test_top4, y_pred_top4_rf))

Top-4 Prediction Accuracy (Random Forest): 0.6666666666666666
              precision    recall  f1-score   support

           0       0.67      1.00      0.80         4
           1       0.00      0.00      0.00         2

    accuracy                           0.67         6
   macro avg       0.33      0.50      0.40         6
weighted avg       0.44      0.67      0.53         6



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Export Probabilities to Excel

In [13]:
X_champion_scaled = scaler.fit_transform(X_champion)
X_top4_scaled = scaler.fit_transform(X_top4)

data_cleaned['Champion_Probability'] = model_champion.predict_proba(X_champion_scaled)[:, 1]
data_cleaned['Top4_Probability'] = model_top4_rf.predict_proba(X_top4_scaled)[:, 1]

export_data = data_cleaned[['Rank', 'Points', 'Champion_Probability', 'Top4_Probability']]
export_data.insert(0, 'Team', data['Team'])

export_data.to_csv('../../Data/Club/team_probabilities.csv', index=False)