In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA

In [6]:
dataset = pd.read_csv('Datasets/updated_final_feature_dataset.csv')
X = dataset.drop(['Winner', 'Home Goals', 'Away Goals', 'Season', 'Home Team', 'Away Team'], axis=1)
# X['Home Team'] = LabelEncoder().fit_transform(X['Home Team'])
# X['Away Team'] = LabelEncoder().fit_transform(X['Away Team'])
y = dataset['Winner']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
class ELO_Insights_model:
    def __init__(self):
        self.rf_model = RandomForestClassifier(bootstrap=False, max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=1000)
        self.xgb_model = XGBClassifier(
            random_state=16,
            eval_metric='mlogloss',
            colsample_bytree=0.8,
            gamma=1,
            learning_rate=0.01,
            max_depth=7,
            n_estimators=2000,
            subsample=0.8
        )
        self.gb_model = GradientBoostingClassifier(random_state=16, n_estimators=1000, learning_rate=0.5, max_depth=4)
        # self.gaussian_model = GaussianNB()

    def fit(self, X_train, y_train):
        label_mapping = {-1: 0, 0: 1, 1: 2}
        y_train_ = np.array([label_mapping[label] for label in y_train])

        self.rf_model.fit(X_train, y_train)
        self.xgb_model.fit(X_train, y_train_)
        self.gb_model.fit(X_train, y_train)
        # self.gaussian_model.fit(X_train, y_train)

    def predict(self, X_test):
        # Get predictions from the models
        rf_pred = self.rf_model.predict(X_test)
        xgb_pred = self.xgb_model.predict(X_test)
        gb_pred = self.gb_model.predict(X_test)
        # gaussian_pred = self.gaussian_model.predict(X_test)

        label_mapping = {0: -1, 1: 0, 2: 1}
        xgb_pred_mapped = np.vectorize(label_mapping.get)(xgb_pred)

        all_preds = np.vstack((rf_pred, xgb_pred_mapped, gb_pred)).T  # Shape: (num_samples, num_models)

        # Calculate majority vote for each sample
        result = np.apply_along_axis(
            lambda x: np.bincount(x + 1).argmax() - 1, axis=1, arr=all_preds
        )

        return result

    def score(self, y_pred, y_test):
        return np.mean(y_pred == y_test)
        

In [8]:
elo_model = ELO_Insights_model()
elo_model.fit(X, y)


In [10]:
team_data = pd.read_csv('Datasets/2024-2025_team_data.csv')
final_standings = {team:0 for team in team_data['Team']}
team_data.head()

fixtures_25 = pd.read_csv('Datasets/fixtures-2025.csv')

for i, row in fixtures_25.iterrows():
    home_team = row['home']
    away_team = row['away']
    home_ELO = team_data[team_data['Team'] == home_team]['ELO'].values[0]
    home_XG = team_data[team_data['Team'] == home_team]['XG'].values[0]
    home_XGA = team_data[team_data['Team'] == home_team]['XGA'].values[0]
    home_win_percentage = team_data[team_data['Team'] == home_team]['Win Percentage'].values[0]
    home_draw_percentage = team_data[team_data['Team'] == home_team]['Draw Percentage'].values[0]
    home_team_form = team_data[team_data['Team'] == home_team]['Team Form'].values[0]
    home_team_cumulative_points = team_data[team_data['Team'] == home_team]['Team Cumulative Points'].values[0]
    home_team_statistic = team_data[team_data['Team'] == home_team]['Team Form Statistics'].values[0]

    away_ELO = team_data[team_data['Team'] == away_team]['ELO'].values[0]
    away_XG = team_data[team_data['Team'] == away_team]['XG'].values[0]
    away_XGA = team_data[team_data['Team'] == away_team]['XGA'].values[0]
    away_win_percentage = team_data[team_data['Team'] == away_team]['Win Percentage'].values[0]
    away_draw_percentage = team_data[team_data['Team'] == away_team]['Draw Percentage'].values[0]
    away_team_form = team_data[team_data['Team'] == away_team]['Team Form'].values[0]
    away_team_cumulative_points = team_data[team_data['Team'] == away_team]['Team Cumulative Points'].values[0]
    away_team_statistic = team_data[team_data['Team'] == away_team]['Team Form Statistics'].values[0]
    
    
    entry = [
        home_ELO,                    # Corresponds to 'Home Team ELO'
        away_ELO,                    # Corresponds to 'Away Team ELO'
        home_XG,                     # Corresponds to 'Home Team XG'
        away_XG,                     # Corresponds to 'Away Team XG'
        home_XGA,                    # Corresponds to 'Home Team XGA'
        away_XGA,                    # Corresponds to 'Away Team XGA'
        home_win_percentage,         # Corresponds to 'Home Team Win Percentage'
        home_draw_percentage,        # Corresponds to 'Home Team Draw Percentage'
        away_win_percentage,         # Corresponds to 'Away Team Win Percentage'
        away_draw_percentage,        # Corresponds to 'Away Team Draw Percentage'
        home_team_form,              # Corresponds to 'Home Team Form'
        away_team_form,              # Corresponds to 'Away Team Form'
        home_team_cumulative_points, # Corresponds to 'Home Team Cumulative Points'
        away_team_cumulative_points,  # Corresponds to 'Away Team Cumulative Points'
        home_team_statistic,         # Corresponds to 'Home Team Form Statistics'
        away_team_statistic          # Corresponds to 'Away Team Form Statistics

    ]
    entry_df = pd.DataFrame([entry], columns=X_train.columns)

    predicted_result = elo_model.predict(entry_df)


    if predicted_result[0] == 1:
        final_standings[home_team] += 3

    elif predicted_result[0] == 0:
        final_standings[home_team] += 1
        final_standings[away_team] += 1

    elif predicted_result[0] == -1:
        final_standings[away_team] += 3

print('Final Standings for 2024-2025 season')
final_standings = dict(sorted(final_standings.items(), key=lambda item: item[1], reverse=True))
for team in final_standings:
    print(f'{team}: {final_standings[team]}')


Final Standings for 2024-2025 season
Tottenham: 78
Chelsea: 74
Arsenal: 70
Liverpool: 70
Manchester Utd: 66
Manchester City: 66
Newcastle Utd: 65
Aston Villa: 59
Crystal Palace: 48
Leicester City: 45
Fulham: 40
Everton: 34
Nottingham Forest: 34
Ipswich Town: 32
West Ham: 31
Southampton: 31
Brighton: 29
Wolves: 26
Brentford: 19
Bournemouth: 17


In [None]:
team_data = pd.read_csv('Datasets/2023-2024_team_data.csv')
final_standings = {team:0 for team in team_data['Team']}
team_data.head()

fixtures_24 = dataset[dataset['Season'] == '2023-2024']

for i, row in fixtures_24.iterrows():
    home_team = row['Home Team']
    away_team = row['Away Team']
    home_ELO = team_data[team_data['Team'] == home_team]['ELO'].values[0]
    home_XG = team_data[team_data['Team'] == home_team]['XG'].values[0]
    home_XGA = team_data[team_data['Team'] == home_team]['XGA'].values[0]
    home_win_percentage = team_data[team_data['Team'] == home_team]['Win Percentage'].values[0]
    home_draw_percentage = team_data[team_data['Team'] == home_team]['Draw Percentage'].values[0]
    home_team_form = team_data[team_data['Team'] == home_team]['Team Form'].values[0]
    home_team_cumulative_points = team_data[team_data['Team'] == home_team]['Team Cumulative Points'].values[0]
    home_team_statistic = team_data[team_data['Team'] == home_team]['Team Form Statistics'].values[0]

    away_ELO = team_data[team_data['Team'] == away_team]['ELO'].values[0]
    away_XG = team_data[team_data['Team'] == away_team]['XG'].values[0]
    away_XGA = team_data[team_data['Team'] == away_team]['XGA'].values[0]
    away_win_percentage = team_data[team_data['Team'] == away_team]['Win Percentage'].values[0]
    away_draw_percentage = team_data[team_data['Team'] == away_team]['Draw Percentage'].values[0]
    away_team_form = team_data[team_data['Team'] == away_team]['Team Form'].values[0]
    away_team_cumulative_points = team_data[team_data['Team'] == away_team]['Team Cumulative Points'].values[0]
    away_team_statistic = team_data[team_data['Team'] == away_team]['Team Form Statistics'].values[0]
    
    
    entry = [
        home_ELO,                    # Corresponds to 'Home Team ELO'
        away_ELO,                    # Corresponds to 'Away Team ELO'
        home_XG,                     # Corresponds to 'Home Team XG'
        away_XG,                     # Corresponds to 'Away Team XG'
        home_XGA,                    # Corresponds to 'Home Team XGA'
        away_XGA,                    # Corresponds to 'Away Team XGA'
        home_win_percentage,         # Corresponds to 'Home Team Win Percentage'
        home_draw_percentage,        # Corresponds to 'Home Team Draw Percentage'
        away_win_percentage,         # Corresponds to 'Away Team Win Percentage'
        away_draw_percentage,        # Corresponds to 'Away Team Draw Percentage'
        home_team_form,              # Corresponds to 'Home Team Form'
        away_team_form,              # Corresponds to 'Away Team Form'
        home_team_cumulative_points, # Corresponds to 'Home Team Cumulative Points'
        away_team_cumulative_points,  # Corresponds to 'Away Team Cumulative Points'
        home_team_statistic,         # Corresponds to 'Home Team Form Statistics'
        away_team_statistic          # Corresponds to 'Away Team Form Statistics

    ]
    entry_df = pd.DataFrame([entry], columns=X_train.columns)

    predicted_result = elo_model.predict(entry_df)


    if predicted_result[0] == 1:
        final_standings[home_team] += 3

    elif predicted_result[0] == 0:
        final_standings[home_team] += 1
        final_standings[away_team] += 1

    elif predicted_result[0] == -1:
        final_standings[away_team] += 3

print('Final Standings for 2023-2024 season')
final_standings = dict(sorted(final_standings.items(), key=lambda item: item[1], reverse=True))
for team in final_standings:
    print(f'{team}: {final_standings[team]}')


Final Standings for 2023-2024 season
Liverpool: 72
Manchester City: 67
Aston Villa: 65
Manchester Utd: 63
Newcastle Utd: 62
Arsenal: 60
Burnley: 52
Everton: 51
Tottenham: 50
Brighton: 50
Sheffield Utd: 50
West Ham: 38
Chelsea: 38
Brentford: 36
Crystal Palace: 33
Luton Town: 32
Nott'ham Forest: 29
Fulham: 27
Wolves: 27
Bournemouth: 17


In [None]:
team_data = pd.read_csv('Datasets/2022-2023_team_data.csv')
final_standings = {team:0 for team in team_data['Team']}
team_data.head()

fixtures_23 = dataset[dataset['Season'] == '2022-2023']

for i, row in fixtures_23.iterrows():

    home_team = row['Home Team']
    away_team = row['Away Team']

    home_ELO = team_data[team_data['Team'] == home_team]['ELO'].values[0]
    home_XG = team_data[team_data['Team'] == home_team]['XG'].values[0]
    home_XGA = team_data[team_data['Team'] == home_team]['XGA'].values[0]
    home_win_percentage = team_data[team_data['Team'] == home_team]['Win Percentage'].values[0]
    home_draw_percentage = team_data[team_data['Team'] == home_team]['Draw Percentage'].values[0]
    home_team_form = team_data[team_data['Team'] == home_team]['Team Form'].values[0]
    home_team_cumulative_points = team_data[team_data['Team'] == home_team]['Team Cumulative Points'].values[0]
    home_team_statistic = team_data[team_data['Team'] == home_team]['Team Form Statistics'].values[0]

    away_ELO = team_data[team_data['Team'] == away_team]['ELO'].values[0]
    away_XG = team_data[team_data['Team'] == away_team]['XG'].values[0]
    away_XGA = team_data[team_data['Team'] == away_team]['XGA'].values[0]
    away_win_percentage = team_data[team_data['Team'] == away_team]['Win Percentage'].values[0]
    away_draw_percentage = team_data[team_data['Team'] == away_team]['Draw Percentage'].values[0]
    away_team_form = team_data[team_data['Team'] == away_team]['Team Form'].values[0]
    away_team_cumulative_points = team_data[team_data['Team'] == away_team]['Team Cumulative Points'].values[0]
    away_team_statistic = team_data[team_data['Team'] == away_team]['Team Form Statistics'].values[0]
    
    entry = [
        home_ELO,                    # Corresponds to 'Home Team ELO'
        away_ELO,                    # Corresponds to 'Away Team ELO'
        home_XG,                     # Corresponds to 'Home Team XG'
        away_XG,                     # Corresponds to 'Away Team XG'
        home_XGA,                    # Corresponds to 'Home Team XGA'
        away_XGA,                    # Corresponds to 'Away Team XGA'
        home_win_percentage,         # Corresponds to 'Home Team Win Percentage'
        home_draw_percentage,        # Corresponds to 'Home Team Draw Percentage'
        away_win_percentage,         # Corresponds to 'Away Team Win Percentage'
        away_draw_percentage,        # Corresponds to 'Away Team Draw Percentage'
        home_team_form,              # Corresponds to 'Home Team Form'
        away_team_form,              # Corresponds to 'Away Team Form'
        home_team_cumulative_points, # Corresponds to 'Home Team Cumulative Points'
        away_team_cumulative_points,  # Corresponds to 'Away Team Cumulative Points'
        home_team_statistic,         # Corresponds to 'Home Team Form Statistics'
        away_team_statistic          # Corresponds to 'Away Team Form Statistics

    ]
    entry_df = pd.DataFrame([entry], columns=X_train.columns)

    predicted_result = elo_model.predict(entry_df)

    if predicted_result[0] == 1:
        final_standings[home_team] += 3

    elif predicted_result[0] == 0:
        final_standings[home_team] += 1
        final_standings[away_team] += 1

    elif predicted_result[0] == -1:
        final_standings[away_team] += 3

print('Final Standings for 2022-2023 Season')
final_standings = dict(sorted(final_standings.items(), key=lambda item: item[1], reverse=True))
for team in final_standings:
    print(f'{team}: {final_standings[team]}')


Final Standings for 2022-2023 Season
Tottenham: 80
Arsenal: 71
Liverpool: 68
Manchester City: 68
Chelsea: 54
Fulham: 51
Brighton: 49
Crystal Palace: 45
Bournemouth: 45
Newcastle Utd: 42
Leicester City: 39
Leeds United: 39
Manchester Utd: 38
Everton: 37
West Ham: 36
Aston Villa: 36
Nott'ham Forest: 35
Wolves: 33
Brentford: 22
Southampton: 21
