In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA

In [2]:
dataset = pd.read_csv('Datasets/updated_final_feature_dataset.csv')
X = dataset.drop(['Winner', 'Home Goals', 'Away Goals', 'Season', 'Home Team', 'Away Team'], axis=1)
# X['Home Team'] = LabelEncoder().fit_transform(X['Home Team'])
# X['Away Team'] = LabelEncoder().fit_transform(X['Away Team'])
y = dataset['Winner']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
gb_model = GradientBoostingClassifier(random_state=16, n_estimators=1000, learning_rate=0.5, max_depth=4)

gb_model.fit(X, y)

In [4]:
team_data = pd.read_csv('Datasets/2024-2025_team_data.csv')
final_standings = {team:0 for team in team_data['Team']}
team_data.head()

fixtures_25 = pd.read_csv('Datasets/fixtures-2025.csv')

for i, row in fixtures_25.iterrows():
    home_team = row['home']
    away_team = row['away']
    home_ELO = team_data[team_data['Team'] == home_team]['ELO'].values[0]
    home_XG = team_data[team_data['Team'] == home_team]['XG'].values[0]
    home_XGA = team_data[team_data['Team'] == home_team]['XGA'].values[0]
    home_win_percentage = team_data[team_data['Team'] == home_team]['Win Percentage'].values[0]
    home_draw_percentage = team_data[team_data['Team'] == home_team]['Draw Percentage'].values[0]
    home_team_form = team_data[team_data['Team'] == home_team]['Team Form'].values[0]
    home_team_cumulative_points = team_data[team_data['Team'] == home_team]['Team Cumulative Points'].values[0]
    home_team_statistic = team_data[team_data['Team'] == home_team]['Team Form Statistics'].values[0]

    away_ELO = team_data[team_data['Team'] == away_team]['ELO'].values[0]
    away_XG = team_data[team_data['Team'] == away_team]['XG'].values[0]
    away_XGA = team_data[team_data['Team'] == away_team]['XGA'].values[0]
    away_win_percentage = team_data[team_data['Team'] == away_team]['Win Percentage'].values[0]
    away_draw_percentage = team_data[team_data['Team'] == away_team]['Draw Percentage'].values[0]
    away_team_form = team_data[team_data['Team'] == away_team]['Team Form'].values[0]
    away_team_cumulative_points = team_data[team_data['Team'] == away_team]['Team Cumulative Points'].values[0]
    away_team_statistic = team_data[team_data['Team'] == away_team]['Team Form Statistics'].values[0]
    
    
    entry = [
        home_ELO,                    # Corresponds to 'Home Team ELO'
        away_ELO,                    # Corresponds to 'Away Team ELO'
        home_XG,                     # Corresponds to 'Home Team XG'
        away_XG,                     # Corresponds to 'Away Team XG'
        home_XGA,                    # Corresponds to 'Home Team XGA'
        away_XGA,                    # Corresponds to 'Away Team XGA'
        home_win_percentage,         # Corresponds to 'Home Team Win Percentage'
        home_draw_percentage,        # Corresponds to 'Home Team Draw Percentage'
        away_win_percentage,         # Corresponds to 'Away Team Win Percentage'
        away_draw_percentage,        # Corresponds to 'Away Team Draw Percentage'
        home_team_form,              # Corresponds to 'Home Team Form'
        away_team_form,              # Corresponds to 'Away Team Form'
        home_team_cumulative_points, # Corresponds to 'Home Team Cumulative Points'
        away_team_cumulative_points,  # Corresponds to 'Away Team Cumulative Points'
        home_team_statistic,         # Corresponds to 'Home Team Form Statistics'
        away_team_statistic          # Corresponds to 'Away Team Form Statistics

    ]
    entry_df = pd.DataFrame([entry], columns=X_train.columns)

    predicted_result = gb_model.predict(entry_df)


    if predicted_result[0] == 1:
        final_standings[home_team] += 3

    elif predicted_result[0] == 0:
        final_standings[home_team] += 1
        final_standings[away_team] += 1

    elif predicted_result[0] == -1:
        final_standings[away_team] += 3

print('Final Standings for 2024-2025 season')
final_standings = dict(sorted(final_standings.items(), key=lambda item: item[1], reverse=True))
for team in final_standings:
    print(f'{team}: {final_standings[team]}')


Final Standings for 2024-2025 season
Ipswich Town: 74
Leicester City: 69
Chelsea: 62
Newcastle Utd: 59
Manchester Utd: 57
Manchester City: 57
Tottenham: 56
Arsenal: 56
Liverpool: 52
Crystal Palace: 51
Everton: 42
Fulham: 39
Nottingham Forest: 38
Aston Villa: 36
Brighton: 36
Southampton: 31
West Ham: 29
Wolves: 26
Brentford: 25
Bournemouth: 16


In [5]:
team_data = pd.read_csv('Datasets/2023-2024_team_data.csv')
final_standings = {team:0 for team in team_data['Team']}
team_data.head()

fixtures_24 = dataset[dataset['Season'] == '2023-2024']

for i, row in fixtures_24.iterrows():
    home_team = row['Home Team']
    away_team = row['Away Team']
    home_ELO = team_data[team_data['Team'] == home_team]['ELO'].values[0]
    home_XG = team_data[team_data['Team'] == home_team]['XG'].values[0]
    home_XGA = team_data[team_data['Team'] == home_team]['XGA'].values[0]
    home_win_percentage = team_data[team_data['Team'] == home_team]['Win Percentage'].values[0]
    home_draw_percentage = team_data[team_data['Team'] == home_team]['Draw Percentage'].values[0]
    home_team_form = team_data[team_data['Team'] == home_team]['Team Form'].values[0]
    home_team_cumulative_points = team_data[team_data['Team'] == home_team]['Team Cumulative Points'].values[0]
    home_team_statistic = team_data[team_data['Team'] == home_team]['Team Form Statistics'].values[0]

    away_ELO = team_data[team_data['Team'] == away_team]['ELO'].values[0]
    away_XG = team_data[team_data['Team'] == away_team]['XG'].values[0]
    away_XGA = team_data[team_data['Team'] == away_team]['XGA'].values[0]
    away_win_percentage = team_data[team_data['Team'] == away_team]['Win Percentage'].values[0]
    away_draw_percentage = team_data[team_data['Team'] == away_team]['Draw Percentage'].values[0]
    away_team_form = team_data[team_data['Team'] == away_team]['Team Form'].values[0]
    away_team_cumulative_points = team_data[team_data['Team'] == away_team]['Team Cumulative Points'].values[0]
    away_team_statistic = team_data[team_data['Team'] == away_team]['Team Form Statistics'].values[0]
    
    
    entry = [
        home_ELO,                    # Corresponds to 'Home Team ELO'
        away_ELO,                    # Corresponds to 'Away Team ELO'
        home_XG,                     # Corresponds to 'Home Team XG'
        away_XG,                     # Corresponds to 'Away Team XG'
        home_XGA,                    # Corresponds to 'Home Team XGA'
        away_XGA,                    # Corresponds to 'Away Team XGA'
        home_win_percentage,         # Corresponds to 'Home Team Win Percentage'
        home_draw_percentage,        # Corresponds to 'Home Team Draw Percentage'
        away_win_percentage,         # Corresponds to 'Away Team Win Percentage'
        away_draw_percentage,        # Corresponds to 'Away Team Draw Percentage'
        home_team_form,              # Corresponds to 'Home Team Form'
        away_team_form,              # Corresponds to 'Away Team Form'
        home_team_cumulative_points, # Corresponds to 'Home Team Cumulative Points'
        away_team_cumulative_points,  # Corresponds to 'Away Team Cumulative Points'
        home_team_statistic,         # Corresponds to 'Home Team Form Statistics'
        away_team_statistic          # Corresponds to 'Away Team Form Statistics

    ]
    entry_df = pd.DataFrame([entry], columns=X_train.columns)

    predicted_result = gb_model.predict(entry_df)


    if predicted_result[0] == 1:
        final_standings[home_team] += 3

    elif predicted_result[0] == 0:
        final_standings[home_team] += 1
        final_standings[away_team] += 1

    elif predicted_result[0] == -1:
        final_standings[away_team] += 3

print('Final Standings for 2023-2024 season')
final_standings = dict(sorted(final_standings.items(), key=lambda item: item[1], reverse=True))
for team in final_standings:
    print(f'{team}: {final_standings[team]}')


Final Standings for 2023-2024 season
Aston Villa: 82
Luton Town: 80
Manchester City: 58
Sheffield Utd: 56
Liverpool: 54
Arsenal: 53
Manchester Utd: 51
Newcastle Utd: 50
Brentford: 47
Tottenham: 46
Brighton: 46
Everton: 45
Burnley: 43
West Ham: 41
Chelsea: 36
Nott'ham Forest: 30
Crystal Palace: 28
Wolves: 28
Fulham: 20
Bournemouth: 18


In [6]:
team_data = pd.read_csv('Datasets/2022-2023_team_data.csv')
final_standings = {team:0 for team in team_data['Team']}
team_data.head()

fixtures_23 = dataset[dataset['Season'] == '2022-2023']

for i, row in fixtures_23.iterrows():

    home_team = row['Home Team']
    away_team = row['Away Team']

    home_ELO = team_data[team_data['Team'] == home_team]['ELO'].values[0]
    home_XG = team_data[team_data['Team'] == home_team]['XG'].values[0]
    home_XGA = team_data[team_data['Team'] == home_team]['XGA'].values[0]
    home_win_percentage = team_data[team_data['Team'] == home_team]['Win Percentage'].values[0]
    home_draw_percentage = team_data[team_data['Team'] == home_team]['Draw Percentage'].values[0]
    home_team_form = team_data[team_data['Team'] == home_team]['Team Form'].values[0]
    home_team_cumulative_points = team_data[team_data['Team'] == home_team]['Team Cumulative Points'].values[0]
    home_team_statistic = team_data[team_data['Team'] == home_team]['Team Form Statistics'].values[0]

    away_ELO = team_data[team_data['Team'] == away_team]['ELO'].values[0]
    away_XG = team_data[team_data['Team'] == away_team]['XG'].values[0]
    away_XGA = team_data[team_data['Team'] == away_team]['XGA'].values[0]
    away_win_percentage = team_data[team_data['Team'] == away_team]['Win Percentage'].values[0]
    away_draw_percentage = team_data[team_data['Team'] == away_team]['Draw Percentage'].values[0]
    away_team_form = team_data[team_data['Team'] == away_team]['Team Form'].values[0]
    away_team_cumulative_points = team_data[team_data['Team'] == away_team]['Team Cumulative Points'].values[0]
    away_team_statistic = team_data[team_data['Team'] == away_team]['Team Form Statistics'].values[0]
    
    entry = [
        home_ELO,                    # Corresponds to 'Home Team ELO'
        away_ELO,                    # Corresponds to 'Away Team ELO'
        home_XG,                     # Corresponds to 'Home Team XG'
        away_XG,                     # Corresponds to 'Away Team XG'
        home_XGA,                    # Corresponds to 'Home Team XGA'
        away_XGA,                    # Corresponds to 'Away Team XGA'
        home_win_percentage,         # Corresponds to 'Home Team Win Percentage'
        home_draw_percentage,        # Corresponds to 'Home Team Draw Percentage'
        away_win_percentage,         # Corresponds to 'Away Team Win Percentage'
        away_draw_percentage,        # Corresponds to 'Away Team Draw Percentage'
        home_team_form,              # Corresponds to 'Home Team Form'
        away_team_form,              # Corresponds to 'Away Team Form'
        home_team_cumulative_points, # Corresponds to 'Home Team Cumulative Points'
        away_team_cumulative_points,  # Corresponds to 'Away Team Cumulative Points'
        home_team_statistic,         # Corresponds to 'Home Team Form Statistics'
        away_team_statistic          # Corresponds to 'Away Team Form Statistics

    ]
    entry_df = pd.DataFrame([entry], columns=X_train.columns)

    predicted_result = gb_model.predict(entry_df)

    if predicted_result[0] == 1:
        final_standings[home_team] += 3

    elif predicted_result[0] == 0:
        final_standings[home_team] += 1
        final_standings[away_team] += 1

    elif predicted_result[0] == -1:
        final_standings[away_team] += 3

print('Final Standings for 2022-2023 Season')
final_standings = dict(sorted(final_standings.items(), key=lambda item: item[1], reverse=True))
for team in final_standings:
    print(f'{team}: {final_standings[team]}')


Final Standings for 2022-2023 Season
Tottenham: 79
Nott'ham Forest: 74
Bournemouth: 65
Liverpool: 63
Manchester City: 63
Arsenal: 60
Fulham: 51
Brighton: 49
Crystal Palace: 46
Newcastle Utd: 45
Leeds United: 45
Chelsea: 41
Leicester City: 39
Everton: 39
Aston Villa: 36
Manchester Utd: 35
Wolves: 28
West Ham: 26
Southampton: 24
Brentford: 20
