In [44]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA

In [45]:
dataset = pd.read_csv('Datasets/updated_final_feature_dataset.csv')
X = dataset.drop(['Winner', 'Home Goals', 'Away Goals', 'Season', 'Home Team', 'Away Team'], axis=1)
# X['Home Team'] = LabelEncoder().fit_transform(X['Home Team'])
# X['Away Team'] = LabelEncoder().fit_transform(X['Away Team'])
y = dataset['Winner']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [46]:
label_mapping = {-1: 0, 0: 1, 1: 2}

# Map the labels in y_train
y_ = np.array([label_mapping[label] for label in y])

xgb_model = XGBClassifier(
    random_state=16,
    eval_metric='mlogloss',
    colsample_bytree=0.8,
    gamma=1,
    learning_rate=0.01,
    max_depth=7,
    n_estimators=2000,
    subsample=0.8
)
xgb_model.fit(X, y_)

In [47]:
team_data = pd.read_csv('Datasets/2023-2024_team_data.csv')
final_standings = {team:0 for team in team_data['Team']}
team_data.head()

fixtures_24 = dataset[dataset['Season'] == '2023-2024']

for i, row in fixtures_24.iterrows():
    home_team = row['Home Team']
    away_team = row['Away Team']
    home_ELO = team_data[team_data['Team'] == home_team]['ELO'].values[0]
    home_XG = team_data[team_data['Team'] == home_team]['XG'].values[0]
    home_XGA = team_data[team_data['Team'] == home_team]['XGA'].values[0]
    home_win_percentage = team_data[team_data['Team'] == home_team]['Win Percentage'].values[0]
    home_draw_percentage = team_data[team_data['Team'] == home_team]['Draw Percentage'].values[0]
    home_team_form = team_data[team_data['Team'] == home_team]['Team Form'].values[0]
    home_team_cumulative_points = team_data[team_data['Team'] == home_team]['Team Cumulative Points'].values[0]
    home_team_statistic = team_data[team_data['Team'] == home_team]['Team Form Statistics'].values[0]

    away_ELO = team_data[team_data['Team'] == away_team]['ELO'].values[0]
    away_XG = team_data[team_data['Team'] == away_team]['XG'].values[0]
    away_XGA = team_data[team_data['Team'] == away_team]['XGA'].values[0]
    away_win_percentage = team_data[team_data['Team'] == away_team]['Win Percentage'].values[0]
    away_draw_percentage = team_data[team_data['Team'] == away_team]['Draw Percentage'].values[0]
    away_team_form = team_data[team_data['Team'] == away_team]['Team Form'].values[0]
    away_team_cumulative_points = team_data[team_data['Team'] == away_team]['Team Cumulative Points'].values[0]
    away_team_statistic = team_data[team_data['Team'] == away_team]['Team Form Statistics'].values[0]
    
    
    entry = [
        home_ELO,                    # Corresponds to 'Home Team ELO'
        away_ELO,                    # Corresponds to 'Away Team ELO'
        home_XG,                     # Corresponds to 'Home Team XG'
        away_XG,                     # Corresponds to 'Away Team XG'
        home_XGA,                    # Corresponds to 'Home Team XGA'
        away_XGA,                    # Corresponds to 'Away Team XGA'
        home_win_percentage,         # Corresponds to 'Home Team Win Percentage'
        home_draw_percentage,        # Corresponds to 'Home Team Draw Percentage'
        away_win_percentage,         # Corresponds to 'Away Team Win Percentage'
        away_draw_percentage,        # Corresponds to 'Away Team Draw Percentage'
        home_team_form,              # Corresponds to 'Home Team Form'
        away_team_form,              # Corresponds to 'Away Team Form'
        home_team_cumulative_points, # Corresponds to 'Home Team Cumulative Points'
        away_team_cumulative_points,  # Corresponds to 'Away Team Cumulative Points'
        home_team_statistic,         # Corresponds to 'Home Team Form Statistics'
        away_team_statistic          # Corresponds to 'Away Team Form Statistics

    ]
    entry_df = pd.DataFrame([entry], columns=X_train.columns)

    predicted_result = xgb_model.predict(entry_df)


    if predicted_result[0] == 2:
        final_standings[home_team] += 3

    elif predicted_result[0] == 1:
        final_standings[home_team] += 1
        final_standings[away_team] += 1

    elif predicted_result[0] == 0:
        final_standings[away_team] += 3

print('Final Standings for 2023-2024 season')
final_standings = dict(sorted(final_standings.items(), key=lambda item: item[1], reverse=True))
for team in final_standings:
    print(f'{team}: {final_standings[team]}')


Final Standings for 2023-2024 season
Manchester City: 75
Liverpool: 74
Manchester Utd: 64
Arsenal: 62
Newcastle Utd: 62
Aston Villa: 60
Burnley: 57
Tottenham: 53
Brighton: 50
Everton: 49
Sheffield Utd: 47
Chelsea: 40
Brentford: 36
West Ham: 34
Luton Town: 32
Crystal Palace: 30
Fulham: 29
Nott'ham Forest: 29
Wolves: 27
Bournemouth: 15


In [51]:
team_data = pd.read_csv('Datasets/2022-2023_team_data.csv')
final_standings = {team:0 for team in team_data['Team']}
team_data.head()

fixtures_23 = dataset[dataset['Season'] == '2022-2023']

for i, row in fixtures_23.iterrows():

    home_team = row['Home Team']
    away_team = row['Away Team']

    home_ELO = team_data[team_data['Team'] == home_team]['ELO'].values[0]
    home_XG = team_data[team_data['Team'] == home_team]['XG'].values[0]
    home_XGA = team_data[team_data['Team'] == home_team]['XGA'].values[0]
    home_win_percentage = team_data[team_data['Team'] == home_team]['Win Percentage'].values[0]
    home_draw_percentage = team_data[team_data['Team'] == home_team]['Draw Percentage'].values[0]
    home_team_form = team_data[team_data['Team'] == home_team]['Team Form'].values[0]
    home_team_cumulative_points = team_data[team_data['Team'] == home_team]['Team Cumulative Points'].values[0]
    home_team_statistic = team_data[team_data['Team'] == home_team]['Team Form Statistics'].values[0]

    away_ELO = team_data[team_data['Team'] == away_team]['ELO'].values[0]
    away_XG = team_data[team_data['Team'] == away_team]['XG'].values[0]
    away_XGA = team_data[team_data['Team'] == away_team]['XGA'].values[0]
    away_win_percentage = team_data[team_data['Team'] == away_team]['Win Percentage'].values[0]
    away_draw_percentage = team_data[team_data['Team'] == away_team]['Draw Percentage'].values[0]
    away_team_form = team_data[team_data['Team'] == away_team]['Team Form'].values[0]
    away_team_cumulative_points = team_data[team_data['Team'] == away_team]['Team Cumulative Points'].values[0]
    away_team_statistic = team_data[team_data['Team'] == away_team]['Team Form Statistics'].values[0]
    
    entry = [
        home_ELO,                    # Corresponds to 'Home Team ELO'
        away_ELO,                    # Corresponds to 'Away Team ELO'
        home_XG,                     # Corresponds to 'Home Team XG'
        away_XG,                     # Corresponds to 'Away Team XG'
        home_XGA,                    # Corresponds to 'Home Team XGA'
        away_XGA,                    # Corresponds to 'Away Team XGA'
        home_win_percentage,         # Corresponds to 'Home Team Win Percentage'
        home_draw_percentage,        # Corresponds to 'Home Team Draw Percentage'
        away_win_percentage,         # Corresponds to 'Away Team Win Percentage'
        away_draw_percentage,        # Corresponds to 'Away Team Draw Percentage'
        home_team_form,              # Corresponds to 'Home Team Form'
        away_team_form,              # Corresponds to 'Away Team Form'
        home_team_cumulative_points, # Corresponds to 'Home Team Cumulative Points'
        away_team_cumulative_points,  # Corresponds to 'Away Team Cumulative Points'
        home_team_statistic,         # Corresponds to 'Home Team Form Statistics'
        away_team_statistic          # Corresponds to 'Away Team Form Statistics

    ]
    entry_df = pd.DataFrame([entry], columns=X_train.columns)

    predicted_result = xgb_model.predict(entry_df)

    if predicted_result[0] == 2:
        final_standings[home_team] += 3

    elif predicted_result[0] == 1:
        final_standings[home_team] += 1
        final_standings[away_team] += 1

    elif predicted_result[0] == 0:
        final_standings[away_team] += 3

print('Final Standings for 2022-2023 Season')
final_standings = dict(sorted(final_standings.items(), key=lambda item: item[1], reverse=True))
for team in final_standings:
    print(f'{team}: {final_standings[team]}')


Final Standings for 2022-2023 Season
Tottenham: 80
Arsenal: 68
Liverpool: 68
Manchester City: 68
Chelsea: 54
Fulham: 51
Leicester City: 45
Bournemouth: 45
Brighton: 45
Crystal Palace: 43
Newcastle Utd: 42
Manchester Utd: 41
Leeds United: 41
Everton: 37
Nott'ham Forest: 35
Wolves: 34
Aston Villa: 32
West Ham: 31
Brentford: 24
Southampton: 21
