In [None]:
pip install xgboost

import pandas as pd
import numpy as np

import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

df = pd.read_csv('../data/all_games_stats_with_team_ratings.csv')

df.head()

df.shape

df.columns

def label_result(home_goals, away_goals):
    if home_goals > away_goals:
        return 'Home Win'
    elif home_goals < away_goals:
        return 'Away Win'
    else:
        return 'Draw'

df['Result'] = df.apply(lambda row: label_result(row['Home_Goals'], row['Away_Goals']), axis=1)

df.loc[:, ['Season', 'Home_Team_Name', 'Away_Team_Name', 'Home_Goals', 'Away_Goals', 'Result']].head()

df.drop(['Season', 'Home_Goals', 'Away_Goals', 'Home_shots_on_goal', 'Home_shots_off_goal', 'Home_total_shots', 'Home_blocked_shots', 'Home_shots_insidebox', 'Home_corner_kicks', 'Home_ball_possession', 'Home_yellow_cards', 'Home_goalkeeper_saves', 'Home_total_passes', 'Home_passes_accurate', 'Home_passes_%', 'Away_shots_on_goal', 'Away_shots_off_goal', 'Away_total_shots', 'Away_blocked_shots', 'Away_shots_insidebox', 'Away_shots_outsidebox', 'Away_corner_kicks', 'Away_offsides', 'Away_ball_possession', 'Away_yellow_cards', 'Away_goalkeeper_saves', 'Away_total_passes', 'Away_passes_accurate', 'Away_passes_%', 'Home_offsides', 'Away_red_cards', 'Home_red_cards', 'Home_fouls', 'Away_fouls', 'Home_expected_goals', 'Away_expected_goals'], axis=1, inplace=True)

df.columns

df['Month'] = df['Match_Date'].astype(str).str[1:3]

df.drop(['Match_Date'], axis=1, inplace=True)

df.head()

def convert_monetary(value):
    if not value or value.strip() == '':
        return np.nan

    if value == '€0':
        return 0

    multiplier = 1
    if 'M' in value:
        multiplier = 1e6
    elif 'B' in value:
        multiplier = 1e9

    numeric_value = float(value[1:-1]) * multiplier

    return numeric_value

df['Home_Transfer budget'].unique()

df['Home_Club worth'].unique()

df['Home_Transfer budget'] = df['Home_Transfer budget'].apply(convert_monetary)
df['Home_Club worth'] = df['Home_Club worth'].apply(convert_monetary)

df.head()

df_encoded = pd.get_dummies(df, columns=['Home_Team_Name', 'Away_Team_Name', 'Home_Speed', 'Home_Dribbling', 'Home_Passing', 'Home_Positioning', 'Home_Crossing', 'Home_Shooting', 'Home_Aggression', 'Home_Pressure', 'Home_Team width', 'Home_Defender line', 'Away_Transfer budget', 'Away_Club worth', 'Away_Speed', 'Away_Dribbling', 'Away_Passing', 'Away_Positioning', 'Away_Crossing', 'Away_Shooting', 'Away_Aggression', 'Away_Pressure', 'Away_Team width', 'Away_Defender line', 'Month'])

X = df_encoded.drop(['Result'], axis=1)
y_labels = df_encoded['Result']

class_mapping = {'Away Win': 0, 'Draw': 1, 'Home Win': 2}
y = y_labels.map(class_mapping)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = xgb.XGBClassifier(objective='multi:softmax', num_class=3)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')