In [1]:
import pandas as pd
import numpy as np
import os
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
WEEK = 10

In [3]:
# load in model
model = joblib.load('new_model.pkl')

In [4]:
df = pd.read_csv('new_data.csv')

In [5]:
df

Unnamed: 0,Wk,Day,Date,Time,Home,xG,Score,xG.1,Away,Attendance,Venue,Referee,Season
0,1.0,Fri,2017-08-11,19:45,Arsenal,2.5,4–3,1.5,Leicester City,59387.0,Emirates Stadium,Mike Dean,2017-2018
1,1.0,Sat,2017-08-12,12:30,Watford,2.1,3–3,2.6,Liverpool,20407.0,Vicarage Road Stadium,Anthony Taylor,2017-2018
2,1.0,Sat,2017-08-12,15:00,Crystal Palace,1.1,0–3,1.5,Huddersfield,25448.0,Selhurst Park,Jonathan Moss,2017-2018
3,1.0,Sat,2017-08-12,15:00,West Brom,1.3,1–0,0.5,Bournemouth,25011.0,The Hawthorns,Robert Madley,2017-2018
4,1.0,Sat,2017-08-12,15:00,Chelsea,1.5,2–3,0.6,Burnley,41616.0,Stamford Bridge,Craig Pawson,2017-2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3035,38.0,Sun,2025-05-25,16:00,Fulham,,,,Manchester City,,Craven Cottage,,2024-2025
3036,38.0,Sun,2025-05-25,16:00,Nott'ham Forest,,,,Chelsea,,The City Ground,,2024-2025
3037,38.0,Sun,2025-05-25,16:00,Manchester Utd,,,,Aston Villa,,Old Trafford,,2024-2025
3038,38.0,Sun,2025-05-25,16:00,Wolves,,,,Brentford,,Molineux Stadium,,2024-2025


In [6]:
# create a variable called current day
current_day = pd.to_datetime('today').date()

# convert 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date']).dt.date

# create a df to store our latest data on teams playing before the next round of games
model_df = df[df['Date'] < current_day]

# remove nan rows
model_df = model_df.dropna()

In [7]:
model_df

Unnamed: 0,Wk,Day,Date,Time,Home,xG,Score,xG.1,Away,Attendance,Venue,Referee,Season
0,1.0,Fri,2017-08-11,19:45,Arsenal,2.5,4–3,1.5,Leicester City,59387.0,Emirates Stadium,Mike Dean,2017-2018
1,1.0,Sat,2017-08-12,12:30,Watford,2.1,3–3,2.6,Liverpool,20407.0,Vicarage Road Stadium,Anthony Taylor,2017-2018
2,1.0,Sat,2017-08-12,15:00,Crystal Palace,1.1,0–3,1.5,Huddersfield,25448.0,Selhurst Park,Jonathan Moss,2017-2018
3,1.0,Sat,2017-08-12,15:00,West Brom,1.3,1–0,0.5,Bournemouth,25011.0,The Hawthorns,Robert Madley,2017-2018
4,1.0,Sat,2017-08-12,15:00,Chelsea,1.5,2–3,0.6,Burnley,41616.0,Stamford Bridge,Craig Pawson,2017-2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2740,9.0,Fri,2024-10-25,20:00,Leicester City,0.8,1–3,1.7,Nott'ham Forest,31879.0,King Power Stadium,Craig Pawson,2024-2025
2741,9.0,Sat,2024-10-26,15:00,Brentford,4.0,4–3,1.3,Ipswich Town,17109.0,Gtech Community Stadium,Lewis Smith,2024-2025
2742,9.0,Sat,2024-10-26,15:00,Brighton,1.3,2–2,1.3,Wolves,31840.0,The American Express Stadium,Michael Oliver,2024-2025
2745,9.0,Sat,2024-10-26,17:30,Everton,0.8,1–1,1.2,Fulham,38742.0,Goodison Park,John Brooks,2024-2025


In [8]:
model_df['Attendance'] = model_df['Attendance'].fillna(0) # setting null values to 0 because the NaN are from Covid-19 games with no attendance

model_df['Attendance'] = model_df['Attendance'].astype(int)

In [9]:
model_df[['home_goals', 'away_goals']] = model_df['Score'].str.split('–', expand=True).astype(int)

# rename xG and xG.1 columns to home_xG and away_xG
model_df.rename(columns={'xG': 'home_xg', 'xG.1': 'away_xg'}, inplace=True)

In [10]:
def get_result(row):
    if row['home_goals'] > row['away_goals']:
        return 'Home Win'
    elif row['home_goals'] < row['away_goals']:
        return 'Away Win'
    else:
        return 'Draw'
    
model_df['Result'] = model_df.apply(get_result, axis=1)

In [11]:
model_df['Day'] = pd.to_datetime(model_df['Date']).dt.day_name()

model_df['Season'] = model_df['Season'].apply(lambda x: x.split('-')[1])

In [12]:
model_df = pd.get_dummies(model_df, columns=['Day'])

model_df.reset_index(drop=True, inplace=True)

In [13]:
for x in model_df.Home.unique():
    temp_df = model_df[(model_df['Home'] == x) | (model_df['Away'] == x)]
    temp_df = temp_df.sort_values(['Date'])

    temp_df['goal_value_to_calculate'] = temp_df.apply(lambda y: y['home_goals'] if y['Home'] == x else y['away_goals'], axis=1)
    temp_df['rolling_avg_goals'] = temp_df['goal_value_to_calculate'].rolling(window = 5, closed = 'left', min_periods = 1).mean()

    for index, row in temp_df.iterrows():
        if row['Home'] == x:
            model_df.at[index, 'home_rolling_avg_goals'] = row['rolling_avg_goals']
        else:
            model_df.at[index, 'away_rolling_avg_goals'] = row['rolling_avg_goals']

In [14]:
for x in model_df.Home.unique():
    temp_df = model_df[(model_df['Home'] == x) | (model_df['Away'] == x)]
    temp_df = temp_df.sort_values(['Date'])

    temp_df['xg_value_to_calculate'] = temp_df.apply(lambda y: y['home_xg'] if y['Home'] == x else y['away_xg'], axis=1)
    temp_df['rolling_avg_xG'] = temp_df['xg_value_to_calculate'].rolling(window = 5, closed = 'left', min_periods = 1).mean()

    for index, row in temp_df.iterrows():
        if row['Home'] == x:
            model_df.at[index, 'home_rolling_avg_xG'] = row['rolling_avg_xG']
        else:
            model_df.at[index, 'away_rolling_avg_xG'] = row['rolling_avg_xG']

In [15]:
model_df = model_df.dropna(subset=['home_rolling_avg_goals', 'away_rolling_avg_goals', 'home_rolling_avg_xG', 'away_rolling_avg_xG'])

In [16]:
# create a new column for the central moving average of the home goals
model_df['home_goals_cma'] = model_df['home_goals'].expanding().mean()
model_df['home_xg_cma'] = model_df['home_xg'].expanding().mean()

# away goals
model_df['away_goals_cma'] = model_df['away_goals'].expanding().mean()
model_df['away_xg_cma'] = model_df['away_xg'].expanding().mean()

# create a new column by dividing the home goals by central moving average
model_df['home_goals_div_cma'] = model_df['home_goals'] / model_df['home_goals_cma']
model_df['home_xg_div_cma'] = model_df['home_xg'] / model_df['home_xg_cma']

# away goals
model_df['away_goals_div_cma'] = model_df['away_goals'] / model_df['away_goals_cma']
model_df['away_xg_div_cma'] = model_df['away_xg'] / model_df['away_xg_cma']

In [17]:
# set Wk column to integer
model_df['Wk'] = model_df['Wk'].astype(int)

# create a new column called quarter. divide the season into 4 quarters using the Wk column and assign a value between 1 and 4
model_df['quarter'] = np.where(model_df['Wk'].astype(int) <= 9, 1,
                np.where(model_df['Wk'].astype(int) <= 18, 2,
                np.where(model_df['Wk'].astype(int) <= 27, 3, 4)))

# using home_goals_div_cma, take the mean of the column for each quarter. put into new column called home_goals_seasonality
model_df['home_goals_seasonality'] = model_df.groupby('quarter')['home_goals_div_cma'].transform('mean')
model_df['home_xg_seasonality'] = model_df.groupby('quarter')['home_xg_div_cma'].transform('mean')

# away goals
model_df['away_goals_seasonality'] = model_df.groupby('quarter')['away_goals_div_cma'].transform('mean')
model_df['away_xg_seasonality'] = model_df.groupby('quarter')['away_xg_div_cma'].transform('mean')

# create a new column called home_goals_deseasonalised by dividing home_goals by home_goals_seasonality
model_df['home_goals_deseasonalised'] = model_df['home_goals'] / model_df['home_goals_seasonality']
model_df['home_xg_deseasonalised'] = model_df['home_xg'] / model_df['home_xg_seasonality']

# away goals
model_df['away_goals_deseasonalised'] = model_df['away_goals'] / model_df['away_goals_seasonality']
model_df['away_xg_deseasonalised'] = model_df['away_xg'] / model_df['away_xg_seasonality']

In [18]:
model_df['Season'] = model_df['Season'].astype(int)
# set home goals and away goals to integer
model_df['home_goals'] = model_df['home_goals'].astype(int)
model_df['away_goals'] = model_df['away_goals'].astype(int)

In [19]:
# Ensure 'Result' is categorical or integer-based
model_df['Result'] = model_df['Result'].astype('category')  # or use int depending on your encoding

# Ensure 'home_goals' and 'away_goals' are integers
model_df['home_goals'] = pd.to_numeric(model_df['home_goals'], errors='coerce').fillna(0).astype(int)
model_df['away_goals'] = pd.to_numeric(model_df['away_goals'], errors='coerce').fillna(0).astype(int)

In [20]:
model_df = pd.get_dummies(model_df, columns=['Home', 'Away', 'Venue'])

In [21]:
features = [column for column in model_df.drop(columns=[
    'Date', 'Time', 'home_xg', 'away_xg', 'home_goals', 'Score', 'Referee', 'Attendance',
    'away_goals', 'Result', 'Season', 'quarter', 'home_goals_cma', 'home_xg_cma', 
    'away_goals_cma', 'away_xg_cma', 'home_goals_div_cma', 'home_xg_div_cma', 
    'away_goals_div_cma', 'away_xg_div_cma'])]

# New games

In [22]:
# create a new dataframe with the teams and the features

teams = df['Home'].unique()

teams_df = pd.DataFrame(teams, columns=['Team'])

# assign the latest 'home_rolling_avg_goals', 'away_rolling_avg_goals' etc. to the teams_df
for team in teams:
    home_rolling_avg_goals = model_df[model_df['Home_' + team] == 1]['home_rolling_avg_goals'].iloc[-1]
    away_rolling_avg_goals = model_df[model_df['Away_' + team] == 1]['away_rolling_avg_goals'].iloc[-1]
    home_rolling_avg_xG = model_df[model_df['Home_' + team] == 1]['home_rolling_avg_xG'].iloc[-1]
    away_rolling_avg_xG = model_df[model_df['Away_' + team] == 1]['away_rolling_avg_xG'].iloc[-1]
    home_goals_seasonality = model_df[model_df['Home_' + team] == 1]['home_goals_seasonality'].iloc[-1]
    home_xg_seasonality = model_df[model_df['Home_' + team] == 1]['home_xg_seasonality'].iloc[-1]
    away_goals_seasonality = model_df[model_df['Away_' + team] == 1]['away_goals_seasonality'].iloc[-1]
    away_xg_seasonality = model_df[model_df['Away_' + team] == 1]['away_xg_seasonality'].iloc[-1]
    home_goals_deseasonalised = model_df[model_df['Home_' + team] == 1]['home_goals_deseasonalised'].iloc[-1]
    home_xg_deseasonalised = model_df[model_df['Home_' + team] == 1]['home_xg_deseasonalised'].iloc[-1]
    away_goals_deseasonalised = model_df[model_df['Away_' + team] == 1]['away_goals_deseasonalised'].iloc[-1]
    away_xg_deseasonalised = model_df[model_df['Away_' + team] == 1]['away_xg_deseasonalised'].iloc[-1]

    teams_df.loc[teams_df['Team'] == team, 'home_rolling_avg_goals'] = home_rolling_avg_goals
    teams_df.loc[teams_df['Team'] == team, 'away_rolling_avg_goals'] = away_rolling_avg_goals
    teams_df.loc[teams_df['Team'] == team, 'home_rolling_avg_xG'] = home_rolling_avg_xG
    teams_df.loc[teams_df['Team'] == team, 'away_rolling_avg_xG'] = away_rolling_avg_xG
    teams_df.loc[teams_df['Team'] == team, 'home_goals_seasonality'] = home_goals_seasonality
    teams_df.loc[teams_df['Team'] == team, 'home_xg_seasonality'] = home_xg_seasonality
    teams_df.loc[teams_df['Team'] == team, 'away_goals_seasonality'] = away_goals_seasonality
    teams_df.loc[teams_df['Team'] == team, 'away_xg_seasonality'] = away_xg_seasonality
    teams_df.loc[teams_df['Team'] == team, 'home_goals_deseasonalised'] = home_goals_deseasonalised
    teams_df.loc[teams_df['Team'] == team, 'home_xg_deseasonalised'] = home_xg_deseasonalised
    teams_df.loc[teams_df['Team'] == team, 'away_goals_deseasonalised'] = away_goals_deseasonalised
    teams_df.loc[teams_df['Team'] == team, 'away_xg_deseasonalised'] = away_xg_deseasonalised


# create a copy of original df and filter for games this weekend
weekend_df = df.copy()

# filter for Wk 9, season 2024-2025
weekend_df = weekend_df[(weekend_df['Wk'] == WEEK) & (weekend_df['Season'] == '2024-2025')]

# drop columns with null values
weekend_df = weekend_df.drop(columns=['xG', 'xG.1', 'Attendance', 'Referee', 'Score'])


# using Home and Away columns, assign the rolling averages and seasonality values to the weekend_df
for index, row in weekend_df.iterrows():
    home_team = row['Home']
    away_team = row['Away']

    home_rolling_avg_goals = teams_df[teams_df['Team'] == home_team]['home_rolling_avg_goals'].iloc[0]
    away_rolling_avg_goals = teams_df[teams_df['Team'] == away_team]['away_rolling_avg_goals'].iloc[0]
    home_rolling_avg_xG = teams_df[teams_df['Team'] == home_team]['home_rolling_avg_xG'].iloc[0]
    away_rolling_avg_xG = teams_df[teams_df['Team'] == away_team]['away_rolling_avg_xG'].iloc[0]
    home_goals_seasonality = teams_df[teams_df['Team'] == home_team]['home_goals_seasonality'].iloc[0]
    home_xg_seasonality = teams_df[teams_df['Team'] == home_team]['home_xg_seasonality'].iloc[0]
    away_goals_seasonality = teams_df[teams_df['Team'] == away_team]['away_goals_seasonality'].iloc[0]
    away_xg_seasonality = teams_df[teams_df['Team'] == away_team]['away_xg_seasonality'].iloc[0]
    home_goals_deseasonalised = teams_df[teams_df['Team'] == home_team]['home_goals_deseasonalised'].iloc[0]
    home_xg_deseasonalised = teams_df[teams_df['Team'] == home_team]['home_xg_deseasonalised'].iloc[0]
    away_goals_deseasonalised = teams_df[teams_df['Team'] == away_team]['away_goals_deseasonalised'].iloc[0]
    away_xg_deseasonalised = teams_df[teams_df['Team'] == away_team]['away_xg_deseasonalised'].iloc[0]

    weekend_df.at[index, 'home_rolling_avg_goals'] = home_rolling_avg_goals
    weekend_df.at[index, 'away_rolling_avg_goals'] = away_rolling_avg_goals
    weekend_df.at[index, 'home_rolling_avg_xG'] = home_rolling_avg_xG
    weekend_df.at[index, 'away_rolling_avg_xG'] = away_rolling_avg_xG
    weekend_df.at[index, 'home_goals_seasonality'] = home_goals_seasonality
    weekend_df.at[index, 'home_xg_seasonality'] = home_xg_seasonality

    weekend_df.at[index, 'away_goals_seasonality'] = away_goals_seasonality
    weekend_df.at[index, 'away_xg_seasonality'] = away_xg_seasonality
    weekend_df.at[index, 'home_goals_deseasonalised'] = home_goals_deseasonalised
    weekend_df.at[index, 'home_xg_deseasonalised'] = home_xg_deseasonalised
    weekend_df.at[index, 'away_goals_deseasonalised'] = away_goals_deseasonalised
    weekend_df.at[index, 'away_xg_deseasonalised'] = away_xg_deseasonalised


display(weekend_df)

Unnamed: 0,Wk,Day,Date,Time,Home,Away,Venue,Season,home_rolling_avg_goals,away_rolling_avg_goals,home_rolling_avg_xG,away_rolling_avg_xG,home_goals_seasonality,home_xg_seasonality,away_goals_seasonality,away_xg_seasonality,home_goals_deseasonalised,home_xg_deseasonalised,away_goals_deseasonalised,away_xg_deseasonalised
2750,10.0,Sat,2024-11-02,12:30,Newcastle Utd,Arsenal,St James' Park,2024-2025,1.2,2.2,1.7,2.14,1.03694,1.034738,1.065545,1.068097,0.0,1.932857,0.0,0.655371
2751,10.0,Sat,2024-11-02,15:00,Southampton,Everton,St Mary's Stadium,2024-2025,0.8,1.4,1.24,1.08,1.03694,1.034738,1.065545,1.068097,1.928752,2.0295,1.876974,1.591616
2752,10.0,Sat,2024-11-02,15:00,Liverpool,Brighton,Anfield,2024-2025,1.8,1.6,1.72,1.44,1.03694,1.034738,1.065545,1.068097,1.928752,1.836214,0.938487,1.029869
2753,10.0,Sat,2024-11-02,15:00,Nott'ham Forest,West Ham,The City Ground,2024-2025,1.0,1.4,0.9,1.4,1.03694,1.034738,1.065545,1.068097,0.964376,1.642928,0.938487,0.748996
2754,10.0,Sat,2024-11-02,15:00,Ipswich Town,Leicester City,Portman Road Stadium,2024-2025,1.0,1.4,0.82,0.68,1.03694,1.034738,1.065545,1.068097,0.0,1.256357,2.815461,2.902359
2755,10.0,Sat,2024-11-02,15:00,Bournemouth,Manchester City,Vitality Stadium,2024-2025,1.2,2.2,1.72,1.94,1.03694,1.034738,1.065545,1.068097,1.928752,1.739571,1.876974,1.497992
2756,10.0,Sat,2024-11-02,17:30,Wolves,Crystal Palace,Molineux Stadium,2024-2025,1.4,0.8,0.8,1.1,1.03694,1.034738,1.065545,1.068097,0.964376,0.773143,0.0,0.936245
2757,10.0,Sun,2024-11-03,14:00,Tottenham,Aston Villa,Tottenham Hotspur Stadium,2024-2025,1.8,2.0,2.22,1.3,1.03694,1.034738,1.065545,1.068097,3.857504,1.836214,2.815461,1.497992
2758,10.0,Sun,2024-11-03,16:30,Manchester Utd,Chelsea,Old Trafford,2024-2025,0.6,2.0,1.44,2.38,1.03694,1.034738,1.065545,1.068097,1.928752,1.256357,0.938487,0.936245
2759,10.0,Mon,2024-11-04,20:00,Fulham,Brentford,Craven Cottage,2024-2025,1.6,2.2,1.7,1.84,1.03694,1.034738,1.065545,1.068097,0.964376,1.739571,0.938487,0.84262


In [23]:
teams_df

Unnamed: 0,Team,home_rolling_avg_goals,away_rolling_avg_goals,home_rolling_avg_xG,away_rolling_avg_xG,home_goals_seasonality,home_xg_seasonality,away_goals_seasonality,away_xg_seasonality,home_goals_deseasonalised,home_xg_deseasonalised,away_goals_deseasonalised,away_xg_deseasonalised
0,Arsenal,2.0,2.2,1.86,2.14,1.03694,1.034738,1.065545,1.068097,1.928752,0.869786,0.0,0.655371
1,Watford,0.6,0.6,0.78,0.72,1.07499,1.082672,1.066751,1.067687,0.930241,1.293096,0.937426,0.561962
2,Crystal Palace,0.8,0.8,1.24,1.1,1.03694,1.034738,1.065545,1.068097,0.0,0.579857,0.0,0.936245
3,West Brom,1.0,0.8,0.88,0.84,1.07499,1.082672,1.066751,1.067687,0.930241,1.016004,0.937426,0.936604
4,Chelsea,3.0,2.0,2.24,2.38,1.03694,1.034738,1.065545,1.068097,0.964376,2.222785,0.938487,0.936245
5,Everton,1.4,1.4,1.06,1.08,1.03694,1.034738,1.065545,1.068097,0.964376,0.773143,1.876974,1.591616
6,Southampton,0.8,0.6,1.24,1.14,1.03694,1.034738,1.065545,1.068097,1.928752,2.0295,0.938487,0.561747
7,Brighton,1.6,1.6,1.32,1.44,1.03694,1.034738,1.065545,1.068097,1.928752,1.256357,0.938487,1.029869
8,Newcastle Utd,1.2,1.4,1.7,1.6,1.03694,1.034738,1.065545,1.068097,0.0,1.932857,0.0,1.966114
9,Manchester Utd,0.6,0.8,1.44,1.6,1.03694,1.034738,1.065545,1.068097,1.928752,1.256357,0.0,0.561747


In [24]:
weekend_df['Day'] = pd.to_datetime(weekend_df['Date']).dt.day_name()

# store weekend_df in new variable for final results

results_df = weekend_df.loc[:, ['Date', 'Home', 'Away', 'Day', 'Venue']]

# Ensure 'weekend_df' has the same structure as 'model_df'
weekend_df = pd.get_dummies(weekend_df, columns=['Home', 'Away', 'Day','Venue'], drop_first=False)

# Add missing columns with default values
for column in model_df.columns:
    if column not in weekend_df.columns:
        if model_df[column].dtype == 'bool':
            weekend_df[column] = False
        elif model_df[column].dtype == 'float64':
            weekend_df[column] = 0.0
        elif model_df[column].dtype == 'int64':
            weekend_df[column] = 0
        else:
            weekend_df[column] = None

# Ensure the order of columns matches
weekend_df = weekend_df[model_df.columns]

  weekend_df[column] = False
  weekend_df[column] = False
  weekend_df[column] = False
  weekend_df[column] = False
  weekend_df[column] = False
  weekend_df[column] = False
  weekend_df[column] = False


In [25]:
weekend_df

Unnamed: 0,Wk,Date,Time,home_xg,Score,away_xg,Attendance,Referee,Season,home_goals,...,Venue_The American Express Stadium,Venue_The City Ground,Venue_The Hawthorns,Venue_The John Smith's Stadium,Venue_Tottenham Hotspur Stadium,Venue_Turf Moor,Venue_Vicarage Road Stadium,Venue_Villa Park,Venue_Vitality Stadium,Venue_Wembley Stadium
2750,10.0,2024-11-02,12:30,0.0,,0.0,0,,2024-2025,0,...,False,False,False,False,False,False,False,False,False,False
2751,10.0,2024-11-02,15:00,0.0,,0.0,0,,2024-2025,0,...,False,False,False,False,False,False,False,False,False,False
2752,10.0,2024-11-02,15:00,0.0,,0.0,0,,2024-2025,0,...,False,False,False,False,False,False,False,False,False,False
2753,10.0,2024-11-02,15:00,0.0,,0.0,0,,2024-2025,0,...,False,True,False,False,False,False,False,False,False,False
2754,10.0,2024-11-02,15:00,0.0,,0.0,0,,2024-2025,0,...,False,False,False,False,False,False,False,False,False,False
2755,10.0,2024-11-02,15:00,0.0,,0.0,0,,2024-2025,0,...,False,False,False,False,False,False,False,False,True,False
2756,10.0,2024-11-02,17:30,0.0,,0.0,0,,2024-2025,0,...,False,False,False,False,False,False,False,False,False,False
2757,10.0,2024-11-03,14:00,0.0,,0.0,0,,2024-2025,0,...,False,False,False,False,True,False,False,False,False,False
2758,10.0,2024-11-03,16:30,0.0,,0.0,0,,2024-2025,0,...,False,False,False,False,False,False,False,False,False,False
2759,10.0,2024-11-04,20:00,0.0,,0.0,0,,2024-2025,0,...,False,False,False,False,False,False,False,False,False,False


In [26]:
features = [column for column in weekend_df.drop(columns=[
    'Date', 'Time', 'home_xg', 'away_xg', 'home_goals', 'Score', 'Attendance', 'Referee',
    'away_goals', 'Result', 'Season', 'quarter', 'home_goals_cma', 'home_xg_cma', 
    'away_goals_cma', 'away_xg_cma', 'home_goals_div_cma', 'home_xg_div_cma', 
    'away_goals_div_cma', 'away_xg_div_cma'])]

In [27]:
# add the weekend_df Result column to the results_df
results_df['results'] = model.predict(weekend_df[features])


In [28]:
results_df.to_csv(f'prediction_wk{WEEK}_2025.csv', index=False)

In [30]:
display(results_df)

Unnamed: 0,Date,Home,Away,Day,Venue,results
2750,2024-11-02,Newcastle Utd,Arsenal,Saturday,St James' Park,Draw
2751,2024-11-02,Southampton,Everton,Saturday,St Mary's Stadium,Draw
2752,2024-11-02,Liverpool,Brighton,Saturday,Anfield,Home Win
2753,2024-11-02,Nott'ham Forest,West Ham,Saturday,The City Ground,Draw
2754,2024-11-02,Ipswich Town,Leicester City,Saturday,Portman Road Stadium,Away Win
2755,2024-11-02,Bournemouth,Manchester City,Saturday,Vitality Stadium,Draw
2756,2024-11-02,Wolves,Crystal Palace,Saturday,Molineux Stadium,Home Win
2757,2024-11-03,Tottenham,Aston Villa,Sunday,Tottenham Hotspur Stadium,Home Win
2758,2024-11-03,Manchester Utd,Chelsea,Sunday,Old Trafford,Home Win
2759,2024-11-04,Fulham,Brentford,Monday,Craven Cottage,Draw


In [29]:
features

['Wk',
 'Day_Friday',
 'Day_Monday',
 'Day_Saturday',
 'Day_Sunday',
 'Day_Thursday',
 'Day_Tuesday',
 'Day_Wednesday',
 'home_rolling_avg_goals',
 'away_rolling_avg_goals',
 'home_rolling_avg_xG',
 'away_rolling_avg_xG',
 'home_goals_seasonality',
 'home_xg_seasonality',
 'away_goals_seasonality',
 'away_xg_seasonality',
 'home_goals_deseasonalised',
 'home_xg_deseasonalised',
 'away_goals_deseasonalised',
 'away_xg_deseasonalised',
 'Home_Arsenal',
 'Home_Aston Villa',
 'Home_Bournemouth',
 'Home_Brentford',
 'Home_Brighton',
 'Home_Burnley',
 'Home_Cardiff City',
 'Home_Chelsea',
 'Home_Crystal Palace',
 'Home_Everton',
 'Home_Fulham',
 'Home_Huddersfield',
 'Home_Ipswich Town',
 'Home_Leeds United',
 'Home_Leicester City',
 'Home_Liverpool',
 'Home_Luton Town',
 'Home_Manchester City',
 'Home_Manchester Utd',
 'Home_Newcastle Utd',
 'Home_Norwich City',
 "Home_Nott'ham Forest",
 'Home_Sheffield Utd',
 'Home_Southampton',
 'Home_Stoke City',
 'Home_Swansea City',
 'Home_Tottenham',
