# Imports

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OrdinalEncoder

# Useful functions

In [None]:
def add_rolling_averages(df: pd.DataFrame, n: int = 5) -> pd.DataFrame:
    """Adds rolling average goals for Home and Away teams."""
    team_stats = {}

    home_avgs = []
    away_avgs = []

    for _, row in df.iterrows():
        home = row['HomeTeam']
        away = row['AwayTeam']
        fthg = row['FTHG']
        ftag = row['FTAG']

        home_prev = team_stats.get(home, {'GF': []})
        away_prev = team_stats.get(away, {'GF': []})

        home_avg = sum(home_prev['GF'][-n:]) / \
            min(len(home_prev['GF']), n) if home_prev['GF'] else 0
        away_avg = sum(away_prev['GF'][-n:]) / \
            min(len(away_prev['GF']), n) if away_prev['GF'] else 0

        home_avgs.append(home_avg)
        away_avgs.append(away_avg)

        team_stats[home] = {'GF': home_prev['GF'] + [fthg]}
        team_stats[away] = {'GF': away_prev['GF'] + [ftag]}

    df['HomeGoalsAvg'] = home_avgs
    df['AwayGoalsAvg'] = away_avgs
    return df

# Set Up

In [88]:
df_2023 = pd.read_csv('../data/EPL_2022_2023.csv')

df_2023 = df_2023[['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR', 'HS', 'AS']]


y = df_2023['FTR']


team_encoder = OrdinalEncoder()
df_2023[['HomeID', 'AwayID']] = team_encoder.fit_transform(
    df_2023[['HomeTeam', 'AwayTeam']])

df_2023['ShotDiff'] = df_2023['HS'] - df_2023['AS']

df_2023 = add_rolling_averages(df_2023, n=5)

# Training

In [89]:
X = df_2023[['HomeID', 'AwayID', 'ShotDiff', 'HomeGoalsAvg', 'AwayGoalsAvg']]
y = df_2023['FTR']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=10)

In [90]:
model = RandomForestClassifier(random_state=10)
model.fit(X_train, y_train)
preds = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, preds))

Accuracy: 0.5657894736842105
