In [1]:
from typing import Tuple, List
import numpy as np
import pandas as pd
import pandasql as pdsql
import matplotlib.pyplot as plt
import sys
import math

In [2]:
first_season: str = "2019-20"
last_season: str = "2023-24"

# premier, la_liga, serie_a, bundesliga, the_championship
LIGA = "premier"

full_df: pd.DataFrame = pd.DataFrame()
for season in range(int(first_season[:4]), int(last_season[:4]) + 1):
    season_str: str = f"{season:02d}-{season + 1 - 2000:02d}"
    season_df: pd.DataFrame = pd.read_csv(f"Datasets/{LIGA}/{season_str}.csv")
    season_df["Season"] = season_str
    full_df = pd.concat([full_df, season_df])

full_df = full_df[["HomeTeam", "AwayTeam", "FTHG", "FTAG", "HST", "AST", "HC", "AC", "B365>2.5", "B365<2.5", "Season"]]
full_df.reset_index(drop=True, inplace=True)
full_df.head(20)

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,HST,AST,HC,AC,B365>2.5,B365<2.5,Season
0,Man United,Leicester,2,1,6,4,2,5,,,2018-19
1,Bournemouth,Cardiff,2,0,4,1,7,4,,,2018-19
2,Fulham,Crystal Palace,0,2,6,9,5,5,,,2018-19
3,Huddersfield,Chelsea,0,3,1,4,2,5,,,2018-19
4,Newcastle,Tottenham,1,2,2,5,3,5,,,2018-19
5,Watford,Brighton,2,0,5,0,8,2,,,2018-19
6,Wolves,Everton,2,2,4,5,3,6,,,2018-19
7,Arsenal,Man City,0,2,3,8,2,9,,,2018-19
8,Liverpool,West Ham,4,0,8,2,5,4,,,2018-19
9,Southampton,Burnley,0,0,3,6,8,5,,,2018-19


In [3]:
df_teams = pdsql.sqldf("SELECT DISTINCT HomeTeam as TEAM FROM full_df")
df_teams["RATING_H_DEF"] = df_teams["RATING_H_OFF"] = df_teams["RATING_A_DEF"] = df_teams["RATING_A_OFF"] = 0.0
df_teams.head(20)

Unnamed: 0,TEAM,RATING_H_DEF,RATING_H_OFF,RATING_A_DEF,RATING_A_OFF
0,Man United,0.0,0.0,0.0,0.0
1,Bournemouth,0.0,0.0,0.0,0.0
2,Fulham,0.0,0.0,0.0,0.0
3,Huddersfield,0.0,0.0,0.0,0.0
4,Newcastle,0.0,0.0,0.0,0.0
5,Watford,0.0,0.0,0.0,0.0
6,Wolves,0.0,0.0,0.0,0.0
7,Arsenal,0.0,0.0,0.0,0.0
8,Liverpool,0.0,0.0,0.0,0.0
9,Southampton,0.0,0.0,0.0,0.0


In [4]:

from typing import Tuple

# Assume these constants are defined as they are part of the formulas but not included in the function.
# They should be defined somewhere outside of this function in the actual code.
LAMBDA = 0.441  # governs the impact of the most recent match on a team’s ratings
PHI_1 = 0.518   # govern the impact of a home match on a team’s away ratings
PHI_2 = 0.552   # govern the impact away match on a team’s home ratings respectively

ALPHA = -2.3
BETA_1 = 0.0081
BETA_2 = 3.8815

# Assuming a Ratings class exists that can take four float arguments.
class Ratings:
    def __init__(self, defensive_home, offensive_home, defensive_away, offensive_away):
        self.defensive_home = defensive_home
        self.offensive_home = offensive_home
        self.defensive_away = defensive_away
        self.offensive_away = offensive_away

def calculate_ratings(
  home_home_defensive: float,
  home_home_offensive: float,
  home_away_defensive: float,
  home_away_offensive: float,
  away_home_defensive: float,
  away_home_offensive: float,
  away_away_defensive: float,
  away_away_offensive: float,
  shots_for: int,
  shots_against: int,
  corners_for: int,
  corners_against: int
  ) -> Tuple[Ratings, Ratings]:
    # Calculate performance metrics based on shots and corners
    Sa = shots_for + corners_for
    Sh = shots_against + corners_against

    # Update home team's home offensive rating
    new_home_home_offensive = max(home_home_offensive + LAMBDA * PHI_1 * (Sa - (home_home_offensive + home_away_defensive) / 2), 0)

    # Update home team's away offensive rating
    new_home_away_offensive = max(home_away_offensive + LAMBDA * (1 - PHI_1) * (Sa - (home_away_offensive + away_away_defensive) / 2), 0)

    # Update home team's home defensive rating
    new_home_home_defensive = max(home_home_defensive + LAMBDA * PHI_1 * (Sh - (away_home_offensive + home_home_defensive) / 2), 0)

    # Update home team's away defensive rating
    new_home_away_defensive = max(home_away_defensive + LAMBDA * (1 - PHI_1) * (Sh - (away_away_offensive + home_away_defensive) / 2), 0)

    # Update away team's away offensive rating
    new_away_away_offensive = max(away_away_offensive + LAMBDA * PHI_2 * (Sh - (away_away_offensive + home_away_defensive) / 2), 0)

    # Update away team's home offensive rating
    new_away_home_offensive = max(away_home_offensive + LAMBDA * (1 - PHI_2) * (Sh - (away_home_offensive + home_home_defensive) / 2), 0)

    # Update away team's away defensive rating
    new_away_away_defensive = max(away_away_defensive + LAMBDA * PHI_2 * (Sa - (home_home_offensive + away_away_defensive) / 2), 0)

    # Update away team's home defensive rating
    new_away_home_defensive = max(away_home_defensive + LAMBDA * (1 - PHI_2) * (Sa - (home_away_offensive + away_home_defensive) / 2), 0)

    # Return the updated ratings in two Ratings objects
    return (
        Ratings(new_home_home_defensive, new_home_home_offensive, new_home_away_defensive, new_home_away_offensive),
        Ratings(new_away_away_defensive, new_away_away_offensive, new_away_home_defensive, new_away_home_offensive)
    )

def get_ratings(home_team_name: str, away_team_name: str) -> Tuple[Ratings, Ratings]:
    # Get the ratings for the home and away teams
    home_home_defensive = df_teams[df_teams["TEAM"] == home_team_name]["RATING_H_DEF"].values[0]
    home_home_offensive = df_teams[df_teams["TEAM"] == home_team_name]["RATING_H_OFF"].values[0]
    home_away_defensive = df_teams[df_teams["TEAM"] == home_team_name]["RATING_A_DEF"].values[0]
    home_away_offensive = df_teams[df_teams["TEAM"] == home_team_name]["RATING_A_OFF"].values[0]

    away_away_defensive = df_teams[df_teams["TEAM"] == away_team_name]["RATING_A_DEF"].values[0]
    away_away_offensive = df_teams[df_teams["TEAM"] == away_team_name]["RATING_A_OFF"].values[0]
    away_home_defensive = df_teams[df_teams["TEAM"] == away_team_name]["RATING_H_DEF"].values[0]
    away_home_offensive = df_teams[df_teams["TEAM"] == away_team_name]["RATING_H_OFF"].values[0]

    return (
        Ratings(home_home_defensive, home_home_offensive, home_away_defensive, home_away_offensive),
        Ratings(away_away_defensive, away_away_offensive, away_home_defensive, away_home_offensive)
    )

In [5]:
for index, row in full_df.iterrows():
  bookie_implied_odds = 1 / row["B365>2.5"]
  home_team_ratings, away_team_ratings = get_ratings(row["HomeTeam"], row["AwayTeam"])

  K = ALPHA + BETA_1 * (home_team_ratings.offensive_home + home_team_ratings.defensive_home + away_team_ratings.offensive_away + away_team_ratings.defensive_away) + BETA_2 * bookie_implied_odds
  p_over = np.exp(K) / (1 + np.exp(K))
  full_df.loc[index, "P>2.5"] = p_over
  full_df.loc[index, "P<2.5"] = 1 - p_over

  new_home_team_ratings, new_away_team_ratings = calculate_ratings(
    home_team_ratings.defensive_home,
    home_team_ratings.offensive_home,
    home_team_ratings.defensive_away,
    home_team_ratings.offensive_away,
    away_team_ratings.defensive_home,
    away_team_ratings.offensive_home,
    away_team_ratings.defensive_away,
    away_team_ratings.offensive_away,
    row["HST"],
    row["AST"],
    row["HC"],
    row["AC"]
  )

  full_df.loc[index, "RATING_H_DEF"] = new_home_team_ratings.defensive_home
  full_df.loc[index, "RATING_H_OFF"] = new_home_team_ratings.offensive_home
  full_df.loc[index, "RATING_A_DEF"] = new_home_team_ratings.defensive_away
  full_df.loc[index, "RATING_A_OFF"] = new_home_team_ratings.offensive_away




  df_teams.loc[df_teams["TEAM"] == row["HomeTeam"], "RATING_H_DEF"] = new_home_team_ratings.defensive_home
  df_teams.loc[df_teams["TEAM"] == row["HomeTeam"], "RATING_H_OFF"] = new_home_team_ratings.offensive_home
  df_teams.loc[df_teams["TEAM"] == row["HomeTeam"], "RATING_A_DEF"] = new_home_team_ratings.defensive_away
  df_teams.loc[df_teams["TEAM"] == row["HomeTeam"], "RATING_A_OFF"] = new_home_team_ratings.offensive_away

  df_teams.loc[df_teams["TEAM"] == row["AwayTeam"], "RATING_A_DEF"] = new_away_team_ratings.defensive_away
  df_teams.loc[df_teams["TEAM"] == row["AwayTeam"], "RATING_A_OFF"] = new_away_team_ratings.offensive_away
  df_teams.loc[df_teams["TEAM"] == row["AwayTeam"], "RATING_H_DEF"] = new_away_team_ratings.defensive_home
  df_teams.loc[df_teams["TEAM"] == row["AwayTeam"], "RATING_H_OFF"] = new_away_team_ratings.offensive_home

In [6]:
df_teams.head(20)

Unnamed: 0,TEAM,RATING_H_DEF,RATING_H_OFF,RATING_A_DEF,RATING_A_OFF
0,Man United,12.062927,11.31292,11.279517,12.857494
1,Bournemouth,12.05323,9.870525,11.976338,11.807742
2,Fulham,8.728769,10.322211,9.592687,9.481876
3,Huddersfield,10.24947,4.691452,10.577117,4.547908
4,Newcastle,7.885651,11.660762,6.616956,11.243302
5,Watford,13.085222,4.850697,13.867215,5.156264
6,Wolves,9.849562,7.166337,9.833594,8.298554
7,Arsenal,0.0,20.534928,0.0,16.433847
8,Liverpool,5.420162,18.799075,6.046368,17.53819
9,Southampton,9.772199,8.10761,9.811193,7.724144


In [7]:
full_df.tail(20)

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,HST,AST,HC,AC,B365>2.5,B365<2.5,Season,P>2.5,P<2.5,RATING_H_DEF,RATING_H_OFF,RATING_A_DEF,RATING_A_OFF
2049,Tottenham,West Ham,1,2,7,5,8,4,1.5,2.5,2023-24,0.650441,0.349559,11.082165,13.119467,11.314862,14.263263
2050,Crystal Palace,Liverpool,1,2,4,2,6,5,1.65,2.2,2023-24,0.599283,0.400717,7.325602,8.791663,7.371429,7.836131
2051,Brighton,Burnley,1,1,11,3,9,1,1.57,2.35,2023-24,0.625077,0.374923,4.688859,18.43377,4.823218,15.927315
2052,Man United,Bournemouth,0,3,3,4,10,4,1.5,2.5,2023-24,0.661935,0.338065,10.563522,13.734506,9.89871,14.900668
2053,Sheffield United,Brentford,1,0,4,4,3,4,1.95,1.95,2023-24,0.494425,0.505575,16.406634,0.0,16.394897,2.422032
2054,Wolves,Nott'm Forest,1,1,4,2,4,3,2.06,1.84,2023-24,0.461267,0.538733,9.506305,7.21358,9.736932,8.489575
2055,Aston Villa,Arsenal,1,0,3,5,3,3,1.7,2.1,2023-24,0.575644,0.424356,5.723595,12.230243,4.046185,12.492145
2056,Everton,Chelsea,2,0,5,4,4,8,1.75,2.05,2023-24,0.55513,0.44487,8.252991,8.218415,8.100064,8.980443
2057,Fulham,West Ham,5,0,8,5,3,6,1.9,2.0,2023-24,0.508006,0.491994,7.204507,11.416258,8.437619,10.40665
2058,Luton,Man City,1,2,2,6,6,3,1.36,3.1,2023-24,0.707899,0.292101,10.7771,5.564076,9.063267,5.879565


In [8]:
# write the ratings and match data to csv files
df_teams.to_csv(f"Datasets/{LIGA}/ratings.csv", index=False)
full_df.to_csv(f"Datasets/{LIGA}/matches_with_ratings.csv", index=False)