In [None]:
from typing import Tuple, List
import numpy as np
import pandas as pd
import pandasql as pdsql
import matplotlib.pyplot as plt
import sys
import math

In [None]:
first_season: str = "2019-20"
last_season: str = "2023-24"

# premier, la_liga, serie_a, bundesliga, the_championship
LIGA = "bundesliga"

full_df: pd.DataFrame = pd.DataFrame()
for season in range(int(first_season[:4]), int(last_season[:4]) + 1):
    season_str: str = f"{season:02d}-{season + 1 - 2000:02d}"
    season_df: pd.DataFrame = pd.read_csv(f"Datasets/{LIGA}/{season_str}.csv")
    season_df["Season"] = season_str
    full_df = pd.concat([full_df, season_df])

full_df = full_df[["HomeTeam", "AwayTeam", "FTHG", "FTAG", "HST", "AST", "HC", "AC", "B365>2.5", "B365<2.5", "Season"]]
full_df.reset_index(drop=True, inplace=True)
full_df.head(20)

In [None]:
df_teams = pdsql.sqldf("SELECT DISTINCT HomeTeam as TEAM FROM full_df")
df_teams["RATING_H_DEF"] = df_teams["RATING_H_OFF"] = df_teams["RATING_A_DEF"] = df_teams["RATING_A_OFF"] = 0.0
df_teams.head(20)

In [None]:

from typing import Tuple

# Assume these constants are defined as they are part of the formulas but not included in the function.
# They should be defined somewhere outside of this function in the actual code.
LAMBDA = 0.441  # governs the impact of the most recent match on a team’s ratings
PHI_1 = 0.518   # govern the impact of a home match on a team’s away ratings
PHI_2 = 0.552   # govern the impact away match on a team’s home ratings respectively

ALPHA = -2.3
BETA_1 = 0.0081
BETA_2 = 3.8815

# Assuming a Ratings class exists that can take four float arguments.
class Ratings:
    def __init__(self, defensive_home, offensive_home, defensive_away, offensive_away):
        self.defensive_home = defensive_home
        self.offensive_home = offensive_home
        self.defensive_away = defensive_away
        self.offensive_away = offensive_away

def calculate_ratings(
  home_home_defensive: float,
  home_home_offensive: float,
  home_away_defensive: float,
  home_away_offensive: float,
  away_home_defensive: float,
  away_home_offensive: float,
  away_away_defensive: float,
  away_away_offensive: float,
  shots_for: int,
  shots_against: int,
  corners_for: int,
  corners_against: int
  ) -> Tuple[Ratings, Ratings]:
    # Calculate performance metrics based on shots and corners
    Sa = shots_for + corners_for
    Sh = shots_against + corners_against

    # Update home team's home offensive rating
    new_home_home_offensive = max(home_home_offensive + LAMBDA * PHI_1 * (Sa - (home_home_offensive + home_away_defensive) / 2), 0)

    # Update home team's away offensive rating
    new_home_away_offensive = max(home_away_offensive + LAMBDA * (1 - PHI_1) * (Sa - (home_away_offensive + away_away_defensive) / 2), 0)

    # Update home team's home defensive rating
    new_home_home_defensive = max(home_home_defensive + LAMBDA * PHI_1 * (Sh - (away_home_offensive + home_home_defensive) / 2), 0)

    # Update home team's away defensive rating
    new_home_away_defensive = max(home_away_defensive + LAMBDA * (1 - PHI_1) * (Sh - (away_away_offensive + home_away_defensive) / 2), 0)

    # Update away team's away offensive rating
    new_away_away_offensive = max(away_away_offensive + LAMBDA * PHI_2 * (Sh - (away_away_offensive + home_away_defensive) / 2), 0)

    # Update away team's home offensive rating
    new_away_home_offensive = max(away_home_offensive + LAMBDA * (1 - PHI_2) * (Sh - (away_home_offensive + home_home_defensive) / 2), 0)

    # Update away team's away defensive rating
    new_away_away_defensive = max(away_away_defensive + LAMBDA * PHI_2 * (Sa - (home_home_offensive + away_away_defensive) / 2), 0)

    # Update away team's home defensive rating
    new_away_home_defensive = max(away_home_defensive + LAMBDA * (1 - PHI_2) * (Sa - (home_away_offensive + away_home_defensive) / 2), 0)

    # Return the updated ratings in two Ratings objects
    return (
        Ratings(new_home_home_defensive, new_home_home_offensive, new_home_away_defensive, new_home_away_offensive),
        Ratings(new_away_away_defensive, new_away_away_offensive, new_away_home_defensive, new_away_home_offensive)
    )

def get_ratings(home_team_name: str, away_team_name: str) -> Tuple[Ratings, Ratings]:
    # Get the ratings for the home and away teams
    home_home_defensive = df_teams[df_teams["TEAM"] == home_team_name]["RATING_H_DEF"].values[0]
    home_home_offensive = df_teams[df_teams["TEAM"] == home_team_name]["RATING_H_OFF"].values[0]
    home_away_defensive = df_teams[df_teams["TEAM"] == home_team_name]["RATING_A_DEF"].values[0]
    home_away_offensive = df_teams[df_teams["TEAM"] == home_team_name]["RATING_A_OFF"].values[0]

    away_away_defensive = df_teams[df_teams["TEAM"] == away_team_name]["RATING_A_DEF"].values[0]
    away_away_offensive = df_teams[df_teams["TEAM"] == away_team_name]["RATING_A_OFF"].values[0]
    away_home_defensive = df_teams[df_teams["TEAM"] == away_team_name]["RATING_H_DEF"].values[0]
    away_home_offensive = df_teams[df_teams["TEAM"] == away_team_name]["RATING_H_OFF"].values[0]

    return (
        Ratings(home_home_defensive, home_home_offensive, home_away_defensive, home_away_offensive),
        Ratings(away_away_defensive, away_away_offensive, away_home_defensive, away_home_offensive)
    )

def get_probabilities(home_team_rating: Ratings, away_team_rating: Ratings) -> Tuple[float, float]:
    K = ALPHA + BETA_1 * (home_team_ratings.offensive_home + home_team_ratings.defensive_home + away_team_ratings.offensive_away + away_team_ratings.defensive_away) + BETA_2 * bookie_implied_odds
    p_over = np.exp(K) / (1 + np.exp(K))
    p_under = 1 - p_over
    return (p_over, p_under)


In [None]:
for index, row in full_df.iterrows():
  bookie_implied_odds = 1 / row["B365>2.5"]
  home_team_ratings, away_team_ratings = get_ratings(row["HomeTeam"], row["AwayTeam"])
  p_over, p_under = get_probabilities(home_team_ratings, away_team_ratings)
  full_df.loc[index, "P>2.5"] = p_over
  full_df.loc[index, "P<2.5"] = p_under

  new_home_team_ratings, new_away_team_ratings = calculate_ratings(
    home_team_ratings.defensive_home,
    home_team_ratings.offensive_home,
    home_team_ratings.defensive_away,
    home_team_ratings.offensive_away,
    away_team_ratings.defensive_home,
    away_team_ratings.offensive_home,
    away_team_ratings.defensive_away,
    away_team_ratings.offensive_away,
    row["HST"],
    row["AST"],
    row["HC"],
    row["AC"]
  )

  full_df.loc[index, "RATING_H_DEF"] = new_home_team_ratings.defensive_home
  full_df.loc[index, "RATING_H_OFF"] = new_home_team_ratings.offensive_home
  full_df.loc[index, "RATING_A_DEF"] = new_home_team_ratings.defensive_away
  full_df.loc[index, "RATING_A_OFF"] = new_home_team_ratings.offensive_away




  df_teams.loc[df_teams["TEAM"] == row["HomeTeam"], "RATING_H_DEF"] = new_home_team_ratings.defensive_home
  df_teams.loc[df_teams["TEAM"] == row["HomeTeam"], "RATING_H_OFF"] = new_home_team_ratings.offensive_home
  df_teams.loc[df_teams["TEAM"] == row["HomeTeam"], "RATING_A_DEF"] = new_home_team_ratings.defensive_away
  df_teams.loc[df_teams["TEAM"] == row["HomeTeam"], "RATING_A_OFF"] = new_home_team_ratings.offensive_away

  df_teams.loc[df_teams["TEAM"] == row["AwayTeam"], "RATING_A_DEF"] = new_away_team_ratings.defensive_away
  df_teams.loc[df_teams["TEAM"] == row["AwayTeam"], "RATING_A_OFF"] = new_away_team_ratings.offensive_away
  df_teams.loc[df_teams["TEAM"] == row["AwayTeam"], "RATING_H_DEF"] = new_away_team_ratings.defensive_home
  df_teams.loc[df_teams["TEAM"] == row["AwayTeam"], "RATING_H_OFF"] = new_away_team_ratings.offensive_home

In [None]:
df_teams.head(20)

In [None]:
full_df.tail(20)

In [None]:
# write the ratings and match data to csv files
df_teams.to_csv(f"Datasets/{LIGA}/ratings.csv", index=False)
full_df.to_csv(f"Datasets/{LIGA}/matches_with_ratings.csv", index=False)