[Reference](https://ofelipebandeira.medium.com/can-we-use-chess-to-predict-soccer-b7b22b75a92a)

# Step 1: Initial ratings calibration

In [1]:
def elo_predict(c, d, omega, teams, ratings_dict):
  '''
  Calculates predicted Elo outcome (E_H and E_A)

  Inputs:
    c, d, omega: int
      Free variables for the formula
    teams: list
      Name of both teams in the match
    ratings_dict: dict
      Dictionary with the teams as keys and their Elo score as value

  Outputs:
    expected_home, expected_away: float
      The expected Elo outcome (E_H and E_A) for each team
    rating_difference: float
      The difference in ratings between both teams (used to inform the logistic regression)

  '''
  rating_home = ratings_dict[teams[0]]
  rating_away = ratings_dict[teams[1]]
  rating_difference = rating_home - rating_away

  exponent = (rating_away - rating_home - omega)/d

  expected_home = 1/(1 + c**exponent) # This is E_H in the formula
  expected_away = 1 - expected_home

  return expected_home, expected_away, rating_difference

def elo_update(k0, expected_home, expected_away, teams, goals, outcomes, ratings_dict):
  '''
  Updates Elo ratings for two teams based on the match outcome.

  Inputs:
    k0: int or float
      Base scaling factor used for the rating update
    expected_home, expected_away: float
      The expected outcomes for the home and away teams (E_H and E_A)
    teams: list
      Name of both teams in the match (home team first, away team second)
    goals: list
      Number of goals scored by each team ([home_goals, away_goals])
    outcomes: list
      Actual match outcomes for both teams ([home_outcome, away_outcome])
      Typically 1 for a win, 0.5 for a draw, and 0 for a loss
    ratings_dict: dict
      Dictionary with the teams as keys and their current Elo ratings as values

  Outputs:
    ratings_dict: dict
      Updated dictionary with new Elo ratings for the two teams involved in the match
  '''
  # Unpacks variables
  home = teams[0]
  away = teams[1]
  rating_home = ratings_dict[home]
  rating_away = ratings_dict[away]
  outcome_home = outcomes[0]
  outcome_away = outcomes[1]
  goal_diff = abs(goals[0] - goals[1])

  ratings_dict[home] = rating_home + k0*(1+goal_diff) * (outcome_home - expected_home)
  ratings_dict[away] = rating_away + k0*(1+goal_diff) * (outcome_away - expected_away)

  return ratings_dict

In [2]:
def determine_elo_outcome(row):
  '''
  Determines outcome of a match (S_H or S_A in the formula) according to Elo's standards:
  0 for loss, 0.5 for draw, 1 for victory
  '''
  if row['Res'] == 'H':
    return [1, 0]
  elif row['Res'] == 'D':
    return [0.5, 0.5]
  else:
    return [0, 1]

In [4]:
def run_elo_calibration(df, calibration_seasons, c=10, d=400, omega=100, k0=10):
  '''
  This function iteratively adjusts team ratings based on match results over multiple seasons.

  Inputs:
    df: pandas.DataFrame
      Dataset containing match data, including columns for season, teams, goals etc.
    calibration_seasons: list
      List of seasons (or years) to be used for the calibration process
    c, d: int or float, optional (default: 10 and 400)
      Free variables for the Elo prediction formula
    omega: int or float (default=100)
      Free variable representing the advantage of the home team
    k0: int or float, optional (default=10)
      Scaling factor used to determine the influence of recent matches on team ratings

  Outputs:
    ratings_dict: dict
      Dictionary with the final Elo ratings for all teams after calibration
  '''
  # Initialize Elo ratings for all teams
  ratings_dict = create_elo_dict(df)

  # Loop through the specified calibration seasons
  for season in calibration_seasons:
    # Filter data for the current season
    season_df = df[df['Season'] == season]

    # Adjust team ratings for inter-season changes
    ratings_dict = adjust_teams_interseason(ratings_dict, season_df)

    # Iterate over each match in the current season
    for index, row in season_df.iterrows():
      # Extract team names and match information
      teams = [row['Home'], row['Away']]
      goals = [row['HG'], row['AG']]

      # Determine the actual match outcomes in Elo terms
      elo_outcomes = determine_elo_outcome(row)

      # Calculate expected outcomes using the Elo prediction formula
      expected_home, expected_away, _ = elo_predict(c, d, omega, teams, ratings_dict)

      # Update the Elo ratings based on the match results
      ratings_dict = elo_update(k0, expected_home, expected_away, teams, goals, elo_outcomes, ratings_dict)

  # Return the calibrated Elo ratings
  return ratings_dict

In [6]:
def adjust_teams_interseason(ratings_dict, elo_calibration_df):
  '''
  Implements the process in which promoted teams take the Elo ratings
  of demoted teams in between seasons
  '''
  # Lists all teams in previous and upcoming seasons
  old_season_teams = set(ratings_dict.keys())
  new_season_teams = set(elo_calibration_df['Home'].unique())

  # If any teams were demoted/promoted
  if len(old_season_teams - new_season_teams) != 0:
    demoted_teams = list(old_season_teams - new_season_teams)
    promoted_teams = list(new_season_teams - old_season_teams)

    # Inserts new team in the dictionary and removes the old one
    for i in range(4):
      ratings_dict[promoted_teams[i]] = ratings_dict.pop(demoted_teams[i])

  return ratings_dict

def create_elo_dict(df):
  # Creates very first dictionary with initial rating of 1000 for all teams
  teams = df[df['Season'] == 2012]['Home'].unique()
  ratings_dict = {}

  for team in teams:
      ratings_dict[team] = 1000

  return ratings_dict

# Calling the function
calibration_seasons = [2012, 2013, 2014]
ratings_dict = run_elo_calibration(df, calibration_seasons)

# Step 2: Calibrating the logistic regression

In [7]:
def run_logit_calibration(df, logit_seasons, ratings_dict, c=10, d=400, omega=100, k0=10):
  '''
  Runs the logistic regression calibration process for Elo ratings.

  This function calibrates Elo ratings over multiple seasons while collecting data
  (rating differences and outcomes) to prepare for training a logistic regression.
  The logistic regression is later used to make outcome predictions based on rating differences.

  Inputs:
    df: pandas.DataFrame
      Dataset containing match data, including columns for 'Season', 'Home', 'Away', 'HG', 'AG', 'Res', etc.
    logit_seasons: list
      List of seasons (or years) to be used for the logistic regression calibration process
    ratings_dict: dict
      Initial Elo ratings dictionary with teams as keys and their ratings as values
    c, d: int or float, optional (default: 10 and 400)
      Free variables for the Elo prediction formula
    omega: int or float (default=100)
      Free variable representing the advantage of the home team
    k0: int or float, optional (default=10)
      Scaling factor used to determine the influence of recent matches on team ratings


  Outputs:
    ratings_dict: dict
      Updated Elo ratings dictionary after calibration
    logit_df: pandas.DataFrame
      DataFrame containing columns 'rating_diff' (Elo rating difference between teams)
      and 'outcome' (match results) for logistic regression analysis
  '''
  # Initializes the Elo ratings dictionary
  ratings_dict = ratings_dict

  # Initializes an empty DataFrame to store rating differences and outcomes
  logit_df = pd.DataFrame(columns=['season', 'rating_diff', 'outcome'])

  # Loops through the specified seasons for logistic calibration
  for season in logit_seasons:
    # Filters data for the current season
    season_df = df[df['Season'] == season]

    # Adjusts team ratings for inter-season changes
    ratings_dict = adjust_teams_interseason(ratings_dict, season_df)

    # Iterates over each match in the current season
    for index, row in season_df.iterrows():
      # Extracts team names and match information
      teams = [row['Home'], row['Away']]
      goals = [row['HG'], row['AG']]

      # Determines the match outcomes in Elo terms
      elo_outcomes = determine_elo_outcome(row)

      # Calculates expected outcomes and rating difference using the Elo prediction formula
      expected_home, expected_away, rating_difference = elo_predict(c, d, omega, teams, ratings_dict)

      # Updates Elo ratings based on the match results
      ratings_dict = elo_update(k0, expected_home, expected_away, teams, goals, elo_outcomes, ratings_dict)

      # Adds the rating difference and match outcome to the logit DataFrame
      logit_df.loc[len(logit_df)] = {'season': season, 'rating_diff': rating_difference, 'outcome': row['Res']}

  # Returns the updated ratings and the logistic regression dataset
  return ratings_dict, logit_df

# Calling the function
logit_seasons = [2015, 2016, 2017, 2018]
ratings_dict, logit_df = run_logit_calibration(df, logit_seasons, ratings_dict, c=10, d=400, omega=100, k0=10)

In [8]:
def fit_logistic_regression(logit_df, max_past_seasons = 15, report = True):

  # Prunes the dataframe, if needed
  most_recent_seasons = sorted(logit_df['season'].unique(), reverse=True)[:max_past_seasons]
  filtered_df = logit_df[logit_df['season'].isin(most_recent_seasons)].copy()

  # Adjust outcome columns from str to int
  label_encoder = LabelEncoder()
  filtered_df['outcome_encoded'] = label_encoder.fit_transform(filtered_df['outcome'])

  # Isolates independent and dependent variables
  X = filtered_df[['rating_diff']].values
  y = filtered_df['outcome_encoded'].values
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

  # define the multinomial logistic regression model
  model = LogisticRegression(solver='lbfgs')

  # fit the model on the whole dataset
  model.fit(X, y)

  # report the model performance
  if report:
    # Generate predictions on the test data
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)

    # Compute key metrics
    cm = confusion_matrix(y_test, y_pred)
    recall = recall_score(y_test, y_pred, average='weighted')
    loss = log_loss(y_test, y_prob)
    balanced_acc = balanced_accuracy_score(y_test, y_pred)

    print(f'Recall (weighted): {recall}')
    print(f'Balanced accuracy: {balanced_acc}')
    print(f'Log loss: {loss}')
    print()

    # Display the confusion matrix
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_encoder.classes_)
    disp.plot(cmap="Blues")

  return model

# Step 3: Running the system

In [9]:
def run_elo_predictions(df, logit_df, seasons, ratings_dict, plot_title,
                        c=10, d=400, omega=100, k0=10, max_past_seasons=15,
                        report_ml=False):
    '''
    Runs an Elo + logistic regression pipeline to predict match outcomes.

    This function processes matches across multiple seasons, using Elo ratings
    to estimate team strength and logistic regression to predict match outcomes.
    It logs predictions and actual outcomes for performance evaluation.

    Inputs:
      df: pandas.DataFrame
        Dataset with match data: 'Season', 'Home', 'Away', 'HG', 'AG', 'Res', etc.
      logit_df: pandas.DataFrame
        Historical data with Elo differences and match outcomes to train the model.
      seasons: list
        Seasons (or years) to include in the evaluation loop.
      ratings_dict: dict
        Current Elo ratings for all teams.
      c, d: Elo parameters
      omega: Home advantage parameter
      k0: Elo update factor
      max_past_seasons: int
        How many seasons back to include when training logistic regression
      report_ml: bool
        Whether to print model performance each season

    Outputs:
      posterior_samples (array): Samples from the posterior of prediction accuracy
      prediction_log (DataFrame): Logs model predictions vs actual outcomes
    '''
    ratings_dict = ratings_dict
    logit_df = logit_df

    prediction_log = pd.DataFrame(columns=['Season', 'Prediction', 'Actual', 'Correct'])

    for season in seasons:
        if season == seasons[-1]:
            print('\nLogistic regression performance at FINAL SEASON')
            logistic_regression = fit_logistic_regression(logit_df, max_past_seasons, report=True)
        else:
            if report_ml:
                print(f'Logistic regression performance PRE SEASON {season}')
            logistic_regression = fit_logistic_regression(logit_df, max_past_seasons, report=report_ml)

        season_df = df[df['Season'] == season]
        ratings_dict = adjust_teams_interseason(ratings_dict, season_df)

        for index, row in season_df.iterrows():
            teams = [row['Home'], row['Away']]
            goals = [row['HG'], row['AG']]
            elo_outcomes = determine_elo_outcome(row)

            expected_home, expected_away, rating_difference = elo_predict(c, d, omega, teams, ratings_dict)
            yhat = logistic_regression.predict([[rating_difference]])[0]

            prediction = 'A' if yhat == 0 else 'D' if yhat == 1 else 'H'
            actual = row['Res']
            correct = int(prediction == actual)

            prediction_log.loc[len(prediction_log)] = {
                'Season': season,
                'Prediction': prediction,
                'Actual': actual,
                'Correct': correct
            }

            # Update Elo ratings and training data
            ratings_dict = elo_update(k0, expected_home, expected_away, teams, goals, elo_outcomes, ratings_dict)
            logit_df.loc[len(logit_df)] = {'season': season, 'rating_diff': rating_difference, 'outcome': actual}

    # Analyze predictive performance using Bayesian modeling
    num_predictions = len(prediction_log)
    num_correct = prediction_log['Correct'].sum()

    return num_predictions, num_correct

# Evaluating results

In [10]:
def fit_pymc(samples, success):
  '''
  Creates a PyMC model to estimate the accuracy of guesses
  made with Elo ratings over a given period of time.
  '''
  with pm.Model() as model:
    p = pm.Uniform('p', lower=0, upper=1) # Prior
    x = pm.Binomial('x', n=samples, p=p, observed=success) # Likelihood

  with model:
    inference = pm.sample(progressbar=False, chains = 4, draws = 2000)

  # Stores key variables
  mean = az.summary(inference, hdi_prob = 0.95)['mean'].values[0]
  lower = az.summary(inference, hdi_prob = 0.95)['hdi_2.5%'].values[0]
  upper = az.summary(inference, hdi_prob = 0.95)['hdi_97.5%'].values[0]

  return mean, [lower, upper]