# 05 - Elo Rating Construction

Goals:
- Define a initial Elo, K-factor
- Give an advantage for the home team 
- Apply the Elo formula 
- Create a csv 




In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

# Elo parameters 
BASE_RATING = 1500      # initial Elo for all teams
K_FACTOR = 25           # sensitivity of Elo to each match
HOME_ADVANTAGE = 80     # home-field advantage in Elo points

# Load the long-format match data
df = pd.read_csv("../data/processed/matches_wide_22_23.csv")
df.head()

In [None]:
# Compute the expected probability of a home win using Elo formula.
def expected_home_win(r_home, r_away, home_advantage=HOME_ADVANTAGE):

    # r_home : float (Elo rating of the home team BEFORE the match.)
    # r_away : float (Elo rating of the away team BEFORE the match.)
    # home_advantage : float (Elo points added to the home team before computing win probability.)
    rh = r_home + home_advantage
    ra = r_away
    return 1 / (1 + 10 ** ((ra - rh) / 400)) # Expected win probability for the home team.

# Update a team's Elo rating based on match outcome.
def update_elo(rating, score, expected_score, k=K_FACTOR):
  
    # rating : float (Team Elo BEFORE the match.)
    # k : float (K-factor controlling how fast Elo changes.)
    # score : float (Actual match score from team's perspective:
        #  win = 1.0
        #  draw = 0.5
        #  loss = 0.0
    # expected_score : float (Expected probability for the team (0 to 1).)
    
    return rating + k * (score - expected_score) # Updated Elo rating AFTER the match.


In [None]:
# Make sure matches are ordered in time
df = df.sort_values(["date", "match_id"]).copy()

# Current Elo rating for each team (updated after every match)
ratings = {}

# Lists to store Elo BEFORE each match
elo_home_before = []
elo_away_before = []

for index, row in df.iterrows():
    home = row["home_team"]
    away = row["away_team"]
    result = row["result"]   # 'H', 'D', or 'A'
    
    # Get current ratings, or base rating if the team appears for the first time
    r_home = ratings.get(home, BASE_RATING)
    r_away = ratings.get(away, BASE_RATING)
    
    # Store pre-match Elo ratings
    elo_home_before.append(r_home)
    elo_away_before.append(r_away)
    
    # Expected win probability for the home team
    p_home = expected_home_win(r_home, r_away)
    
    # Actual score for the home team (from its perspective)
    if result == "H":
        s_home = 1.0
    elif result == "D":
        s_home = 0.5
    elif result == "A":
        s_home = 0.0
    else:
        raise ValueError(f"Unknown result value: {result}")
    
    # Update Elo ratings after the match
    new_r_home = update_elo(r_home, s_home, p_home)
    new_r_away = update_elo(r_away, 1 - s_home, 1 - p_home)
    
    ratings[home] = new_r_home
    ratings[away] = new_r_away

# Add Elo BEFORE match to the dataframe
df["elo_home_before"] = elo_home_before
df["elo_away_before"] = elo_away_before

# Elo difference from the home team's perspective
df["elo_diff_home"] = df["elo_home_before"] - df["elo_away_before"]

df[["match_id", "date", "home_team", "away_team", "result",
    "elo_home_before", "elo_away_before", "elo_diff_home"]].head(20)

In [None]:
# Select only the columns we want to keep in the Elo output file
elo_minimal = df[[
    "match_id",
    "date",
    "home_team",
    "away_team",
    "result",
    "elo_home_before",
    "elo_away_before",
    "elo_diff_home"
]].copy()

# Save the minimal Elo dataset
elo_minimal.to_csv("../data/processed/elo_rating.csv", index=False)

print("Saved minimal Elo dataset to:", "elo_ratings.csv")

# Preview
elo_minimal.head()