In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
from datetime import datetime, timedelta

In [2]:
matches = pd.read_csv("../data/all_years_nc_time_coordinations/matches.csv")

## We'll use Brier score to evaluate performance of ELO ratings. First we'll calculate Brier score when using rank difference to calculate expected probabilities as a baseline.

In [3]:
def logistic_function(x):
    return 1 / (1 + np.exp(-x))


class ATPRankingPredictions:
    def __init__(self, matches_df):
        self.matches_df = matches_df

    def calculate_winner_probabilities(self):
        expected_probabilities = []
        
        for index, row in self.matches_df.iterrows():
            winner_rank = row['winner_rank']
            loser_rank = row['loser_rank']
            
            # Skip rows with NaN values in ranks
            if pd.isna(winner_rank):
                winner_rank = 2000
            if pd.isna(loser_rank):
                loser_rank = 2000
                 
            # Calculate rank difference
            rank_difference = loser_rank - winner_rank
            
            # Ensure rank difference is not too large to avoid overflow in logistic function
            scaled_rank_difference = rank_difference / 100.0  # Dla 100 jest najlepszy wynik
            expected_winner = logistic_function(scaled_rank_difference)
            
            expected_probabilities.append(expected_winner)
        
        return expected_probabilities
    
    def calculate_prediction_accuracy(self):
        correct_predictions = 0 
        valid_count = 0
        for index, row in self.matches_df.iterrows():
            winner_rank = row['winner_rank']
            loser_rank = row['loser_rank']
            if winner_rank < 100 or loser_rank < 100:
                valid_count += 1
                if winner_rank < loser_rank:
                    correct_predictions+=1
                
        return (correct_predictions/valid_count) * 100
    
    def calculate_brier_score(self):
        total_brier_score = 0
        
        for expected_winner_probability in self.calculate_winner_probabilities():
            
            # Calculate Brier score for this match
            total_brier_score += (1 - expected_winner_probability) ** 2
        
        return total_brier_score / len(self.matches_df)

In [4]:
atp_rank_predictor = ATPRankingPredictions(matches)

prediction_accuracy = atp_rank_predictor.calculate_prediction_accuracy()
print(f"Prediction accuracy: {prediction_accuracy}")

brier_score_rank = atp_rank_predictor.calculate_brier_score()
print(f"Brier Score (Rank-based Prediction with Logistic Normalization): {brier_score_rank}")

Prediction accuracy: 63.33883801231046
Brier Score (Rank-based Prediction with Logistic Normalization): 0.23236372254563928


In [5]:
def create_player_ranks(matches_df):
    player_ranks = {}

    for index, row in matches_df.iterrows():
        winner_id = row['winner_id']
        loser_id = row['loser_id']
        winner_rank = row['winner_rank']
        loser_rank = row['loser_rank']
        
        # Add the winner's rank if not already in the dictionary
        if winner_id not in player_ranks:
            player_ranks[winner_id] = winner_rank
        
        # Add the loser's rank if not already in the dictionary
        if loser_id not in player_ranks:
            player_ranks[loser_id] = loser_rank

    return player_ranks


In [6]:
player_ranks = create_player_ranks(matches)

In [7]:
from collections import defaultdict

In [8]:
class EloRatingsPredictor:
    def __init__(self, matches_df):
        self.matches_df = matches_df
        self.current_ratings = self.initialize_ratings()
    
    def initialize_ratings(self):
        players = set(self.matches_df['winner_id']).union(set(self.matches_df['loser_id']))
        ratings = {player: 1500 for player in players}
        return ratings
    
    @staticmethod
    def k_factor(matches_count, player_id):
       return 250 / ((matches_count.get(player_id, 0) + 5) ** 0.4)
    
    @staticmethod
    def win_probability(rating_a, rating_b):
        return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
    
    def update_ratings(self, tourney_level, matches_count, winner_id, loser_id):
        # Updates current ratings and return current ratings for winner and loser before update
        rating_winner = self.current_ratings[winner_id]
        rating_loser = self.current_ratings[loser_id]
                
        winner_probability = self.win_probability(rating_winner, rating_loser)
        loser_probability = 1 - winner_probability
        
        k_winner = self.k_factor(matches_count, winner_id)
        k_loser = self.k_factor(matches_count, loser_id)
        k_level = 1.1 if tourney_level == "Grand Slam" else 1
        
        self.current_ratings[winner_id] = rating_winner + (k_winner * k_level) * (1 - winner_probability)
        self.current_ratings[loser_id] = rating_loser + (k_loser * k_level) * (0 - loser_probability)
        return rating_winner, rating_loser
    
    def insert_elo_ratings_to_df(self):
        matches_count = defaultdict(int)
        elo_winners = []
        elo_losers = [] 
        for index, row in self.matches_df.iterrows():
            matches_count[row['winner_id']] +=1
            matches_count[row['loser_id']] +=1
            rating_winner, rating_loser = self.update_ratings(row['tourney_level'], matches_count, row['winner_id'], row['loser_id'])
            elo_winners.append(rating_winner)
            elo_losers.append(rating_loser)
        
        self.matches_df['elo_winner'] = elo_winners
        self.matches_df['elo_loser'] = elo_losers
        
    def calculate_prediction_accuracy(self):
        correct_predictions = 0 
        start_score_index = 2000

        for index, row in self.matches_df.iterrows():
            if index > start_score_index:
                rating_winner = row['elo_winner']
                rating_loser = row['elo_loser']
                expected_winner = self.win_probability(rating_winner, rating_loser)
                if expected_winner > 0.5:
                    correct_predictions+=1
                
        return (correct_predictions / (len(self.matches_df) - start_score_index)) * 100
    
    # Step 4: Calculate Brier Score
    def calculate_brier_score(self):
        total_brier_score = 0
        start_score_index = 2000
        
        for index, row in self.matches_df.iterrows():
            if index > start_score_index:
                rating_winner = row['elo_winner']
                rating_loser = row['elo_loser']
                expected_winner = self.win_probability(rating_winner, rating_loser)
                
                # print(rating_winner, rating_loser, row['winner_name'], row['loser_name'], row['Date'])
                # print(expected_winner)
                total_brier_score += (expected_winner - 1) ** 2
            
        return total_brier_score / (len(self.matches_df) - start_score_index)



### Basic ELO

In [9]:
class BasicEloRatingsPredictor:
    def __init__(self, matches_df):
        self.matches_df = matches_df
        self.current_ratings = self.initialize_ratings()
    
    def initialize_ratings(self):
        players = set(self.matches_df['winner_id']).union(set(self.matches_df['loser_id']))
        ratings = {player: 1500 for player in players}
        return ratings
    
    @staticmethod
    def k_factor():
       return 32
    
    @staticmethod
    def win_probability(rating_a, rating_b):
        return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
    
    def update_ratings(self, winner_id, loser_id):
        rating_winner = self.current_ratings[winner_id]
        rating_loser = self.current_ratings[loser_id]
                
        winner_probability = self.win_probability(rating_winner, rating_loser)
        loser_probability = 1 - winner_probability
        
        k_winner = self.k_factor()
        k_loser = self.k_factor()
        
        self.current_ratings[winner_id] = rating_winner + k_winner * (1 - winner_probability)
        self.current_ratings[loser_id] = rating_loser + k_loser * (0 - loser_probability)
        return rating_winner, rating_loser
    
    def insert_elo_ratings_to_df(self):
        elo_winners = []
        elo_losers = [] 
        for index, row in self.matches_df.iterrows():
          
            rating_winner, rating_loser = self.update_ratings(row['winner_id'], row['loser_id'])
            elo_winners.append(rating_winner)
            elo_losers.append(rating_loser)
        
        self.matches_df['elo_winner'] = elo_winners
        self.matches_df['elo_loser'] = elo_losers
        
    def calculate_prediction_accuracy(self):
        correct_predictions = 0 
        start_score_index = 2000

        for index, row in self.matches_df.iterrows():
            if index > start_score_index:
                rating_winner = row['elo_winner']
                rating_loser = row['elo_loser']
                expected_winner = self.win_probability(rating_winner, rating_loser)
                if expected_winner > 0.5:
                    correct_predictions+=1
                
        return (correct_predictions / (len(self.matches_df) - start_score_index)) * 100
    
    # Step 4: Calculate Brier Score
    def calculate_brier_score(self):
        total_brier_score = 0
        start_score_index = 2000
        
        for index, row in self.matches_df.iterrows():
            if index > start_score_index:
                rating_winner = row['elo_winner']
                rating_loser = row['elo_loser']
                expected_winner = self.win_probability(rating_winner, rating_loser)
                total_brier_score += (expected_winner - 1) ** 2
            
        return total_brier_score / (len(self.matches_df) - start_score_index)

In [10]:
basic_elo_ratings_predictor = BasicEloRatingsPredictor(matches)
basic_elo_ratings_predictor.insert_elo_ratings_to_df()

basic_elo_prediction_accuracy = basic_elo_ratings_predictor.calculate_prediction_accuracy()
print(f"Prediction accuracy: {basic_elo_prediction_accuracy}")
basic_elo_brier_score = basic_elo_ratings_predictor.calculate_brier_score()
print(f"Brier Score: {basic_elo_brier_score}")

Prediction accuracy: 63.14920320369911
Brier Score: 0.22338078814608028


### Added match count k factor

In [11]:
class EloRatingsPredictor:
    def __init__(self, matches_df):
        self.matches_df = matches_df
        self.current_ratings = self.initialize_ratings()
    
    def initialize_ratings(self):
        players = set(self.matches_df['winner_id']).union(set(self.matches_df['loser_id']))
        ratings = {player: 1500 for player in players}
        return ratings
    
    @staticmethod
    def k_factor(matches_count, player_id):
       return 250 / ((matches_count.get(player_id, 0) + 5) ** 0.4)
    
    @staticmethod
    def win_probability(rating_a, rating_b):
        return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
    
    def update_ratings(self, tourney_level, matches_count, winner_id, loser_id):
        # Updates current ratings and return current ratings for winner and loser before update
        rating_winner = self.current_ratings[winner_id]
        rating_loser = self.current_ratings[loser_id]
                
        winner_probability = self.win_probability(rating_winner, rating_loser)
        loser_probability = 1 - winner_probability
        
        k_winner = self.k_factor(matches_count, winner_id)
        k_loser = self.k_factor(matches_count, loser_id)
        k_level = 1.1 if tourney_level == "Grand Slam" else 1
        
        self.current_ratings[winner_id] = rating_winner + (k_winner * k_level) * (1 - winner_probability)
        self.current_ratings[loser_id] = rating_loser + (k_loser * k_level) * (0 - loser_probability)
        return rating_winner, rating_loser
    
    def insert_elo_ratings_to_df(self):
        matches_count = defaultdict(int)
        elo_winners = []
        elo_losers = [] 
        for index, row in self.matches_df.iterrows():
            matches_count[row['winner_id']] +=1
            matches_count[row['loser_id']] +=1
            rating_winner, rating_loser = self.update_ratings(row['tournament_level'], matches_count, row['winner_id'], row['loser_id'])
            elo_winners.append(rating_winner)
            elo_losers.append(rating_loser)
        
        self.matches_df['elo_winner'] = elo_winners
        self.matches_df['elo_loser'] = elo_losers
        
    def calculate_prediction_accuracy(self):
        correct_predictions = 0 
        start_score_index = 2000

        for index, row in self.matches_df.iterrows():
            if index > start_score_index:
                rating_winner = row['elo_winner']
                rating_loser = row['elo_loser']
                expected_winner = self.win_probability(rating_winner, rating_loser)
                if expected_winner > 0.5:
                    correct_predictions+=1
                
        return (correct_predictions / (len(self.matches_df) - start_score_index)) * 100
    
    # Step 4: Calculate Brier Score
    def calculate_brier_score(self):
        total_brier_score = 0
        start_score_index = 2000
        
        for index, row in self.matches_df.iterrows():
            if index > start_score_index:
                rating_winner = row['elo_winner']
                rating_loser = row['elo_loser']
                expected_winner = self.win_probability(rating_winner, rating_loser)
                total_brier_score += (expected_winner - 1) ** 2
            
        return total_brier_score / (len(self.matches_df) - start_score_index)

In [12]:
elo_ratings_predictor = EloRatingsPredictor(matches)
elo_ratings_predictor.insert_elo_ratings_to_df()

elo_prediction_accuracy = elo_ratings_predictor.calculate_prediction_accuracy()
print(f"Prediction accuracy: {elo_prediction_accuracy}")
elo_brier_score = elo_ratings_predictor.calculate_brier_score()
print(f"Brier Score: {elo_brier_score}")

Prediction accuracy: 63.94187102633969
Brier Score: 0.22488452826500183


### Considering only matches from last year for match count

In [13]:
class EloRatingsPredictor:
    def __init__(self, matches_df):
        self.matches_df = matches_df
        self.current_ratings = self.initialize_ratings()
    
    def initialize_ratings(self):
        players = set(self.matches_df['winner_id']).union(set(self.matches_df['loser_id']))
        ratings = {player: 1500 for player in players}
        return ratings
    
    @staticmethod
    def k_factor(matches_history, player_id):
        total_matches = len(matches_history[player_id])
        return 250 / ((total_matches + 5) ** 0.4)
    
    @staticmethod
    def win_probability(rating_a, rating_b):
        return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
    
    @staticmethod
    def update_matches_history(matches_history, player_id, current_date):
        # Track match count within the past year
        if isinstance(current_date, str):
            current_date = datetime.strptime(current_date, "%Y-%m-%d")
        
        one_year_ago = current_date - timedelta(days=365)
    
        # Remove outdated matches
        matches_history[player_id] = [date for date in matches_history[player_id] if date > one_year_ago]
    
        # Add current match
        matches_history[player_id].append(current_date)

    
    def update_ratings(self, tourney_level, matches_history, winner_id, loser_id, current_date):
        # Updates current ratings and return current ratings for winner and loser before update
        rating_winner = self.current_ratings[winner_id]
        rating_loser = self.current_ratings[loser_id]
                
        winner_probability = self.win_probability(rating_winner, rating_loser)
        loser_probability = 1 - winner_probability
        
        self.update_matches_history(matches_history, winner_id, current_date)
        self.update_matches_history(matches_history, loser_id, current_date)
        
        k_winner = self.k_factor(matches_history, winner_id)
        k_loser = self.k_factor(matches_history, loser_id)
        k_level = 1.1 if tourney_level == "Grand Slam" else 1
        
        self.current_ratings[winner_id] = rating_winner + (k_winner * k_level) * (1 - winner_probability)
        self.current_ratings[loser_id] = rating_loser + (k_loser * k_level) * (0 - loser_probability)
        return rating_winner, rating_loser
    
    def insert_elo_ratings_to_df(self):
        matches_history = defaultdict(lambda: defaultdict(list))
            
        elo_winners = []
        elo_losers = [] 
        for index, row in self.matches_df.iterrows():
            rating_winner, rating_loser = self.update_ratings(row['tournament_level'], matches_history, row['winner_id'], row['loser_id'], row['Date'])
            elo_winners.append(rating_winner)
            elo_losers.append(rating_loser)
        
        self.matches_df['elo_winner'] = elo_winners
        self.matches_df['elo_loser'] = elo_losers
        
    def calculate_prediction_accuracy(self):
        correct_predictions = 0 
        start_score_index = 2000

        for index, row in self.matches_df.iterrows():
            if index > start_score_index:
                rating_winner = row['elo_winner']
                rating_loser = row['elo_loser']
                expected_winner = self.win_probability(rating_winner, rating_loser)
                if expected_winner > 0.5:
                    correct_predictions+=1
                
        return (correct_predictions / (len(self.matches_df) - start_score_index)) * 100
    
    # Step 4: Calculate Brier Score
    def calculate_brier_score(self):
        total_brier_score = 0
        start_score_index = 2000
        
        for index, row in self.matches_df.iterrows():
            if index > start_score_index:
                rating_winner = row['elo_winner']
                rating_loser = row['elo_loser']
                expected_winner = self.win_probability(rating_winner, rating_loser)
                total_brier_score += (expected_winner - 1) ** 2
            
        return total_brier_score / (len(self.matches_df) - start_score_index)

In [14]:
elo_ratings_predictor = EloRatingsPredictor(matches)
elo_ratings_predictor.insert_elo_ratings_to_df()

elo_prediction_accuracy = elo_ratings_predictor.calculate_prediction_accuracy()
print(f"Prediction accuracy: {elo_prediction_accuracy}")
elo_brier_score = elo_ratings_predictor.calculate_brier_score()
print(f"Brier Score: {elo_brier_score}")

Prediction accuracy: 63.94187102633969
Brier Score: 0.22488452826500183


### With momentum

### With starting elo rating based on atp rank

In [15]:
class EloRatingsPredictor:
    def __init__(self, matches_df):
        self.matches_df = matches_df
        self.current_ratings = self.initialize_ratings()
    
    def initialize_ratings(self):
        players = set(self.matches_df['winner_id']).union(set(self.matches_df['loser_id']))
        ratings = {}

        for player in players:
            atp_rank = player_ranks.get(player, 1000)
            
            # Initialize rating based on ATP rank
            if atp_rank <= 10:
                initial_rating = 2000
            elif atp_rank <= 50:
                initial_rating = 1900
            elif atp_rank <= 100:
                initial_rating = 1800
            elif atp_rank <= 500:
                initial_rating = 1600
            else:
                initial_rating = 1500

            ratings[player] = initial_rating
        return ratings
    
    
    @staticmethod
    def k_factor(matches_count, player_id):
       return 250 / ((matches_count.get(player_id, 0) + 5) ** 0.4)
    
    @staticmethod
    def win_probability(rating_a, rating_b):
        return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
    
    def update_ratings(self, tourney_level, matches_count, winner_id, loser_id):
        # Updates current ratings and return current ratings for winner and loser before update
        rating_winner = self.current_ratings[winner_id]
        rating_loser = self.current_ratings[loser_id]
                
        winner_probability = self.win_probability(rating_winner, rating_loser)
        loser_probability = 1 - winner_probability
        
        k_winner = self.k_factor(matches_count, winner_id)
        k_loser = self.k_factor(matches_count, loser_id)
        k_level = 1.1 if tourney_level == "Grand Slam" else 1
        
        self.current_ratings[winner_id] = rating_winner + (k_winner * k_level) * (1 - winner_probability)
        self.current_ratings[loser_id] = rating_loser + (k_loser * k_level) * (0 - loser_probability)
        return rating_winner, rating_loser
    
    def insert_elo_ratings_to_df(self):
        matches_count = defaultdict(int)
        elo_winners = []
        elo_losers = [] 
        for index, row in self.matches_df.iterrows():
            matches_count[row['winner_id']] +=1
            matches_count[row['loser_id']] +=1
            rating_winner, rating_loser = self.update_ratings(row['tournament_level'], matches_count, row['winner_id'], row['loser_id'])
            elo_winners.append(rating_winner)
            elo_losers.append(rating_loser)
        
        self.matches_df['elo_winner'] = elo_winners
        self.matches_df['elo_loser'] = elo_losers
        
    def calculate_prediction_accuracy(self):
        correct_predictions = 0 
        start_score_index = 2000

        for index, row in self.matches_df.iterrows():
            if index > start_score_index:
                rating_winner = row['elo_winner']
                rating_loser = row['elo_loser']
                expected_winner = self.win_probability(rating_winner, rating_loser)
                if expected_winner > 0.5:
                    correct_predictions+=1
                
        return (correct_predictions / (len(self.matches_df) - start_score_index)) * 100
    
    # Step 4: Calculate Brier Score
    def calculate_brier_score(self):
        total_brier_score = 0
        start_score_index = 2000
        
        for index, row in self.matches_df.iterrows():
            if index > start_score_index:
                rating_winner = row['elo_winner']
                rating_loser = row['elo_loser']
                expected_winner = self.win_probability(rating_winner, rating_loser)
                total_brier_score += (expected_winner - 1) ** 2
            
        return total_brier_score / (len(self.matches_df) - start_score_index)

In [16]:
elo_ratings_predictor = EloRatingsPredictor(matches)
elo_ratings_predictor.insert_elo_ratings_to_df()

elo_prediction_accuracy = elo_ratings_predictor.calculate_prediction_accuracy()
print(f"Prediction accuracy: {elo_prediction_accuracy}")
elo_brier_score = elo_ratings_predictor.calculate_brier_score()
print(f"Brier Score: {elo_brier_score}")

Prediction accuracy: 64.50334406737677
Brier Score: 0.22298859148994396


### All additions

In [17]:
from collections import defaultdict
from datetime import datetime, timedelta

class ComplexEloRatingsPredictor:
    def __init__(self, matches_df):
        self.matches_df = matches_df
        self.current_ratings = self.initialize_ratings()
        self.momentum = defaultdict(lambda: 1.0)

    def initialize_ratings(self):
        players = set(self.matches_df['winner_id']).union(set(self.matches_df['loser_id']))
        surface_ratings = {}

        for player in players:
            atp_rank = player_ranks.get(player, 1000)
            
            # Initialize rating based on ATP rank
            if atp_rank <= 10:
                initial_rating = 2000
            elif atp_rank <= 50:
                initial_rating = 1900
            elif atp_rank <= 100:
                initial_rating = 1800
            elif atp_rank <= 500:
                initial_rating = 1600
            else:
                initial_rating = 1500

            # # Adjust surface-specific initial ratings based on surface win rates
            surface_ratings[player] = {
                'General': initial_rating,
                'Hard': initial_rating,
                'Clay': initial_rating,
                'Grass': initial_rating
            }
        
        return surface_ratings

    def blended_rating(self, overall_rating, surface_rating, num_surface_matches, base_weight=0.2):
        # Increase weight of surface rating if the player has many matches on that surface
        weight = base_weight if num_surface_matches < 10 else 0.4
        return (weight * surface_rating) + ((1 - weight) * overall_rating)

    def dynamic_k_factor(self, matches_by_date, player_id):
        # Calculate base K-factor based on matches count
        total_matches = len(matches_by_date[player_id])
        base_k = 210 / ((total_matches + 5) ** 0.5)
        
        # Apply momentum factor to K
        k = base_k * self.momentum[player_id]
        return k

    @staticmethod
    def update_matches_history(matches_history, player_id, surface, current_date):
        # Track match count within the past year
        if isinstance(current_date, str):
            current_date = datetime.strptime(current_date, "%Y-%m-%d")
        
        one_year_ago = current_date - timedelta(days=365)
    
        # Remove outdated matches
        matches_history[surface][player_id] = [date for date in matches_history[surface][player_id] if date > one_year_ago]
    
        # Add current match
        matches_history[surface][player_id].append(current_date)

    def update_momentum(self, winner_id, loser_id):
        # Increase momentum for winner, up to a max (e.g., 1.5)
        self.momentum[winner_id] = min(self.momentum[winner_id] + 0.06, 1.3)
        
        # Decrease momentum for loser, down to a minimum (e.g., 0.7)
        self.momentum[loser_id] = max(self.momentum[loser_id] - 0.06, 0.7)
        
        # Slightly decay momentum for all players after each match
        for player in self.momentum:
            if player != winner_id and player != loser_id:
                self.momentum[player] = 1.0 + (self.momentum[player] - 1.0) * 0.99  # Decay toward 1.0

    def win_probability(self, rating_a, rating_b):
        return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))

    def update_ratings(self, tourney_level, matches_history, winner_id, loser_id, surface, current_date):
        general_rating_winner = self.current_ratings[winner_id]['General']
        general_rating_loser = self.current_ratings[loser_id]['General']
        surface_rating_winner = self.current_ratings[winner_id][surface]
        surface_rating_loser = self.current_ratings[loser_id][surface]

        general_winner_probability = self.win_probability(general_rating_winner, general_rating_loser)
        general_loser_probability = 1 - general_winner_probability
        surface_winner_probability = self.win_probability(surface_rating_winner, surface_rating_loser)
        surface_loser_probability = 1 - surface_winner_probability


        # Update match counts
        self.update_matches_history(matches_history, winner_id, 'General', current_date)
        self.update_matches_history(matches_history, loser_id, 'General', current_date)
        self.update_matches_history(matches_history, winner_id, surface, current_date)
        self.update_matches_history(matches_history, loser_id, surface, current_date)

        # Tournament level K-factor adjustment
        k_level = 1.2 if tourney_level == "Grand Slam" else (1.05 if tourney_level == "Masters" else (1.0 if tourney_level == "ATP 500" else 0.9))
        
        # Calculate K-factors with momentum applied
        k_general_winner = self.dynamic_k_factor(matches_history['General'], winner_id) * k_level
        k_general_loser = self.dynamic_k_factor(matches_history['General'], loser_id) * k_level
        k_surface_winner = self.dynamic_k_factor(matches_history[surface], winner_id) * k_level
        k_surface_loser = self.dynamic_k_factor(matches_history[surface], loser_id) * k_level

        # Update general and surface ratings
        self.current_ratings[winner_id]['General'] += k_general_winner * (1 - general_winner_probability)
        self.current_ratings[loser_id]['General'] += k_general_loser * (0 - general_loser_probability)
        self.current_ratings[winner_id][surface] += k_surface_winner * (1 - surface_winner_probability)
        self.current_ratings[loser_id][surface] += k_surface_loser * (0 - surface_loser_probability)

        # Update momentum based on the match outcome
        self.update_momentum(winner_id, loser_id)

        return general_rating_winner, general_rating_loser, surface_rating_winner, surface_rating_loser

    # Step 5: Process Matches and Calculate Brier Score
    def insert_elo_ratings_to_df(self):
        matches_history = {
            'General': defaultdict(lambda: defaultdict(list)),
            'Hard': defaultdict(lambda: defaultdict(list)),
            'Clay': defaultdict(lambda: defaultdict(list)),
            'Grass': defaultdict(lambda: defaultdict(list))
        }
        elo_winners, elo_losers = [], []
        surface_elo_winners, surface_elo_losers = [], []
        blended_elo_winners, blended_elo_losers = [], []

        for index, row in self.matches_df.iterrows():
            winner_id, loser_id, surface, current_date = row['winner_id'], row['loser_id'], row['Surface'], row['Date']
            general_rating_winner, general_rating_loser, surface_rating_winner, surface_rating_loser = self.update_ratings(
                row['tournament_level'], matches_history, winner_id, loser_id, surface, current_date)
            
            num_surface_matches_winner = len(matches_history[surface][winner_id])
            num_surface_matches_loser = len(matches_history[surface][loser_id])
            
            elo_winners.append(general_rating_winner)
            elo_losers.append(general_rating_loser)
            surface_elo_winners.append(surface_rating_winner)
            surface_elo_losers.append(surface_rating_loser)
            blended_elo_winners.append(self.blended_rating(general_rating_winner, surface_rating_winner, num_surface_matches_winner))
            blended_elo_losers.append(self.blended_rating(general_rating_loser, surface_rating_loser, num_surface_matches_loser))
            
        self.matches_df['elo_winner'] = elo_winners
        self.matches_df['elo_loser'] = elo_losers
        self.matches_df['surface_elo_winner'] = surface_elo_winners
        self.matches_df['surface_elo_loser'] = surface_elo_losers
        self.matches_df['blended_elo_winner'] = blended_elo_winners
        self.matches_df['blended_elo_loser'] = blended_elo_losers


    def calculate_prediction_accuracy(self):
        correct_predictions = 0
        start_score_index = 2000
        valid_count = 0
        for index, row in self.matches_df.iterrows():
            if index > start_score_index:
                blended_elo_winner = row['blended_elo_winner']
                blended_elo_loser = row['blended_elo_loser']
                if blended_elo_winner > 1700 or blended_elo_loser > 1700:

                    expected_winner = self.win_probability(blended_elo_winner, blended_elo_loser)
                    valid_count += 1
                    if expected_winner > 0.5:
                        correct_predictions += 1

        return (correct_predictions / valid_count) * 100

    def calculate_brier_score(self):
        total_brier_score = 0
        start_score_index = 2000
        valid_count = 0
        for index, row in self.matches_df.iterrows():
            if index > start_score_index:
                blended_elo_winner = row['blended_elo_winner']
                blended_elo_loser = row['blended_elo_loser']
                if blended_elo_winner > 1700 or blended_elo_loser > 1700:
                    expected_winner = self.win_probability(blended_elo_winner, blended_elo_loser)
                    total_brier_score += (expected_winner - 1) ** 2
                    valid_count +=1 
        return total_brier_score / valid_count if valid_count > 0 else float('nan')


In [18]:
complex_elo_ratings_predictor = ComplexEloRatingsPredictor(matches)
complex_elo_ratings_predictor.insert_elo_ratings_to_df()

elo_prediction_accuracy = complex_elo_ratings_predictor.calculate_prediction_accuracy()
print(f"Prediction accuracy: {elo_prediction_accuracy}")
elo_brier_score = complex_elo_ratings_predictor.calculate_brier_score()
print(f"Brier Score: {elo_brier_score}")

Prediction accuracy: 65.75871905769762
Brier Score: 0.21533314242665436


## GLICKO 2

In [19]:
import pandas as pd
from glicko2 import Player  # Importing the Player class as per your package
from math import sqrt, pi

# Dictionary to store Player objects for each player
players = {}
q = 0.0057565  # Constant in Glicko-2 system

def g(rd):
    """Calculate the g(RD) function."""
    return 1 / sqrt(1 + (3 * (q ** 2) * (rd ** 2)) / (pi ** 2))

def expected_outcome(player1, player2):
    """Calculate the expected probability that player1 will win against player2."""
    g_rd = g(sqrt(player1.rd**2 + player2.rd**2))
    rating_diff = (player1.rating - player2.rating) / 400
    return 1 / (1 + 10 ** (-g_rd * rating_diff))

def get_player(player_id):
    """Retrieve or initialize a Player object."""
    if player_id not in players:
        players[player_id] = Player()  # Initialize new player with default Glicko-2 values
    return players[player_id]

def update_ratings(winner, loser):
    """Update the Glicko-2 ratings for both players after a match."""
    winner_rating = winner.rating
    winner_rd = winner.rd
    loser_rating = loser.rating
    loser_rd = loser.rd

    # Update ratings
    winner.update_player([loser_rating], [loser_rd], [1])  # Winner's outcome is 1
    loser.update_player([winner_rating], [winner_rd], [0])  # Loser's outcome is 0

# Evaluate Glicko-2 with accuracy and Brier score
from sklearn.metrics import precision_score, brier_score_loss

def evaluate_glicko2(matches_df):
    probabilities = []
    actuals = []
    good_pred = 0
    total_pred = 0
    for index, row in matches_df.iterrows():
        winner_id = row['winner_id']
        loser_id = row['loser_id']

        # Retrieve or initialize player objects
        winner = get_player(winner_id)
        loser = get_player(loser_id)
        if index > 2000:
            print(winner.rating)
            print(loser.rating)
    
            # Calculate win probability for the expected winner
            win_prob = expected_outcome(winner, loser)
            print(win_prob)
            probabilities.append(win_prob)
            
            # Make a prediction (1 if win_prob > 0.5, else 0)
            if win_prob > 0.5:
                good_pred+=1
            actuals.append(1)  # Actual outcome is 1 since winner_id is the actual winner
            total_pred +=1

        # Update ratings based on match outcome
        update_ratings(winner, loser)

    # Calculate accuracy
    accuracy = (good_pred / total_pred) * 100
    print(f"Prediction Accuracy: {accuracy:.2f}%")

    # Calculate Brier score
    brier_score = brier_score_loss(actuals, probabilities)
    print(f"Brier Score: {brier_score:.4f}")

# Run evaluation
evaluate_glicko2(matches)


1777.4595760165848
1640.9523740750017
0.6749670976395753
1638.8269645160497
1435.6681045959804
0.7472243236189761
1639.1023040435966
1761.3510628071656
0.3396856811758441
1605.5012427679092
1503.897490792986
0.6317864906669182
1615.4462630843952
1792.2063028969826
0.2793027200120172
1782.3904392297782
1560.178948551959
0.7710764691082348
1817.2269994103058
1752.7692988361493
0.5871145622313791
1673.1160575639722
1507.0396651377018
0.7090042039957473
1571.9248280304619
1752.8745045265307
0.2763101799790913
1743.5240687917435
1475.5256475466426
0.80835339622558
1851.3005713838218
1857.0430097271042
0.49216503656336996
1765.4885143111662
1661.0273764708074
0.6391637514375086
1785.8099821324993
1595.9583866666442
0.7356597020283632
1710.208177533373
1732.562017467563
0.47100470396292354
1616.3616898764049
1631.175026056242
0.48021780806303577
1779.331343741773
1640.3390177238014
0.6790525515176947
1268.6160995302196
1476.982982930831
0.2589660049118559
1646.4277410241602
1906.795886902903


### Zapisujemy elo all additions

In [20]:
matches.to_csv("../data/all_years_nc_tc_elo/matches.csv", index=False)