## Top Coder rating method

This is the Top Coder Rating System. The implementation is based on the algorithm mentioned at the thesis: 

https://people.ksp.sk/~misof/publications/2009thesis.pdf

Simulating Top Coder multi-player on International Informatics Olympiad contests between 2011 and 2022

In [1]:
import math 
from statistics import NormalDist

# Function to calculate the Average Rating  
def AvgRatings(ratings):
    sum = 0
    for nation in ratings:
        sum += ratings[nation]
    
    return sum / len(ratings)

# Function to calculate the Challenge Factor based on ratings, voltalities, and average ratings
def ChallengeFactor(ratings, voltalities, avg):
    sumRatings = 0
    for nation in ratings:
        sumRatings += (ratings[nation] - avg) ** 2
    
    sumVoltalities = 0
    for nation in voltalities:
        sumVoltalities += (voltalities[nation]) ** 2
    
    return math.sqrt((sumRatings / (len(ratings) - 1) ) + (sumVoltalities / len(voltalities)))


# Function to calculate win propability of team 1 with rating1 and voltality1 when facing team 2 with rating2 and voltality2
def WinProbability(rating1, rating2, vol1, vol2):
    return 0.5 * (math.erf((rating1 - rating2) / math.sqrt(2 * (vol1 ** 2 + vol2 ** 2))) + 1)


#Function to Calculate the Expected performance of a contestant based on the Expected rank
def ExpectedPerformance(ExpectedRank, NoOfCoders):
    return -1 * NormalDist().inv_cdf((ExpectedRank - 0.5) / NoOfCoders)

#Function to Calculate the ActualPerformance performance of a contestant based on the Actual rank
def ActualPerformance(ActualRank, NoOfCoders):
     return -1 * NormalDist().inv_cdf((ActualRank - 0.5) / NoOfCoders)
    
#Function to Calculate the weight of rating of contestant based on the no of times the contestant played
def Weight(rating, timePlayed):
    factor = 1.0
    
    #Less weight for players with higher ratings
    if rating > 2500:
        factor = 0.8
    if rating < 2500 and rating > 2000:
        factor = 0.9
        
    return factor * (1 / (1-(0.42 / (timePlayed + 1) + 0.18)) - 1)

#Function to Calculate the cap (maximum update) of rating of contestant based on the no of times the contestant played
def Cap(timePlayed):
    return 150 + 1500 / (timePlayed+2)
    
#Function to Calculate the performance of player at a contest based on Old rating, Competition factor, expected rating, and actual ratings
def Perf(oldR, CF, EP, AP):
    perf = oldR + CF * (AP - EP)
    
    return perf
    
#Function to Calculate the new rating of player at a contest based on Old rating, weight, performance, cap
def NewRating(oldR, weight, perf, cap):
    
    newR = (oldR + weight * perf) / (1+weight)
    
    #if difference between ratings exceed the cap, the rating change does not exceed the cap 
    if abs(newR - oldR) > cap:
        if newR > oldR:
            newR = oldR + cap
        else:
            newR = oldR - cap
        
    return newR 

#Function to Calculate the new voltality of player at a contest based on Old rating, old voltality, newR, oldR, and weight
def newVoltality(timePlayed, OldVol, newR, oldR, weight):
    #first voltality change is from 535 to 385
    if timePlayed == 1:
        return 385
    else:
        return math.sqrt((newR-oldR)**2 / weight + (OldVol)**2 / (weight + 1))

### Loading contests datasets

In [2]:
import pandas as pd
import os.path

#directory of the dataset
filedir = "PerOthers" 
#type of the dataset (For instance C for whole contests, AH for AdHoc problems, and IN for interactive problems)
filetype = "OT" 
#filename {directory}/IOI{year}{type}.csv Ex: PerWholeContest/IOI2022C.csv
filename = "{}/IOI{}{}.csv"

#dictionary of contests data => contests[year] = pandas DataFrame
contests = dict()

#read from firstYear contest through lastYear contest
firstYear = 2011
lastYear = 2022

#list of years where specific categories exist
years = []

#Reading csv files into DataFrames 
for year in range(firstYear, lastYear + 1):
    file = filename.format(filedir, year, filetype)
    
    #check if file exists (some years don't comprise problems of some categories)
    if os.path.isfile(file):
        years.append(year)
        contests[year] = pd.read_csv(file, encoding='unicode_escape')

### Simulate Elo ratings

In [3]:
#dictionary of ratings => ratings[nation] = rating
ratings = dict()
#dictionary of voltalities => voltalities[nation] = voltality
voltalities = dict()
#dictionary of the the number of rounds a contestant participated in => prevPlayed[nation] = noPrevPlayed
prevPlayed = dict()

#dictionary of ratings history over years
#where => ratingsHistory[nation] = {year1: rating_year1, year2: rating_year2...}
#purpose: extracting results 
ratingsHistory = dict()

#simulating the multiplayer rating over years
for year in years:

    noOfNations = len(contests[year])

    #temp dictionaries used for calculating of AverageRating and Competition factor
    tempRatings = dict()
    tempVoltalities = dict()
    
    #intializing nations with no prior rating to TopCoder initial rating
    for i in range(noOfNations):
        nation = contests[year].loc[i,"Country"]

        if nation not in ratings:
            ratings[nation] = 1200 #considering 1200 as intial TopCoder rating
            voltalities[nation] = 535 #considering 535 as intial TopCoder voltality
            prevPlayed[nation] = 1 #no of rounds a contestant participated starts from 1
            ratingsHistory[nation] = {0: 1200} #0 represents initial rating
            
        tempRatings[nation] = ratings[nation]
        tempVoltalities[nation] = voltalities[nation]

        
    #dictionaries for storing new ratings and new voltalities
    newRatings = dict()
    newVol = dict()
    
    
    
    avgRatings = AvgRatings(tempRatings)
    CF = ChallengeFactor(tempRatings, tempVoltalities, avgRatings)


    for i in range(noOfNations):
        team1Nation = contests[year].loc[i,"Country"]
        team1Rank = contests[year].loc[i,"Rank"]
        
        
        #Expected rank = 0.5 + sum of win probabilities between team i and all the other teams
        erank = 0.5
        
        for j in range(noOfNations):
            team2Nation = contests[year].loc[j,"Country"]
            
            if team1Nation == team2Nation:
                continue 
            
            erank += WinProbability(ratings[team1Nation], ratings[team2Nation], voltalities[team1Nation], voltalities[team2Nation])
        
        
        #Getting the new Rating and voltality for team i
        eperf = ExpectedPerformance(erank, noOfNations)
        aperf = ActualPerformance(team1Rank, noOfNations)
        
        cap = Cap(prevPlayed[team1Nation])
        weight = Weight(ratings[team1Nation], prevPlayed[team1Nation])  
        
        perf = Perf(ratings[team1Nation], CF, eperf, aperf)
        
        newRatings[team1Nation] = NewRating(ratings[team1Nation], weight, perf, cap)    
        newVol[team1Nation] = newVoltality(prevPlayed[team1Nation], voltalities[team1Nation], newRatings[team1Nation], ratings[team1Nation], weight)
        
        prevPlayed[team1Nation] += 1
    
    #adding countries to newRatings that didn't participate in a specific contest but had old rating
    for nation in ratings:
        if nation not in newRatings:
            newRatings[nation] = ratings[nation]

            
    #adding countries to newVol that didn't participate in a specific contest but had old voltality        
    for nation in voltalities:
        if nation not in newVol:
            newVol[nation] = voltalities[nation]
    
    
    ratings = newRatings
    voltalities = newVol
    
    #appending ratings after a year's contest to the ratingHistory
    for nation in ratings:
        if nation in contests[year].values:
            ratingsHistory[nation][year] = ratings[nation]

### Extracting results

In [4]:
#Extracting csv file for rating history of each nation
for nation in ratingsHistory:
    nationData = {"Year": [], "Rating": []}
    
    for year in ratingsHistory[nation]:
        nationData["Year"].append(year)
        nationData["Rating"].append(ratingsHistory[nation][year])
    
    nationHistory = pd.DataFrame(nationData)
    
    historyFileName = 'nationsRatingsChanges{}/topCoder/{}.csv'
    nationHistory.to_csv(historyFileName.format(filedir, nation), index=False)
    
    
#Extracting csv file for ratings and ranks for each year contest

#dictionary of contests TrueSkill ratings data => contestsRating[year] = pandas DataFrame
contestsRatings = dict()

for year in years:
    contestRatings = {"Country": [], "Rating": []}
    
    for nation in ratingsHistory:
        if year in ratingsHistory[nation]:
            contestRatings["Country"].append(nation)
            contestRatings["Rating"].append(ratingsHistory[nation][year])
    
    contestRatingsDF = pd.DataFrame(contestRatings)
    
    #sorting the DataFrame AND adding ranking column
    contestRatingsDF.sort_values(["Rating"], ascending=False, inplace=True)
    
    contestRanks = range(1, len(contestRatingsDF.index)+1)
    contestRatingsDF["Rank"] = contestRanks
    
    #Extracting CSV and appending contest DataFrame to contestsRatings
    
    contestsRatings[year] = contestRatingsDF
    contestFileName = 'contestRatings{}/topCoder/{}.csv'
    contestRatingsDF.to_csv(contestFileName.format(filedir, year), index=False)

### Calculating predective accuracies

Calculating the predectivity by comparing the rataings of each contest with the rankings of the following contest.

This is done through dividing the nations into combination of pairs and determine whether each pair is predicted correctly.

In [5]:
#storing predective accuracies for csv extraction 
predectiveAccuracies = {"Year": [], "Predective Accuracy": []}

print('\n\n************** TopCoder Predectivity **************\n')
print('===========================')
print('Year    Predictive Accuracy')
print('===========================')

for year in range(0, len(years) - 1):
    
    noOfCombinations = 0
    truePredections = 0
    
    noOfNationsInNextContest = len(contestsRatings[years[year+1]])
    
    currentContestDF = contestsRatings[years[year]]
    
    for i in range(noOfNationsInNextContest):
        
        team1Nation = contests[years[year + 1]].loc[i,"Country"]
        team1Rank = contests[years[year + 1]].loc[i,"Rank"]
        
        #if a nation participated in the following contest but did not participate in the current contest,
        #this code looks for the nearst avaliable rank for this nation.
        #if not found, the nation's rating is considered as the initial rating of TopCoder (1200).
        if team1Nation not in currentContestDF["Country"].values: 
            team1Rating = 1200
            
            for _year in ratingsHistory[team1Nation]:
                if _year < years[year]:
                    team1Rating = ratingsHistory[team1Nation][_year]
        else:
            team1Rating = ratingsHistory[team1Nation][years[year]]
            
            
        for j in range(i+1, noOfNationsInNextContest):
            
            team2Nation = contests[years[year + 1]].loc[j,"Country"]
            team2Rank = contests[years[year + 1]].loc[j,"Rank"]
            
            if team2Nation not in currentContestDF["Country"].values: 
                team2Rating = 1200
                
                for _year in ratingsHistory[team2Nation]:
                    if _year < years[year]:
                        team2Rating = ratingsHistory[team2Nation][_year]
            else:
                team2Rating = ratingsHistory[team2Nation][years[year]]
            
            if(team1Nation == team2Nation):
                continue
                
            noOfCombinations += 1
            
            if team1Rating > team2Rating and team1Rank < team2Rank:
                truePredections += 1
            elif team1Rating < team2Rating and team1Rank > team2Rank:
                truePredections += 1
            elif team1Rating == team2Rating and team1Rank == team2Rank:
                truePredections += 1
    
    
    predectivity = round((truePredections / noOfCombinations) * 100, 4)
    
    print(f'{years[year+1]}       {predectivity}%')
    
    predectiveAccuracies["Year"].append(years[year + 1])
    predectiveAccuracies["Predective Accuracy"].append(predectivity)


#Extracting accuracies to csv
predectiveDF = pd.DataFrame(predectiveAccuracies)
    
predectiveFileName = 'predectiveAccuracies{}/TopCoder.csv'
predectiveDF.to_csv(predectiveFileName.format(filedir), index=False)



************** TopCoder Predectivity **************

Year    Predictive Accuracy
2012       62.3148%
2013       72.1844%
2016       78.2407%
2019       69.4734%
2020       78.4817%
2021       72.2932%
2022       72.1099%
