# MS TrueSkill Rating Method
#### Script written by Mohamed Mahmoud
Simulating TrueSkill on International Informatics Olympiad contests between 2011 and 2022.

In [1]:
import trueskill as ts

### Loading contests datasets

In [2]:
import pandas as pd
import os.path

#directory of the dataset
filedir = "PerDataStructures" 
#type of the dataset (For instance C for whole contests, AH for AdHoc problems, and IN for interactive problems)
filetype = "DS" 
#filename {directory}/IOI{year}{type}.csv Ex: PerWholeContest/IOI2022C.csv
filename = "{}/IOI{}{}.csv"

#dictionary of contests data => contests[year] = pandas DataFrame
contests = dict()

#read from firstYear contest through lastYear contest
firstYear = 2011
lastYear = 2022

#list of years where specific categories exist
years = []

#Reading csv files into DataFrames 
for year in range(firstYear, lastYear + 1):
    file = filename.format(filedir, year, filetype)
    
    #check if file exists (some years don't comprise problems of some categories)
    if os.path.isfile(file):
        years.append(year)
        contests[year] = pd.read_csv(file, encoding='unicode_escape')

### Simulate TrueSkill ratings

In [3]:
#dictionary of ratings => ratings[nation] = rating
ratings = dict()

#dictionary of ratings history over years
#where => ratingsHistory[nation] = {year1: rating_year1, year2: rating_year2...}
#purpose: extracting results 
ratingsHistory = dict()

#simulating the multiplayer rating over years
for year in years:
    
    noOfNations = len(contests[year])
    
    #intializing nations with no prior rating to TrueSkill initial rating
    for i in range(noOfNations):
        nation = contests[year].loc[i,"Country"]
        
        if nation not in ratings:
            ratings[nation] = ts.Rating()
            ratingsHistory[nation] = {0: ts.Rating()} #0 represents initial rating
    
    #preparing the current ratings and the new contest rankings for trueskill rate() method
    tempRatings = []
    tempNations = []
    tempRankings = []
    
    for i in range(noOfNations):
        
        nation = contests[year].loc[i,"Country"]
        tempRating = ratings[nation]
        tempRanking = contests[year].loc[i,"Rank"] - 1 #rate() starts ranking from 0 (why minus 1)
        
        tempRatings.append({nation: tempRating})
        tempRankings.append(tempRanking)
        tempNations.append(nation)
    
    #newRatings in a form of list of dictionaries => [{"Country1": Rating()}, {"Country2": Rating()}...]
    newRatings = ts.rate(tempRatings, ranks=tempRankings)
    
    
    #turning newRatings into a dictionary to be assigned as the new values for the "ratings" dictionary
    newRatingsDict = dict()
    
    for ratingDict in newRatings:
        for nation in ratingDict:
            newRatingsDict[nation] = ratingDict[nation]
    
    #appending ratings after a year's contest to the ratingHistory
    for nation in newRatingsDict:
        ratingsHistory[nation][year] = newRatingsDict[nation]
        
    #adding countries to newRatingsDict that didn't participate in a specific contest but had old rating
    for nation in ratings:
        if nation not in newRatingsDict:
            newRatingsDict[nation] = ratings[nation]
    
    #assiging new ratings to "ratings"
    ratings = newRatingsDict

### Extracting results

In [4]:
#Extracting csv file for rating history of each nation
for nation in ratingsHistory:
    nationData = {"Year": [], "Rating": []}
    
    for year in ratingsHistory[nation]:
        nationData["Year"].append(year)
        nationData["Rating"].append(ratingsHistory[nation][year].mu)
    
    nationHistory = pd.DataFrame(nationData)
    
    historyFileName = 'nationsRatingsChanges{}/trueSkill/{}.csv'
    nationHistory.to_csv(historyFileName.format(filedir, nation), index=False)
    
    
#Extracting csv file for ratings and ranks for each year contest

#dictionary of contests TrueSkill ratings data => contestsRating[year] = pandas DataFrame
contestsRatings = dict()

for year in years:
    contestRatings = {"Country": [], "Rating": []}
    
    for nation in ratingsHistory:
        if year in ratingsHistory[nation]:
            contestRatings["Country"].append(nation)
            contestRatings["Rating"].append(ratingsHistory[nation][year].mu)
    
    contestRatingsDF = pd.DataFrame(contestRatings)
    
    #sorting the DataFrame AND adding ranking column
    contestRatingsDF.sort_values(["Rating"], ascending=False, inplace=True)
    
    contestRanks = range(1, len(contestRatingsDF.index)+1)
    contestRatingsDF["Rank"] = contestRanks
    
    #Extracting CSV and appending contest DataFrame to contestsRatings
    
    contestsRatings[year] = contestRatingsDF
    contestFileName = 'contestRatings{}/trueSkill/{}.csv'
    contestRatingsDF.to_csv(contestFileName.format(filedir, year), index=False)

### Calculating predective accuracies

Calculating the predectivity by comparing the rataings of each contest with the rankings of the following contest.

This is done through dividing the nations into combination of pairs and determine whether each pair is predicted correctly.

In [5]:
#storing predective accuracies for csv extraction 
predectiveAccuracies = {"Year": [], "Predective Accuracy": []}

print('\n\n************** TrueSkill Predectivity **************\n')
print('===========================')
print('Year    Predictive Accuracy')
print('===========================')

for year in range(0, len(years) - 1):
    
    noOfCombinations = 0
    truePredections = 0
    
    noOfNationsInNextContest = len(contestsRatings[years[year+1]])
    
    currentContestDF = contestsRatings[years[year]]
    
    for i in range(noOfNationsInNextContest):
        
        team1Nation = contests[years[year + 1]].loc[i,"Country"]
        team1Rank = contests[years[year + 1]].loc[i,"Rank"]
        
        #if a nation participated in the following contest but did not participate in the current contest,
        #this code looks for the nearst avaliable rank for this nation.
        #if not found, the nation's rating is considered as the initial rating of TrueSkill (25).
        if team1Nation not in currentContestDF["Country"].values: 
            team1Rating = 25
            
            for _year in ratingsHistory[team1Nation]:
                if _year < years[year]:
                    team1Rating = ratingsHistory[team1Nation][_year].mu
        else:
            team1Rating = ratingsHistory[team1Nation][years[year]].mu
            
            
        for j in range(i+1, noOfNationsInNextContest):
            
            team2Nation = contests[years[year + 1]].loc[j,"Country"]
            team2Rank = contests[years[year + 1]].loc[j,"Rank"]
            
            if team2Nation not in currentContestDF["Country"].values: 
                team2Rating = 25
                
                for _year in ratingsHistory[team2Nation]:
                    if _year < years[year]:
                        team2Rating = ratingsHistory[team2Nation][_year].mu
            else:
                team2Rating = ratingsHistory[team2Nation][years[year]].mu
            
            if(team1Nation == team2Nation):
                continue
                
            noOfCombinations += 1
            
            if team1Rating > team2Rating and team1Rank < team2Rank:
                truePredections += 1
            elif team1Rating < team2Rating and team1Rank > team2Rank:
                truePredections += 1
            elif team1Rating == team2Rating and team1Rank == team2Rank:
                truePredections += 1
    
    
    predectivity = round((truePredections / noOfCombinations) * 100, 4)
    
    print(f'{years[year+1]}       {predectivity}%')
    
    predectiveAccuracies["Year"].append(years[year + 1])
    predectiveAccuracies["Predective Accuracy"].append(predectivity)


#Extracting accuracies to csv
predectiveDF = pd.DataFrame(predectiveAccuracies)
    
predectiveFileName = 'predectiveAccuracies{}/trueSkill.csv'
predectiveDF.to_csv(predectiveFileName.format(filedir), index=False)



************** TrueSkill Predectivity **************

Year    Predictive Accuracy
2012       61.6667%
2013       79.3898%
2014       74.7967%
2018       76.0492%
2019       84.1754%
2020       76.8778%
2021       77.3749%
2022       79.8752%
