In [205]:
import itertools
import pandas as pd
import statistics

pd.options.mode.chained_assignment = None  # default='warn'
    
def calculateKendallTauDistance(listA, listB):
    distance = 0
    combinations = []
    for a in listA:
        for b in listB:
            if a != b:
                combinations.append(sorted([a, b]))
    pairs = list(set(map(tuple, combinations)))
    for x, y in pairs:
        a = listA.index(x) - listA.index(y)
        b = listB.index(x) - listB.index(y)
        if a * b < 0:
            distance += 1
    return distance

def partialScoring(dropOutOrder, playerPredictionsArray):
    placeHolders1 = list(map(chr, range(97, 97 + len(playerPredictionsArray) - 1)))
    placeHolders2 = list(map(chr, range(97, 97 + len(playerPredictionsArray) - 1)))
    
    predictionsWithPlaceHolders = []
    dropsWithPlaceHolders = []
    
    for candidate in playerPredictionsArray:
        if candidate not in dropOutOrder: # candidate hasn't dropped yet
            predictionsWithPlaceHolders.append(placeHolders1.pop())
        else: # candidate has dropped
            predictionsWithPlaceHolders.append(candidate)
                
    for i, candidate in enumerate(playerPredictionsArray):
        if i < len(dropOutOrder):
            dropsWithPlaceHolders.append(dropOutOrder[i])
        else:
            dropsWithPlaceHolders.append(placeHolders2.pop())
    
    return calculateKendallTauDistance(dropsWithPlaceHolders, predictionsWithPlaceHolders);

df = pd.read_csv("../2024/leaderboard/data/submissions2024.csv")
df = df[['leaderboardAlias', 'prediction']]

display(df.tail(1))

Unnamed: 0,leaderboardAlias,prediction
49,Wisdom of the crowd,"hurd,elder,suarez,hutchinson,burgum,christie,s..."


In [206]:
averageOrder = ['hurd','elder','suarez','hutchinson','burgum','christie','scott','haley','ramaswamy','pence','desantis','trump']
dropOutOrder = ['desantis', 'hurd', 'elder']

partialScoring(dropOutOrder, averageOrder)

10

In [207]:
# create a column for every previous and current round, so we can track score over time
for i in range(0, len(averageOrder)):
    df['round_' + str(i+1) + '_score'] = 0
    
# create a column for most recent round's score
df['kendallDistance'] = 0
    
# calc kendall distances for every previous round and the current round
for i in df.index:
    prediction = df['prediction'][i].split(",")[:-1]
    for round in range(0, len(averageOrder)):
        df['round_' + str(round+1) + '_score'][i] = partialScoring(averageOrder[:round+1], prediction)
    df['kendallDistance'][i] = partialScoring(averageOrder, prediction)
    
# for every distance column, create a rank column
df['rank'] = df['kendallDistance'].rank(method='min')

for i in range(0, len(averageOrder)):
    df['round_' + str(i+1) + '_rank'] = df['round_' + str(i+1) + '_score'].rank(method='min')

In [210]:
df.to_csv("../2024/leaderboard/data/submissions2024.csv", index=False)