In [10]:
import itertools
import pandas as pd
import statistics

pd.options.mode.chained_assignment = None  # default='warn'
    
def calculateSimilarity(predictionA, predictionB):
    listA = predictionA.split(',')[:-1]
    listB = predictionB.split(',')[:-1]
    distance = 0
    combinations = []
    for a in listA:
        for b in listB:
            if a != b:
                combinations.append(sorted([a, b]))
    pairs = list(set(map(tuple, combinations)))
    for x, y in pairs:
        a = listA.index(x) - listA.index(y)
        b = listB.index(x) - listB.index(y)
        if a * b < 0:
            distance += 1
            
    normalizedDistance = distance / ((12 * (12 - 1)) / 2)
    
    return 100 - (normalizedDistance * 100)

In [2]:
df = pd.read_csv("../2024/leaderboard/data/submissions2024AllColumns.csv")
df = df[['leaderboardAlias', 'prediction']]

In [44]:
player1 = []
player2 = []
similarity = []

for i1 in df.index:
    # for loop starts with num to avoid finding intersections for characters we've already covered 
    for i2 in df.index:
        if i1 != i2:
            player1.append(df['leaderboardAlias'][i1])
            player2.append(df['leaderboardAlias'][i2])
            similarity.append(calculateSimilarity(df['prediction'][i1], df['prediction'][i2]))
            
assert(len(player1) == len(player2) == len(similarity))

pairs = pd.DataFrame({'player1':player1,'player2':player2,'similarity':similarity})

display(pairs.head(1))

In [58]:
# convert into proper matrix
matrix = pairs.pivot_table(columns='player2', index='player1', values='similarity').reset_index()
matrix = matrix.fillna(100)

# check network shape
print(matrix.shape)

# save player name column, then drop it from matrix since it's non-numeric and can't go into TSN-E algorithm
player_coordinates = matrix[['player1']]
matrix = matrix.drop(['player1'], axis=1)

(78, 79)


In [65]:
# TSNE stuff
from sklearn.manifold import TSNE
m = TSNE(learning_rate=50)

tsne_features = m.fit_transform(matrix)

# sanity check: each character should have one set of tsne_features values
assert(len(tsne_features) == len(matrix))

# for each character, apply their (x,y) coordinates
player_coordinates['x'] = tsne_features[:,0]
player_coordinates['y'] = tsne_features[:,1]

# looks like this
display(player_coordinates.head(1))

import seaborn as sns
import matplotlib.pyplot as plt
sns.scatterplot(x="x", y="y", data=player_coordinates)
plt.show()

In [74]:
# save coordinates
player_coordinates.to_csv("../2024/leaderboard/data/tsne.csv", index=False)