In [1]:
# Import necessary libraries
import json 
import pandas as pd
import numpy as np
from scipy.stats import spearmanr

In [2]:
with open("out10x.json", 'r') as fdata:
    data = json.load(fdata)

Convert the shepard scores into [0-1] so that 0 is better so it is comparable to other scores

In [3]:
for dataset, res in data.items():
    for key in res.keys():
        if "sheppard" in key: 
            res[key] = 1 - (res[key] + 1) / 2

In [4]:
df = pd.DataFrame(data)

Unnamed: 0,har_0,har_1,har_2,har_3,har_4,har_5,har_6,har_7,har_8,har_9,...,bank_0,bank_1,bank_2,bank_3,bank_4,bank_5,bank_6,bank_7,bank_8,bank_9
MDS_raw,841279200.0,840946600.0,840226000.0,840905700.0,840257200.0,841020500.0,840554400.0,841145700.0,841238800.0,841065100.0,...,1847327000.0,1849817000.0,1850775000.0,1849195000.0,1851677000.0,1849693000.0,1849616000.0,1849191000.0,1850667000.0,1849287000.0
MDS_norm,8.839321,8.837573,8.833786,8.837359,8.833951,8.837962,8.835513,8.83862,8.839109,8.838196,...,8.412015,8.417683,8.419862,8.416268,8.421915,8.417401,8.417227,8.416259,8.419618,8.416478
MDS_scalenorm,0.1892851,0.1903003,0.1924827,0.1904233,0.1923883,0.1900667,0.1914946,0.1896953,0.1893978,0.189938,...,0.3577325,0.3560605,0.3554084,0.3564782,0.3548071,0.3561435,0.3561868,0.3564742,0.3554869,0.3564161
MDS_kruskal,0.1459952,0.1473284,0.1497061,0.1475579,0.1494295,0.1472933,0.1487018,0.1462131,0.1458909,0.14721,...,0.3201178,0.3158464,0.3151969,0.3174094,0.3124974,0.3162028,0.3156323,0.3167153,0.3143835,0.3168455
MDS_sheppard,0.02613944,0.02727817,0.02834992,0.02724958,0.02820437,0.02719717,0.02792554,0.02664692,0.02604321,0.02680282,...,0.1878468,0.1816295,0.1796214,0.1832193,0.1776279,0.1821342,0.1820708,0.1831138,0.1795811,0.1833311
TSNE_raw,53555110000.0,49907180000.0,53664280000.0,48069850000.0,46141030000.0,49957730000.0,52239270000.0,39877360000.0,44399040000.0,50887230000.0,...,713483400000.0,649817200000.0,699240900000.0,729311400000.0,727867400000.0,684775100000.0,615291400000.0,593512100000.0,643087500000.0,563697400000.0
TSNE_norm,70.52601,68.0817,70.59785,66.81673,65.46248,68.11617,69.65421,60.85721,64.21487,68.74692,...,165.318,157.7698,163.6597,167.1417,166.9761,161.9579,153.5213,150.7797,156.9507,146.9438
TSNE_scalenorm,0.272679,0.2629721,0.2699534,0.2612807,0.2699339,0.2788021,0.2697914,0.2651101,0.2701522,0.264312,...,0.4441064,0.4365312,0.4285409,0.4434324,0.4441963,0.4413623,0.4246376,0.4228715,0.4437075,0.4338714
TSNE_kruskal,0.2390122,0.2282536,0.2280894,0.2291657,0.2433236,0.2438441,0.2365527,0.2450593,0.2444778,0.226877,...,0.4091811,0.4033116,0.3945089,0.4074875,0.4106017,0.4107749,0.3911567,0.3886871,0.4081659,0.398497
TSNE_sheppard,0.06566688,0.06154144,0.06086442,0.06211031,0.0679198,0.07421404,0.06404612,0.07161049,0.06923739,0.06016597,...,0.239033,0.2396694,0.2339613,0.2358974,0.2426208,0.2443682,0.234997,0.2303145,0.2364051,0.2351116


In [5]:
data = df.transpose()
data = data.drop([col for col in data.columns if "UMAP" in col],axis=1)
data

In [6]:
data[[col for col in data.columns if "raw" in col]].head()

Unnamed: 0,MDS_raw,TSNE_raw,RANDOM_raw
har_0,841279200.0,53555110000.0,3226825.0
har_1,840946600.0,49907180000.0,3229896.0
har_2,840226000.0,53664280000.0,3263362.0
har_3,840905700.0,48069850000.0,3184251.0
har_4,840257200.0,46141030000.0,3210462.0


In [7]:
metric_names = ["_raw", "_norm", "_scalenorm", "_kruskal", "_sheppard"]

In [8]:
# Calculate how often metrics agree with “correct order”
res = dict()
for mname in metric_names: 
    metric = data[(col for col in data.columns if mname in col)]
    name_lookup = ["MDS", "TSNE", "RANDOM"]
    orderCount = {
        ("MDS", "TSNE", "RANDOM"): 0,
        ("MDS", "RANDOM", "TSNE"): 0,
        ("TSNE", "MDS", "RANDOM"): 0,
        ("TSNE", "RANDOM", "MDS"): 0,
        ("RANDOM", "MDS", "TSNE"): 0,
        ("RANDOM", "TSNE", "MDS"): 0,                    
    }

    for ind,row in metric.iterrows():
        tup_row = sorted([(row[i], name_lookup[i]) for i in range(3)])
        order = tuple([tup[1] for tup in tup_row])
        orderCount[order] += 1

    totalvals = sum(orderCount.values())
    for key in orderCount.keys():
        orderCount[key] /= totalvals
        
    res[mname] = orderCount

  tup_row = sorted([(row[i], name_lookup[i]) for i in range(3)])
  tup_row = sorted([(row[i], name_lookup[i]) for i in range(3)])
  tup_row = sorted([(row[i], name_lookup[i]) for i in range(3)])
  tup_row = sorted([(row[i], name_lookup[i]) for i in range(3)])
  tup_row = sorted([(row[i], name_lookup[i]) for i in range(3)])


In [9]:
res

{'_raw': {('MDS', 'TSNE', 'RANDOM'): 0.0,
  ('MDS', 'RANDOM', 'TSNE'): 0.08333333333333333,
  ('TSNE', 'MDS', 'RANDOM'): 0.125,
  ('TSNE', 'RANDOM', 'MDS'): 0.0,
  ('RANDOM', 'MDS', 'TSNE'): 0.7916666666666666,
  ('RANDOM', 'TSNE', 'MDS'): 0.0},
 '_norm': {('MDS', 'TSNE', 'RANDOM'): 0.0,
  ('MDS', 'RANDOM', 'TSNE'): 0.08333333333333333,
  ('TSNE', 'MDS', 'RANDOM'): 0.125,
  ('TSNE', 'RANDOM', 'MDS'): 0.0,
  ('RANDOM', 'MDS', 'TSNE'): 0.7916666666666666,
  ('RANDOM', 'TSNE', 'MDS'): 0.0},
 '_scalenorm': {('MDS', 'TSNE', 'RANDOM'): 0.9083333333333333,
  ('MDS', 'RANDOM', 'TSNE'): 0.08333333333333333,
  ('TSNE', 'MDS', 'RANDOM'): 0.008333333333333333,
  ('TSNE', 'RANDOM', 'MDS'): 0.0,
  ('RANDOM', 'MDS', 'TSNE'): 0.0,
  ('RANDOM', 'TSNE', 'MDS'): 0.0},
 '_kruskal': {('MDS', 'TSNE', 'RANDOM'): 0.8208333333333333,
  ('MDS', 'RANDOM', 'TSNE'): 0.17916666666666667,
  ('TSNE', 'MDS', 'RANDOM'): 0.0,
  ('TSNE', 'RANDOM', 'MDS'): 0.0,
  ('RANDOM', 'MDS', 'TSNE'): 0.0,
  ('RANDOM', 'TSNE', 'MDS')

In [10]:
stab = " & " + " & ".join(mname.replace("_","") for mname in metric_names)
stab += "\\hline \\\\ \n"
stab

' & raw & norm & scalenorm & kruskal & sheppard\\hline \\\\ \n'

In [11]:
# Convert results to latex table
stab = " & " + " & ".join(mname.replace("_","") for mname in metric_names)
stab += "\\hline \\\\ \n"
for row in res['_raw'].keys():
    stab += ",".join(el for el in row)
    stab += " & "
    for metric in res.keys(): 
        stab += f"{round(res[metric][row], 3)}"
        if metric != "sheppard": 
            stab += " & "
    stab += "\\\\ \n"
    

In [12]:
print(stab)

 & raw & norm & scalenorm & kruskal & sheppard\hline \\ 
MDS,TSNE,RANDOM & 0.0 & 0.0 & 0.908 & 0.821 & 0.917\\ 
MDS,RANDOM,TSNE & 0.083 & 0.083 & 0.083 & 0.179 & 0.0\\ 
TSNE,MDS,RANDOM & 0.125 & 0.125 & 0.008 & 0.0 & 0.083\\ 
TSNE,RANDOM,MDS & 0.0 & 0.0 & 0.0 & 0.0 & 0.0\\ 
RANDOM,MDS,TSNE & 0.792 & 0.792 & 0.0 & 0.0 & 0.0\\ 
RANDOM,TSNE,MDS & 0.0 & 0.0 & 0.0 & 0.0 & 0.0\\ 



In [12]:
# Determine correct ranking for all metrics in each trial
rankings = pd.DataFrame(index=data.index, columns=metric_names)
for index, row in data.iterrows():
    for metric in metric_names:
        values = row.filter(like=metric)
        
        if metric == "_sheppard":
            rankings.loc[index, metric] = ', '.join(values.sort_values(
                ascending=False).index.str.replace(metric, ''))
        else:
            rankings.loc[index, metric] = ', '.join(values.sort_values(
                ascending=True).index.str.replace(metric, ''))

In [51]:
# Calculate the correlation between the rankings produced by the stress metrics
corr_matrix, _ = spearmanr(rankings)
corr_matrix = np.triu(corr_matrix)

corr_res = pd.DataFrame(
    corr_matrix, index=rankings.keys(), columns=rankings.keys())

In [54]:
# Convert results to latex table
stab = " & " + " & ".join(mname.replace("_", "") for mname in metric_names)
stab += "\\\\ \\hline\n"
for row in corr_res:
    stab += row.replace("_", "")
    stab += " & "
    for metric in corr_res.keys():
        stab += f"{round(corr_res[metric][row], 3)}"
        if metric != "_sheppard":
            stab += " & "
    stab += "\\\\ \n"

In [55]:
print(stab)

 & raw & norm & scalenorm & kruskal & sheppard\\ \hline
raw & 1.0 & 1.0 & -0.145 & -0.379 & 0.328\\ 
norm & 0.0 & 1.0 & -0.145 & -0.379 & 0.328\\ 
scalenorm & 0.0 & 0.0 & 1.0 & 0.591 & 0.174\\ 
kruskal & 0.0 & 0.0 & 0.0 & 1.0 & -0.252\\ 
sheppard & 0.0 & 0.0 & 0.0 & 0.0 & 1.0\\ 

