# Begin

### Import Statements

In [1]:
import pandas as pd
import itertools
from scipy.stats import spearmanr
import numpy as np

### Load DataFrames

* Min KLs / Scale Normalized KL Divergence (SNKL)

In [2]:
min_kls = pd.read_csv('csv_files/min_kls.csv', index_col=[0, 1, 2])
min_kls.columns.name = 'Algorithm'

* Y-normalized KL Divergence

In [3]:
y_normalized_kls = pd.read_csv('csv_files/y_normalized_kls.csv', index_col=[0, 1, 2])

* KL at different scales

In [4]:
kl_at_scales = pd.read_csv('csv_files/kl_at_1_and_10.csv', index_col=[0, 1, 2])
kl_at_scales.columns.name = 'Algorithm'
kl_at_scales = kl_at_scales.unstack().reorder_levels(["Scale", "Algorithm"], axis=1).sort_index(axis=1)

* KL at infinite scale

In [5]:
kl_at_infty = pd.read_csv('csv_files/kl_at_infty.csv', index_col=[0, 1])
kl_at_infty.columns.name = 'Algorithm'

* Forced Scale KL Divergence (X and Y normalized KL)

In [6]:
fskl = pd.read_csv('csv_files/zadu_kls.csv', index_col=[0,1])

* KL with Gaussian kernel for Q

In [7]:
gaussian_kl = pd.read_csv('csv_files/gaussian_kl_at_1.csv', index_col=[0, 1, 2])
gaussian_kl = gaussian_kl.loc[:, :, 1]

* ~~Dropping ORL results~~

In [8]:
# for df in [min_kls, y_normalized_kls, kl_at_infty, kl_at_scales, zadu_kls, gaussian_kl]:
#     df.drop('orl', level='Dataset', inplace=True)

### Common Functions

In [9]:
def get_kl_stats(df):
    """
    Calculate statistics (min, max, mean, standard deviation, median) for each algorithm in `df`
    """
    kl_stats = df.agg([pd.Series.min, pd.Series.max, pd.Series.mean, pd.Series.std, pd.Series.median]).T

    if kl_stats.index.nlevels == 2:
        kl_stats.index.rename('Statistic', level=1, inplace=True)
    else:
        kl_stats = kl_stats.T

    colors = {
        "min": "background-color: #2F2D2E; color: white",
        "max": "background-color: #808080; color: white",
        "mean": "background-color: #536878; color: white",
        "std": "background-color: #493D31; color: white",
        "median": "background-color: #323F48: color: white",
    }


    def make_pretty(styler):
        styler.set_caption('Statistics')
        styler.apply(lambda row : [colors.get(row.name[1], "")] * len(row), axis=1)
        styler.map_index(lambda stat : colors.get(stat, ""), axis=0, level=1)
        styler.set_table_styles(
        [{'selector': 'td, th', 'props': [('border', '1px solid black')]}]
    )
        return styler

    # Apply the Styler
    return kl_stats.style.pipe(make_pretty)

In [10]:
def percentage_per_order(df: pd.DataFrame, drop_UMAP: bool):
    """
    Find the percentages of instances where the metric in `df` follows a certain order (e.g. MDS >= TSNE >= RANDOM)
    """
    results = {}

    if drop_UMAP:
        permutations = itertools.permutations(['TSNE', 'MDS', 'RANDOM'])
    else:
        permutations = itertools.permutations(['TSNE', 'UMAP', 'MDS', 'RANDOM'])


    for perm in permutations:
        condition = pd.Series(True, index=df.index)
        for i in range(1, len(perm)):
            condition &= df[perm[i-1]] <= df[perm[i]]

        percentage = (condition).mean() * 100
        # results[" < ".join(perm)] = f"{percentage:.3}%"
        results[" < ".join(perm)] = percentage


    return results

In [11]:
def ranks_for_spearman(df: pd.DataFrame, drop_UMAP: bool):
    """
    Gives each entry a rank depending on the order of least KL between the algorithms
    """
    if drop_UMAP:
        permutations = itertools.permutations(['TSNE', 'MDS', 'RANDOM'])
    else:
        permutations = reversed(list(itertools.permutations(df.columns)))

    ranked_df = pd.DataFrame(100, index=df.index, columns=["Rank"])

    for rank, perm in enumerate(permutations):
        condition = pd.Series(True, index=df.index)
        for i in range(1, len(perm)):
            condition &= df[perm[i-1]] <= df[perm[i]]

        ranked_df[condition] = rank

    return ranked_df
        

# Analysis: Min KLs (Scale Normalized KL Divergence)

### Intro

In [12]:
min_kls.info()
min_kls.head(15)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 480 entries, ('auto-mpg', 'Run 0', 'x') to ('wine', 'Run 9', 'y')
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   RANDOM  480 non-null    float64
 1   MDS     480 non-null    float64
 2   UMAP    480 non-null    float64
 3   TSNE    480 non-null    float64
dtypes: float64(4)
memory usage: 17.8+ KB


Unnamed: 0_level_0,Unnamed: 1_level_0,Algorithm,RANDOM,MDS,UMAP,TSNE
Dataset,Run,Coord,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
auto-mpg,Run 0,x,4e-06,6.472563,0.520477,0.191975
auto-mpg,Run 0,y,2.535313,1.837415,1.870127,1.769069
auto-mpg,Run 1,x,6e-06,6.669652,0.43415,0.19602
auto-mpg,Run 1,y,2.535313,1.824199,1.971372,1.777601
auto-mpg,Run 2,x,0.008391,6.520778,0.540085,0.201242
auto-mpg,Run 2,y,2.535313,1.833119,1.942651,1.785405
auto-mpg,Run 3,x,0.229519,6.632549,0.494364,0.199491
auto-mpg,Run 3,y,2.535208,1.82552,1.890303,1.76765
auto-mpg,Run 4,x,0.161434,6.524206,0.578017,0.196217
auto-mpg,Run 4,y,2.535285,1.839745,1.868108,1.765736


### Check within search range

* Since minimum-finding function (`scipy.optimize.minimize_scalar`) was bounded by (0, 300), should check if the minimum actually lies beyond 300 for any embedding

In [13]:
minimum_beyond_bound = ((min_kls.loc[:, :, 'x'] > 50)).any(axis=None)
print("There are some minima beyond 50: ", minimum_beyond_bound)

There are some minima beyond 50:  False


* For which datasets do the graphs need to be drawn for scales greater than 15?

In [14]:
thresh = 15
# set([idx[0] for idx in min_kls.loc[:, :, 'x'][min_kls.loc[:, :, 'x'] > thresh].dropna(how='all').index])
min_kls.loc[:, :, 'x'][min_kls.loc[:, :, 'x'] > thresh].dropna(how='all').style.background_gradient('viridis', vmin=thresh, vmax=250)

Unnamed: 0_level_0,Algorithm,RANDOM,MDS,UMAP,TSNE
Dataset,Run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
epileptic,Run 2,,15.956142,,
epileptic,Run 6,,15.049216,,
s-curve,Run 0,,45.292447,,
s-curve,Run 1,,45.330158,,
s-curve,Run 2,,45.311639,,
s-curve,Run 3,,45.199879,,
s-curve,Run 4,,44.982382,,
s-curve,Run 5,,44.941177,,
s-curve,Run 6,,45.307728,,
s-curve,Run 7,,45.229118,,


### KL Statistics of Each Algorithm

In [15]:
get_kl_stats(min_kls.groupby('Coord'))

Unnamed: 0_level_0,Coord,x,y
Algorithm,Statistic,Unnamed: 2_level_1,Unnamed: 3_level_1
RANDOM,min,3e-06,1.520969
RANDOM,max,0.692885,4.733988
RANDOM,mean,0.094614,3.416669
RANDOM,std,0.127575,0.894552
RANDOM,median,2.2e-05,3.737903
MDS,min,0.461398,0.338312
MDS,max,45.330346,4.437266
MDS,mean,9.111034,2.349173
MDS,std,13.299098,1.063931
MDS,median,2.562928,2.256778


### When is t-SNE much more performant than MDS, and when are they almost the same in performance?

In [16]:
snkl_scores = min_kls.loc[:, :, 'y']
snkl_scores[snkl_scores['MDS'] > snkl_scores['TSNE'] * 3.5]

Unnamed: 0_level_0,Algorithm,RANDOM,MDS,UMAP,TSNE
Dataset,Run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
coil20,Run 0,3.792203,1.801468,0.596492,0.304494
coil20,Run 1,3.792203,1.826313,0.652974,0.313066
coil20,Run 2,3.792203,1.656485,0.60768,0.307469
coil20,Run 3,3.792203,1.70383,0.603441,0.292203
coil20,Run 4,3.792202,1.742041,0.614907,0.307159
coil20,Run 5,3.792203,1.694181,0.642245,0.305428
coil20,Run 6,3.792203,1.638056,0.610181,0.30439
coil20,Run 7,3.792192,1.672423,0.619158,0.304634
coil20,Run 8,3.792201,1.710116,0.628453,0.309823
coil20,Run 9,3.792198,1.738658,0.612024,0.307829


# Analysis: Y-Normalized KLs

### Intro

In [17]:
y_normalized_kls.info()
y_normalized_kls.head(15)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 480 entries, ('auto-mpg', 'Run 0', 'x') to ('wine', 'Run 9', 'y')
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   RANDOM  480 non-null    float64
 1   MDS     480 non-null    float64
 2   UMAP    480 non-null    float64
 3   TSNE    480 non-null    float64
dtypes: float64(4)
memory usage: 17.8+ KB


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,RANDOM,MDS,UMAP,TSNE
Dataset,Run,Coord,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
auto-mpg,Run 0,x,0.739438,0.509407,0.050891,0.01638
auto-mpg,Run 0,y,2.542375,2.424599,2.328818,2.355027
auto-mpg,Run 1,x,0.738554,0.512678,0.037625,0.015826
auto-mpg,Run 1,y,2.542868,2.423216,2.365333,2.360946
auto-mpg,Run 2,x,0.738011,0.51011,0.036,0.017893
auto-mpg,Run 2,y,2.542183,2.424133,2.415831,2.354604
auto-mpg,Run 3,x,0.738331,0.517105,0.041797,0.017104
auto-mpg,Run 3,y,2.540973,2.421634,2.335741,2.349177
auto-mpg,Run 4,x,0.730897,0.506407,0.052426,0.017702
auto-mpg,Run 4,y,2.54175,2.425669,2.352903,2.345319


### KL Statistics of Each Algorithm

In [18]:
get_kl_stats(y_normalized_kls.groupby('Coord'))

Unnamed: 0_level_0,Coord,x,y
Unnamed: 0_level_1,Statistic,Unnamed: 2_level_1,Unnamed: 3_level_1
RANDOM,min,0.709515,1.525251
RANDOM,max,0.810239,4.741364
RANDOM,mean,0.730713,3.4238
RANDOM,std,0.016382,0.894338
RANDOM,median,0.725777,3.745498
MDS,min,0.025037,1.376096
MDS,max,0.677647,4.724712
MDS,mean,0.246038,3.32805
MDS,std,0.217256,0.902933
MDS,median,0.172719,3.639568


# Analysis: KL at Scales (1 and 10)

### Intro

In [19]:
kl_at_scales.info()
kl_at_scales.head(15)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 240 entries, ('auto-mpg', 'Run 0') to ('wine', 'Run 9')
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   (1, MDS)      240 non-null    float64
 1   (1, RANDOM)   240 non-null    float64
 2   (1, TSNE)     240 non-null    float64
 3   (1, UMAP)     240 non-null    float64
 4   (10, MDS)     240 non-null    float64
 5   (10, RANDOM)  240 non-null    float64
 6   (10, TSNE)    240 non-null    float64
 7   (10, UMAP)    240 non-null    float64
dtypes: float64(8)
memory usage: 16.7+ KB


Unnamed: 0_level_0,Scale,1,1,1,1,10,10,10,10
Unnamed: 0_level_1,Algorithm,MDS,RANDOM,TSNE,UMAP,MDS,RANDOM,TSNE,UMAP
Dataset,Run,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
auto-mpg,Run 0,2.260036,2.552953,2.056778,1.914384,1.86279,3.178052,2.697596,2.77934
auto-mpg,Run 1,2.259054,2.553946,2.0539,2.028368,1.846315,3.189645,2.678543,2.918564
auto-mpg,Run 2,2.259457,2.552795,2.058033,1.980785,1.857672,3.186284,2.697722,2.857733
auto-mpg,Run 3,2.258952,2.551285,2.041167,1.93675,1.848275,3.195708,2.671838,2.803946
auto-mpg,Run 4,2.260591,2.552873,2.049322,1.901817,1.863742,3.197782,2.697123,2.757661
auto-mpg,Run 5,2.259248,2.553572,2.082053,2.043938,1.852597,3.189174,2.725878,2.950985
auto-mpg,Run 6,2.260012,2.550859,2.08104,1.943119,1.852631,3.180017,2.734143,2.842294
auto-mpg,Run 7,2.259878,2.555118,2.069704,1.967446,1.845652,3.197647,2.697283,2.833012
auto-mpg,Run 8,2.260087,2.55214,2.061277,1.86666,1.858349,3.188546,2.734419,2.728013
auto-mpg,Run 9,2.258823,2.555313,2.06614,2.020646,1.851207,3.223766,2.721533,2.903508


### KL Statistics For Each Algorithm

In [None]:
get_kl_stats(kl_at_scales.stack(level=0).groupby('Scale'))

Unnamed: 0_level_0,Scale,1,10
Algorithm,Statistic,Unnamed: 2_level_1,Unnamed: 3_level_1
MDS,min,1.157711,0.343924
MDS,max,4.635335,4.79378
MDS,mean,2.785069,2.591275
MDS,std,0.991742,1.125679
MDS,median,2.703978,2.482691
RANDOM,min,1.532239,2.144199
RANDOM,max,4.753577,5.408364
RANDOM,mean,3.435087,4.075686
RANDOM,std,0.89491,0.899219
RANDOM,median,3.757944,4.406172


# Analysis: KL at Infinity

In [21]:
kl_at_infty.info()
kl_at_infty.head(15)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 240 entries, ('auto-mpg', 'Run 0') to ('wine', 'Run 9')
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   RANDOM  240 non-null    float64
 1   MDS     240 non-null    float64
 2   UMAP    240 non-null    float64
 3   TSNE    240 non-null    float64
dtypes: float64(4)
memory usage: 9.2+ KB


Unnamed: 0_level_0,Algorithm,RANDOM,MDS,UMAP,TSNE
Dataset,Run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
auto-mpg,Run 0,4.471531,24.549255,9.733527,25.112246
auto-mpg,Run 1,5.158738,25.123875,4.365509,20.81097
auto-mpg,Run 2,4.495461,24.602004,3.924285,25.112246
auto-mpg,Run 3,5.48939,25.123875,4.395012,25.122034
auto-mpg,Run 4,4.902924,25.123875,3.969273,25.112246
auto-mpg,Run 5,5.050707,24.546348,4.314876,25.112246
auto-mpg,Run 6,4.33018,24.248274,4.079489,25.123875
auto-mpg,Run 7,4.430172,24.543722,4.708786,25.112246
auto-mpg,Run 8,4.516623,25.123875,3.938876,25.122034
auto-mpg,Run 9,4.671378,25.112246,3.975901,25.112246


In [22]:
kl_at_infty.agg([pd.Series.min, pd.Series.max, pd.Series.mean, pd.Series.std, pd.Series.median])


Algorithm,RANDOM,MDS,UMAP,TSNE
min,2.983299,0.805937,1.223326,0.65693
max,8.371012,25.123875,22.593049,25.123875
mean,5.699435,8.410468,4.519085,9.581806
std,1.139825,8.610645,2.868249,8.876871
median,5.978104,3.315857,3.436983,4.339935


# Analysis: ZADU KLs

In [23]:
fskl.info()
fskl.head(15)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 240 entries, ('auto-mpg', 'Run 0') to ('wine', 'Run 9')
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   RANDOM  240 non-null    float64
 1   MDS     240 non-null    float64
 2   UMAP    240 non-null    float64
 3   TSNE    240 non-null    float64
dtypes: float64(4)
memory usage: 9.2+ KB


Unnamed: 0_level_0,Unnamed: 1_level_0,RANDOM,MDS,UMAP,TSNE
Dataset,Run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
auto-mpg,Run 0,2.542375,2.424598,2.328818,2.355026
auto-mpg,Run 1,2.542868,2.423216,2.365333,2.360946
auto-mpg,Run 2,2.542183,2.424133,2.415831,2.354604
auto-mpg,Run 3,2.540973,2.421634,2.33574,2.349177
auto-mpg,Run 4,2.54175,2.425669,2.352902,2.345319
auto-mpg,Run 5,2.542638,2.423558,2.384354,2.345004
auto-mpg,Run 6,2.540586,2.4214,2.357246,2.359305
auto-mpg,Run 7,2.543478,2.424475,2.363545,2.358625
auto-mpg,Run 8,2.541546,2.422234,2.354257,2.355172
auto-mpg,Run 9,2.543807,2.424791,2.390157,2.358451


In [24]:
fskl.agg([pd.Series.min, pd.Series.max, pd.Series.mean, pd.Series.std, pd.Series.median])

Unnamed: 0,RANDOM,MDS,UMAP,TSNE
min,1.525251,1.376096,1.300795,1.291456
max,4.741364,4.724712,4.706779,4.617412
mean,3.423979,3.328211,3.299031,3.286501
std,0.894012,0.902649,0.91301,0.898783
median,3.745498,3.639568,3.598117,3.591945


##### **The largest difference between KL values across the algorithms**

In [25]:
(fskl.max(axis=1) - fskl.min(axis=1)).max()

0.27426240114634926

In [26]:
(min_kls.loc[:, :, 'y'].max(axis=1) - min_kls.loc[:, :, 'y'].min(axis=1)).max()

3.7282559175504195

* Such a low difference compared to min_kls implies low sensitivity.

# Performance of Each Metric

### Percentages of Each Algorithm Ranking

In [27]:
def get_table(drop_UMAP: bool):
    data = {
        "Min-KL" : percentage_per_order(min_kls.loc[:, :, 'y'], drop_UMAP=drop_UMAP),
        # "Normalized-KL": percentage_per_order(y_normalized_kls.loc[:, :, 'y'], drop_UMAP=drop_UMAP),
        "ZADU KLs" : percentage_per_order(fskl, drop_UMAP=drop_UMAP),
        "KL at Scale = 1": percentage_per_order(kl_at_scales[1], drop_UMAP=drop_UMAP),
        # "KL at Scale = 5": percentage_per_order(kl_at_scales[5], drop_UMAP=drop_UMAP),
        "KL at Scale = 10": percentage_per_order(kl_at_scales[10], drop_UMAP=drop_UMAP),
        # "KL at Scale = 20": percentage_per_order(kl_at_scales[20], drop_UMAP=drop_UMAP),
        "KL at Scale = \u221e": percentage_per_order(kl_at_infty, drop_UMAP=drop_UMAP),
        "Gaussian KL" : percentage_per_order(gaussian_kl, drop_UMAP=drop_UMAP)
    }
    return pd.DataFrame(data).style.background_gradient('Blues', vmin=0, vmax=150).format(lambda x : f"{x:.2f}%")


##### **Comparing t-SNE, MDS, Random**

In [28]:
get_table(drop_UMAP=True)

Unnamed: 0,Min-KL,ZADU KLs,KL at Scale = 1,KL at Scale = 10,KL at Scale = ∞,Gaussian KL
TSNE < MDS < RANDOM,96.67%,82.92%,91.67%,68.33%,28.33%,0.00%
TSNE < RANDOM < MDS,0.00%,0.00%,0.00%,3.75%,0.00%,0.00%
MDS < TSNE < RANDOM,3.33%,17.08%,0.00%,15.42%,26.25%,0.00%
MDS < RANDOM < TSNE,0.00%,0.00%,8.33%,12.50%,16.25%,42.92%
RANDOM < TSNE < MDS,0.00%,0.00%,0.00%,0.00%,15.42%,4.58%
RANDOM < MDS < TSNE,0.00%,0.00%,0.00%,0.00%,13.75%,52.50%


##### **Comparing t-SNE, UMAP, MDS, Random**

In [29]:
get_table(drop_UMAP=False)

Unnamed: 0,Min-KL,ZADU KLs,KL at Scale = 1,KL at Scale = 10,KL at Scale = ∞,Gaussian KL
TSNE < UMAP < MDS < RANDOM,81.25%,38.75%,82.92%,26.67%,9.58%,0.00%
TSNE < UMAP < RANDOM < MDS,0.00%,0.00%,0.00%,3.75%,0.00%,0.00%
TSNE < MDS < UMAP < RANDOM,13.75%,18.33%,0.42%,0.00%,17.08%,0.00%
TSNE < MDS < RANDOM < UMAP,0.00%,0.00%,4.17%,0.00%,0.42%,0.00%
TSNE < RANDOM < UMAP < MDS,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%
TSNE < RANDOM < MDS < UMAP,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%
UMAP < TSNE < MDS < RANDOM,1.67%,25.83%,4.17%,41.67%,1.25%,0.00%
UMAP < TSNE < RANDOM < MDS,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%
UMAP < MDS < TSNE < RANDOM,0.00%,5.83%,0.00%,7.08%,8.75%,0.00%
UMAP < MDS < RANDOM < TSNE,0.00%,0.00%,1.67%,0.00%,7.50%,24.17%


### Agreement on order between metrics

* How often do each pair of metrics give the same algorithm ranking?

In [30]:
def get_agreement_table(drop_UMAP=True):
    ranks = {
        "SNKL" : ranks_for_spearman(min_kls.loc[:, :, 'y'], drop_UMAP=drop_UMAP).values,
        "FSKL" : ranks_for_spearman(fskl, drop_UMAP=drop_UMAP).values,
        "KL at Scale = 1": ranks_for_spearman(kl_at_scales[1], drop_UMAP=drop_UMAP).values,
        "KL at Scale = 10": ranks_for_spearman(kl_at_scales[10], drop_UMAP=drop_UMAP).values,
        "KL at Scale = \u221e" : ranks_for_spearman(kl_at_infty, drop_UMAP=drop_UMAP).values,
    }

    num_observations = min_kls.loc[:, :, 'y'].shape[0]

    agreement = pd.DataFrame(0, index=ranks.keys(), columns=ranks.keys()).astype("Float64")

    for metric_i in ranks.keys():
        for metric_j in ranks.keys():
            num_same = np.sum(ranks[metric_i] == ranks[metric_j])
            agreement.loc[metric_i, metric_j] = num_same / num_observations * 100 # Percentage of times metric_i and metric_j agreed on the order

    return agreement.style.background_gradient('Blues', vmin=20, vmax=140).format(lambda x : f"{x:.2f}%")

In [31]:
get_agreement_table(True)

Unnamed: 0,SNKL,FSKL,KL at Scale = 1,KL at Scale = 10,KL at Scale = ∞
SNKL,100.00%,79.58%,91.67%,68.33%,29.17%
FSKL,79.58%,100.00%,74.58%,67.50%,23.75%
KL at Scale = 1,91.67%,74.58%,100.00%,76.67%,26.67%
KL at Scale = 10,68.33%,67.50%,76.67%,100.00%,25.83%
KL at Scale = ∞,29.17%,23.75%,26.67%,25.83%,100.00%


### Spearman Rank Correlation of Orders

In [32]:
def get_ranks(drop_UMAP: bool):
    dfs = {
        "Min-KL" : min_kls.loc[:, :, 'y'],
        # "Y-Normalized-KL": y_normalized_kls.loc[:, :, 'y'], drop_UMAP=drop_UMAP).values,
        "ZADU KLs" : fskl,
        "KL at Scale = 1": kl_at_scales[1],
        "KL at Scale = 10": kl_at_scales[10],
        "KL at Scale = \u221e" : kl_at_infty,
        "Gaussian KL": gaussian_kl,
    }

    ranks = {name:ranks_for_spearman(df, drop_UMAP=drop_UMAP).values for name, df in dfs.items()}

    correlation = pd.DataFrame(pd.NA, index=ranks.keys(), columns=ranks.keys()).astype("Float64")

    methods = list(ranks.keys())
    for i in range(len(methods)):
        for j in range(i, len(methods)):
            new_var = spearmanr(ranks[methods[i]], ranks[methods[j]])[0]
            correlation.loc[methods[i], methods[j]] = round(new_var, 2)

    return correlation.style.set_caption("Without UMAP" if drop_UMAP else "With UMAP").background_gradient('Blues', vmin=-0, vmax=2).format(lambda x: '-' if pd.isna(x) else x)



#### **Considering only t-SNE, MDS, and Random**

In [33]:
get_ranks(drop_UMAP=True)

Unnamed: 0,Min-KL,ZADU KLs,KL at Scale = 1,KL at Scale = 10,KL at Scale = ∞,Gaussian KL
Min-KL,1.0,-0.08,0.62,0.34,-0.12,-0.21
ZADU KLs,-,1.0,-0.14,0.34,0.11,-0.05
KL at Scale = 1,-,-,1.0,0.56,-0.15,-0.34
KL at Scale = 10,-,-,-,1.0,0.17,-0.42
KL at Scale = ∞,-,-,-,-,1.0,-0.05
Gaussian KL,-,-,-,-,-,1.0


#### **Considering t-SNE, UMAP, MDS, Random**

* Orders are not meaningfully ranked, so correlation analysis is in question

In [34]:
get_ranks(drop_UMAP=False)

Unnamed: 0,Min-KL,ZADU KLs,KL at Scale = 1,KL at Scale = 10,KL at Scale = ∞,Gaussian KL
Min-KL,1.0,0.3,0.16,0.56,0.31,0.51
ZADU KLs,-,1.0,0.24,0.25,0.05,0.12
KL at Scale = 1,-,-,1.0,0.24,0.19,0.19
KL at Scale = 10,-,-,-,1.0,-0.14,0.02
KL at Scale = ∞,-,-,-,-,1.0,0.41
Gaussian KL,-,-,-,-,-,1.0


#### **For Which Datasets is Expected Order Not Observed for Min-KL?**

In [35]:
def incorrect_datasets(df: pd.DataFrame, drop_UMAP=True):
    if drop_UMAP:
        order = ['TSNE', 'MDS', 'RANDOM']
        df = df.drop(columns='UMAP')
    else:
        order = ['TSNE', 'UMAP', 'MDS', 'RANDOM']

    condition = pd.Series(False, index=df.index)
    for i in range(1, len(order)):
        condition |= df[order[i-1]] > df[order[i]]


    datasets = sorted(set(df[condition].index.get_level_values(level=0)))
    max_kl = df[condition].max(axis=None)
    not_preserved_kls = df[condition].style.background_gradient('inferno', vmax=max_kl)

    return datasets, not_preserved_kls

In [36]:

bad_datasets, not_preserved_kls = incorrect_datasets(min_kls.loc[:, :, 'y'], drop_UMAP=True)
print(bad_datasets)
not_preserved_kls

['penguins']


Unnamed: 0_level_0,Algorithm,RANDOM,MDS,TSNE
Dataset,Run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
penguins,Run 0,2.407916,1.983058,2.037759
penguins,Run 1,2.408258,1.973734,2.048086
penguins,Run 4,2.408258,1.975566,1.995916
penguins,Run 5,2.408258,1.975692,2.020049
penguins,Run 6,2.408258,1.979904,2.028398
penguins,Run 7,2.408258,1.971269,2.015081
penguins,Run 8,2.408258,1.974364,1.985759
penguins,Run 9,2.408258,1.976551,2.020332


In [37]:
not_preserved_kls.data.mean(axis=0)

Algorithm
RANDOM    2.408215
MDS       1.976267
TSNE      2.018922
dtype: float64

#### **For Which Datasets is Expected Order Not Observed for ZADU-KL?**

In [None]:
bad_datasets, not_preserved_kls = incorrect_datasets(fskl, drop_UMAP=True)
print(bad_datasets)
not_preserved_kls

['bank', 'imdb', 's-curve', 'sms', 'swissroll']


Unnamed: 0_level_0,Unnamed: 1_level_0,RANDOM,MDS,TSNE
Dataset,Run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bank,Run 0,4.097308,3.96677,3.979232
bank,Run 1,4.100539,3.966469,3.980284
bank,Run 2,4.101505,3.964809,3.984689
bank,Run 3,4.098236,3.965542,3.973875
bank,Run 4,4.10203,3.960566,3.975497
bank,Run 5,4.09989,3.964299,3.975215
bank,Run 6,4.100744,3.966768,3.982059
bank,Run 7,4.100205,3.965665,3.979585
bank,Run 8,4.099359,3.961458,3.983299
bank,Run 9,4.100478,3.969084,3.985187


In [39]:
len(not_preserved_kls.index)

41

In [40]:
zadu_bad_datasets = ['bank', 'imdb', 's-curve', 'sms', 'swissroll']

def filter_zadu_bad(df):
    return df.loc[zadu_bad_datasets].drop(columns='UMAP')

zadu_zadu_bad = filter_zadu_bad(fskl)
min_zadu_bad = filter_zadu_bad(min_kls.loc[:, :, 'y'])
pd.concat([zadu_zadu_bad, min_zadu_bad], axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,RANDOM,MDS,TSNE,RANDOM,MDS,TSNE
Dataset,Run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
bank,Run 0,4.097308,3.96677,3.979232,4.092994,2.509956,1.497967
bank,Run 1,4.100539,3.966469,3.980284,4.093197,2.5007,1.496993
bank,Run 2,4.101505,3.964809,3.984689,4.093197,2.478655,1.504374
bank,Run 3,4.098236,3.965542,3.973875,4.093095,2.486565,1.494883
bank,Run 4,4.10203,3.960566,3.975497,4.093197,2.456578,1.481788
bank,Run 5,4.09989,3.964299,3.975215,4.093197,2.495032,1.489624
bank,Run 6,4.100744,3.966768,3.982059,4.093197,2.465141,1.494873
bank,Run 7,4.100205,3.965665,3.979585,4.093197,2.526484,1.496984
bank,Run 8,4.099359,3.961458,3.983299,4.093176,2.49943,1.49021
bank,Run 9,4.100478,3.969084,3.985187,4.093197,2.55137,1.492582


### Spearman Rank Correlation of the Scores

In [None]:
def get_score_corr_df(drop_UMAP=True):
    if drop_UMAP:
        algs = ['MDS', 'TSNE', 'RANDOM']
    else:
        algs = ['MDS', 'TSNE', 'RANDOM', 'UMAP']

    dfs = {
        "SNKL" : min_kls.loc[:, :, 'y'],
        "FSKL" : fskl,
        "KL(1)" : kl_at_scales[1],
        "KL(10)" : kl_at_scales[10],
        "KLINF" : kl_at_infty
    }

    score_corr_df = pd.concat(dfs, axis='columns').stack().loc[:, :, algs].corr('spearman')
    return score_corr_df

score_corr_df = get_score_corr_df(drop_UMAP=True)

def corr_styler(df: pd.DataFrame):
    df = df.round(2)
    mask = np.tril(np.ones(df.shape), k=-1).astype(bool)
    upper_triangled = df.mask(mask, -2)
    return upper_triangled.style.background_gradient('Blues', vmin=0.38, vmax=1.7).format(lambda x : '-' if x == -2 else x)
    

corr_styler(score_corr_df)

Unnamed: 0,SNKL,FSKL,KL(1),KL(10),KLINF
SNKL,1.0,0.51,0.94,0.94,0.45
FSKL,-,1.0,0.54,0.51,0.41
KL(1),-,-,1.0,0.87,0.39
KL(10),-,-,-,1.0,0.55
KLINF,-,-,-,-,1.0


### Y-Normalized KLs vs. ZADU KLs

* What is the maximum deviation of scores between fskl and y_normalized_kls?

In [42]:
((y_normalized_kls.loc[:, :, 'y'] - fskl).abs()).max(axis=0)

RANDOM    0.006199
MDS       0.004925
UMAP      0.005514
TSNE      0.006218
dtype: float64

* The majority of scores don't even deviate by 10<sup>-6</sup>

In [43]:
set(fskl[((y_normalized_kls.loc[:, :, 'y'] - fskl).abs() >= 10 ** -6).all(axis=1)].index.get_level_values(0))

{'hatespeech', 'sentiment', 'sms'}

* Therefore, it appears that there's not much difference between normalizing both the dataset and embedding before calculating KL divergence, and only normalizing the embedding.