In [1]:
from src.knn_methods import KNNClassifier
from src.dataset import SwissRoll
import pandas as pd
import graphtools as gt
import os
import pickle
from omegaconf import DictConfig, OmegaConf
import matplotlib.pyplot as plt


In [2]:
def load_results(path):
    results = pd.DataFrame()
    for root, dirs, files in os.walk(path):
        for filename in files:
            if filename.endswith("_0.pkl"):
                dt_current = pd.read_pickle(os.path.join(root, filename))
                results = pd.concat([results,dt_current])
    return results

In [9]:
# Default swiss roll on 200 points
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/knn_sampta/2023-03-28_08-16-14")
print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())

\begin{tabular}{lrrrrrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro &  Norm inf &  Norm Fro N2 &  Norm inf N2 &   P@20 &   P@40 &  time(s) \\
Method         &            &           &           &           &              &              &        &        &          \\
\midrule
Euc            &      0.392 &     0.344 &     2.737 &     2.386 &        0.053 &        0.058 &  0.819 &  0.790 &    0.092 \\
diff-map       &      0.535 &     0.521 &     0.937 &     0.936 &        0.018 &        0.023 &  0.821 &  0.859 &    0.111 \\
euler-heat-geo &      0.905 &     0.883 &     2.713 &     2.632 &        0.053 &        0.064 &  0.806 &  0.924 &    0.102 \\
heat-geo       &      0.815 &     0.755 &   136.689 &   120.584 &        2.666 &        2.914 &  0.828 &  0.926 &    0.099 \\
heat-phate     &      0.786 &     0.765 &    67.313 &    56.410 &        1.313 &        1.364 &  0.818 &  0.901 &    0.096 \\
mar-heat-geo   &      0.818 &     0.765 &     4.526 &     3.833 &        0.088 &    

  print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [10]:
# Rotated swiss roll on 2000 points 5 width in 10 dimensions.
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/knn_sampta/2023-03-28_08-21-22")
print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


\begin{tabular}{lrrrrrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro &  Norm inf &  Norm Fro N2 &  Norm inf N2 &   P@20 &   P@40 &  time(s) \\
Method         &            &           &           &           &              &              &        &        &          \\
\midrule
Euc            &      0.266 &     0.266 &     2.391 &     2.169 &        0.005 &        0.006 &  0.337 &  0.396 &    1.898 \\
diff-map       &      0.199 &     0.281 &     0.957 &     0.958 &        0.002 &        0.003 &  0.333 &  0.392 &    5.737 \\
euler-heat-geo &      0.893 &     0.849 &     7.612 &     6.685 &        0.017 &        0.019 &  0.308 &  0.377 &    3.029 \\
heat-geo       &      0.589 &     0.541 &   153.574 &   122.678 &        0.335 &        0.346 &  0.339 &  0.401 &    2.364 \\
heat-phate     &      0.531 &     0.576 &   147.911 &   112.950 &        0.323 &        0.319 &  0.284 &  0.310 &    3.586 \\
mar-heat-geo   &      0.589 &     0.552 &     4.875 &     3.886 &        0.011 &    

  print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [11]:
# Rotated swiss roll on 2000 points 5 width in 10 dimensions.
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/knn_sampta/2023-03-28_08-27-29")
print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


\begin{tabular}{lrrrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro N2 &  Norm inf N2 &   P@20 &   P@40 &  time(s) \\
Method         &            &           &              &              &        &        &          \\
\midrule
Euc            &      0.269 &     0.270 &        0.005 &        0.006 &  0.426 &  0.438 &    1.880 \\
euler-heat-geo &      0.812 &     0.591 &        0.004 &        0.004 &  0.420 &  0.440 &    2.796 \\
heat-geo       &      0.534 &     0.418 &        0.033 &        0.034 &  0.424 &  0.442 &    2.251 \\
heat-phate     &      0.443 &     0.525 &        0.306 &        0.307 &  0.312 &  0.309 &    3.548 \\
mar-heat-geo   &      0.535 &     0.426 &        0.002 &        0.003 &  0.425 &  0.443 &    2.249 \\
phate          &      0.482 &     0.541 &        0.105 &        0.105 &  0.314 &  0.311 &    6.815 \\
shortest-path  &      0.890 &     0.895 &        0.016 &        0.023 &  0.426 &  0.438 &    5.014 \\
\bottomrule
\end{tabular}



  print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [12]:
# Rotated swiss roll on 2000 points 5 width in 10 dimensions.
#python test_knn.py -m model=heat_geo,diff_map,euc,euler_heat,heat_phate,phate,shortest_path,heat_geo_mar 
#fit_args.order=10 n_seeds=10 name=knn_sampta fit_args.tau_min=10 fit_args.tau_max=10 fit_args.n_tau=1 fit_args.tau=10 
#data=swiss_roll_high data.manifold_noise=1.5
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/knn_sampta/2023-03-28_08-31-53")
print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


\begin{tabular}{lrrrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro N2 &  Norm inf N2 &   P@20 &   P@40 &  time(s) \\
Method        &            &           &              &              &        &        &          \\
\midrule
Euc           &      0.248 &     0.240 &        0.005 &        0.008 &  0.158 &  0.237 &    1.903 \\
diff-map      &      0.214 &     0.307 &        0.002 &        0.003 &  0.160 &  0.239 &    5.729 \\
heat-geo      &      0.258 &     0.225 &        0.513 &        0.560 &  0.161 &  0.241 &    2.465 \\
heat-phate    &      0.209 &     0.210 &        0.353 &        0.376 &  0.153 &  0.224 &    3.579 \\
mar-heat-geo  &      0.261 &     0.228 &        0.013 &        0.014 &  0.160 &  0.242 &    2.488 \\
phate         &      0.197 &     0.197 &        0.113 &        0.120 &  0.147 &  0.211 &    7.154 \\
shortest-path &      0.242 &     0.227 &        0.006 &        0.008 &  0.158 &  0.237 &    5.479 \\
\bottomrule
\end{tabular}



  print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [13]:
#python test_knn.py -m model=heat_geo,diff_map,euc,euler_heat,heat_phate,phate,shortest_path,heat_geo_mar 
#fit_args.order=10 n_seeds=10 name=knn_sampta fit_args.tau_min=2 fit_args.tau_max=2 fit_args.n_tau=1 fit_args.tau=2 
#data=swiss_roll_high data.manifold_noise=0.05 data.rotate_dim=100
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/knn_sampta/2023-03-28_08-37-30")
print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


\begin{tabular}{lrrrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro N2 &  Norm inf N2 &   P@20 &   P@40 &  time(s) \\
Method         &            &           &              &              &        &        &          \\
\midrule
Euc            &      0.269 &     0.270 &        0.005 &        0.006 &  0.426 &  0.438 &    1.905 \\
diff-map       &      0.142 &     0.214 &        0.002 &        0.003 &  0.419 &  0.392 &    5.638 \\
euler-heat-geo &      0.879 &     0.730 &        0.007 &        0.008 &  0.407 &  0.434 &    2.851 \\
heat-geo       &      0.535 &     0.474 &        0.137 &        0.139 &  0.423 &  0.442 &    2.304 \\
heat-phate     &      0.443 &     0.525 &        0.306 &        0.307 &  0.312 &  0.309 &    3.570 \\
mar-heat-geo   &      0.535 &     0.485 &        0.006 &        0.007 &  0.424 &  0.443 &    2.242 \\
phate          &      0.482 &     0.541 &        0.105 &        0.105 &  0.314 &  0.311 &    7.088 \\
shortest-path  &      0.890 &     0.896 &        0

  print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [14]:
#python test_knn.py -m model=heat_geo,diff_map,euc,euler_heat,heat_phate,phate,shortest_path,heat_geo_mar fit_args.order=10 
#n_seeds=10 name=knn_sampta fit_args.tau_min=2 fit_args.tau_max=2 fit_args.n_tau=1 fit_args.tau=2 data=swiss_roll_high 
#data.manifold_noise=0.05 data.rotate_dim=100 model.anisotropy=1
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/knn_sampta/2023-03-28_08-40-26")
print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())

\begin{tabular}{lrrrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro N2 &  Norm inf N2 &   P@20 &   P@40 &  time(s) \\
Method         &            &           &              &              &        &        &          \\
\midrule
Euc            &      0.269 &     0.270 &        0.005 &        0.006 &  0.426 &  0.438 &    1.895 \\
diff-map       &      0.142 &     0.214 &        0.002 &        0.003 &  0.419 &  0.392 &    5.582 \\
euler-heat-geo &      0.604 &     0.409 &        0.009 &        0.009 &  0.426 &  0.445 &    2.713 \\
heat-geo       &      0.535 &     0.481 &        0.137 &        0.139 &  0.425 &  0.445 &    2.263 \\
heat-phate     &      0.453 &     0.530 &        0.311 &        0.312 &  0.313 &  0.311 &    3.542 \\
mar-heat-geo   &      0.535 &     0.486 &        0.006 &        0.007 &  0.425 &  0.444 &    2.230 \\
phate          &      0.506 &     0.556 &        0.108 &        0.106 &  0.309 &  0.307 &    7.425 \\
shortest-path  &      0.890 &     0.896 &        0

  print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [17]:
# made sure we are not using the landmarks.
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/knn_sampta/2023-03-28_08-45-12")
print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())

\begin{tabular}{lrrrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro N2 &  Norm inf N2 &   P@20 &   P@40 &  time(s) \\
Method         &            &           &              &              &        &        &          \\
\midrule
Euc            &      0.269 &     0.270 &        0.005 &        0.006 &  0.426 &  0.438 &    1.892 \\
diff-map       &      0.142 &     0.214 &        0.002 &        0.003 &  0.419 &  0.392 &    6.146 \\
euler-heat-geo &      0.879 &     0.730 &        0.007 &        0.008 &  0.407 &  0.434 &    2.806 \\
heat-geo       &      0.535 &     0.474 &        0.137 &        0.139 &  0.423 &  0.442 &    2.334 \\
heat-phate     &      0.443 &     0.525 &        0.306 &        0.307 &  0.312 &  0.309 &    3.510 \\
mar-heat-geo   &      0.535 &     0.485 &        0.006 &        0.007 &  0.424 &  0.443 &    2.235 \\
phate          &      0.482 &     0.541 &        0.105 &        0.105 &  0.314 &  0.311 &    7.435 \\
shortest-path  &      0.890 &     0.896 &        0

  print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [18]:
# reproducing initial results.
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/knn_sampta/2023-03-28_09-09-53")
print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())

\begin{tabular}{lrrrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro N2 &  Norm inf N2 &   P@20 &   P@40 &  time(s) \\
Method         &            &           &              &              &        &        &          \\
\midrule
Euc            &      0.388 &     0.338 &        0.021 &        0.023 &  0.610 &  0.774 &    0.249 \\
diff-map       &      0.119 &     0.232 &        0.007 &        0.009 &  0.514 &  0.400 &    0.419 \\
euler-heat-geo &      0.947 &     0.779 &        0.012 &        0.012 &  0.595 &  0.771 &    0.377 \\
heat-geo       &      0.754 &     0.553 &        0.268 &        0.268 &  0.603 &  0.772 &    0.328 \\
heat-phate     &      0.760 &     0.747 &        0.718 &        0.690 &  0.595 &  0.771 &    0.326 \\
mar-heat-geo   &      0.839 &     0.554 &        0.017 &        0.018 &  0.602 &  0.772 &    0.334 \\
phate          &      0.700 &     0.694 &        0.257 &        0.249 &  0.594 &  0.770 &    0.330 \\
shortest-path  &      0.944 &     0.928 &        0

  print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [19]:
#python test_knn.py -m model=heat_geo,diff_map,euc,euler_heat,heat_phate,phate,shortest_path,heat_geo_mar fit_args.order=30 
#n_seeds=10 name=knn_sampta fit_args.tau_min=1 fit_args.tau_max=1 fit_args.n_tau=1 fit_args.tau=1 data=swiss_roll_high 
#data.manifold_noise=0.5 data.rotate_dim=100 data.width=1 data.n_points=500
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/knn_sampta/2023-03-28_09-12-12")
print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())

\begin{tabular}{lrrrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro N2 &  Norm inf N2 &   P@20 &   P@40 &  time(s) \\
Method         &            &           &              &              &        &        &          \\
\midrule
Euc            &      0.385 &     0.332 &        0.022 &        0.025 &  0.582 &  0.756 &    0.251 \\
diff-map       &      0.123 &     0.225 &        0.007 &        0.009 &  0.509 &  0.415 &    0.377 \\
euler-heat-geo &      0.960 &     0.836 &        0.011 &        0.011 &  0.571 &  0.757 &    0.394 \\
heat-geo       &      0.788 &     0.607 &        0.263 &        0.267 &  0.578 &  0.759 &    0.330 \\
heat-phate     &      0.813 &     0.800 &        0.749 &        0.721 &  0.574 &  0.759 &    0.328 \\
mar-heat-geo   &      0.886 &     0.615 &        0.016 &        0.017 &  0.578 &  0.759 &    0.291 \\
phate          &      0.703 &     0.703 &        0.258 &        0.250 &  0.574 &  0.757 &    0.425 \\
shortest-path  &      0.916 &     0.897 &        0

  print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [20]:
#python test_knn.py -m model=heat_geo,diff_map,euc,euler_heat,heat_phate,phate,shortest_path,heat_geo_mar fit_args.order=30 
#n_seeds=10 name=knn_sampta fit_args.tau_min=1 fit_args.tau_max=1 fit_args.n_tau=1 fit_args.tau=1 data=swiss_roll_high 
#data.manifold_noise=0.5 data.rotate_dim=100 data.width=1 data.n_points=500 model.knn=50
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/knn_sampta/2023-03-28_09-31-28")
print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())

\begin{tabular}{lrrrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro N2 &  Norm inf N2 &   P@20 &   P@40 &  time(s) \\
Method         &            &           &              &              &        &        &          \\
\midrule
Euc            &      0.385 &     0.332 &        0.022 &        0.025 &  0.582 &  0.756 &    0.268 \\
diff-map       &      0.414 &     0.489 &        0.007 &        0.009 &  0.573 &  0.756 &    0.424 \\
euler-heat-geo &      0.521 &     0.447 &        0.005 &        0.007 &  0.352 &  0.565 &    0.774 \\
heat-geo       &      0.473 &     0.368 &        0.131 &        0.137 &  0.564 &  0.756 &    0.443 \\
heat-phate     &      0.408 &     0.366 &        0.152 &        0.165 &  0.570 &  0.749 &    0.439 \\
mar-heat-geo   &      0.479 &     0.415 &        0.009 &        0.011 &  0.558 &  0.755 &    0.400 \\
phate          &      0.447 &     0.409 &        0.064 &        0.071 &  0.567 &  0.742 &    0.420 \\
shortest-path  &      0.393 &     0.312 &        0

  print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [21]:
#python test_knn.py -m model=heat_geo,diff_map,euc,euler_heat,heat_phate,phate,shortest_path,heat_geo_mar fit_args.order=30 
#n_seeds=10 name=knn_sampta fit_args.tau_min=1 fit_args.tau_max=1 fit_args.n_tau=1 fit_args.tau=1 data=swiss_roll_high 
#data.manifold_noise=0.05 data.rotate_dim=100 data.width=1 data.n_points=500 model.knn=50
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/knn_sampta/2023-03-28_09-34-39")
print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())

\begin{tabular}{lrrrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro N2 &  Norm inf N2 &   P@20 &   P@40 &  time(s) \\
Method         &            &           &              &              &        &        &          \\
\midrule
Euc            &      0.388 &     0.338 &        0.021 &        0.023 &  0.610 &  0.774 &    0.270 \\
diff-map       &      0.437 &     0.499 &        0.007 &        0.009 &  0.592 &  0.770 &    0.367 \\
euler-heat-geo &      0.678 &     0.628 &        0.005 &        0.006 &  0.375 &  0.596 &    0.750 \\
heat-geo       &      0.548 &     0.461 &        0.137 &        0.148 &  0.584 &  0.771 &    0.444 \\
heat-phate     &      0.546 &     0.518 &        0.172 &        0.199 &  0.592 &  0.768 &    0.434 \\
mar-heat-geo   &      0.554 &     0.505 &        0.010 &        0.011 &  0.577 &  0.770 &    0.402 \\
phate          &      0.593 &     0.570 &        0.086 &        0.105 &  0.591 &  0.766 &    0.347 \\
shortest-path  &      0.545 &     0.478 &        0

  print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [22]:
#python test_knn.py -m model=heat_geo,diff_map,euc,euler_heat,heat_phate,phate,shortest_path,heat_geo_mar fit_args.order=30 
#n_seeds=10 name=knn_sampta fit_args.tau_min=1 fit_args.tau_max=1 fit_args.n_tau=1 fit_args.tau=1 data=swiss_roll_high 
#data.manifold_noise=0.05 data.rotate_dim=100 data.width=1 data.n_points=2000 model.knn=50
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/knn_sampta/2023-03-28_09-41-48")
print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())

\begin{tabular}{lrrrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro N2 &  Norm inf N2 &   P@20 &   P@40 &  time(s) \\
Method         &            &           &              &              &        &        &          \\
\midrule
Euc            &      0.387 &     0.335 &        0.005 &        0.006 &  0.447 &  0.496 &    1.931 \\
diff-map       &      0.137 &     0.259 &        0.002 &        0.002 &  0.343 &  0.454 &    5.724 \\
euler-heat-geo &      0.998 &     0.966 &        0.002 &        0.002 &  0.297 &  0.424 &    8.958 \\
heat-geo       &      0.790 &     0.583 &        0.066 &        0.067 &  0.340 &  0.476 &    4.665 \\
heat-phate     &      0.798 &     0.787 &        0.362 &        0.345 &  0.324 &  0.446 &    6.018 \\
mar-heat-geo   &      0.865 &     0.580 &        0.004 &        0.004 &  0.339 &  0.474 &    4.635 \\
phate          &      0.724 &     0.717 &        0.118 &        0.114 &  0.324 &  0.446 &    7.681 \\
shortest-path  &      0.988 &     0.978 &        0

  print(df[["Method", "SpearmanR", "PearsonR","Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


KNN results

In [2]:
def load_results(path):
    results = pd.DataFrame()
    for root, dirs, files in os.walk(path):
        for filename in files:
            if filename.endswith(".pkl"):
                dt_current = pd.read_pickle(os.path.join(root, filename))
                results = pd.concat([results,dt_current])
    return results

In [8]:
# ROTATION IN 100D manifold noise 0.05, 500 obs, anisotropy=1
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/default/2023-02-22_09-05-43")

In [9]:
print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())

\begin{tabular}{lrrrrrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro &  Norm inf &  Norm Fro N2 &  Norm inf N2 &   P@20 &   P@40 &  time(s) \\
Method         &            &           &           &           &              &              &        &        &          \\
\midrule
Euc            &      0.388 &     0.338 &     2.747 &     2.389 &        0.021 &        0.023 &  0.610 &  0.774 &    0.389 \\
diff-map       &      0.120 &     0.232 &     0.912 &     0.910 &        0.007 &        0.009 &  0.514 &  0.400 &    0.412 \\
euler-heat-geo &      0.653 &     0.381 &     0.791 &     0.642 &        0.006 &        0.006 &  0.605 &  0.772 &    0.473 \\
heat-geo       &      0.653 &     0.381 &     0.791 &     0.642 &        0.006 &        0.006 &  0.605 &  0.772 &    0.440 \\
heat-phate     &      0.827 &     0.819 &    99.561 &    76.224 &        0.777 &        0.739 &  0.594 &  0.770 &    0.352 \\
phate          &      0.719 &     0.712 &    33.559 &    25.982 &        0.262 &    

  print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [10]:
# ROTATION IN 100D manifold noise 0.05, 500 obs, anisotropy=0
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/default/2023-02-22_09-11-08")

In [13]:
# print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "Norm Fro N2", "Norm inf N2", "P@20", "P@40", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())
print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())

\begin{tabular}{lrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro &  Norm inf &  time(s) \\
Method         &            &           &           &           &          \\
\midrule
Euc            &      0.388 &     0.338 &     2.747 &     2.389 &    0.292 \\
diff-map       &      0.120 &     0.232 &     0.912 &     0.910 &    0.440 \\
euler-heat-geo &      0.813 &     0.537 &     0.712 &     0.582 &    0.504 \\
heat-geo       &      0.813 &     0.537 &     0.712 &     0.582 &    0.476 \\
heat-phate     &      0.819 &     0.810 &    98.623 &    75.556 &    0.425 \\
phate          &      0.700 &     0.694 &    32.917 &    25.629 &    0.745 \\
shortest-path  &      0.545 &     0.478 &     4.174 &     3.802 &    0.729 \\
\bottomrule
\end{tabular}



  print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [14]:
# NO rotation 
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/default/2023-02-22_09-22-59")

In [15]:
print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())

\begin{tabular}{lrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro &  Norm inf &  time(s) \\
Method         &            &           &           &           &          \\
\midrule
Euc            &      0.388 &     0.338 &     2.747 &     2.389 &    0.293 \\
diff-map       &      0.120 &     0.232 &     0.912 &     0.910 &    0.835 \\
euler-heat-geo &      0.813 &     0.537 &     0.712 &     0.582 &    0.485 \\
heat-geo       &      0.813 &     0.537 &     0.712 &     0.582 &    0.591 \\
heat-phate     &      0.819 &     0.810 &    98.623 &    75.556 &    0.371 \\
phate          &      0.700 &     0.694 &    32.917 &    25.629 &    0.418 \\
shortest-path  &      0.545 &     0.478 &     4.174 &     3.802 &    0.506 \\
\bottomrule
\end{tabular}



  print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [16]:
# NO rotation 
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/runs/default/2023-02-22_10-13-53")

In [17]:
df

Unnamed: 0,Method,Seed,# group,SpearmanR,PearsonR,P@5,P@10,P@20,P@30,P@40,P@50,Norm Fro,Norm inf,Norm Fro N2,Norm inf N2,time(s)
0,heat-phate,42,500,0.795621,0.76458,0.3252,0.4594,0.5987,0.699333,0.77595,0.83292,90.598131,73.975319,0.724338,0.694884,0.437432
1,heat-phate,43,500,0.772462,0.755555,0.3176,0.4556,0.6068,0.706333,0.78495,0.8456,91.29293,71.691296,0.722941,0.692218,0.391653
2,heat-phate,44,500,0.7259,0.716764,0.3152,0.4594,0.6034,0.695467,0.76785,0.8202,92.922516,70.354682,0.709174,0.680427,0.370444
3,heat-phate,45,500,0.759879,0.746111,0.342,0.4562,0.5976,0.6942,0.7681,0.82828,91.725708,73.143068,0.714629,0.687269,0.438729
4,heat-phate,46,500,0.78688,0.764143,0.3576,0.4722,0.5973,0.694867,0.7715,0.82944,91.567191,67.914347,0.726147,0.695559,0.374199
5,heat-phate,47,500,0.742331,0.742448,0.3356,0.4576,0.5855,0.6878,0.7725,0.83248,91.222444,68.287805,0.714882,0.688253,0.581641
6,heat-phate,48,500,0.732099,0.740572,0.316,0.4514,0.5994,0.701933,0.7723,0.83288,93.680428,72.63281,0.720055,0.6897,0.393452
7,heat-phate,49,500,0.749913,0.741562,0.3212,0.447,0.5899,0.6912,0.77195,0.83696,89.575597,70.21082,0.711144,0.684183,0.374606
8,heat-phate,50,500,0.773992,0.746077,0.322,0.4494,0.5768,0.6828,0.768,0.82288,94.878405,71.868216,0.719844,0.69097,0.430007
9,heat-phate,51,500,0.759475,0.75265,0.3316,0.4492,0.5911,0.6882,0.75995,0.8182,92.886597,70.859104,0.721251,0.692129,0.373599


In [18]:
print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())

\begin{tabular}{lrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro &  Norm inf &  time(s) \\
Method     &            &           &           &           &          \\
\midrule
heat-phate &       0.76 &     0.747 &    92.035 &    71.094 &    0.417 \\
\bottomrule
\end{tabular}



  print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


In [22]:
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/runs/default/2023-02-22_10-15-59")

In [23]:
print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())

\begin{tabular}{lrrrrr}
\toprule
{} &  SpearmanR &  PearsonR &  Norm Fro &  Norm inf &  time(s) \\
Method     &            &           &           &           &          \\
\midrule
heat-phate &      0.894 &     0.894 &   111.579 &    86.896 &      0.4 \\
\bottomrule
\end{tabular}



  print(df[["Method", "SpearmanR", "PearsonR", "Norm Fro", "Norm inf", "time(s)" ]].groupby(["Method"]).mean().round(3).to_latex())


comparing phate with random walk and phate with heat kernel.

In [68]:
df = load_results("/network/scratch/g/guillaume.huguet/projects/metric_embeddings/logs/experiments/multiruns/default/2023-02-22_11-42-12")

In [69]:
s_df = df[["Method", "tau","SpearmanR"]]
mean = s_df.groupby(["tau","Method"]).mean().round(3).reset_index()
std = s_df.groupby(["tau","Method"]).std().round(3).reset_index()

taus = [20,40,60,80,100]
mean = mean.pivot(index="Method", columns="tau", values="SpearmanR")[taus]
std = std.pivot(index="Method", columns="tau", values="SpearmanR")[taus]

In [70]:
print((mean.astype(str) + " $\pm$ " + std.astype(str)).to_latex(escape=False))

\begin{tabular}{llllll}
\toprule
tau &                20  &                40  &                60  &              80  &                100 \\
Method     &                    &                    &                    &                  &                    \\
\midrule
heat-phate &  0.819 $\pm$ 0.024 &  0.867 $\pm$ 0.032 &  0.883 $\pm$ 0.037 &  0.89 $\pm$ 0.04 &  0.894 $\pm$ 0.041 \\
phate      &  0.681 $\pm$ 0.034 &  0.774 $\pm$ 0.028 &   0.82 $\pm$ 0.027 &  0.85 $\pm$ 0.03 &  0.871 $\pm$ 0.034 \\
\bottomrule
\end{tabular}



  print((mean.astype(str) + " $\pm$ " + std.astype(str)).to_latex(escape=False))
