In [56]:
import os
import glob
import numpy as np
import pandas as pd

import util.util_validation as ut_val
from util.util_logging import load_confusion_matrix

pd.set_option('display.max_columns', None)

In [2]:
epoch = "last"

exclude_params_dict = {"tag": ["CropSize32", "divergent", "toSlow", "lrAdjust1", "lrAdjust2", "noCosine", "bszAdjust1"]}
keep_params_dict = {"aug": ["colorJitter", "grayscale"]}
models_dict, df_modelNames = ut_val.collect_models_dict(epoch=epoch, dataset_classifier="", exclude_params_dict=exclude_params_dict, keep_params_dict=keep_params_dict)
print(df_modelNames.to_markdown(index=False))

| model_name                                                             |
|:-----------------------------------------------------------------------|
| CE_animals10_diff_-1+4000_allAug                                       |
| CE_animals10_diff_-1+8000_allAug                                       |
| CE_animals10_diff_-1_allAug                                            |
| CE_animals10_diff_4000_allAug                                          |
| SupConHybrid_animals10_diff_-1+4000_cAug                               |
| SupCon_20.0_animals10_diff_-1+4000_cAugSameSAug                        |
| SupCon_5.0_animals10_diff_-1+4000_cAugSameSAug_factor5cAugSameSAug     |
| SupCon_5.0_animals10_diff_-1+4000_cAugSameSAug_fineTuneCAsameSAFactor5 |
| SupCon_animals10_diff_-1+4000_cAugSameSAug                             |
| SupCon_animals10_diff_-1+8000_cAugSameSAug                             |
| SupCon_animals10_diff_-1_allAug                                        |


## Table of Accuracies

In [4]:
acc_dict_val = {"model_name": []}
datasets_cm_all = set()
for mn in df_modelNames["model_name"].values:
    root_model, _ = models_dict[mn]
    path_folder, _ = ut_val.get_paths_from_model_checkpoint(root_model)
    acc_dict_val["model_name"].append(mn)

    cm_path_val = glob.glob(os.path.join(path_folder, f"val_{epoch}", "*", "cm", f"cm_val_epoch_{epoch}.csv"))
    datasets_cm_all = datasets_cm_all.union([cm_path.split('/')[-3] for cm_path in cm_path_val])
datasets_cm_all = sorted(datasets_cm_all)
for dset in datasets_cm_all:
    acc_dict_val[dset] = []

for mn in df_modelNames["model_name"].values:
    root_model, _ = models_dict[mn]
    path_folder, _ = ut_val.get_paths_from_model_checkpoint(root_model)

    for dset in datasets_cm_all:
        cm_dset_path_val = glob.glob(os.path.join(path_folder, f"val_{epoch}", dset, "cm", f"cm_val_epoch_{epoch}.csv"))
        if len(cm_dset_path_val) == 1:
            C_val = load_confusion_matrix(cm_dset_path_val[0])
            acc_val, acc_b_val = ut_val.compute_accuracies_form_cm(C_val)
            acc_dict_val[dset].append(f"{acc_val*100:.2f} ({acc_b_val*100:.2f})")
        else:
            acc_dict_val[dset].append("")

df_acc = pd.DataFrame.from_dict(acc_dict_val)
df_acc.style.background_gradient(subset=datasets_cm_all, axis=None, gmap=df_acc[datasets_cm_all].map(lambda x: np.nan if x == '' else float(x.split(' ')[0])))

Unnamed: 0,model_name,animals10_diff_-1,animals10_diff_4000,animals10_diff_8000
0,CE_animals10_diff_-1+4000_allAug,94.75 (94.04),93.01 (92.18),90.85 (89.70)
1,CE_animals10_diff_-1+8000_allAug,94.08 (93.37),92.27 (91.16),90.60 (89.47)
2,CE_animals10_diff_-1_allAug,94.92 (94.32),61.94 (60.00),51.28 (49.32)
3,CE_animals10_diff_4000_allAug,69.56 (65.98),92.02 (91.08),88.69 (87.40)
4,SupConHybrid_animals10_diff_-1+4000_cAug,57.98 (51.17),57.81 (51.03),
5,SupCon_20.0_animals10_diff_-1+4000_cAugSameSAug,93.37 (92.74),91.92 (91.11),
6,SupCon_5.0_animals10_diff_-1+4000_cAugSameSAug_factor5cAugSameSAug,94.77 (94.11),93.41 (92.95),91.12 (90.36)
7,SupCon_5.0_animals10_diff_-1+4000_cAugSameSAug_fineTuneCAsameSAFactor5,94.40 (93.61),93.24 (92.46),
8,SupCon_animals10_diff_-1+4000_cAugSameSAug,94.96 (94.21),92.93 (92.02),90.58 (89.26)
9,SupCon_animals10_diff_-1+8000_cAugSameSAug,94.58 (93.77),92.65 (91.62),91.14 (89.91)


#### Old

In [None]:
epoch = "last"

datasets = ["animals10_diff_-1", "animals10_diff_4000", "animals10_diff_8000",
            "city_classification_original", "city_classification_diff"]

In [3]:
def get_name(cm_path):
    method = cm_path.split('/')[4].split('_')[0]

    train_dataset = cm_path.split('/')[3]

    tag = cm_path.split('/')[4].split('_')[-2]
    tag_dict = {"try3": "baseline", "4000": "diff4000", "8000": "diff8000", "4000Small": "diff4000Small"}
    if tag in tag_dict:
        tag = tag_dict[tag]

    if train_dataset != "untrained":
        name = f"{method}_{train_dataset}_{tag}"
    else:
        name = "untrained"

    return name

In [4]:
training_dict = dict()
for dataset in datasets:
    for cm_path in glob.glob(f"./save/*/*/*/val_{epoch}/{dataset}/cm/cm_val_epoch_{epoch}.csv"):
        name = get_name(cm_path)
        if name in training_dict:
            training_dict[name].append((dataset, cm_path))
        else:
            training_dict[name] = [(dataset, cm_path)]

In [5]:
acc_dict = {"training": []}
for dataset in datasets:
    acc_dict[dataset] = []
for training in training_dict:
    acc_dict["training"].append(training)
    for dataset, cm_path in training_dict[training]:
        cm = load_confusion_matrix(cm_path)
        acc, acc_b = ut_val.compute_accuracies(cm)

        acc_dict[dataset].append(f"{acc*100:.2f} ({acc_b*100:.2f})")

    for dataset in datasets:
        if len(acc_dict[dataset]) < len(acc_dict["training"]):
            acc_dict[dataset].append("")

In [6]:
df_acc = pd.DataFrame.from_dict(acc_dict).sort_values(by="training").reset_index(drop=True)
df_acc

Unnamed: 0,training,animals10_diff_-1,animals10_diff_4000,animals10_diff_8000,city_classification_original,city_classification_diff
0,SupCE_animals10_baseline,95.30 (94.66),75.57 (72.05),,,
1,SupCE_animals10_diff_-1+4000_diffAug,94.88 (94.21),93.39 (92.60),,,
2,SupCE_animals10_diff_-1+4000_diffAugAllAug,94.75 (94.04),93.01 (92.18),90.85 (89.70),,
3,SupCE_animals10_diff_-1_origAllAug,94.92 (94.32),61.94 (60.00),51.28 (49.32),,
4,SupCE_animals10_diff_-1_small_origSmall,84.19 (85.28),63.22 (63.62),,,
5,SupCE_animals10_diff_4000_4000AllAug,69.56 (65.98),92.02 (91.08),88.69 (87.40),,
6,SupCE_animals10_diff_4000_diff4000,76.80 (73.52),93.28 (92.30),,,
7,SupCE_animals10_diff_4000_small_diff4000Small,61.44 (62.37),74.62 (76.08),,,
8,SupCE_animals10_diff_8000_diff8000,55.16 (52.15),90.74 (89.82),90.91 (89.87),,
9,SupCE_city_classification_diff_cityDiff,,,,78.47 (64.32),82.20 (69.51)


## Table of Cue Conflict Shape Bias Metric

In [72]:
bias_dict = {"model_name": []}
datasets_bias_all = set()
for mn in df_modelNames["model_name"].values:
    root_model, _ = models_dict[mn]
    path_folder, _ = ut_val.get_paths_from_model_checkpoint(root_model)
    bias_dict["model_name"].append(mn)

    cue_conf_paths = glob.glob(os.path.join(path_folder, f"val_{epoch}", "shapeBiasMetrics", "CueConflict", "*", "shape_bias.csv"))
    datasets_bias_all = datasets_bias_all.union([cue_conf_path.split('/')[-2] for cue_conf_path in cue_conf_paths])
datasets_bias_all = sorted(datasets_bias_all)
for dset in datasets_bias_all:
    bias_dict[f"{dset}"] = []

for mn in df_modelNames["model_name"].values:
    root_model, _ = models_dict[mn]
    path_folder, _ = ut_val.get_paths_from_model_checkpoint(root_model)

    for dset in datasets_bias_all:
        cue_conf_dset_paths = glob.glob(os.path.join(path_folder, f"val_{epoch}", "shapeBiasMetrics", "CueConflict", dset, "shape_bias.csv"))

        if len(cue_conf_dset_paths) == 1:
            df_bias = pd.read_csv(cue_conf_dset_paths[0], index_col=0)
            shape_bias = df_bias.loc["shape_bias"].values[0]
            bias_dict[f"{dset}"].append(shape_bias)
        else:
            bias_dict[f"{dset}"].append("")

df_bias = pd.DataFrame.from_dict(bias_dict)
df_bias.style.background_gradient(subset=datasets_bias_all)

Unnamed: 0,model_name,shape_texture_conflict_animals10_many
0,CE_animals10_diff_-1+4000_allAug,0.631831
1,CE_animals10_diff_-1+8000_allAug,0.626199
2,CE_animals10_diff_-1_allAug,0.628997
3,CE_animals10_diff_4000_allAug,0.588198
4,SupConHybrid_animals10_diff_-1+4000_cAug,0.586778
5,SupCon_20.0_animals10_diff_-1+4000_cAugSameSAug,0.653593
6,SupCon_5.0_animals10_diff_-1+4000_cAugSameSAug_factor5cAugSameSAug,0.675375
7,SupCon_5.0_animals10_diff_-1+4000_cAugSameSAug_fineTuneCAsameSAFactor5,0.652331
8,SupCon_animals10_diff_-1+4000_cAugSameSAug,0.66861
9,SupCon_animals10_diff_-1+8000_cAugSameSAug,0.644133


## Table of Correlation Coefficient Shape Bias Metric

In [60]:
dims_dict = {"model_name": []}
datasets_corr_coef_all = set()
for mn in df_modelNames["model_name"].values:
    root_model, _ = models_dict[mn]
    path_folder, _ = ut_val.get_paths_from_model_checkpoint(root_model)
    dims_dict["model_name"].append(mn)

    corr_coef_paths = glob.glob(os.path.join(path_folder, f"val_{epoch}", "shapeBiasMetrics", "CorrelationCoefficient", "*", "pred_dims.csv"))
    datasets_corr_coef_all = datasets_corr_coef_all.union([corr_coef_path.split('/')[-2] for corr_coef_path in corr_coef_paths])
datasets_corr_coef_all = sorted(datasets_corr_coef_all)
for dset in datasets_corr_coef_all:
    dims_dict[f"{dset}_shape"] = []
    dims_dict[f"{dset}_texture"] = []
    dims_dict[f"{dset}_color"] = []
    dims_dict[f"{dset}_remaining"] = []

for mn in df_modelNames["model_name"].values:
    root_model, _ = models_dict[mn]
    path_folder, _ = ut_val.get_paths_from_model_checkpoint(root_model)

    for dset in datasets_corr_coef_all:
        corr_coef_dset_paths = glob.glob(os.path.join(path_folder, f"val_{epoch}", "shapeBiasMetrics", "CorrelationCoefficient", dset, "pred_dims.csv"))

        if len(corr_coef_dset_paths) == 1:
            df_dims = pd.read_csv(corr_coef_dset_paths[0], index_col=0)
            embedding_size = df_dims.values.sum()

            shape_dims, texture_dims, remaining_dims = df_dims[["shape_dims", "texture_dims", "remaining_dims"]].iloc[0]
            dims_dict[f"{dset}_shape"].append(f"{shape_dims} ({shape_dims/embedding_size:.4f})")
            dims_dict[f"{dset}_texture"].append(f"{texture_dims} ({texture_dims/embedding_size:.4f})")
            dims_dict[f"{dset}_remaining"].append(f"{remaining_dims} ({remaining_dims/embedding_size:.4f})")
            if "color_dims" in df_dims.columns:
                color_dims = df_dims["color_dims"].iloc[0]
                dims_dict[f"{dset}_color"].append(f"{color_dims} ({color_dims/embedding_size:.4f})")
            else:
                dims_dict[f"{dset}_color"].append("")
        else:
            dims_dict[f"{dset}_shape"].append("")
            dims_dict[f"{dset}_texture"].append("")
            dims_dict[f"{dset}_color"].append("")
            dims_dict[f"{dset}_remaining"].append("")

df_dims = pd.DataFrame.from_dict(dims_dict)
multiIndex_tuples = [("", "model_name")]
for dset in datasets_corr_coef_all:
    multiIndex_tuples.extend([(dset, "shape_dims"), (dset, "texture_dims"), (dset, "color_dims"), (dset, "remaining_dims")])
df_dims.columns = pd.MultiIndex.from_tuples(multiIndex_tuples)
df_dims.style.background_gradient(subset=datasets_corr_coef_all, axis=None, gmap=df_dims[datasets_corr_coef_all].map(lambda x: np.nan if x == '' else float(x.split(' ')[0])))

Unnamed: 0_level_0,Unnamed: 1_level_0,animals10_diff_4000CJitterShape_animals10_diff_-1PatchSize30CJitterTexture_animals10_diff_-1PixelShuffledColor,animals10_diff_4000CJitterShape_animals10_diff_-1PatchSize30CJitterTexture_animals10_diff_-1PixelShuffledColor,animals10_diff_4000CJitterShape_animals10_diff_-1PatchSize30CJitterTexture_animals10_diff_-1PixelShuffledColor,animals10_diff_4000CJitterShape_animals10_diff_-1PatchSize30CJitterTexture_animals10_diff_-1PixelShuffledColor,animals10_diff_4000CJitterShape_animals10_diff_-1PatchSize30Texture,animals10_diff_4000CJitterShape_animals10_diff_-1PatchSize30Texture,animals10_diff_4000CJitterShape_animals10_diff_-1PatchSize30Texture,animals10_diff_4000CJitterShape_animals10_diff_-1PatchSize30Texture,shape_texture_conflict_animals10_many,shape_texture_conflict_animals10_many,shape_texture_conflict_animals10_many,shape_texture_conflict_animals10_many,stylized_animals10Shape_animals10_diff_-1PatchSize30CJitterTexture_animals10_diff_-1PixelShuffledColor,stylized_animals10Shape_animals10_diff_-1PatchSize30CJitterTexture_animals10_diff_-1PixelShuffledColor,stylized_animals10Shape_animals10_diff_-1PatchSize30CJitterTexture_animals10_diff_-1PixelShuffledColor,stylized_animals10Shape_animals10_diff_-1PatchSize30CJitterTexture_animals10_diff_-1PixelShuffledColor,stylized_animals10Shape_animals10_diff_-1PatchSize30Texture,stylized_animals10Shape_animals10_diff_-1PatchSize30Texture,stylized_animals10Shape_animals10_diff_-1PatchSize30Texture,stylized_animals10Shape_animals10_diff_-1PatchSize30Texture
Unnamed: 0_level_1,model_name,shape_dims,texture_dims,color_dims,remaining_dims,shape_dims,texture_dims,color_dims,remaining_dims,shape_dims,texture_dims,color_dims,remaining_dims,shape_dims,texture_dims,color_dims,remaining_dims,shape_dims,texture_dims,color_dims,remaining_dims
0,CE_animals10_diff_-1+4000_allAug,166 (0.3242),101 (0.1973),65 (0.1270),180 (0.3516),190 (0.3711),116 (0.2266),,206 (0.4023),96 (0.1875),174 (0.3398),,242 (0.4727),107 (0.2090),118 (0.2305),77 (0.1504),210 (0.4102),125 (0.2441),140 (0.2734),,247 (0.4824)
1,CE_animals10_diff_-1+8000_allAug,166 (0.3242),98 (0.1914),66 (0.1289),182 (0.3555),190 (0.3711),114 (0.2227),,208 (0.4062),97 (0.1895),171 (0.3340),,244 (0.4766),108 (0.2109),115 (0.2246),77 (0.1504),212 (0.4141),127 (0.2480),136 (0.2656),,249 (0.4863)
2,CE_animals10_diff_-1_allAug,144 (0.2812),111 (0.2168),68 (0.1328),189 (0.3691),167 (0.3262),128 (0.2500),,217 (0.4238),99 (0.1934),169 (0.3301),,244 (0.4766),105 (0.2051),123 (0.2402),76 (0.1484),208 (0.4062),123 (0.2402),145 (0.2832),,244 (0.4766)
3,CE_animals10_diff_4000_allAug,139 (0.2715),111 (0.2168),70 (0.1367),192 (0.3750),161 (0.3145),130 (0.2539),,221 (0.4316),96 (0.1875),172 (0.3359),,244 (0.4766),98 (0.1914),123 (0.2402),77 (0.1504),214 (0.4180),116 (0.2266),147 (0.2871),,249 (0.4863)
4,SupConHybrid_animals10_diff_-1+4000_cAug,171 (0.3340),93 (0.1816),67 (0.1309),181 (0.3535),196 (0.3828),108 (0.2109),,208 (0.4062),99 (0.1934),168 (0.3281),,245 (0.4785),110 (0.2148),110 (0.2148),79 (0.1543),213 (0.4160),130 (0.2539),131 (0.2559),,251 (0.4902)
5,SupCon_20.0_animals10_diff_-1+4000_cAugSameSAug,172 (0.3359),92 (0.1797),67 (0.1309),181 (0.3535),198 (0.3867),107 (0.2090),,207 (0.4043),104 (0.2031),157 (0.3066),,251 (0.4902),108 (0.2109),110 (0.2148),80 (0.1562),214 (0.4180),128 (0.2500),131 (0.2559),,253 (0.4941)
6,SupCon_5.0_animals10_diff_-1+4000_cAugSameSAug_factor5cAugSameSAug,171 (0.3340),91 (0.1777),68 (0.1328),182 (0.3555),197 (0.3848),105 (0.2051),,210 (0.4102),103 (0.2012),159 (0.3105),,250 (0.4883),106 (0.2070),108 (0.2109),81 (0.1582),217 (0.4238),126 (0.2461),129 (0.2520),,257 (0.5020)
7,SupCon_5.0_animals10_diff_-1+4000_cAugSameSAug_fineTuneCAsameSAFactor5,169 (0.3301),98 (0.1914),66 (0.1289),179 (0.3496),195 (0.3809),113 (0.2207),,204 (0.3984),103 (0.2012),159 (0.3105),,250 (0.4883),109 (0.2129),115 (0.2246),78 (0.1523),210 (0.4102),128 (0.2500),136 (0.2656),,248 (0.4844)
8,SupCon_animals10_diff_-1+4000_cAugSameSAug,165 (0.3223),99 (0.1934),67 (0.1309),181 (0.3535),190 (0.3711),114 (0.2227),,208 (0.4062),103 (0.2012),158 (0.3086),,251 (0.4902),107 (0.2090),115 (0.2246),78 (0.1523),212 (0.4141),126 (0.2461),137 (0.2676),,249 (0.4863)
9,SupCon_animals10_diff_-1+8000_cAugSameSAug,166 (0.3242),96 (0.1875),67 (0.1309),183 (0.3574),191 (0.3730),112 (0.2188),,209 (0.4082),103 (0.2012),158 (0.3086),,251 (0.4902),107 (0.2090),112 (0.2188),79 (0.1543),214 (0.4180),127 (0.2480),134 (0.2617),,251 (0.4902)


## Table of Distances

In [20]:
dist_dict = {"model_name": []}
datasets_1_dist = set()
datasets_2_dist = set()
for mn in df_modelNames["model_name"].values:
    root_model, _ = models_dict[mn]
    path_folder, _ = ut_val.get_paths_from_model_checkpoint(root_model)
    dist_dict["model_name"].append(mn)

    dist_paths = glob.glob(os.path.join(path_folder, f"val_{epoch}", "*", "embeddings", "*_dist_to_*.csv"))
    datasets_1_dist = datasets_1_dist.union([dist_path.split('/')[-3] for dist_path in dist_paths])
    datasets_2_dist = datasets_2_dist.union([dist_path.split("_dist_to_")[-1].replace(".csv", '') for dist_path in dist_paths])
datasets_1_dist = sorted(datasets_1_dist)
datasets_2_dist = sorted(datasets_2_dist)
for dset1 in datasets_1_dist:
    for dset2 in datasets_2_dist:
        dist_dict[f"{dset1}_to_{dset2}_related"] = []
        dist_dict[f"{dset1}_to_{dset2}_class"] = []
        dist_dict[f"{dset1}_to_{dset2}_all"] = []

for mn in df_modelNames["model_name"].values:
    root_model, _ = models_dict[mn]
    path_folder, _ = ut_val.get_paths_from_model_checkpoint(root_model)

    for dset1 in datasets_1_dist:
        for dset2 in datasets_2_dist:
            dist_dset_paths = glob.glob(os.path.join(path_folder, f"val_{epoch}", dset1, "embeddings", f"{dset1}_dist_to_{dset2}.csv"))

            if len(dist_dset_paths) == 1:
                df_dist = pd.read_csv(dist_dset_paths[0], index_col=0)
                mean_rel, mean_class, mean_all = df_dist.T.loc[:,["mean_distance_related", "mean_distance_classes", "mean_distance_all_vs_all"]].iloc[0]

                dist_dict[f"{dset1}_to_{dset2}_related"].append(f"{mean_rel:.4f} ({mean_rel/mean_all:.4f})")
                dist_dict[f"{dset1}_to_{dset2}_class"].append(f"{mean_class:.4f} ({mean_class/mean_all:.4f})")
                dist_dict[f"{dset1}_to_{dset2}_all"].append(f"{mean_all:.4f} ({mean_all/mean_all})")
            else:
                dist_dict[f"{dset1}_to_{dset2}_related"].append("")
                dist_dict[f"{dset1}_to_{dset2}_class"].append("")
                dist_dict[f"{dset1}_to_{dset2}_all"].append("")

df_dist = pd.DataFrame.from_dict(dist_dict)
multiIndex_tuples = [("", "model_name")]
for dset1 in datasets_1_dist:
    for dset2 in datasets_2_dist:
        dsets = f"{dset1}_to_{dset2}"
        multiIndex_tuples.extend([(dsets, "related"), (dsets, "in class"), (dsets, "all vs. all")])
df_dist.columns = pd.MultiIndex.from_tuples(multiIndex_tuples)
df_dist

Unnamed: 0_level_0,Unnamed: 1_level_0,animals10_diff_-1_to_animals10_diff_4000,animals10_diff_-1_to_animals10_diff_4000,animals10_diff_-1_to_animals10_diff_4000,animals10_diff_-1_to_animals10_diff_8000,animals10_diff_-1_to_animals10_diff_8000,animals10_diff_-1_to_animals10_diff_8000
Unnamed: 0_level_1,model_name,related,in class,all vs. all,related,in class,all vs. all
0,CE_animals10_diff_-1+4000_allAug,0.0138 (0.0652),0.0963 (0.4535),0.2124 (1.0),,,
1,CE_animals10_diff_-1+8000_allAug,0.0152 (0.0772),0.0972 (0.4929),0.1971 (1.0),0.0208 (0.1073),0.0983 (0.5064),0.1942 (1.0)
2,CE_animals10_diff_-1_allAug,0.0991 (0.4283),0.1531 (0.6614),0.2314 (1.0),,,
3,CE_animals10_diff_4000_allAug,0.1391 (0.5245),0.2010 (0.7580),0.2652 (1.0),,,
4,SupConHybrid_animals10_diff_-1+4000_cAug,0.0012 (0.0033),0.3294 (0.9331),0.3530 (1.0),,,
5,SupCon_20.0_animals10_diff_-1+4000_cAugSameSAug,0.0099 (0.0212),0.4068 (0.8705),0.4673 (1.0),,,
6,SupCon_5.0_animals10_diff_-1+4000_cAugSameSAug...,0.0157 (0.0375),0.3231 (0.7738),0.4175 (1.0),,,
7,SupCon_5.0_animals10_diff_-1+4000_cAugSameSAug...,0.0133 (0.0348),0.2826 (0.7396),0.3821 (1.0),,,
8,SupCon_animals10_diff_-1+4000_cAugSameSAug,0.0324 (0.0862),0.2587 (0.6880),0.3760 (1.0),,,
9,SupCon_animals10_diff_-1+8000_cAugSameSAug,0.0332 (0.0906),0.2590 (0.7068),0.3664 (1.0),0.0452 (0.1238),0.2622 (0.7178),0.3653 (1.0)


#### Old

In [7]:
epoch = "last"

datasets = ["animals10_diff_-1", "animals10_diff_4000", "animals10_diff_8000",
            "city_classification_original", "city_classification_diff"]

In [8]:
training_dict = dict()
for dataset_1 in datasets:
    for dataset_2 in datasets:
        for cm_path in glob.glob(f"./save/*/*/*/val_{epoch}/{dataset_1}/embeddings/{dataset_1}_dist_to_{dataset_2}.csv"):
            name = get_name(cm_path)
            if name in training_dict:
                training_dict[name].append((dataset_1, dataset_2, cm_path))
            else:
                training_dict[name] = [(dataset_1, dataset_2, cm_path)]

In [11]:
dist_dict = {"training": []}
dist_datasets = []
for training in training_dict:
    for dataset_1, dataset_2, _ in training_dict[training]:
        dist_dataset = f"{dataset_1}_dist_to_{dataset_2}"
        if dist_dataset not in dist_dict:
            dist_dict[dist_dataset] = []
            dist_datasets.append(dist_dataset)

for training in training_dict:
    dist_dict["training"].append(training)
    for dataset_1, dataset_2, dist_path in training_dict[training]:
        dists = pd.read_csv(dist_path, index_col=0)
        mean_rel, mean_class, mean_all = dists.T.loc[:,["mean_distance_related", "mean_distance_classes", "mean_distance_all_vs_all"]].iloc[0]
        # std_rel, std_class, std_all = dists.T.loc[:,["std_distance_related", "std_distance_classes", "std_distance_all_vs_all"]].iloc[0]

        dist_dataset = f"{dataset_1}_dist_to_{dataset_2}"
        dist_dict[dist_dataset].append(f"{mean_rel:.4f} ({mean_rel/mean_all:.4f}) | {mean_class:.4f} ({mean_class/mean_all:.4f}) | {mean_all:.4f} ({mean_all/mean_all})")

    for dist_dataset in dist_datasets:
        if len(dist_dict[dist_dataset]) < len(dist_dict["training"]):
            dist_dict[dist_dataset].append("")

In [12]:
df_dist = pd.DataFrame.from_dict(dist_dict).sort_values(by="training").reset_index(drop=True)
df_dist

Unnamed: 0,training,animals10_diff_-1_dist_to_animals10_diff_4000
0,SupCE_animals10_diff_-1+4000_diffAugAllAug,0.0138 (0.0652) | 0.0963 (0.4535) | 0.2124 (1.0)
1,SupCE_animals10_diff_-1_origAllAug,0.0991 (0.4283) | 0.1531 (0.6614) | 0.2314 (1.0)
2,SupCE_animals10_diff_4000_4000AllAug,0.1391 (0.5245) | 0.2010 (0.7580) | 0.2652 (1.0)
3,SupConHybrid_animals10_diff_-1+4000_colorAug,0.0012 (0.0033) | 0.3294 (0.9331) | 0.3530 (1.0)
4,SupCon_animals10_diff_-1+4000_colorAugSameShap...,0.0324 (0.0862) | 0.2587 (0.6880) | 0.3760 (1.0)
5,SupCon_animals10_diff_-1+4000_factor20cAugSame...,0.0099 (0.0212) | 0.4068 (0.8705) | 0.4673 (1.0)
6,SupCon_animals10_diff_-1+4000_factor5cAugSameSAug,0.0157 (0.0375) | 0.3231 (0.7738) | 0.4175 (1.0)
7,SupCon_animals10_diff_-1_baseline,0.2267 (0.4970) | 0.3683 (0.8075) | 0.4561 (1.0)
