In [1]:
import os
import glob
import pandas as pd

import util.util_validation as ut_val
from util.util_logging import load_confusion_matrix

## Table of Accuracies

In [2]:
epoch = "last"

datasets = ["animals10_diff_-1", "animals10_diff_4000", "animals10_diff_8000",
            "city_classification_original", "city_classification_diff"]

In [3]:
def get_name(cm_path):
    method = cm_path.split('/')[4].split('_')[0]

    train_dataset = cm_path.split('/')[3]

    tag = cm_path.split('/')[4].split('_')[-2]
    tag_dict = {"try3": "baseline", "4000": "diff4000", "8000": "diff8000", "4000Small": "diff4000Small"}
    if tag in tag_dict:
        tag = tag_dict[tag]

    if train_dataset != "untrained":
        name = f"{method}_{train_dataset}_{tag}"
    else:
        name = "untrained"

    return name

In [4]:
training_dict = dict()
for dataset in datasets:
    for cm_path in glob.glob(f"./save/*/*/*/val_{epoch}/{dataset}/cm/cm_val_epoch_{epoch}.csv"):
        name = get_name(cm_path)
        if name in training_dict:
            training_dict[name].append((dataset, cm_path))
        else:
            training_dict[name] = [(dataset, cm_path)]

In [5]:
acc_dict = {"training": []}
for dataset in datasets:
    acc_dict[dataset] = []
for training in training_dict:
    acc_dict["training"].append(training)
    for dataset, cm_path in training_dict[training]:
        cm = load_confusion_matrix(cm_path)
        acc, acc_b = ut_val.compute_accuracies(cm)

        acc_dict[dataset].append(f"{acc*100:.2f} ({acc_b*100:.2f})")

    for dataset in datasets:
        if len(acc_dict[dataset]) < len(acc_dict["training"]):
            acc_dict[dataset].append("")

In [6]:
df_acc = pd.DataFrame.from_dict(acc_dict).sort_values(by="training").reset_index(drop=True)
df_acc

Unnamed: 0,training,animals10_diff_-1,animals10_diff_4000,animals10_diff_8000,city_classification_original,city_classification_diff
0,SupCE_animals10_baseline,95.30 (94.66),75.57 (72.05),,,
1,SupCE_animals10_diff_-1+4000_diffAug,94.88 (94.21),93.39 (92.60),,,
2,SupCE_animals10_diff_-1+4000_diffAugAllAug,94.75 (94.04),93.01 (92.18),90.85 (89.70),,
3,SupCE_animals10_diff_-1_origAllAug,94.92 (94.32),61.94 (60.00),51.28 (49.32),,
4,SupCE_animals10_diff_-1_small_origSmall,84.19 (85.28),63.22 (63.62),,,
5,SupCE_animals10_diff_4000_4000AllAug,69.56 (65.98),92.02 (91.08),88.69 (87.40),,
6,SupCE_animals10_diff_4000_diff4000,76.80 (73.52),93.28 (92.30),,,
7,SupCE_animals10_diff_4000_small_diff4000Small,61.44 (62.37),74.62 (76.08),,,
8,SupCE_animals10_diff_8000_diff8000,55.16 (52.15),90.74 (89.82),90.91 (89.87),,
9,SupCE_city_classification_diff_cityDiff,,,,78.47 (64.32),82.20 (69.51)


## Table of Distances

In [7]:
epoch = "last"

datasets = ["animals10_diff_-1", "animals10_diff_4000", "animals10_diff_8000",
            "city_classification_original", "city_classification_diff"]

In [8]:
training_dict = dict()
for dataset_1 in datasets:
    for dataset_2 in datasets:
        for cm_path in glob.glob(f"./save/*/*/*/val_{epoch}/{dataset_1}/embeddings/{dataset_1}_dist_to_{dataset_2}.csv"):
            name = get_name(cm_path)
            if name in training_dict:
                training_dict[name].append((dataset_1, dataset_2, cm_path))
            else:
                training_dict[name] = [(dataset_1, dataset_2, cm_path)]

In [11]:
dist_dict = {"training": []}
dist_datasets = []
for training in training_dict:
    for dataset_1, dataset_2, _ in training_dict[training]:
        dist_dataset = f"{dataset_1}_dist_to_{dataset_2}"
        if dist_dataset not in dist_dict:
            dist_dict[dist_dataset] = []
            dist_datasets.append(dist_dataset)

for training in training_dict:
    dist_dict["training"].append(training)
    for dataset_1, dataset_2, dist_path in training_dict[training]:
        dists = pd.read_csv(dist_path, index_col=0)
        mean_rel, mean_class, mean_all = dists.T.loc[:,["mean_distance_related", "mean_distance_classes", "mean_distance_all_vs_all"]].iloc[0]
        # std_rel, std_class, std_all = dists.T.loc[:,["std_distance_related", "std_distance_classes", "std_distance_all_vs_all"]].iloc[0]

        dist_dataset = f"{dataset_1}_dist_to_{dataset_2}"
        dist_dict[dist_dataset].append(f"{mean_rel:.4f} ({mean_rel/mean_all:.4f}) | {mean_class:.4f} ({mean_class/mean_all:.4f}) | {mean_all:.4f} ({mean_all/mean_all})")

    for dist_dataset in dist_datasets:
        if len(dist_dict[dist_dataset]) < len(dist_dict["training"]):
            dist_dict[dist_dataset].append("")

In [12]:
df_dist = pd.DataFrame.from_dict(dist_dict).sort_values(by="training").reset_index(drop=True)
df_dist

Unnamed: 0,training,animals10_diff_-1_dist_to_animals10_diff_4000
0,SupCE_animals10_diff_-1+4000_diffAugAllAug,0.0138 (0.0652) | 0.0963 (0.4535) | 0.2124 (1.0)
1,SupCE_animals10_diff_-1_origAllAug,0.0991 (0.4283) | 0.1531 (0.6614) | 0.2314 (1.0)
2,SupCE_animals10_diff_4000_4000AllAug,0.1391 (0.5245) | 0.2010 (0.7580) | 0.2652 (1.0)
3,SupConHybrid_animals10_diff_-1+4000_colorAug,0.0012 (0.0033) | 0.3294 (0.9331) | 0.3530 (1.0)
4,SupCon_animals10_diff_-1+4000_colorAugSameShap...,0.0324 (0.0862) | 0.2587 (0.6880) | 0.3760 (1.0)
5,SupCon_animals10_diff_-1+4000_factor20cAugSame...,0.0099 (0.0212) | 0.4068 (0.8705) | 0.4673 (1.0)
6,SupCon_animals10_diff_-1+4000_factor5cAugSameSAug,0.0157 (0.0375) | 0.3231 (0.7738) | 0.4175 (1.0)
7,SupCon_animals10_diff_-1_baseline,0.2267 (0.4970) | 0.3683 (0.8075) | 0.4561 (1.0)
