In [2]:
import os
import glob
import pandas as pd

import util.util_validation as ut_val

## Table of Accuracies

In [78]:
epoch = "last"

datasets = ["animals10_diff_-1", "animals10_diff_4000", "animals10_diff_8000",
            "city_classification_original", "city_classification_diff"]

In [79]:
def get_name(cm_path):
    method = cm_path.split('/')[4].split('_')[0]

    train_dataset = cm_path.split('/')[3]

    tag = cm_path.split('/')[4].split('_')[-2]
    tag_dict = {"try3": "baseline", "4000": "diff4000", "8000": "diff8000", "4000Small": "diff4000Small"}
    if tag in tag_dict:
        tag = tag_dict[tag]

    if train_dataset != "untrained":
        name = f"{method}_{train_dataset}_{tag}"
    else:
        name = "untrained"

    return name

In [80]:
training_dict = dict()
for dataset in datasets:
    for cm_path in glob.glob(f"./save/*/*/*/val_{epoch}/{dataset}/cm/cm_val_epoch_{epoch}.csv"):
        name = get_name(cm_path)
        if name in training_dict:
            training_dict[name].append((dataset, cm_path))
        else:
            training_dict[name] = [(dataset, cm_path)]

In [81]:
acc_dict = {"training": []}
for dataset in datasets:
    acc_dict[dataset] = []
for training in training_dict:
    acc_dict["training"].append(training)
    for dataset, cm_path in training_dict[training]:
        cm = ut_val.load_confusion_matrix(cm_path)
        acc, acc_b = ut_val.compute_accuracies(cm)

        acc_dict[dataset].append(f"{acc:.2f} ({acc_b:.2f})")

    for dataset in datasets:
        if len(acc_dict[dataset]) < len(acc_dict["training"]):
            acc_dict[dataset].append("")

In [82]:
df_acc = pd.DataFrame.from_dict(acc_dict).sort_values(by="training").reset_index(drop=True)
df_acc

Unnamed: 0,training,animals10_diff_-1,animals10_diff_4000,animals10_diff_8000,city_classification_original,city_classification_diff
0,SupCE_animals10_baseline,95.30 (94.66),75.57 (72.05),,,
1,SupCE_animals10_diff_-1+4000_diffAug,94.88 (94.21),93.39 (92.60),,,
2,SupCE_animals10_diff_-1+4000_diffAugAllAug,94.75 (94.04),93.01 (92.18),,,
3,SupCE_animals10_diff_-1_small_origSmall,84.19 (85.28),63.22 (63.62),,,
4,SupCE_animals10_diff_4000_diff4000,76.80 (73.52),93.28 (92.30),,,
5,SupCE_animals10_diff_4000_small_diff4000Small,61.44 (62.37),74.62 (76.08),,,
6,SupCE_animals10_diff_8000_diff8000,55.16 (52.15),90.74 (89.82),90.91 (89.87),,
7,SupCE_city_classification_diff_cityDiff,,,,78.47 (64.32),82.20 (69.51)
8,SupCE_city_classification_original_cityBaseline,,,,92.25 (86.05),64.86 (54.14)
9,SupConHybrid_animals10_diff_-1+4000_colorAug,57.98 (51.17),57.81 (51.03),,,


## Table of Distances

In [None]:
epoch = "last"

datasets = ["animals10_diff_-1", "animals10_diff_4000", "animals10_diff_8000",
            "city_classification_original", "city_classification_diff"]

In [91]:
training_dict = dict()
for dataset_1 in datasets:
    for dataset_2 in datasets:
        for cm_path in glob.glob(f"./save/*/*/*/val_{epoch}/{dataset_1}/embeddings/{dataset_1}_dist_to_{dataset_2}.csv"):
            name = get_name(cm_path)
            if name in training_dict:
                training_dict[name].append((dataset_1, dataset_2, cm_path))
            else:
                training_dict[name] = [(dataset_1, dataset_2, cm_path)]

In [126]:
dist_dict = {"training": []}
dist_datasets = []
for training in training_dict:
    for dataset_1, dataset_2, _ in training_dict[training]:
        dist_dataset = f"{dataset_1}_dist_to_{dataset_2}"
        if dist_dataset not in dist_dict:
            dist_dict[dist_dataset] = []
            dist_datasets.append(dist_dataset)

for training in training_dict:
    dist_dict["training"].append(training)
    for dataset_1, dataset_2, dist_path in training_dict[training]:
        dists = pd.read_csv(dist_path).values.reshape(-1)
        diff_dist, class_dist, all_dist = dists

        dist_dataset = f"{dataset_1}_dist_to_{dataset_2}"
        # dist_dict[dist_dataset].append(f"{diff_dist:.4f} | {class_dist:.4f} | {all_dist:.4f}")
        dist_dict[dist_dataset].append(f"{diff_dist/all_dist:.4f} ({class_dist/all_dist:.4f})")

    for dist_dataset in dist_datasets:
        if len(dist_dict[dist_dataset]) < len(dist_dict["training"]):
            dist_dict[dist_dataset].append("")

In [127]:
df_dist = pd.DataFrame.from_dict(dist_dict).sort_values(by="training").reset_index(drop=True)
df_dist

Unnamed: 0,training,animals10_diff_-1_dist_to_animals10_diff_4000,animals10_diff_-1_dist_to_animals10_diff_8000,animals10_diff_4000_dist_to_animals10_diff_8000,city_classification_original_dist_to_city_classification_diff
0,SupCE_animals10_baseline,0.3218 (0.6070),,,
1,SupCE_animals10_diff_-1+4000_diffAug,0.0412 (0.4400),,,
2,SupCE_animals10_diff_-1+4000_diffAugAllAug,0.0652 (0.4302),,,
3,SupCE_animals10_diff_-1_small_origSmall,0.3150 (0.7226),,,
4,SupCE_animals10_diff_4000_diff4000,0.3616 (0.6459),,,
5,SupCE_animals10_diff_4000_small_diff4000Small,0.2150 (0.7384),,,
6,SupCE_animals10_diff_8000_diff8000,0.4863 (0.7668),0.5693 (0.7865),0.0423 (0.4853),
7,SupCE_city_classification_diff_cityDiff,,,,0.2861 (0.7362)
8,SupCE_city_classification_original_cityBaseline,,,,0.4336 (0.7633)
9,SupConHybrid_animals10_diff_-1+4000_colorAug,0.0033 (0.9124),,,
