In [27]:
import copy
from pprint import pprint

import numpy as np
import yaml
from easydict import EasyDict
from tqdm import tqdm

import wandb

In [28]:
api = wandb.Api()

In [29]:
with open("../configs/base_config.yaml", "r") as file:
    base_config = EasyDict(yaml.safe_load(file))

with open("../configs/igs_config.yaml", "r") as file:
    igs_config = EasyDict(yaml.safe_load(file))

with open("../configs/relative_flatness_config.yaml", "r") as file:
    relative_flatness_config = EasyDict(yaml.safe_load(file))

In [30]:
pprint(base_config)

{'batch_size': 256,
 'cifar_dir': '../data/cifar',
 'config_dir': '../configs',
 'learning_rate': 0.05,
 'metrics': ['acc', 'ece', 'squared_euclidean_norm', 'fisher_rao_norm'],
 'models_dir': '../models',
 'momentum': 0.9,
 'num_epochs': 75,
 'seed': [43, 91, 17],
 'sharpness_batch_size': 25000,
 'sharpness_dataset_size': -1,
 'vgg_config': 'vgg_config.json',
 'wandb': {'entity': 'r252_project',
           'experiment_name': '',
           'project': 'VGG19_CIFAR10_FINAL'}}


In [31]:
models = ["best_model", "temp_best_model", "model", "temp_model"]

In [32]:
metrics = ["acc", "ece", "squared_euclidean_norm", "fisher_rao_norm", "relative_flatness", "IGS"]

In [33]:
approaches = ["no-regularisation", "augmentation", "dropout_0.5", "weight_decay0.0005"]

In [34]:
runs = api.runs(f"{base_config.wandb.entity}/{base_config.wandb.project}")

In [37]:
results = {model: {approach: {metric: {} for metric in metrics} for approach in approaches} for model in models}

for run in tqdm(runs):
    for approach in approaches:
        if run.name.startswith(f"VGG19-CIFAR10-baseline-with-{approach}"):
            seed = run.name.split("-seed-")[-1]
            for metric in metrics:
                if metric in run.config['metrics']:
                    for model in models:
                        # if approach == "augmentation" and seed == "17":
                        #     print(run.summary)
                        # if f"eval_metrics_{model}/{metric}" not in run.summary and metric == "IGS":
                        #     results[model][approach][metric][seed] = -1
                        if f"eval_metrics_{model}/{metric}" in run.summary:
                            results[model][approach][metric][seed] = run.summary[f"eval_metrics_{model}/{metric}"]
                            
                        

100%|██████████| 31/31 [00:00<00:00, 6018.49it/s]


In [38]:
pprint(results)

{'best_model': {'augmentation': {'IGS': {'91': 'NaN'},
                                 'acc': {'17': 0.879,
                                         '43': 0.8954,
                                         '91': 0.8935},
                                 'ece': {'17': 0.07430009543895721,
                                         '43': 0.06152530014514923,
                                         '91': 0.06169174239039421},
                                 'fisher_rao_norm': {'17': 3.228570511410916,
                                                     '43': 1.876088123469265,
                                                     '91': 1.852983583958818},
                                 'relative_flatness': {'17': 14.161212314094882,
                                                       '43': 10.83865855152908,
                                                       '91': 11.235430257707776},
                                 'squared_euclidean_norm': {'17': 53.53863951003177,
            

In [39]:
processed_results = copy.deepcopy(results)

for model in models:
    for approach in approaches:
        for metric in metrics:
            num_results = len(results[model][approach][metric])
            if num_results == 1:
                if list(results[model][approach][metric].values())[0] == 'NaN':
                    processed_results[model][approach][metric] = (0, 0)
                else:
                    processed_results[model][approach][metric] = (list(results[model][approach][metric].values())[0], 0)
            else:
                processed_results[model][approach][metric] = (np.mean(list(results[model][approach][metric].values())),
                                                    np.std(list(results[model][approach][metric].values())))

In [40]:
processed_results

{'best_model': {'no-regularisation': {'acc': (0.8555666666666667,
    0.0004988876515698534),
   'ece': (0.11040156086285909, 0.0023397520496003287),
   'squared_euclidean_norm': (49.32138945233735, 4.121913334052008),
   'fisher_rao_norm': (0.7579933050842342, 0.3287841812560869),
   'relative_flatness': (2.224353429644476, 1.002149927487409),
   'IGS': (0, 0)},
  'augmentation': {'acc': (0.8893, 0.007324388484144354),
   'ece': (0.06583904599150021, 0.005983251293930684),
   'squared_euclidean_norm': (51.45655028173201, 1.5720994049288295),
   'fisher_rao_norm': (2.319214072946333, 0.6430812826122064),
   'relative_flatness': (12.078433707777245, 1.481627931006255),
   'IGS': (0, 0)},
  'dropout_0.5': {'acc': (0.8613000000000001, 0.0019442222095223322),
   'ece': (0.10981579124927521, 0.002228018380858644),
   'squared_euclidean_norm': (39.13346427632886, 0.3102168337508363),
   'fisher_rao_norm': (0.7614324797670576, 0.12557730909466713),
   'relative_flatness': (1.1979145049319488,

In [42]:
prefix = r"""\begin{table}
    \centering
    \begin{small}
    \begin{tabular}{l>{\raggedleft\arraybackslash}p{2cm}>{\raggedleft\arraybackslash}p{2cm}>{\raggedleft\arraybackslash}p{2cm}>{\raggedleft\arraybackslash}p{2cm}>{\raggedleft\arraybackslash}p{2cm}>{\raggedleft\arraybackslash}p{2cm}>{\raggedleft\arraybackslash}p{2cm}}
    \toprule
    \textbf{Regularizer} & \textbf{Error (\%)} & \textbf{ECE} & \textbf{Squared euclidean norm} & \textbf{Fisher rao norm} & \textbf{Relative flatness} & \textbf{IGS} \\ \midrule"""

for model in models:

    table = prefix

    for approach in approaches:
        approach_name = approach
        approach_name = approach_name.replace("_", " ").replace("-", " ")
        approach_name_italicized_latex = f"\\textit{{{approach_name}}}"
        table += f"\n\t{approach_name_italicized_latex} "
        for metric in metrics:
            mean = processed_results[model][approach][metric][0]
            std = processed_results[model][approach][metric][1]
            if metric == "acc":
                mean = 100 - mean * 100
                std *= 100
            elif metric == "ece":
                mean *= 100
                std *= 100
            table += f"& {mean:.2f}$_{{\pm {std:.2f}}}$ "
        table += r"\\"

    model = model.replace("_", " ")

    table += r"""
    \bottomrule \\
    \end{tabular}
    \end{small}
    \caption{Results for VGG-19 on CIFAR-10 for """ + model + r"""}
    \label{tab:measures}
\end{table}
    """

    print(table)
    print("\n")

\begin{table}
    \centering
    \begin{small}
    \begin{tabular}{l>{\raggedleft\arraybackslash}p{2cm}>{\raggedleft\arraybackslash}p{2cm}>{\raggedleft\arraybackslash}p{2cm}>{\raggedleft\arraybackslash}p{2cm}>{\raggedleft\arraybackslash}p{2cm}>{\raggedleft\arraybackslash}p{2cm}>{\raggedleft\arraybackslash}p{2cm}}
    \toprule
    \textbf{Regularizer} & \textbf{Error (\%)} & \textbf{ECE} & \textbf{Squared euclidean norm} & \textbf{Fisher rao norm} & \textbf{Relative flatness} & \textbf{IGS} \\ \midrule
	\textit{no regularisation} & 14.44$_{\pm 0.05}$ & 11.04$_{\pm 0.23}$ & 49.32$_{\pm 4.12}$ & 0.76$_{\pm 0.33}$ & 2.22$_{\pm 1.00}$ & 0.00$_{\pm 0.00}$ \\
	\textit{augmentation} & 11.07$_{\pm 0.73}$ & 6.58$_{\pm 0.60}$ & 51.46$_{\pm 1.57}$ & 2.32$_{\pm 0.64}$ & 12.08$_{\pm 1.48}$ & 0.00$_{\pm 0.00}$ \\
	\textit{dropout 0.5} & 13.87$_{\pm 0.19}$ & 10.98$_{\pm 0.22}$ & 39.13$_{\pm 0.31}$ & 0.76$_{\pm 0.13}$ & 1.20$_{\pm 0.21}$ & 0.01$_{\pm 0.00}$ \\
	\textit{weight decay0.0005} & 18.22$_{\pm