In [45]:
from _baseline_utils import get_search_space

from continual_learning.continual_metrics import compute_continual_metrics
import json
import os

List all the experiments for a given input dataset:

In [46]:
DATASET_NAME = "cifar10"
N_TASKS = 5

In [48]:
from collections import defaultdict
results_dirs = os.path.join(
    f"results-random@{N_TASKS}",
    DATASET_NAME,
    "models"
)

# Group the experiment by the baseline name
experiments_groups = defaultdict(lambda: defaultdict(list))
for baseline_model in os.listdir(results_dirs):
    baseline_method, seed = baseline_model.split("-")

    # Read models data from the history
    baseline_history_path = os.path.join(results_dirs, baseline_model, "history.json")
    try:
        with open(baseline_history_path, "r") as f:
            baseline_history = json.load(f)
    except FileNotFoundError:
        print(f"File {baseline_history_path} not found")
        continue

    # Compute the continual metrics
    if ( baseline_method == "joint"):
        training_metrics = baseline_history["joint_training"]["validation"]
        baseline_accuracy = [
            btask["accuracy"][-1] for btask in training_metrics.values()
        ]
        baseline_flatness = [
            btask["flatness"][-1] for btask in training_metrics.values()
        ]

        # Add the data for the data dict
        experiments_groups[baseline_method]["average_accuracy"].append(
            baseline_accuracy
        )

        experiments_groups[baseline_method]["flatness"].append(
            baseline_flatness
        )

    else:
        continual_metrics = compute_continual_metrics(baseline_history)
        training_metrics = baseline_history["training_metrics"]["validation"]
        baseline_accuracy = [
            btask["accuracy"][-1] for btask in training_metrics.values()
        ]
        baseline_flatness = [
            btask["flatness"][-1] for btask in training_metrics.values()
        ]

        # Add the data for the data dict
        for metric in continual_metrics:
            experiments_groups[baseline_method][metric].append(continual_metrics[metric])
        
        # Add the flatness
        experiments_groups[baseline_method]["flatness"].append(
            baseline_flatness
        )

# Aggregate the data for group
baseline_groups = list(experiments_groups.keys())

for baseline_group in baseline_groups:
    for metric in experiments_groups[baseline_group]:
        metric_mean = np.nanmean(experiments_groups[baseline_group][metric], axis=0)
        metric_std = np.nanstd(experiments_groups[baseline_group][metric], axis=0)
        experiments_groups[baseline_group][metric] = {
            "mean": metric_mean,
            "std": metric_std
        }


File results-random@5/cifar10/models/ewc-seed43/history.json not found


Print the accuracies for each baseline:

In [49]:
BASELINE_METHOD = "ewc"

for metric in experiments_groups[BASELINE_METHOD]:
    print(metric)
    print("Mean: ", 100 * experiments_groups[BASELINE_METHOD][metric]["mean"][-1])
    print("Std: ", 100 * experiments_groups[BASELINE_METHOD][metric]["std"][-1])
    print("")


average_accuracy
Mean:  95.98166666666668
Std:  0.0

average_incremental_accuracy
Mean:  95.10244131088257
Std:  0.0

average_forgetting
Mean:  0.9291666559875011
Std:  0.0

backward_transfer
Mean:  -0.929166666666667
Std:  0.0

forward_transfer
Mean:  -0.8208333333333317
Std:  0.0

flatness
Mean:  78.24714774544577
Std:  0.0

