In [1]:
import _config

%cd ..


from tools.post_search import (
    get_archive_best_models, get_base_model_size, get_model_flatness, get_best_archive,
)
from continual_learning.continual_metrics import compute_continual_metrics
from search_space import get_search_space, ModelSample
import matplotlib.pyplot as plt
import numpy as np
import json
import os
import re

BASE_DIR = "nas-results"


/home/fsahli/vvcastro/continual-nas


Define an experiment and retrieve all the seed-values available:

In [16]:
DATASET_NAME = "cifar10"
MODEL_TYPE = "growing"

NAS_TYPE = "multi-objective"

Define utility functions to retrieve the data:

In [17]:
def get_experiment_best_archives(dataset: str, model_type: str):

    # Get the experiments matching the dataset and model type
    _basedir = os.path.join(BASE_DIR, f"efficient-{dataset}")
    experiments_dirs = [ dir for dir in os.listdir(_basedir) if f"{model_type}-seed" in dir ]

    # Get the seeds available for each experiment
    seeds = [ int(re.search(r"\d+", dir).group()) for dir in experiments_dirs ]

    # Load the the last archive for each experiment
    last_archives, archive_seeds = [], []

    for seed in seeds:
        try:
            seed_archive = get_best_archive(
                BASE_DIR, dataset, model_type, seed, nas_type=NAS_TYPE,
            )
            last_archives.append(seed_archive)
            archive_seeds.append(seed)
        except FileNotFoundError:
            continue

    return last_archives, archive_seeds

def get_best_models_from_archive(
    archive: dict,
    seed: int,
    sort_by: str = "accuracy",
):

    archive_models = [datum["sample"] for datum in archive]
    archive_metrics = np.stack([datum["metrics"] for datum in archive])

    # Get the best models from the archive
    best_models, best_metrics = get_archive_best_models(
        archive_models,
        archive_metrics,
        n_best=50
    )

    if NAS_TYPE == "single-objective":
        flatnesses = []
        for model in best_models:
            model_flatness = get_model_flatness(
                BASE_DIR, DATASET_NAME, MODEL_TYPE, seed, model
            )
            flatnesses.append(model_flatness)

        # Concatenate the flatness to the metrics
        best_metrics = np.concatenate(
            [best_metrics, np.array(flatnesses).reshape(-1, 1)],
            axis=1,
        )

    # Get the model with the best accuracy
    sort_idx = 0 if sort_by == "accuracy" else 1
    best_accuracy_idx = np.argmax(best_metrics[:, sort_idx])
    best_accuracy_model = best_models[best_accuracy_idx]
    return best_accuracy_model, best_metrics[best_accuracy_idx]



Get all the seeds and aggregate to get the final model results:

In [18]:
SORT_BY = "accuracy"

last_archives, seeds = get_experiment_best_archives(DATASET_NAME, MODEL_TYPE)
print(f"Found {len(seeds)} archives")

# Store the best accuracies and flatnesses
best_accuracies, best_flatnesses = [], []
bast_params = []

best_models = []
for archive, seed in zip(last_archives, seeds):
    best_model, best_metrics = get_best_models_from_archive(
        archive,
        seed,
        sort_by=SORT_BY
    )

    # Aggregate accuracies
    best_accuracies.append(best_metrics[0])
    best_flatnesses.append(best_metrics[1])

    # Add the model's size
    bast_params.append( get_base_model_size(DATASET_NAME, best_model) )
    best_models.append(best_model)

# Compute the mean and std
print("Accuracy:")
print(f"Mean: {np.mean(best_accuracies)}")
print(f"Std: {np.std(best_accuracies)}")

print("-" * 50)
print("Flatness:")
print(f"Mean: {np.mean(best_flatnesses)}")
print(f"Std: {np.std(best_flatnesses)}")

print("-" * 50)
print("Model size:")
print(f"Mean: {np.mean(bast_params)}")
print(f"Std: {np.std(bast_params)}")


Found 3 archives


Accuracy:
Mean: 96.51564079214234
Std: 0.22541629431276003
--------------------------------------------------
Flatness:
Mean: 76.0812767184703
Std: 3.37799548459081
--------------------------------------------------
Model size:
Mean: 3.9233333333333333
Std: 0.498820831784542


### Evaluate to get the continual metrics:

In [5]:
import subprocess

def test_model(test_dir: str, model: ModelSample, model_type: str, seed: int, n_tasks: int = 10):
    search_space = get_search_space("mobilenetv3", model_type == "fixed")

    # Build the evaluator run params
    model_encoding = search_space.encode(model)
    params = [
        "python",
        "scripts-nas/02_model_evaluator.py",
        "--experiment_dir",
        str(test_dir),
        "--model_encoding",
        *map(str, model_encoding),
        "--dataset",
        str(DATASET_NAME),
        "--n_tasks",
        str(n_tasks),
        "--epochs_per_task",
        "1",
        "--architecture",
        str("expandable" if model_type == "growing" else "fixed"),
        "--random_seed",
        str(seed),
    ]

    # Execute the script
    subprocess.run(params, check=True)
    return model_encoding


Evaluate the models in both 5 and 10 tasks:

In [6]:
BEST_MODELS_RESULTS_DIR = os.path.join(
    "nas-tester",
    f"efficient-{DATASET_NAME}",
    "best-models",
)

evaluated_experiments = []
for model, seed in zip(best_models, seeds):
    encoded_model = test_model(BEST_MODELS_RESULTS_DIR, model, MODEL_TYPE, seed, n_tasks=5)
    test_model(BEST_MODELS_RESULTS_DIR, model, MODEL_TYPE, seed, n_tasks=10)
    evaluated_experiments.append(
        "".join(map(str, encoded_model)) + f"-{seed}"
    )

Model 1100010101001010100010011001110010101011010109010-44 already trained, skipping...
Model 1100010101001010100010011001110010101011010109010-44 already trained, skipping...
Model 1011001001110001100100001000010010000000000004010-43 already trained, skipping...
Model 1011001001110001100100001000010010000000000004010-43 already trained, skipping...
Model 1010011101110000001111001001111001000100001005010-42 already trained, skipping...
Model 1010011101110000001111001001111001000100001005010-42 already trained, skipping...


Aggregate the results:

In [11]:
N_TASKS = 10

# Tasks models dir
experiments_base_dir = os.path.join(BEST_MODELS_RESULTS_DIR + f"-{N_TASKS}", "models")

# Get all the evaluated experiments
experiment_metrics = []

for experiment_dir in evaluated_experiments:
    experiment_path = os.path.join(experiments_base_dir, experiment_dir)
    history_path = os.path.join(experiment_path, "history.json")

    # Read the training history file
    with open(history_path, "r") as f:
        model_history = json.load(f)

    # Get the metrics from the history
    model_metrics = compute_continual_metrics(model_history)
    model_flatness = [
        t["flatness"][-1]
        for t in model_history["training_metrics"]["validation"].values()
    ]
    model_metrics["flatness"] = np.array([np.mean(model_flatness)])

    experiment_metrics.append(model_metrics)


# Group the metrics by mean and std
metric_names = experiment_metrics[0].keys()
aggregated_metrics = {}
for metric in metric_names: 
    aggregated_metrics[metric] = np.array([
        exp_metrics[metric] for exp_metrics in experiment_metrics
    ])


Get the mean and std of the metrics:

In [12]:
for metric in aggregated_metrics:
    print(metric)
    print("Mean:", 100 * np.nanmean(aggregated_metrics[metric], axis=0)[-1])
    print("Std:", 100 * np.nanstd(aggregated_metrics[metric], axis=0)[-1])
    print("-" * 50)


average_accuracy
Mean: 94.6732254069356
Std: 0.9028246266415635
--------------------------------------------------
average_incremental_accuracy
Mean: 94.46292320887247
Std: 0.8542733600195338
--------------------------------------------------
average_forgetting
Mean: 2.3856570944190025
Std: 0.3408280601027133
--------------------------------------------------
backward_transfer
Mean: -2.3560273649445618
Std: 0.30339583257549346
--------------------------------------------------
forward_transfer
Mean: 0.6043721003381298
Std: 0.2969991704659652
--------------------------------------------------
flatness
Mean: 80.5379482632305
Std: 2.1256914424982973
--------------------------------------------------
