In [19]:
import shutil
from pathlib import Path
from omegaconf import OmegaConf
from typing import Callable


def experiment_results_to_storage(
    path: Path, is_date_dir: bool, is_multirun_dir: bool, target_dir: Path, subdir_id_creator: Callable
):
    if not is_date_dir or not is_multirun_dir:
        raise NotImplementedError()

    for timestamp_dir in path.iterdir():
        if not timestamp_dir.is_dir():
            continue

        for run_dir in timestamp_dir.iterdir():
            if not run_dir.is_dir():
                continue

            cfg = OmegaConf.load(run_dir / ".hydra" / "config.yaml")
            target_subdir = target_dir / subdir_id_creator(cfg)
            target_subdir.mkdir(exist_ok=True)

            weights = run_dir / "model.safetensors"
            training_args = run_dir / "training_args.bin"
            model_cfg = run_dir / "config.json"
            hydra_cfg = run_dir / ".hydra" / "config.yaml"
            for f_to_be_copied in [weights, training_args, model_cfg, hydra_cfg]:
                print(target_subdir / f_to_be_copied.name)
                shutil.copy2(str(f_to_be_copied), str(target_subdir / f_to_be_copied.name))


def delete_unused_checkpoints(path: Path, is_date_dir: bool, is_multirun_dir: bool):
    if not is_date_dir or not is_multirun_dir:
        raise NotImplementedError()

    for timestamp_dir in path.iterdir():
        if not timestamp_dir.is_dir():
            continue

        for run_dir in timestamp_dir.iterdir():
            if not run_dir.is_dir():
                continue

            for dir_to_delete in run_dir.glob("checkpoint*"):
                print(f"Deleting {dir_to_delete}")
                for obj in dir_to_delete.iterdir():
                    obj.unlink()
                dir_to_delete.rmdir()

## Shortcut models

- copy best model to `experiments` directory
- delete unused checkpoints

In [17]:
path = Path("/root/similaritybench/multirun/2024-03-20")
is_date_dir = True
is_multirun_dir = True
target_dir = Path("/root/similaritybench/experiments/models/nlp/shortcut")


def shortcut_subdir_id(cfg):
    dataset_id = cfg.dataset.path if not cfg.dataset.name else f"{cfg.dataset.path}__{cfg.dataset.name}"
    shortcut_rate = str(cfg.shortcut_rate).replace(".", "")
    return f"{dataset_id}_pre{cfg.model.seed}_ft{cfg.dataset.finetuning.trainer.args.seed}_scrate{shortcut_rate}"

experiment_results_to_storage(path, is_date_dir, is_multirun_dir, target_dir, shortcut_subdir_id)
delete_unused_checkpoints(path, is_date_dir, is_multirun_dir)


/root/similaritybench/experiments/models/nlp/shortcut/sst2_pre7_ft7_scrate0/model.safetensors
/root/similaritybench/experiments/models/nlp/shortcut/sst2_pre7_ft7_scrate0/training_args.bin
/root/similaritybench/experiments/models/nlp/shortcut/sst2_pre7_ft7_scrate0/config.json
Deleting /root/similaritybench/multirun/2024-03-20/20-58-32/4/checkpoint-3000
Deleting /root/similaritybench/multirun/2024-03-20/20-58-32/4/checkpoint-2000
/root/similaritybench/experiments/models/nlp/shortcut/sst2_pre7_ft7_scrate025/model.safetensors
/root/similaritybench/experiments/models/nlp/shortcut/sst2_pre7_ft7_scrate025/training_args.bin
/root/similaritybench/experiments/models/nlp/shortcut/sst2_pre7_ft7_scrate025/config.json
Deleting /root/similaritybench/multirun/2024-03-20/20-58-32/3/checkpoint-3000
Deleting /root/similaritybench/multirun/2024-03-20/20-58-32/3/checkpoint-2000
/root/similaritybench/experiments/models/nlp/shortcut/sst2_pre7_ft7_scrate075/model.safetensors
/root/similaritybench/experiments/

## Augmented models

(copied over from Viserion)

In [22]:
path = Path("/root/multirun/2024-03-20")
is_date_dir = True
is_multirun_dir = True
target_dir = Path("/root/similaritybench/experiments/models/nlp/augmentation")


def augmentation_subdir_id(cfg):
    dataset_id = cfg.dataset.path if not cfg.dataset.name else f"{cfg.dataset.path}__{cfg.dataset.name}"
    augmentation_id = "eda"
    strength = str(float(cfg.augmentation.recipe.pct_words_to_swap)).replace(".", "")
    return f"{dataset_id}_pre{cfg.model.seed}_ft{cfg.dataset.finetuning.trainer.args.seed}_{augmentation_id}_strength{strength}"

experiment_results_to_storage(path, is_date_dir, is_multirun_dir, target_dir, augmentation_subdir_id)
delete_unused_checkpoints(path, is_date_dir, is_multirun_dir)


/root/similaritybench/experiments/models/nlp/augmentation/sst2_pre7_ft7_eda_strength025/model.safetensors
/root/similaritybench/experiments/models/nlp/augmentation/sst2_pre7_ft7_eda_strength025/training_args.bin
/root/similaritybench/experiments/models/nlp/augmentation/sst2_pre7_ft7_eda_strength025/config.json
/root/similaritybench/experiments/models/nlp/augmentation/sst2_pre7_ft7_eda_strength025/config.yaml
/root/similaritybench/experiments/models/nlp/augmentation/sst2_pre7_ft7_eda_strength075/model.safetensors
/root/similaritybench/experiments/models/nlp/augmentation/sst2_pre7_ft7_eda_strength075/training_args.bin
/root/similaritybench/experiments/models/nlp/augmentation/sst2_pre7_ft7_eda_strength075/config.json
/root/similaritybench/experiments/models/nlp/augmentation/sst2_pre7_ft7_eda_strength075/config.yaml
/root/similaritybench/experiments/models/nlp/augmentation/sst2_pre7_ft7_eda_strength10/model.safetensors
/root/similaritybench/experiments/models/nlp/augmentation/sst2_pre7_ft7