In [1]:
from __future__ import annotations

import logging 
import sys

from collections import defaultdict
from functools import partial
from pathlib import Path

from typing import Union, Callable, Optional
from itertools import repeat
logging.basicConfig(level=logging.INFO, stream=sys.stdout)

# Data loading utilities

In [2]:
def nested_defaultdict(default_factory: Callable, depth: int = 1):
    """Creates a nested default dictionary of arbitrary depth with a specified callable as leaf."""
    if not depth:
        return default_factory()
    result = partial(defaultdict, default_factory)
    for _ in repeat(None, depth - 1):
        result = partial(defaultdict, result)
    return result()


# Tensorboard parsing utilities

Parse the losses from tensorboard and process into a dictionary for each experiment.

In [4]:
import tensorboard as tb
import pandas as pd
import re

In [None]:
def safeget(dct: dict, *keys: Union[tuple[str], list[str]]):
    """Retrieves the value of one nested key represented in `keys`"""
    for key in keys:
        try:
            dct = dct[key]
        except KeyError:
            return None
    return dct

def parse_tb_to_df(experiment_id: str) -> pd.DataFrame:
    experiment = tb.data.experimental.ExperimentFromDev(experiment_id)
    return experiment.get_scalars()

def get_version(exp_name: str) -> str:
    match = re.search(r"version_\d+$", exp_name)  # noqa
    if match is not None:
          version = exp_name[match.start() : match.end()]
    else:
        logging.warning(
               f"Experiment name {exp_name} contained no input data version, defaulting to version_1"
        )
        version = "version_1"
    return version


def map_tb_df_to_dict(
        df: pd.DataFrame,
        experiment_name_map: Optional[dict[str, str]] = None,
        tag_name_map: Optional[dict[str, str]] = None,
        include_version: bool = True,
        include_only_tags: Optional[set[str]] = None,
        extrema_functions: Optional[Union[Callable, list[Callable]]] = None,
) -> dict:
    # group_df = nested_defaultdict(list, depth=2)
    depth = 3 if not include_version else 4
    tag_values = nested_defaultdict(list, depth=depth)
    extrema_indices = nested_defaultdict(list, depth=depth)
    time_series_keys = ["step", "value"]
    ver = None
    get_extremum = min
    if callable(extrema_functions):
        get_extremum = extrema_functions
    for exp_name, exp_df in df.groupby(df.run):
        if experiment_name_map is not None and exp_name in experiment_name_map:
            exp_name = experiment_name_map[exp_name]
        if include_version:
            ver = get_version(exp_name)
        for tag_name, tag_df in exp_df.groupby(exp_df.tag):
            if include_only_tags is not None and not any((tag in tag_name for tag in include_only_tags)):
                continue
            tag_name = tag_name.replace("Loss/", "")
            if tag_name_map is not None and tag_name in tag_name_map:
                tag_name = tag_name_map[tag_name]
            store_location_prefix = [
                exp_name if ver is None else exp_name.replace(f"_{ver}", ""),
                tag_name
            ]
            for step_or_value in time_series_keys:
                if ver is not None:
                    store_key = store_location_prefix + [ver, step_or_value]
                    store_location = safeget(tag_values, *store_key)
                    extrema_store_location = safeget(extrema_indices, *store_key)
                else:
                    store_key = store_location_prefix + [step_or_value]
                    store_location = safeget(tag_values, *store_key)
                    extrema_store_location = safeget(extrema_indices, *store_key)
                series = tag_df[step_or_value].tolist()
                store_location.extend(series)
                if step_or_value == 'value':
                    if isinstance(extrema_functions, list):
                        assert include_only_tags is not None
                        assert len(extrema_functions) == len(include_only_tags)
                        get_extremum = extrema_functions[tag_name.index(include_only_tags)]
                    extremum = get_extremum(series)
                    extremum_index = series.index(extremum)
                    extremum_step = tag_df["step"].tolist()[extremum_index]
                    extrema_store_location.append(extremum_step)
                    step_store_key = store_key[:-1] + ['step']
                    step_store_location = safeget(extrema_indices, *step_store_key)
                    step_store_location.append(extremum_index)

            # group_df[exp_name][tag_name] = tag_df
            # loss_values[exp_name][tag_name]["step"] = tag_df.step.tolist()
            # loss_values[exp_name][tag_name]["value"] = tag_df.value.tolist()
    return {
        'series_values': tag_values,
        'extrema': extrema_indices,
    }

Parse tensorboard and find the step at which #ALL_SERVICES/joint_goal_accuracy is maximised. Delete all the other checkpoints.

In [42]:
import re
import shutil

In [12]:
experiment_id = 'SWslQNHDRveX06WKof46ZA'

In [7]:
tb_df = parse_tb_to_df(experiment_id)

INFO:absl:Requested server info: <version: "2.8.0"
plugin_specification {
  upload_plugins: "scalars"
}
>


In [8]:
data = map_tb_df_to_dict(
    tb_df, 
    include_only_tags = {'#ALL_SERVICES/joint_goal_accuracy'},
    extrema_functions = max,

)
series = data['series_values']
extrema = data['extrema']



In [33]:
keyword = 't5_small'
metric = '#ALL_SERVICES/joint_goal_accuracy'

In [34]:
matching_keys = [k for k in data['extrema'].keys() if keyword in k]

In [39]:
for mk in matching_keys:
    metric_data = data['extrema'][mk][metric]
    version_keys = metric_data.keys()
    for vk in version_keys:
        print(mk, vk, metric_data[vk].keys(),"; step", metric_data[vk]['step'], "; value", metric_data[vk]['value'] )

seed_230792_d3st_baseline_t5_small_bs_16_ga_4_wa_500 version_7 dict_keys(['step', 'value']) ; step [12] ; value [2080000]
seed_230792_d3st_baseline_t5_small version_7 dict_keys(['step', 'value']) ; step [15] ; value [2560000]
seed_240463_d3st_baseline_t5_small_bs_16_ga_4_wa_500 version_7 dict_keys(['step', 'value']) ; step [21] ; value [3520000]
seed_240463_d3st_baseline_t5_small_bs_16_ga_{$.gradient_accumulation_steps}_wa_500 version_7 dict_keys(['step', 'value']) ; step [15] ; value [2560000]
seed_240463_d3st_baseline_t5_small version_7 dict_keys(['step', 'value']) ; step [15] ; value [2880000]
seed_240463_d3st_baseline_t5_small_version_7_bs16_ga16_wa125 version_1 dict_keys(['step', 'value']) ; step [14] ; value [2400000]
seed_240463_d3st_baseline_t5_small_version_7_ref version_1 dict_keys(['step', 'value']) ; step [13] ; value [2240000]


In [40]:
models_path = Path('/scratches/neuron/dev/d3st/models')
version =7
pattern = re.compile(r'model\.')

In [41]:
for mk in matching_keys:
    for mdir in models_path.joinpath(mk, f"version_{version}").iterdir():
        if pattern.match(mdir.name):
            max_jga_step = data['extrema'][mk][metric][f"version_{version}"]["value"][0]
            if mdir.name == f"model.{max_jga_step}":
                print(f"keep {mdir}")
            else:
                print(f"discard {mdir}")
                shutil.rmtree(mdir)

discard /scratches/neuron/dev/d3st/models/seed_230792_d3st_baseline_t5_small_bs_16_ga_4_wa_500/version_7/model.160000


NameError: name 'shutil' is not defined