# Setup code

In [None]:
%matplotlib inline
import collections
import copy
import html
import os
import re
import shutil
import json
import glob

from IPython.core.display import display, HTML
import numpy as np 
import pandas as pd

# FIXME(sam): make cluster subpath selectable with a dropdown. Also, automatically infer
# gfs_mount from the hostname (have sensible default for svm/perceptron)

# this identifies data for a particular cluster on the GFS volume
cluster_subpath = "cluster-data/cluster-2020-12-21-runs-try1/"
# on svm I think gfs_mount is /scratch/sam/repl-vol/ or something like that
gfs_mount = "/scratch/sam/il-representations-gcp-volume/"  # Google Filestore mount point (local)
runs_directory = os.path.join(gfs_mount, cluster_subpath)
path_translations = {
    # when loading things like 'encoder_path' and 'policy_path' from configs,
    # replace the thing on the left with the thing on the right
    "/data/il-representations/": gfs_mount,
    "/root/il-rep/runs/": os.path.join(gfs_mount, cluster_subpath),
}
# FIXME(sam): remove exp_name (I think we can rewrite interp code to not rely on it)
exp_name = "10"
exp_dir = os.path.join(runs_directory, 'chain_runs', exp_name)
assert os.path.exists(exp_dir)

# Preparing a table of results

In [None]:
def get_parent_relpath(sample_parent_file, local_root_dir):
    """Get root-relative path to a 'parent' directory, such as the directory
    containing a saved encoder or policy. This is somewhat tricky because
    we need to replace paths that might be different on svm/perceptron or on
    a laptop compared to what they were on GCP. e.g. inside the Ray docker
    container, '/root/il-rep/runs' maps to 'cluster-data/' in the GFS volume.
    The `path_translations` variable handles all the necessary changes."""
    for prefix, replacement in path_translations.items():
        if sample_parent_file.startswith(prefix):
            sample_parent_file = replacement + sample_parent_file[len(prefix):]

    full_path = os.path.abspath(sample_parent_file)
    full_dir = os.path.dirname(full_path)
    rel_dir = os.path.relpath(full_dir, local_root_dir)

    return rel_dir

class SubexperimentRun:
    """A SubexperimentRun associates all the information associated
    with a run of a particular Sacred sub-experiment. That means a
    run (single execution) of the 'repl', 'il_train', or 'il_test'
    experiments."""
    def __init__(self, subexp_dir, experiment_dir_root):
        # Subexperiment dir is used as a unique identifier.
        # We strip out the leading 'experiment_dir_root' to shorten identifiers.
        subexp_dir = os.path.abspath(subexp_dir)
        experiment_dir_root = os.path.abspath(experiment_dir_root)
        self.ident = os.path.relpath(subexp_dir, experiment_dir_root)
        self.subexp_dir = subexp_dir
        self.experiment_dir_root = experiment_dir_root

        # usually paths are like, e.g., 'chain_runs/repl/42' or
        # 'chain_runs/il_train/13'; if we take the second last component,
        # we should get the mode
        self.mode = os.path.split(os.path.split(subexp_dir)[0])[1]
        assert self.mode in {'repl', 'il_train', 'il_test'}, (mode, subexp_dir)

        # Load experiment config
        config_path = os.path.join(subexp_dir, 'config.json')
        with open(config_path, 'r') as fp:
            self.config = json.load(fp)

        # Store a path to relevant progress.csv file (only for il_train/repl)
        progress_path = os.path.join(subexp_dir, "progress.csv")
        if os.path.exists(progress_path):
            self.progress_path = progress_path
        else:
            self.progress_path = None

        # Store a path to relevant eval.json file
        eval_json_path = os.path.join(subexp_dir, "eval.json")
        if os.path.exists(eval_json_path):
            self.eval_json_path = eval_json_path
        else:
            self.eval_json_path = None

        # Infer the .ident attribute for the parent experiment
        # (if it exists)
        if self.mode == 'il_train' and self.config.get('encoder_path') is not None:
            encoder_relpath = get_parent_relpath(
                self.config['encoder_path'], experiment_dir_root)
            # The relpath is going to be something like
            # "chain_runs/10/repl/5/checkpoints/representation_encoder".
            # We heuristically remove the last two parts.
            # (this definitely breaks on Windows…)
            encoder_relpath = '/'.join(encoder_relpath.split('/')[:-2])
            self.parent_ident = encoder_relpath
        elif self.mode == 'il_test':
            policy_relpath = get_parent_relpath(
                self.config['policy_path'], experiment_dir_root)
            self.parent_ident = policy_relpath
        else:
            # "repl" runs and "il_train" runs without an encoder_path
            # have no parents
            assert self.mode == 'repl' \
              or (self.mode == 'il_train' and self.config.get('encoder_path') is None), \
               (self.mode, self.config.get('encoder_path'))
            self.parent_ident = None
            
        # HACK: adding a use_repl key so that we can see whether il_train runs used repL
        if self.mode == 'il_train':
            self.config['use_repl'] = self.parent_ident is not None

    def get_merged_config(self, index):
        """Get a 'merged' config dictionary for this subexperiment and
        all of its parents. The dict will have a format like this:
        
        {"benchmark": {…}, "il_train": {…}, "il_test": {…}, "repl": {…}}
        
        Note that some keys might not be present (e.g. if this is a `repl` run,
        it will not have the `il_train` key; if this is an `il_train` run with
        no parent, then the `repl` key will be absent)."""
        config = {self.mode: dict(self.config)}
        extract_keys = ('env_cfg', 'venv_opts', 'env_data')
        for extract_key in extract_keys:
            if extract_key in config[self.mode]:
                # move 'benchmark' key to the top because that ingredient name is
                # shared between il_train, and il_test experiments
                config[extract_key] = config[self.mode][extract_key]
                del config[self.mode][extract_key]
        parent = self.get_parent(index)
        if parent is not None:
            # TODO: merge this properly, erroring on incompatible duplicate
            # keys. I think Cody has code for this.
            config.update(parent.get_merged_config(index))
        return config
    
    def get_parent(self, index):
        if self.parent_ident is None:
            return None
        return index.get_subexp(self.parent_ident)

    def __hash__(self):
        return hash(self.ident)
    
    def __eq__(self, other):
        if not isinstance(other, SubexperimentRun):
            return NotImplemented
        return self.ident == other.ident

class SubexperimentIndex:
    """An index of subexperiments. For now this just supports
    looking up experiments by identifier. Later it might support
    lookup by attributes."""
    def __init__(self):
        self.subexp_by_ident = {}
        
    def add_subexp(self, subexp):
        if subexp.ident in self.subexp_by_ident:
            raise ValueError("duplicate subexperiment:", subexp)
        self.subexp_by_ident[subexp.ident] = subexp
        
    def get_subexp(self, ident):
        return self.subexp_by_ident[ident]
    
    def search(self, **attrs):
        """Find a subexperiment with attributes matching the values
        given in 'attrs'."""
        results = []
        for subexp in self.subexp_by_ident.values():
            for k, v in attrs.items():
                if getattr(subexp, k) != v:
                    break
            else:
                results.append(subexp)
        return results

def get_experiment_directories(root_dir, skip_skopt=True):
    """Look for directories that end in a sequence of numbers, and contain a
    grid_search subdirectory."""
    expt_pat = re.compile(r'^.*/(il_test|il_train|repl)/\d+$')
    ignore_pat = re.compile(r'^.*/(grid_search|_sources)$')  # ignore the grid_search subdir
    expt_dirs = set()
    for root, dirs, files in os.walk(root_dir, followlinks=True, topdown=True):
        if ignore_pat.match(root):
            del dirs[:]
            continue
            
        # check whether tihs is a skopt dir
        if skip_skopt and 'grid_search' in dirs:
            gs_files = os.listdir(os.path.join(root, 'grid_search'))
            if any(s.startswith('search-alg-') for s in gs_files):
                # this is a skopt dir, skip it
                print("skipping skopt directory in", root)
                del dirs[:]
                continue

        found_match = False
        for d in dirs:
            d_path = os.path.abspath(os.path.join(root, d))
            m = expt_pat.match(d_path)
            if m is None:
                continue  # no match
            expt_dirs.add(d_path)
            found_match = True

        if found_match:
            del dirs[:]  # don't recurse
    return sorted(expt_dirs)

# Find all experiment directories (i.e. directories containing a grid_search
# subdir)
def load_all_subexperiments(root_dir, skip_skopt=True):
    """Find all experiment run subdirectories, and create SubexperimentIndex objects for them."""
    print("Searching for experiment directories (might take a minute or two)")
    all_expt_directories = get_experiment_directories(root_dir, skip_skopt=skip_skopt)
    print("Loading experiments (might take another minute or two)")
    index = SubexperimentIndex()
    for expt_dir in all_expt_directories:
        subexp = SubexperimentRun(expt_dir, root_dir)
        index.add_subexp(subexp)
    return index

In [None]:
subexp_index = load_all_subexperiments(runs_directory, skip_skopt=True)
print('Discovered', len(subexp_index.subexp_by_ident), 'subexperiments')

test_expts = subexp_index.search(mode='il_test')

test_expts[1].get_merged_config(subexp_index)

## Print a table of il_test results

Shows a separate set of il_test results for each benchmark setting.

In [None]:
def flatten_dict(d):
    """Flatten a nested dict into a single-level dict with
    'keys/separated/like/this'."""
    out_dict = {}
    if isinstance(d, dict):
        key_iter = d.items()
    else:
        assert isinstance(d, list), type(d)
        # we flatten lists into dicts of the form {0: <first elem>, 1: <second elem>, …}
        key_iter = ((str(idx), v) for idx, v in enumerate(d))
    for key, value in key_iter:
        if isinstance(value, (dict, list)):
            value = flatten_dict(value)
            for subkey, subvalue in value.items():
                out_dict[f'{key}/{subkey}'] = subvalue
        else:
            out_dict[key] = value
    return out_dict

def combine_dicts_multiset(dicts):
    """Combine a series of dicts into a key-multiset mapping, where the
    multiset measures how many times each observed value occurs for each
    key."""
    count_dict = {}
    for d in dicts:
        for k, v in d.items():
            if k not in count_dict:
                count_dict[k] = collections.Counter()
            count_dict[k][v] += 1
    return count_dict

def remove_inapplicable_keys(flat_dict):
    """Remove keys that do not make a difference from a flattened config dicts.
    Totally heuristic, so might have to add more options to this later on."""
    remove_keys = set()
    
    # remove inapplicable benchmark keys
    for benchmark_name in ['magical', 'dm_control']:
        if flat_dict.get('env_cfg/benchmark_name') != benchmark_name:
            for key in flat_dict:
                # this will remove, e.g., dm_control keys from magical experiments
                if key.startswith('env_cfg/' + benchmark_name) or key.startswith('env_data/' + benchmark_name):
                    remove_keys.add(key)
                    
    # remove repl keys from things that don't use repL
    if flat_dict.get('il_train/use_repl') is False:
        for key in flat_dict:
            if key.startswith('repl/'):
                remove_keys.add(key)
                    
    return {k: v for k, v in flat_dict.items() if k not in remove_keys}

def simplify_config_dicts(hierarchical_dicts,
                          base_thresh=0.75,
                          remove_seeds=True,
                          prohibited_base_keys=('env_cfg/task_name', 'env_cfg/benchmark_name', ),
                          force_remove_keys=('il_test/policy_path', 'il_train/encoder_path')):
    """Simplify flattened config dicts so that:
    
    0. They are totally flat.
    1. They only contain keys for which values actually differ between
       different dicts, and
    2. If the value of some key is the same for at least a fraction
       `base_thresh` of dicts, then that key is moved into a _base config_.
       Returned dicts will only contain that key if they have a different
       value from the base config one.
    3. Optionally, remove all seed values from dicts.

    This makes it more clear which values are actually changing."""
    # first flatten all dicts
    dicts = [dict(flatten_dict(d)) for d in hierarchical_dicts]
    
    # remove seeds, if required
    if remove_seeds:
        for d in dicts:
            for key in list(d.keys()):
                if key.split('/')[-1] == 'seed':
                    del d[key]
                    
    # make sure that every dict has every key
    all_keys = set()
    for d in dicts:
        all_keys |= d.keys()
    for d in dicts:
        for new_key in all_keys - d.keys():
            d[new_key] = None
                    
    # remove inapplicable keys
    dicts = [remove_inapplicable_keys(d) for d in dicts]

    # now figure out which keys we wish to remove or move to the base config
    base_config = {}
    remove_keys = set()
    base_thresh_abs = len(dicts) * base_thresh
    count_dict = combine_dicts_multiset(dicts)
    for key, counter in count_dict.items():
        if len(counter) == 1 or key in force_remove_keys:
            # if all dicts have the same value for this key, we will
            # remove it from output dicts
            remove_keys.add(key)
        elif key not in prohibited_base_keys:
            # if most dicts have the same value for this key, then
            # we add it to the base config
            (max_count_item, max_count), = counter.most_common(1)
            if max_count > base_thresh_abs:
                base_config[key] = max_count_item

    # remove keys that we are ignoring, or for which the corresponding value
    # already exists in the base config
    new_dicts = []
    for old_dict in dicts:
        new_dict = {}
        for key, value in old_dict.items():
            if key in remove_keys \
              or (key in base_config and base_config[key] == value):
                continue  # skip this key
            new_dict[key] = value
        new_dicts.append(new_dict)
    
    return base_config, new_dicts

In [None]:
all_configs = [subexp.get_merged_config(subexp_index) for subexp in test_expts]
base_config, flat_configs = simplify_config_dicts(all_configs)
flat_config_tups = [tuple(sorted(d.items())) for d in flat_configs]
subexp_by_benchmark = {}
for flat_cfg, subexp in zip(flat_config_tups, test_expts):
    bench_key = tuple((k, v) for k, v in flat_cfg if k.startswith('env_') or k.startswith('venv_'))
    subexp_by_benchmark.setdefault(bench_key, []).append((flat_cfg, subexp))

display(HTML('<p><strong>Base config</strong></p>'))
display(HTML('<p>Unless specified otherwise, all config dicts include these keys:</p>'))
print(base_config)
    
for idx, (bench_key, cfgs_subexps) in enumerate(subexp_by_benchmark.items(), start=1):
    # print out benchmark details
    display(HTML(f'<p><strong>Results for benchmark config &#35;{idx}</strong></p>'))
    display(HTML(f'<p>Config:</p>'))
    rows = [f'<tr><th>{html.escape(key)}</th><td>{html.escape(value)}</td></tr>' for key, value in bench_key]
    display(HTML(f'<table>{"".join(rows)}</table>'))
    display(HTML(f'<p>Runs:</p>'))
    
    # cluster subexperiments by config
    by_cfg = {}
    for tup_cfg, subexp in cfgs_subexps:
        tup_cfg = tuple(k for k in tup_cfg if k not in bench_key)
        by_cfg.setdefault(tup_cfg, []).append(subexp)
        
    # is this a magical run?
    is_magical = True  # ('env_cfg/benchmark_name', 'magical') in bench_key

    # load all eval.json files and figure out what columns we need
    stats_dicts = {}
    columns = set()
    for _, subexp in cfgs_subexps:
        if subexp.eval_json_path:
            with open(subexp.eval_json_path, 'r') as fp:
                eval_dict = json.load(fp)
            if is_magical:
                stats_dict = {
                    '-'.join(env_dict['test_env'].split('-')[:2]): env_dict['mean_score']
                    for env_dict in eval_dict['full_data']
                }
                stats_dict['Average on all envs'] = eval_dict['return_mean']
            else:
                stats_dict = {'return_mean': eval_dict['return_mean']}
            stats_dicts[subexp] = stats_dict
            columns |= stats_dict.keys()
        else:
            stats_dicts[subexp] = {}
    columns = sorted(columns)
    
    # now produce a table with one row per config
    table_parts = ['<table>']                                         # begin table
    table_parts.append('<tr>')                                        # begin header row
    table_parts.append('<th style="border-collapse: collapse;">Config</th>')
    table_parts.extend(f'<th style="border-collapse: collapse;">{html.escape(col_name)}</th>' for col_name in columns)
    table_parts.append('</tr>')                                       # end header row

    for cfg, subexps in sorted(by_cfg.items(), key=lambda cfg_se: dict(cfg_se[0])['il_test/exp_ident']):
        table_parts.append('<tr>')                                    # begin row

        # cell containing config
        if True:  # remove to show full config
            d = dict(cfg)
            exp_ident = d['il_test/exp_ident']
            # bench_name = d['env_cfg/benchmark_name']
            # task_name = d['env_cfg/task_name']
            # desc_str = f'{exp_ident} ({bench_name}/{task_name})'
            table_parts.append(f'<td style="border-collapse: collapse;">{html.escape(exp_ident)}</td>')
        else:
            kv_cfg = ', '.join(f'{key}={value!r}' for key, value in cfg)
            table_parts.append(f'<td style="max-width: 600px; border-collapse: collapse;">{html.escape(kv_cfg)}</td>')
            
        # cells containing data
        for column in columns:
            column_values = [stats_dicts[subexp][column] for subexp in subexps
                             if column in stats_dicts[subexp]]
            if not column_values:
                table_parts.append('<td style="border-collapse: collapse;">-</td>')
            else:
                mean = np.mean(column_values)
                std = np.std(column_values)
                n = len(column_values)
                table_parts.append(f'<td style="border-collapse: collapse;">{mean:.3g}±{std:.1g} ({n})</td>')
        
        # cells containing values

        table_parts.append('</tr>')                                   # end row
    table_parts.append('</table>')                                    # end table
    display(HTML(''.join(table_parts)))

# Print a table of il_train Area Under Loss Curve results

In [None]:
from numpy import trapz
import csv
import pandas as pd

# How many splits?
num_split = 6

# Exclude first n values in the loss list?
start_count = 3

def calculate_auc(y, dx=1):
    return trapz(y, dx=dx)
    
train_expts = subexp_index.search(mode='il_train')
all_configs = [subexp.get_merged_config(subexp_index) for subexp in train_expts]
base_config, flat_configs = simplify_config_dicts(all_configs)
flat_config_tups = [tuple(sorted(d.items())) for d in flat_configs]
subexp_by_benchmark = {}
for flat_cfg, subexp in zip(flat_config_tups, train_expts):
    bench_key = tuple((k, v) for k, v in flat_cfg if k.startswith('env_') or k.startswith('venv_'))
    subexp_by_benchmark.setdefault(bench_key, []).append((flat_cfg, subexp))

display(HTML('<p><strong>Base config</strong></p>'))
display(HTML('<p>Unless specified otherwise, all config dicts include these keys:</p>'))
print(base_config)
    
for idx, (bench_key, cfgs_subexps) in enumerate(subexp_by_benchmark.items(), start=1):
    # print out benchmark details
    display(HTML(f'<p><strong>Results for benchmark config &#35;{idx}</strong></p>'))
    display(HTML(f'<p>Config:</p>'))
    rows = [f'<tr><th>{html.escape(key)}</th><td>{html.escape(value)}</td></tr>' for key, value in bench_key]
    display(HTML(f'<table>{"".join(rows)}</table>'))
    display(HTML(f'<p>Runs:</p>'))
    
    # cluster subexperiments by config
    by_cfg = {}
    for tup_cfg, subexp in cfgs_subexps:
        tup_cfg = tuple(k for k in tup_cfg if k not in bench_key)
        by_cfg.setdefault(tup_cfg, []).append(subexp)

    # load all progress files and figure out what columns we need
    stats_dicts = {}
    columns = set()
    for _, subexp in cfgs_subexps:
        if subexp.progress_path:
            df = pd.read_csv(subexp.progress_path)
            step_length = len(df['loss']) // num_split
            stats_dict = {}
            for step in range(step_length, len(df['loss']), step_length):
                label = f"step {step:02d}"
                stats_dict[label] = calculate_auc(df['loss'][start_count:step])
            stats_dict[f"step {len(df['loss'])}"] = calculate_auc(df['loss'][start_count:len(df['loss'])])
            stats_dicts[subexp] = stats_dict
            columns |= stats_dict.keys()
        else:
            stats_dicts[subexp] = {}
    columns = sorted(columns)
    
    # now produce a table with one row per config
    table_parts = ['<table>']                                         # begin table
    table_parts.append('<tr>')                                        # begin header row
    table_parts.append('<th style="border-collapse: collapse;">Config</th>')
    table_parts.extend(f'<th style="border-collapse: collapse;">{html.escape(col_name)}</th>' for col_name in columns)
    table_parts.append('</tr>')                                       # end header row

    for cfg, subexps in sorted(by_cfg.items(), key=lambda cfg_se: dict(cfg_se[0])['il_train/exp_ident']):
        table_parts.append('<tr>')                                    # begin row

        # cell containing config
        if True:  # remove to show full config
            d = dict(cfg)
            exp_ident = d['il_train/exp_ident']
            # bench_name = d['env_cfg/benchmark_name']
            # task_name = d['env_cfg/task_name']
            # desc_str = f'{exp_ident} ({bench_name}/{task_name})'
            table_parts.append(f'<td style="border-collapse: collapse;">{html.escape(exp_ident)}</td>')
        else:
            kv_cfg = ', '.join(f'{key}={value!r}' for key, value in cfg)
            table_parts.append(f'<td style="max-width: 600px; border-collapse: collapse;">{html.escape(kv_cfg)}</td>')
            
        # cells containing data
        for column in columns:
            column_values = [stats_dicts[subexp][column] for subexp in subexps
                             if column in stats_dicts[subexp]]
            if not column_values:
                table_parts.append('<td style="border-collapse: collapse;">-</td>')
            else:
                mean = np.mean(column_values)
                std = np.std(column_values)
                n = len(column_values)
                table_parts.append(f'<td style="border-collapse: collapse;">{mean:.3g}±{std:.1g} ({n})</td>')
        
        # cells containing values

        table_parts.append('</tr>')                                   # end row
    table_parts.append('</table>')                                    # end table
    display(HTML(''.join(table_parts)))


# Prepare files for analyzing with Viskit

If you want to see a trial object's information, you can do print(trial).

In [None]:
def prepare_files(experiments, exp_index, out_dir):
    """
    Create a folder named `out_dir`. This really just copies over files from il_train or il_test, as appropriate.
    For instance, if il_train looks like this:
    
    il_train
    │   ├── 1
    │   │   ├── ...
    │   │   ├── config.json
    │   │   └── progress.csv
    │   └── _sources
    …
    
    Then the ouptut will look like this:
    ├── progress
    │   └── 1
    │       ├── params.json   (same as config.json)
    │       └── progress.csv
    …

    After you run this, you can execute viskit with: python viskit/frontend.py path/to/out_dir/
    """
    # compute merged configs (nested/hierarchical dicts), and
    # also throw out experiments with no progress.csv
    hierarchical_dicts = []
    new_experiments = []
    for experiment in experiments:
        if not experiment.progress_path:
            print("Skipping experiment", experiment.ident, "because it has no progress.csv")
            continue
        merged_config = experiment.get_merged_config(exp_index)
        hierarchical_dicts.append(merged_config)
        new_experiments.append(experiment)
    experiments = new_experiments

    # first flatten all dicts
    dicts = [dict(flatten_dict(d)) for d in hierarchical_dicts]
    
    # make sure that every dict has every key
    all_keys = set()
    for d in dicts:
        all_keys |= d.keys()
    for d in dicts:
        for new_key in all_keys - d.keys():
            d[new_key] = None
    
    # now generate outputs for experiments
    for flat_config, experiment in zip(dicts, experiments):
        exp_out_dir = os.path.join(out_dir, experiment.ident.replace('/', '-'))
        os.makedirs(exp_out_dir, exist_ok=True)

        params_json_path = os.path.join(exp_out_dir, 'params.json')
        with open(params_json_path, 'w') as fp:
            json.dump(flat_config, fp)

        progress_out_path = os.path.join(exp_out_dir, 'progress.csv')
        shutil.copyfile(experiment.progress_path, progress_out_path)

prepare_files(subexp_index.search(mode='repl'), subexp_index, 'viskit-repl')
prepare_files(subexp_index.search(mode='il_train'), subexp_index, 'viskit-il-train')

# Interpret encoders

Save the encoder interpretation videos. Each sub_exp might take one or two minutes to save.

In [None]:
import glob
import subprocess
from pathlib import Path

train_expts = subexp_index.search(mode='il_train')
all_configs = [subexp.get_merged_config(subexp_index) for subexp in train_expts]
base_config, flat_configs = simplify_config_dicts(all_configs)
flat_config_tups = [tuple(sorted(d.items())) for d in flat_configs]
subexp_by_benchmark = {}
for flat_cfg, subexp in zip(flat_config_tups, train_expts):
    bench_key = tuple((k, v) for k, v in flat_cfg if k.startswith('env_') or k.startswith('venv_'))
    subexp_by_benchmark.setdefault(bench_key, []).append((flat_cfg, subexp))

# Create a folder to save videos
Path(f"./runs/{cluster_subpath.split('/')[1]}").mkdir(parents=True, exist_ok=True)
interp_algo = 'saliency'

for idx, (bench_key, cfgs_subexps) in enumerate(subexp_by_benchmark.items(), start=1):
    # cluster subexperiments by config
    by_cfg = {}
    for tup_cfg, subexp in cfgs_subexps:
        tup_cfg = tuple(k for k in tup_cfg if k not in bench_key)
        by_cfg.setdefault(tup_cfg, []).append(subexp)

    for tup_cfg, subexp in by_cfg.items():
        exp = subexp[0]
        encoder_path = exp.config['encoder_path']
        if encoder_path:
            for prefix, replacement in path_translations.items():
                if encoder_path.startswith(prefix):
                    encoder_path = replacement + encoder_path[len(prefix):]
            command = "python ../src/il_representations/scripts/interpret.py with "
            command += f"log_dir=runs/{cluster_subpath.split('/')[1]} "
            command += f"env_cfg.benchmark_name={exp.config['env_cfg']['benchmark_name']} "
            command += f"env_cfg.task_name={exp.config['env_cfg']['task_name']} "
            command += f"save_video=True "
            command += f"chosen_algo={interp_algo} "
            command += f"encoder_path={encoder_path} "
            command += f"filename={exp.config['env_cfg']['task_name']}_{exp.config['exp_ident']} "

            print(f"Generating videos for exp {exp.config['exp_ident']} on {exp.config['env_cfg']['task_name']}...")
            process = subprocess.Popen(command.split(), stdout=subprocess.PIPE)
            output, error = process.communicate()