In [50]:
# Generates MML commands for the task fingerprinting knowledge transfer experiments (original version)
# This notebook generates the `output_XXX.txt` files next to it, that may be submitted to the DKFZ cluster to run experiments

import dataclasses
import os
from pathlib import Path
from typing import Dict, Union
try:
    import mml.api.notebooks as nb
except ImportError:
    raise RuntimeError('For the original reproduction you need to install an older version of mml, please refer to the README for precise instructions.')
nb.init()

In [51]:
# avoid to install mml_tf and provide some data from mml_tf.tasks
train_tasks = (
    'lapgyn4_anatomical_structures', 'lapgyn4_surgical_actions', 'lapgyn4_instrument_count',
    'lapgyn4_anatomical_actions',
    'sklin2_skin_lesions', 'identify_nbi_infframes', 'laryngeal_tissues', 'nerthus_bowel_cleansing_quality',
    'stanford_dogs_image_categorization', 'svhn', 'caltech101_object_classification',
    'caltech256_object_classification',
    'cifar10_object_classification', 'cifar100_object_classification', 'mnist_digit_classification',
    'emnist_digit_classification', 'hyperkvasir_anatomical-landmarks', 'hyperkvasir_pathological-findings',
    'hyperkvasir_quality-of-mucosal-views', 'hyperkvasir_therapeutic-interventions', 'cholec80_grasper_presence',
    'cholec80_bipolar_presence', 'cholec80_hook_presence', 'cholec80_scissors_presence', 'cholec80_clipper_presence',
    'cholec80_irrigator_presence', 'cholec80_specimenbag_presence', 'derm7pt_skin_lesions')
test_tasks = ('idle_action_recognition', 'barretts_esophagus_diagnosis', 'brain_tumor_classification',
              'mednode_melanoma_classification', 'brain_tumor_type_classification',
              'chexpert_enlarged_cardiomediastinum', 'chexpert_cardiomegaly', 'chexpert_lung_opacity',
              'chexpert_lung_lesion', 'chexpert_edema', 'chexpert_consolidation', 'chexpert_pneumonia',
              'chexpert_atelectasis', 'chexpert_pneumothorax', 'chexpert_pleural_effusion', 'chexpert_pleural_other',
              'chexpert_fracture', 'chexpert_support_devices', 
              'covid-19-chest-ct-image-augmentation_raw', # this task was originally contained, but might be omitted
              'pneumonia_classification', 'ph2-melanocytic-lesions-classification',
              'covid_xray_classification', 'isic20_melanoma_classification', 'deep_drid_dr_level',
              'shenzen_chest_xray_tuberculosis', 'crawled_covid_ct_classification', 'deep_drid_quality',
              'deep_drid_clarity', 'deep_drid_field', 'deep_drid_artifact', 'kvasir_capsule_anatomy',
              'kvasir_capsule_content', 'kvasir_capsule_pathologies', 'breast_cancer_classification_v2',
              'eye_condition_classification', 'mura_xr_wrist', 'mura_xr_shoulder', 'mura_xr_humerus', 'mura_xr_hand',
              'mura_xr_forearm', 'mura_xr_finger', 'mura_xr_elbow', 'bean_plant_disease_classification',
              'aptos19_blindness_detection')
all_tasks = train_tasks + test_tasks
task_infos = nb.get_task_infos(task_list=all_tasks, dims=None)

In [1]:
# convenience function for easier retrieve from cluster results
user_id = 'USERNAME'  # replace with your AD account

# use as print(get_retrieve_for_proj('my_project')) and run the result in a shell to get the results of 'my_project' from cluster to your local system
def get_retrieve_for_proj(proj):
    return f"rsync -rtvu --stats --exclude=PARAMETERS --exclude=hpo --exclude=runs --exclude=FIMS --exclude=FC_TUNED {user_id}@odcf-worker01:{os.getenv('MML_CLUSTER_RESULTS_PATH')}/{proj}/ {os.getenv('MML_RESULTS_PATH')}/{proj}"

# the following optimizes a jobs epochs in a way that target task is seen at least 40 epochs but at max 4000 steps (plus finishing the epoch)
def optimize_epochs(target_task: str, batch_size: int = 300, max_steps: int = 4000, max_epoch: int = 40) -> int:
    return min(max_epoch, (max_steps // ((int(task_infos.num_samples[target_task] * 0.8) // batch_size) + 1)) + 1)

In [53]:
# cluster submission prepends, add yours here in case you have other gpu requirements
base_reqs = nb.LSFSubmissionRequirements(special_requirements=[],
                                         undesired_hosts=['e230-dgx2-2', 'e230-dgxa100-2', 'e230-dgxa100-4',
                                                          'e230-dgxa100-1',
                                                          'e230-dgxa100-2', 'e230-dgxa100-3', 'e230-dgxa100-4', 'lsf22-gpu08', 'lsf22-gpu01', 'lsf22-gpu02', 'lsf22-gpu03', 'lsf22-gpu04', 'lsf22-gpu05', 'lsf22-gpu06', 'lsf22-gpu07'],
                                         num_gpus=1, vram_per_gpu=11.0, queue='gpu-lowprio',
                                         mail='EMAIL.ADDRESS@dkfz-heidelberg.de', script_name='mml.sh',
                                         job_group='/USERNAME/pami_rerun'
                                         )
# alternatively you may use this local setup
# base_reqs = pp_reqs = aa_reqs = def_reqs = arch_reqs = tl_reqs = multi_reqs = nb.DefaultRequirements()
pp_reqs = dataclasses.replace(base_reqs, queue='gpu')
aa_reqs = dataclasses.replace(base_reqs, script_name='aa.sh', vram_per_gpu=13.0)
def_reqs = dataclasses.replace(base_reqs, special_requirements=['tensorcore'])
tl_reqs = dataclasses.replace(base_reqs, special_requirements=['tensorcore'], vram_per_gpu=24.0)
multi_reqs = dataclasses.replace(base_reqs, special_requirements=['tensorcore'], vram_per_gpu=14.0)

In [54]:
# project overview -> points to MML projects we use
projects_train = {
    'base': 'pami2_base_02',
    'fed_hpo': 'pami2_fed_hpo_03',
    'dist_results': 'pami2_dist_02',
    'new_fed': 'pami2_new_fed_03',
    'raw_baseline': 'pami2_raw_03',
    'pretrain': 'pami2_pretrain_10',
    'raw_shrunk': 'pami2_raw_shrunk_10',
    'transfer': 'pami2_transfer_20',
    'multi_task': 'test_multi_balanced_10',
    'multi_shrunk': 'test_multi_balanced_shrunk_10',
    'arch_search': 'pami2_arch_search_02',
    'arch_infer': 'pami2_arch_infer_02',
    'aa_search': 'pami2_aa_search_01',
    'aa_infer': 'pami2_aa_infer_02'
}
projects_test = {
    'base': 'pami2_base_02',
    'fed_hpo': 'pami2_fed_hpo_03',
    'dist_results': 'pami2_dist_02',
    'new_fed': 'pami2_new_fed_03',
    'raw_baseline': 'pami2_raw_03',
    'raw_shrunk': 'pami2_raw_shrunk_10',
    # aa search was misplaced by accident, this is resolved within exp 3
    'aa_search': 'pami2_t_aa_search_01',
    # the above are shared with train (!) since stuff is only computed once anyway
    'pretrain': 'pami2_t_pretrain_01',
    'transfer': 'pami2_t_transfer_20',
    'multi_task': 'test_multi_balanced_test_split_10',
    'multi_shrunk': 'test_multi_balanced_shrunk_test_split_10',
    'arch_search': 'pami2_t_arch_search_02',
    'arch_infer': 'pami2_t_arch_infer_02',
    'aa_infer': 'pami2_t_aa_infer_02'
}

In [55]:
# we distinguish the training period (setting of all hyperparemeters) and later evaluation on the test tasks
phase = 'train'  # set to 'test' later on
# note that experiments have to be run multiple times to ensure significance
rerun = {'train': 3, 'test': 3}[phase]
projects = {'train': projects_train, 'test': projects_test}[phase]
target_tasks = {'train': train_tasks, 'test': test_tasks}[phase]  # these will become the targets
source_tasks = {'train': train_tasks, 'test': train_tasks + test_tasks}[phase]  # these will become the possible sources
task_file = {'train': 'pami_train', 'test': 'pami'}[phase]  # see mml_tf/configs/tasks 

In [56]:
# tasks sharing images should not be inspected together
task_groups = {
    'chexpert': [
        'chexpert_cardiomegaly',
        'chexpert_lung_opacity',
        'chexpert_lung_lesion',
        'chexpert_edema',
        'chexpert_consolidation',
        'chexpert_pneumonia',
        'chexpert_atelectasis',
        'chexpert_pneumothorax',
        'chexpert_pleural_effusion',
        'chexpert_pleural_other',
        'chexpert_fracture',
        'chexpert_support_devices',
    ],
    'cholec80': [
        'cholec80_grasper_presence',
        'cholec80_bipolar_presence',
        'cholec80_hook_presence',
        'cholec80_scissors_presence',
        'cholec80_clipper_presence',
        'cholec80_irrigator_presence',
        'cholec80_specimenbag_presence',
        'lapgyn4_instrument_count'
    ],
    'deep_drid': [
        'deep_drid_dr_level',
        'deep_drid_quality',
        'deep_drid_clarity',
        'deep_drid_field',
        'deep_drid_artifact'
    ]
}


# returns valid source tasks respecting phase and task_groups
def get_valid_sources(target_task):
    # check if target is in a group
    group_id = None
    for group_name, group_list in task_groups.items():
        if target_task in group_list:
            group_id = group_name
            break
    # if so further reduce valid sources
    if group_id:
        return [t for t in source_tasks if t not in task_groups[group_id]]
    return [t for t in source_tasks if t != target_task]


def shrinkable(task_name):
    # the task shrinking operation only makes sense if there are certain restrictions met
    return task_infos.num_classes[task_name] < 40 and task_infos.num_samples[task_name] > 1000

In [57]:
# prepare steps
prep_cmds = list()
# step one: plain task creation

prep_cmds.append(nb.MMLJobDescription(prefix_req=pp_reqs,
                                      config_options={'mode': 'create', 'tasks': 'pami', 'proj': projects['base']}))
# step two: plain task preprocessing
# either sequentially (best option!)
prep_cmds.append(nb.MMLJobDescription(prefix_req=pp_reqs,
                                      config_options={'mode': 'pp', 'tasks': 'pami', 'proj': projects['base']}))
# or parallel (this causes problems for some tasks!)
# for t in all_tasks:
#     prep_cmds.append(nb.MMLJobDescription(prefix_req=pp_reqs,
#                                           config_options={'mode': 'pp', 'tasks': 'pami',
#                                                           'proj': projects['base'], 'pivot.name': t}))
# step three: shrinking preprocessed tasks
prep_cmds.append(nb.MMLJobDescription(prefix_req=pp_reqs,
                                      config_options={'mode': 'info', 'tasks': 'pami_shrinkable_800', 'proj': projects['base']}))
nb.write_out_commands(cmd_list=prep_cmds, suffix='prep')

Submit output_prep.txt commands to set up everything. 

In [58]:
# OPTIONALLY: compute dimensions (used for Fig. 3) and some additional experiments not shown in the paper
dim_cmds = list()
# will do so locally, so use loc_reqs
loc_reqs = nb.DefaultRequirements()
dim_cmds.append(nb.MMLJobDescription(prefix_req=loc_reqs, config_options={'mode': 'dim', 'tasks': 'pami_shrink_mix',
                                                                           'proj': projects["base"],
                                                                           'mode.inv_mle': True}))
nb.write_out_commands(cmd_list=dim_cmds, suffix='dim')

Submit output_dim.txt commands to compute dimensions. 

In [59]:
# baselines
# these are the default options for all tasks, they should not be modified without justification
def get_default_config(target_task: str, shrunk: bool = False) -> Dict[str, Union[str, bool, int]]:
    if shrunk:
        epochs = 40
    else:
        epochs = optimize_epochs(target_task=target_task, batch_size=300, max_steps=4000, max_epoch=40)
    default_options = {'mode': 'opt', 'tasks': task_file, 'pivot.name': t,
                       'mode.store_parameters': False, 'sampling.balanced': True,
                       'sampling.batch_size': 300, 'callbacks': 'none', 'lr_scheduler': 'step',
                       'trainer.max_epochs': epochs, 'augmentations': 'baseline256', 'sampling.enable_caching': True}
    # during test we don't need to validate every epoch
    if phase != 'train':
        default_options.update({'+trainer.check_val_every_n_epoch': epochs})
    return default_options


base_cmds = list()
for ix in range(rerun):
    for t in all_tasks:
        opts = get_default_config(t)
        opts.update({'proj': f'{projects["raw_baseline"]}_{ix}', 'seed': ix, 'mode.store_parameters': True})
        base_cmds.append(nb.MMLJobDescription(prefix_req=def_reqs, config_options=opts))
        if shrinkable(t):
            shrink_opts = get_default_config(t)
            shrink_opts.update(
                {'proj': f'{projects["raw_shrunk"]}_{ix}', 'tasks': f'{task_file}_shrink'})
            base_cmds.append(nb.MMLJobDescription(prefix_req=def_reqs, config_options=shrink_opts))
nb.write_out_commands(cmd_list=base_cmds, suffix='base')

Submit output_base.txt commands to set up everything.

In [61]:
#################################
# EXPERIMENT 1: MODEL TRANSFER  #
#################################
# VRAM requirements for timm architectures
model_transfer_arch_reqs = {
    'tf_efficientnet_b2': 23.0,
    'tf_efficientnet_b2_ap': 24.0,
    'tf_efficientnet_b2_ns': 24.0,
    'tf_efficientnet_cc_b0_4e': 22.0,
    'swsl_resnet50': 20.0,
    'ssl_resnext50_32x4d': 24.0,
    'regnetx_032': 20.5,
    'regnety_032': 22.0,
    'rexnet_100': 20.5,
    'ecaresnet50d': 24.0,
    'cspdarknet53': 23.0,
    'mixnet_l': 25.0,
    'cspresnext50': 24.0,
    'cspresnet50': 18.0,
    'ese_vovnet39b': 25.0,
    'resnest50d': 25.5,
    'hrnet_w18': 24.0,
    'skresnet34': 16.5,
    'mobilenetv3_large_100': 13.5,
    'res2net50_26w_4s': 24.5
}
arch_cmds = list()
for ix in range(rerun):
    for t in source_tasks:
        for arch, vram in model_transfer_arch_reqs.items():
            opts = get_default_config(t)
            opts.update({'proj': f'{projects["arch_search"]}_{ix}',
                         'arch.classification.id': arch, 'seed': ix})
            # the following goes back to a rare occurrence of incompatible singleton batches with some batch_norms
            # avoid this by minimally wiggle batch size
            if t == 'mura_xr_wrist' and arch in ['rexnet_100', 'resnest50d', 'skresnet34']:
                opts.update({'sampling.batch_size': 301})
            arch_reqs = dataclasses.replace(def_reqs, vram_per_gpu=vram)
            arch_cmds.append(nb.MMLJobDescription(prefix_req=arch_reqs,
                                                  config_options=opts))
nb.write_out_commands(cmd_list=arch_cmds, suffix='arch', max_cmds=2000)
arch_shrunk_cmds = list()
for ix in range(rerun):
    for t in target_tasks:
        if task_infos.num_classes[t] > 40:
            continue
        if task_infos.num_samples[t] <= 1000:
            continue
        mod_task_file = f'{task_file}' if task_infos.num_samples[t] <= 1000 else f'{task_file}_shrink'
        for arch, vram in model_transfer_arch_reqs.items():
            opts = get_default_config(t, shrunk=True)
            opts.update({'proj': f'{projects["arch_infer"]}_{ix}', 'tasks': mod_task_file,
                         'arch.classification.id': arch, 'seed': ix})
            arch_reqs = dataclasses.replace(def_reqs, vram_per_gpu=vram)
            arch_shrunk_cmds.append(nb.MMLJobDescription(prefix_req=arch_reqs,
                                                         config_options=opts))
nb.write_out_commands(cmd_list=arch_shrunk_cmds, suffix='arch_shrunk', max_cmds=2000)

In [63]:
####################################
# EXPERIMENT 2: TRANSFER LEARNING  #
####################################
trans_cmds = list()
for ix in range(rerun):
    for t in target_tasks:
        # only small tasks are used as targets
        if task_infos.num_classes[t] > 40:
            continue
        for s in get_valid_sources(t):
            mod_task_file = f'{task_file}' if task_infos.num_samples[t] <= 1000 else f'{task_file}_shrink'
            opts = get_default_config(t, shrunk=True)
            opts.update({'proj': f'{projects["transfer"]}_{ix}', 'tasks': mod_task_file,
                         'mode': 'tl', 'reuse.models': f'{projects["raw_baseline"]}_{ix}', 'mode.pretrain_task': s,
                         'seed': ix})
            del opts['mode.store_parameters']
            trans_cmds.append(nb.MMLJobDescription(prefix_req=def_reqs, config_options=opts))
nb.write_out_commands(cmd_list=trans_cmds, suffix='trans', max_cmds=2000)

In [64]:
######################################
# EXPERIMENT 3: AUG POLICY TRANSFER  #
######################################
# Step 1:  training the auto augmentation pipeline for each potential source
aa_cmds = list()
for s in source_tasks:
    for ix in range(rerun):
        opts = {'mode': 'aa', 'proj': f'{projects["aa_search"]}_{ix}', 'tasks': task_file, 'pivot.name': s,
                'trainer.max_epochs': optimize_epochs(s, batch_size=120, max_epoch=100, max_steps=10000),
                'arch.pretrained': True, 'seed': ix}
        aa_cmds.append(nb.MMLJobDescription(prefix_req=aa_reqs, config_options=opts))
nb.write_out_commands(cmd_list=aa_cmds, suffix='aa')
# Step 2: evaluating the augmentation pipeline
policy_cmds = list()
for ix in range(rerun):
    for t in target_tasks:
        # only small tasks are used as targets
        if task_infos.num_classes[t] > 40:
            continue
        for s in get_valid_sources(t):
            mod_task_file = f'{task_file}' if task_infos.num_samples[t] <= 1000 else f'{task_file}_shrink'
            opts = get_default_config(t, shrunk=True)
            # resolving an accident from above
            reuse_aa_proj = f'{projects_train["aa_search"]}_{ix}' if s in train_tasks else f'{projects_test["aa_search"]}_{ix}'
            # and another accident
            if 'breast' in s:
                reuse_aa_proj = f'{projects_train["aa_search"]}_{ix}'
            opts.update({'proj': f'{projects["aa_infer"]}_{ix}', 'tasks': mod_task_file,
                         'reuse.aa': reuse_aa_proj, 'augmentations': 'load_aa_from',
                         'augmentations.source': s, 'seed': ix})
            policy_cmds.append(nb.MMLJobDescription(prefix_req=def_reqs, config_options=opts))
nb.write_out_commands(cmd_list=policy_cmds, suffix='policy', max_cmds=2000)

In [65]:
######################################
# EXPERIMENT 4: MULTI-TASK LEARNING  #
######################################

multi_cmds = list()
for ix in range(rerun):
    for t in target_tasks:
        for s in get_valid_sources(t):
            opts = get_default_config(t)
            opts.update(
                {
                    'proj': f'{projects["multi_task"]}_{ix}',
                    'mode': 'multi',
                    'sampling.balanced': True,
                 'mode.num_tasks': 2, 'mode.possible_tasks': [s], 'sampling.sample_num': int(1.6 * task_infos.num_samples[t]),
                 'loss.auto_activate_weighing': False, 'seed': ix})
            del opts['mode.store_parameters']
            multi_cmds.append(nb.MMLJobDescription(prefix_req=def_reqs, config_options=opts))
nb.write_out_commands(cmd_list=multi_cmds, suffix='multi', max_cmds=2000)

multi_shrunk_cmds = list()
for ix in range(rerun):
    for t in target_tasks:
        # unshrinkable
        if task_infos.num_classes[t] > 40:
            continue
        # already covered above
        if task_infos.num_samples[t] <= 1000:
            continue
        mod_task_file = f'{task_file}' if task_infos.num_samples[t] <= 1000 else f'{task_file}_shrink'
        for s in get_valid_sources(t):
            opts = get_default_config(t, shrunk=True)
            opts.update(
                {
                    'proj': f'{projects["multi_shrunk"]}_{ix}',
                    'mode': 'multi',
                    'sampling.balanced': True,
                 'mode.num_tasks': 2, 'mode.possible_tasks': [s], 'tasks': mod_task_file,
                 'sampling.sample_num': min(int(1.6 * task_infos.num_samples[t]), 1600),
                 'loss.auto_activate_weighing': False, 'seed': ix})
            del opts['mode.store_parameters']
            multi_shrunk_cmds.append(nb.MMLJobDescription(prefix_req=def_reqs, config_options=opts))
nb.write_out_commands(cmd_list=multi_shrunk_cmds, suffix='multi_shrunk', max_cmds=2000)

In [66]:
sync_cmds = list()
for ix in range(rerun):
    for proj_id in ['multi_task', 'aa_infer', 'transfer', 'arch_search', 'raw_shrunk', 'pretrain',
                    'raw_baseline', 'arch_infer', 'multi_shrunk']:
        sync_cmds.append(get_retrieve_for_proj(f'{projects[proj_id]}_{ix}'))
with open(Path(os.path.abspath('')) / 'output_sync.txt', 'w') as file:
    file.write('\n'.join(sync_cmds))
print(f'Stored {len(sync_cmds)} commands at output_sync.txt.')