In [1]:
#hide
#default_exp tools.rerun
from nbdev.showdoc import *
from dsblocks.utils.nbdev_utils import nbdev_setup, TestRunner

nbdev_setup ()
tst = TestRunner (targets=['dummy'])

# Re-run previous experiments with new parameters

> Utility that allows to:
> - Perform more runs on previous experiments, each run having a different random seed. 
> - Increase the number of epochs used in previous experiments, when NN models are used.

In [2]:
#export
import argparse
import sys

from hpsearch.config.hpconfig import get_experiment_manager
import hpsearch.config.hp_defaults as dflt

In [3]:
#for tests
import pytest
import pandas as pd

from hpsearch.examples.complex_dummy_experiment_manager import generate_data, init_em
import hpsearch.utils.experiment_utils as ut

## rerun

In [4]:
# export
def rerun (experiments=None, folder=None, epochs=None, runs=None, unfinished=False, 
           verbose=None, debug=False, em_attrs=None, store=False, from_dict=False, 
           range_exp=None, min_iterations=None, manager_path=dflt.manager_path):
    em_args = dict(use_process=not debug)
    if range_exp is not None:
        assert len(range_exp) == 2
        experiments += range(range_exp[0], range_exp[1])
    
    em = get_experiment_manager (manager_path=manager_path)
    if folder is not None:
        em.set_path_experiments (folder=folder)
    if verbose is not None:
        em.set_verbose (verbose)
    if em_attrs is not None:
        for k in em_attrs:
            setattr (em, k, em_attrs[k])
    
    if epochs is not None:
        parameters = {em.name_epoch: int(epochs)}
        em_args.update (prev_epoch=True)
        check_experiment_matches=False
    else:
        check_experiment_matches=True
        parameters = {}
    if unfinished:
        em_args.update (check_finished=True, use_previous_best=False)
    if store:
        em_args.update (use_last_result=True)
        if from_dict:
            em_args.update (use_last_result_from_dict=True)
        if min_iterations is not None:
            em_args.update (min_iterations=min_iterations)
    
    
    em.rerun_experiment (experiments=experiments, nruns=runs, 
                         parameters=parameters, check_experiment_matches=check_experiment_matches,
                         **em_args)

## parse_arguments_and_run

In [5]:
#export
def parse_args (args):
    parser = argparse.ArgumentParser(description='run experiment')
    parser.add_argument('-d', '--debug', action= "store_true")
    parser.add_argument('-e', '--experiments', type=int, nargs='+', default=[],  help="experiment numbers")
    parser.add_argument('--range-exp', type=int, nargs='+', default=None, help='include this range of experiments')
    parser.add_argument('--epochs', type=int, default=None,  help="number of epochs")
    parser.add_argument('-u', '--unfinished', action= "store_true")
    parser.add_argument('--runs', type=int, default=None,  help="number of runs")
    parser.add_argument('-s', '--store', action= "store_true",  help="store the result from experiments that were not saved in csv file.")
    parser.add_argument('-f', '--from-dict', action= "store_true",  help="when storing the result from experiments that were not saved in csv file, we use the dictionary of results typically named dict_results.pk")
    parser.add_argument('--min-iterations', type=int, default=None,  help="number of iterations to be present in model history in order to consider the experiment good enough for storage.")
    parser.add_argument('--folder', type=str, default=None, help='name of experiments folder')
    parser.add_argument('-v', '--verbose', type=int, default=None, help='verbosity level: 0, 1, 2')
    parser.add_argument('-p', '--path', default=dflt.manager_path, type=str)
    pars = parser.parse_args(args)
    
    return pars

def parse_arguments_and_run (args, em_attrs = None):
    
    pars = parse_args(args)
    pars = vars(pars)
    pars['manager_path'] = pars['path']
    del pars['path']
    rerun (**pars, em_attrs=em_attrs)

def main():
    parse_arguments_and_run (sys.argv[1:])

### usage: performing more runs on previous experiments

In [6]:
#export tests.tools.test_rerun
def test_parse_arguments_and_run_more_runs ():
    em = generate_data ('parse_arguments_and_run_more_runs', 
                        folder='new_folder')
    
    df = em.get_experiment_data ()
    assert df.shape==(9,25)

    args = ['-e', '4', '3', '--verbose', '1', '-p', em.manager_path]
    parse_arguments_and_run (args)
    em.raise_error_if_run=True
    df = em.get_experiment_data ()
    x=[f'{i}_validation_accuracy' for i in range(5)]; assert df.columns.isin(x).sum()==5
    assert df.shape==(9,25)

    args = ['-e', '4', '3', '--runs', '10', '-p', em.manager_path]
    em.raise_error_if_run=False
    parse_arguments_and_run (args)
    df = em.get_experiment_data ()
    assert df.shape==(9,45)
    x=[f'{i}_validation_accuracy' for i in range(10)]; assert df.columns.isin(x).sum()==10
    
    em.remove_previous_experiments ()

In [7]:
tst.run (test_parse_arguments_and_run_more_runs, tag='dummy', debug=False)

running test_parse_arguments_and_run_more_runs


ValueError: self.root != self.root_folder

### Increasing the number of epochs used in previous experiments

In [None]:
#export tests.tools.test_rerun
def test_parse_arguments_and_run_more_epochs ():
    em = init_em ('parse_arguments_and_run_more_epochs')

    # get reference result
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 7})
    df = em.get_experiment_data ()
    display (df)
    em.remove_previous_experiments()

    # first 3 experiments
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 5})
    _ = em.create_experiment_and_run (parameters={'offset':0.05, 'rate': 0.03, 'epochs': 6})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 9})
    _ = em.create_experiment_and_run (parameters={'offset':0.05, 'rate': 0.03, 'epochs': 10})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 11})
    df = em.get_experiment_data ()
    display (df)
    assert df.shape==(5,8)

    # more epochs
    #args = ['-e', '4', '3', '--epochs', '7', '-d']
    args = ['-e', '3', '--epochs', '7', '-d', '-p', em.manager_path]
    parse_arguments_and_run (
        args, 
        em_attrs={'desired_path_results_previous_experiment':'test_parse_arguments_and_run_more_epochs/experiments/00001/0',
                 'desired_epochs': 1, 'desired_current_epoch': 7}
    )

    df = em.get_experiment_data ()
    print (df.shape)
    assert df.shape==(6,8)
    
    args = ['-e', '4', '--epochs', '7', '-d', '-p', em.manager_path]
    parse_arguments_and_run (
        args, 
        em_args={'desired_path_results_previous_experiment':'test_parse_arguments_and_run_more_epochs/experiments/00000/0',
                 'desired_epochs': 2, 'desired_current_epoch': 7}
    )

    df = em.get_experiment_data ()
    print (df.shape)
    assert df.shape==(7,8)
    
    # *****************************************
    # *****************************************
    em.remove_previous_experiments ()
    em.desired_path_results_previous_experiment, em.desired_epochs, em.desired_current_epoch = None, None, None
    # first 3 experiments
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 5})
    _ = em.create_experiment_and_run (parameters={'offset':0.05, 'rate': 0.03, 'epochs': 6})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 9})
    _ = em.create_experiment_and_run (parameters={'offset':0.05, 'rate': 0.03, 'epochs': 10})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 11})
    
    args = ['-e', '4', '3', '--epochs', '7', '-d', '-p', em.manager_path]
    parse_arguments_and_run (args)
    df = em.get_experiment_data ()
    print (df.shape)
    assert df.shape==(7,8)
    
    em.remove_previous_experiments ()

In [None]:
tst.run (test_parse_arguments_and_run_more_epochs, tag='dummy')

### Storing results from interrupted experiments

In [None]:
#export tests.tools.test_rerun
def test_parse_arguments_and_run_store ():
    path_experiments = 'test_parse_arguments_and_run_store'
    em = generate_data (path_experiments, 
                        folder='new_folder')

    df = em.get_experiment_data ()
    assert df.shape==(9,25)
    columns = ut.get_scores_columns (df, class_ids=range(5), suffix_results='_validation_accuracy')
    columns += ut.get_scores_columns (df, class_ids=range(5), suffix_results='_test_accuracy')

    # *************************************************
    # The following simulates the case where 
    # many experiments were not saved probably
    # because they were interrupted with Ctrl-C
    # *************************************************
    df_orig = df.copy()
    columns = ut.get_scores_columns (df_orig)
    df[columns] = None
    path = em.path_experiments
    df.to_csv (path/'experiments_data.csv')
    df.to_pickle (path/'experiments_data.pk')
    df_overwritten = em.get_experiment_data ()
    assert (df_orig[columns]!=df_overwritten[columns]).all().all()

    parse_arguments_and_run ('--range-exp 0 9 --store --from-dict --runs 5 '
                             f'--min-iterations 1 -p {em.manager_path}'.split())

    df_new = em.get_experiment_data ()
    # TODO: see why finished is False after storing
    #assert (df_orig[columns]==df_new[columns]).all().all()
    x, y = df_orig[columns].astype('float'), df_new[columns].astype('float')
    y[[f'{x}_finished' for x in range(5)]]=1.0
    pd.testing.assert_frame_equal(x,y)

    df = df_orig.copy()
    df.loc[1,columns] = None
    df.loc[3:,columns] = None
    df.to_csv (f'{path}/experiments_data.csv')
    df.to_pickle (f'{path}/experiments_data.pk')
    df_overwritten = em.get_experiment_data ()
    #assert (df_orig[columns]==df_overwritten[columns]).sum().sum() == 20
    # TODO: see why is the following true, instead of the previous:
    #assert (df_orig[columns]==df_overwritten[columns]).sum().sum() == 30

    parse_arguments_and_run ('--range-exp 0 9 --store --from-dict --runs 5 '
                             f'--min-iterations 1 -p {em.manager_path}'.split())

    df_new = em.get_experiment_data ()
    #assert (df_orig[columns]==df_new[columns]).all().all()
    x, y = df_orig[columns].astype('float'), df_new[columns].astype('float')
    y[[f'{x}_finished' for x in range(5)]]=1.0
    pd.testing.assert_frame_equal(x,y)

    # *************************************************
    # The following simulates the case where 
    # many experiments were not saved because
    # experiments_data.csv and experiments_data.pk
    # were overwritten by accident with an old file
    # *************************************************
    df = df_orig.copy()
    df.loc[1,columns] = None
    df = df.drop (index=range(3,9))
    df.to_csv (f'{path}/experiments_data.csv')
    df.to_pickle (f'{path}/experiments_data.pk')
    df_overwritten = em.get_experiment_data ()
    assert df_overwritten.shape==(3,25)

    assert df_overwritten.isna().sum().sum()==15

    parse_arguments_and_run ('--range-exp 0 9 --store --from-dict --runs 5 '
                             f'--min-iterations 1 -p {em.manager_path}'.split())

    df_new = em.get_experiment_data ()
    #assert (df_orig[columns]==df_new[columns]).all().all()
    x, y = df_orig[columns].astype('float'), df_new[columns].astype('float')
    y[[f'{x}_finished' for x in range(5)]]=1.0
    pd.testing.assert_frame_equal(x,y)
    
    # *************************************************
    # The following unrealistic scenario produces an error
    # For each row of an existing csv, we need to either have 
    # the parameters of that row, or not have the row at all.
    # *************************************************
    df_orig = df.copy()
    df.iloc[1,:] = None
    df.iloc[3:,:] = None

    path = em.path_experiments

    df.to_csv (path/'experiments_data.csv')
    df.to_pickle (path/'experiments_data.pk')

    with pytest.raises (ValueError):
        parse_arguments_and_run (
            f'--range-exp 0 9 --store --from-dict --runs 5 -p {em.manager_path}'.split()
        )
        
    em.remove_previous_experiments ()

In [None]:
tst.run (test_parse_arguments_and_run_store, tag='dummy')