In [1]:
#hide
#default_exp tools.rerun
from nbdev.showdoc import *
from dsblocks.utils.nbdev_utils import nbdev_setup, TestRunner

nbdev_setup ()
tst = TestRunner (targets=['dummy'])

# Re-run previous experiments with new parameters

> Utility that allows to:
> - Perform more runs on previous experiments, each run having a different random seed. 
> - Increase the number of epochs used in previous experiments, when NN models are used.

In [2]:
#export
import argparse
import sys

from hpsearch.config.hpconfig import get_experiment_manager
import hpsearch.config.hp_defaults as dflt

In [3]:
#for tests
import pytest
import pandas as pd

from hpsearch.examples.complex_dummy_experiment_manager import generate_data, init_em
import hpsearch.utils.experiment_utils as ut

## rerun

In [4]:
# export
def rerun (experiments=None, root=None, epochs=None, runs=None, unfinished=False, 
           verbose=None, debug=False, em_attrs=None, store=False, from_dict=False, 
           range_exp=None, min_iterations=None, manager_path=dflt.manager_path):
    em_args = dict(
                            use_process=not debug,
                            root_folder=root
                            )
    if range_exp is not None:
        assert len(range_exp) == 2
        experiments += range(range_exp[0], range_exp[1])
    
    em = get_experiment_manager (manager_path=manager_path)
    if verbose is not None:
        em.set_verbose (verbose)
    if em_attrs is not None:
        for k in em_attrs:
            setattr (em, k, em_attrs[k])
    
    if epochs is not None:
        parameters = {em.name_epoch: int(epochs)}
        em_args.update (prev_epoch=True)
        check_experiment_matches=False
    else:
        check_experiment_matches=True
        parameters = {}
    if unfinished:
        em_args.update (check_finished=True, use_previous_best=False)
    if store:
        em_args.update (use_last_result=True)
        if from_dict:
            em_args.update (use_last_result_from_dict=True)
        if min_iterations is not None:
            em_args.update (min_iterations=min_iterations)
    
    
    em.rerun_experiment (experiments=experiments, nruns=runs, root_folder=root, 
                         parameters=parameters, check_experiment_matches=check_experiment_matches,
                         **em_args)

## parse_arguments_and_run

In [5]:
#export
def parse_args (args):
    parser = argparse.ArgumentParser(description='run experiment')
    parser.add_argument('-d', '--debug', action= "store_true")
    parser.add_argument('-e', '--experiments', type=int, nargs='+', default=[],  help="experiment numbers")
    parser.add_argument('--range-exp', type=int, nargs='+', default=None, help='include this range of experiments')
    parser.add_argument('--epochs', type=int, default=None,  help="number of epochs")
    parser.add_argument('-u', '--unfinished', action= "store_true")
    parser.add_argument('--runs', type=int, default=None,  help="number of runs")
    parser.add_argument('-s', '--store', action= "store_true",  help="store the result from experiments that were not saved in csv file.")
    parser.add_argument('-f', '--from-dict', action= "store_true",  help="when storing the result from experiments that were not saved in csv file, we use the dictionary of results typically named dict_results.pk")
    parser.add_argument('--min-iterations', type=int, default=None,  help="number of iterations to be present in model history in order to consider the experiment good enough for storage.")
    parser.add_argument('--root', type=str, default=None, help='name of root folder')
    parser.add_argument('-v', '--verbose', type=int, default=None, help='verbosity level: 0, 1, 2')
    parser.add_argument('-p', '--path', default=dflt.manager_path, type=str)
    pars = parser.parse_args(args)
    
    return pars

def parse_arguments_and_run (args, em_attrs = None):
    
    pars = parse_args(args)
    pars = vars(pars)
    pars['manager_path'] = pars['path']
    del pars['path']
    rerun (**pars, em_attrs=em_attrs)

def main():
    parse_arguments_and_run (sys.argv[1:])

### usage: performing more runs on previous experiments

In [6]:
#export tests.tools.test_rerun
def test_parse_arguments_and_run_more_runs ():
    em = generate_data ('parse_arguments_and_run_more_runs', 
                        root_folder='newroot')
    
    df = em.get_experiment_data (folder_experiments='newroot')
    assert df.shape==(9,25)

    args = ['-e', '4', '3', '--root', 'newroot', '--verbose', '1', '-p', em.manager_path]
    parse_arguments_and_run (args)
    em.raise_error_if_run=True
    df = em.get_experiment_data (folder_experiments='newroot')
    x=[f'{i}_validation_accuracy' for i in range(5)]; assert df.columns.isin(x).sum()==5
    assert df.shape==(9,25)

    args = ['-e', '4', '3', '--runs', '10', '--root', 'newroot', '-p', em.manager_path]
    em.raise_error_if_run=False
    parse_arguments_and_run (args)
    df = em.get_experiment_data (folder_experiments='newroot')
    assert df.shape==(9,45)
    x=[f'{i}_validation_accuracy' for i in range(10)]; assert df.columns.isin(x).sum()==10
    
    em.remove_previous_experiments ()

In [7]:
tst.run (test_parse_arguments_and_run_more_runs, tag='dummy', debug=False)

running test_parse_arguments_and_run_more_runs


requiring experiment number to be 4
running experiment 4 with parameters:
{'epochs': 15, 'offset': 0.3, 'rate': 0.03, 'noise': 0.1}
other_parameters:
{'verbose': 0, 'root_folder': 'newroot', 'log_message': 'fixed rate, multiple epochs values', 'script_path': '/home/jcidatascience/jaume/workspace/remote/temp/hpsearch/hpsearch/examples/dummy_experiment_manager.py', 'lineno': 222, 'experiment_number': 4, 'suffix_results': '_validation_accuracy', 'path_results_big': 'test_parse_arguments_and_run_more_runs/newroot/experiments/00004/4', 'git_hash': "b'f6611fd4061fbf7a3875484767a45fb3daff931f\\n'", 'use_process': True}
experiment script: /tmp/ipykernel_26319/766293011.py, line: 37
doing run 0 out of 1

script: /home/jcidatascience/jaume/workspace/remote/temp/hpsearch/hpsearch/examples/dummy_experiment_manager.py, line number: 222
script: /home/jcidatascience/jaume/workspace/remote/temp/hpsearch/hpsearch/examples/dummy_experiment_manager.py, line number: 222
script: /home/jcidatascience/jaume/

### Increasing the number of epochs used in previous experiments

In [8]:
#export tests.tools.test_rerun
def test_parse_arguments_and_run_more_epochs ():
    em = init_em ('parse_arguments_and_run_more_epochs')

    # get reference result
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 7})
    df = em.get_experiment_data ()
    display (df)
    em.remove_previous_experiments()

    # first 3 experiments
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 5})
    _ = em.create_experiment_and_run (parameters={'offset':0.05, 'rate': 0.03, 'epochs': 6})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 9})
    _ = em.create_experiment_and_run (parameters={'offset':0.05, 'rate': 0.03, 'epochs': 10})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 11})
    df = em.get_experiment_data ()
    display (df)
    assert df.shape==(5,8)

    # more epochs
    #args = ['-e', '4', '3', '--epochs', '7', '-d']
    args = ['-e', '3', '--epochs', '7', '-d', '-p', em.manager_path]
    parse_arguments_and_run (
        args, 
        em_attrs={'desired_path_results_previous_experiment':'test_parse_arguments_and_run_more_epochs/experiments/00001/0',
                 'desired_epochs': 1, 'desired_current_epoch': 7}
    )

    df = em.get_experiment_data ()
    print (df.shape)
    assert df.shape==(6,8)
    
    args = ['-e', '4', '--epochs', '7', '-d', '-p', em.manager_path]
    parse_arguments_and_run (
        args, 
        em_args={'desired_path_results_previous_experiment':'test_parse_arguments_and_run_more_epochs/experiments/00000/0',
                 'desired_epochs': 2, 'desired_current_epoch': 7}
    )

    df = em.get_experiment_data ()
    print (df.shape)
    assert df.shape==(7,8)
    
    # *****************************************
    # *****************************************
    em.remove_previous_experiments ()
    em.desired_path_results_previous_experiment, em.desired_epochs, em.desired_current_epoch = None, None, None
    # first 3 experiments
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 5})
    _ = em.create_experiment_and_run (parameters={'offset':0.05, 'rate': 0.03, 'epochs': 6})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 9})
    _ = em.create_experiment_and_run (parameters={'offset':0.05, 'rate': 0.03, 'epochs': 10})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 11})
    
    args = ['-e', '4', '3', '--epochs', '7', '-d', '-p', em.manager_path]
    parse_arguments_and_run (args)
    df = em.get_experiment_data ()
    print (df.shape)
    assert df.shape==(7,8)
    
    em.remove_previous_experiments ()

In [9]:
tst.run (test_parse_arguments_and_run_more_epochs, tag='dummy')

running test_parse_arguments_and_run_more_epochs
fitting model with 7 epochs
epoch 0: accuracy: 0.15000000000000002
epoch 1: accuracy: 0.2
epoch 2: accuracy: 0.25
epoch 3: accuracy: 0.3
epoch 4: accuracy: 0.35
epoch 5: accuracy: 0.39999999999999997
epoch 6: accuracy: 0.44999999999999996


Unnamed: 0,offset,rate,epochs,0_validation_accuracy,0_test_accuracy,time_0,date,0_finished
0,0.1,0.05,7.0,0.45,0.55,0.001686,12:05:03.645461,True


fitting model with 5 epochs
epoch 0: accuracy: 0.15000000000000002
epoch 1: accuracy: 0.2
epoch 2: accuracy: 0.25
epoch 3: accuracy: 0.3
epoch 4: accuracy: 0.35
fitting model with 6 epochs
epoch 0: accuracy: 0.08
epoch 1: accuracy: 0.11
epoch 2: accuracy: 0.14
epoch 3: accuracy: 0.17
epoch 4: accuracy: 0.2
epoch 5: accuracy: 0.23
fitting model with 9 epochs
epoch 0: accuracy: 0.15000000000000002
epoch 1: accuracy: 0.2
epoch 2: accuracy: 0.25
epoch 3: accuracy: 0.3
epoch 4: accuracy: 0.35
epoch 5: accuracy: 0.39999999999999997
epoch 6: accuracy: 0.44999999999999996
epoch 7: accuracy: 0.49999999999999994
epoch 8: accuracy: 0.5499999999999999
fitting model with 10 epochs
epoch 0: accuracy: 0.08
epoch 1: accuracy: 0.11
epoch 2: accuracy: 0.14
epoch 3: accuracy: 0.17
epoch 4: accuracy: 0.2
epoch 5: accuracy: 0.23
epoch 6: accuracy: 0.26
epoch 7: accuracy: 0.29000000000000004
epoch 8: accuracy: 0.32000000000000006
epoch 9: accuracy: 0.3500000000000001
fitting model with 11 epochs
epoch 0: ac

Unnamed: 0,offset,rate,epochs,0_validation_accuracy,0_test_accuracy,time_0,date,0_finished
0,0.1,0.05,5.0,0.35,0.45,0.001655,12:05:03.691009,True
1,0.05,0.03,6.0,0.23,0.33,0.001519,12:05:03.721775,True
2,0.1,0.05,9.0,0.55,0.65,0.002227,12:05:03.759225,True
3,0.05,0.03,,0.35,0.25,0.002223,12:05:03.798742,True
4,0.1,0.05,11.0,0.65,0.55,0.002157,12:05:03.841399,True


reading model from test_parse_arguments_and_run_more_epochs/experiments/00001/0/model_weights.pk
fitting model with 1 epochs
epoch 0: accuracy: 0.26
(6, 8)
reading model from test_parse_arguments_and_run_more_epochs/experiments/00000/0/model_weights.pk
fitting model with 2 epochs
epoch 0: accuracy: 0.39999999999999997
epoch 1: accuracy: 0.44999999999999996
(7, 8)
fitting model with 5 epochs
epoch 0: accuracy: 0.15000000000000002
epoch 1: accuracy: 0.2
epoch 2: accuracy: 0.25
epoch 3: accuracy: 0.3
epoch 4: accuracy: 0.35
fitting model with 6 epochs
epoch 0: accuracy: 0.08
epoch 1: accuracy: 0.11
epoch 2: accuracy: 0.14
epoch 3: accuracy: 0.17
epoch 4: accuracy: 0.2
epoch 5: accuracy: 0.23
fitting model with 9 epochs
epoch 0: accuracy: 0.15000000000000002
epoch 1: accuracy: 0.2
epoch 2: accuracy: 0.25
epoch 3: accuracy: 0.3
epoch 4: accuracy: 0.35
epoch 5: accuracy: 0.39999999999999997
epoch 6: accuracy: 0.44999999999999996
epoch 7: accuracy: 0.49999999999999994
epoch 8: accuracy: 0.549

### Storing results from interrupted experiments

In [25]:
#export tests.tools.test_rerun
def test_parse_arguments_and_run_store ():
    path_experiments = 'test_parse_arguments_and_run_store'
    em = generate_data (path_experiments, 
                        root_folder='newroot')

    df = em.get_experiment_data (folder_experiments='newroot')
    assert df.shape==(9,25)
    columns = ut.get_scores_columns (df, class_ids=range(5), suffix_results='_validation_accuracy')
    columns += ut.get_scores_columns (df, class_ids=range(5), suffix_results='_test_accuracy')

    # *************************************************
    # The following simulates the case where 
    # many experiments were not saved probably
    # because they were interrupted with Ctrl-C
    # *************************************************
    df_orig = df.copy()
    columns = ut.get_scores_columns (df_orig)
    df[columns] = None
    path = em.get_path_experiments (folder='newroot')
    df.to_csv (f'{path}/experiments_data.csv')
    df.to_pickle (f'{path}/experiments_data.pk')
    df_overwritten = em.get_experiment_data (folder_experiments='newroot')
    assert (df_orig[columns]!=df_overwritten[columns]).all().all()

    parse_arguments_and_run ('--root newroot --range-exp 0 9 --store --from-dict --runs 5 '
                             f'--min-iterations 1 -p {em.manager_path}'.split())

    df_new = em.get_experiment_data (folder_experiments='newroot')
    # TODO: see why finished is False after storing
    #assert (df_orig[columns]==df_new[columns]).all().all()
    x, y = df_orig[columns].astype('float'), df_new[columns].astype('float')
    y[[f'{x}_finished' for x in range(5)]]=1.0
    pd.testing.assert_frame_equal(x,y)

    df = df_orig.copy()
    df.loc[1,columns] = None
    df.loc[3:,columns] = None
    df.to_csv (f'{path}/experiments_data.csv')
    df.to_pickle (f'{path}/experiments_data.pk')
    df_overwritten = em.get_experiment_data (folder_experiments='newroot')
    #assert (df_orig[columns]==df_overwritten[columns]).sum().sum() == 20
    # TODO: see why is the following true, instead of the previous:
    #assert (df_orig[columns]==df_overwritten[columns]).sum().sum() == 30

    parse_arguments_and_run ('--root newroot --range-exp 0 9 --store --from-dict --runs 5 '
                             f'--min-iterations 1 -p {em.manager_path}'.split())

    df_new = em.get_experiment_data (folder_experiments='newroot')
    #assert (df_orig[columns]==df_new[columns]).all().all()
    x, y = df_orig[columns].astype('float'), df_new[columns].astype('float')
    y[[f'{x}_finished' for x in range(5)]]=1.0
    pd.testing.assert_frame_equal(x,y)

    # *************************************************
    # The following simulates the case where 
    # many experiments were not saved because
    # experiments_data.csv and experiments_data.pk
    # were overwritten by accident with an old file
    # *************************************************
    df = df_orig.copy()
    df.loc[1,columns] = None
    df = df.drop (index=range(3,9))
    df.to_csv (f'{path}/experiments_data.csv')
    df.to_pickle (f'{path}/experiments_data.pk')
    df_overwritten = em.get_experiment_data (folder_experiments='newroot')
    assert df_overwritten.shape==(3,25)

    assert df_overwritten.isna().sum().sum()==15

    parse_arguments_and_run ('--root newroot --range-exp 0 9 --store --from-dict --runs 5 '
                             f'--min-iterations 1 -p {em.manager_path}'.split())

    df_new = em.get_experiment_data (folder_experiments='newroot')
    #assert (df_orig[columns]==df_new[columns]).all().all()
    x, y = df_orig[columns].astype('float'), df_new[columns].astype('float')
    y[[f'{x}_finished' for x in range(5)]]=1.0
    pd.testing.assert_frame_equal(x,y)
    
    # *************************************************
    # The following unrealistic scenario produces an error
    # For each row of an existing csv, we need to either have 
    # the parameters of that row, or not have the row at all.
    # *************************************************
    df_orig = df.copy()
    df.iloc[1,:] = None
    df.iloc[3:,:] = None

    path = em.get_path_experiments (folder='newroot')

    df.to_csv (f'{path}/experiments_data.csv')
    df.to_pickle (f'{path}/experiments_data.pk')

    with pytest.raises (ValueError):
        parse_arguments_and_run (
            f'--root newroot --range-exp 0 9 --store --from-dict --runs 5 -p {em.manager_path}'.split()
        )
        
    em.remove_previous_experiments ()

In [26]:
tst.run (test_parse_arguments_and_run_store, tag='dummy')

running test_parse_arguments_and_run_store
storing result from test_test_parse_arguments_and_run_store/newroot/experiments/00000/0 with iterations 5
storing result from test_test_parse_arguments_and_run_store/newroot/experiments/00000/0 with iterations 5
storing result from test_test_parse_arguments_and_run_store/newroot/experiments/00000/1 with iterations 5
storing result from test_test_parse_arguments_and_run_store/newroot/experiments/00000/1 with iterations 5
storing result from test_test_parse_arguments_and_run_store/newroot/experiments/00000/2 with iterations 5
storing result from test_test_parse_arguments_and_run_store/newroot/experiments/00000/2 with iterations 5
storing result from test_test_parse_arguments_and_run_store/newroot/experiments/00000/3 with iterations 5
storing result from test_test_parse_arguments_and_run_store/newroot/experiments/00000/3 with iterations 5
storing result from test_test_parse_arguments_and_run_store/newroot/experiments/00000/4 with iterations 5
sto