In [1]:
#hide
#default_exp utils.experiment_utils
from nbdev.showdoc import *
from dsblocks.utils.nbdev_utils import nbdev_setup, TestRunner

nbdev_setup ()
tst = TestRunner (targets=['dummy'])
#tst = TestRunner (targets=[])

# Experiment Utils

> Helper functions for querying and retrieving results from past experiments

In [2]:
#export
import pandas as pd
import numpy as np
import pickle
import os
import sys
import time
from sklearn.model_selection import ParameterGrid
import warnings

from hpsearch.config import hp_defaults as dflt
from hpsearch.config.hpconfig import get_experiment_manager
warnings.filterwarnings('ignore')

In [3]:
#for tests
import pytest
import pandas as pd
from IPython.display import display

from dsblocks.utils.nbdev_utils import md
from hpsearch.examples.dummy_experiment_manager import (DummyExperimentManager, 
                                                        run_multiple_experiments)
from hpsearch.examples.complex_dummy_experiment_manager import generate_data, init_em
from hpsearch.config import hp_defaults as dflt

In [4]:
# exports tests.utils.test_experiment_utils
def generate_data_exp_utils (name_folder):
    path_experiments = f'test_{name_folder}/debug'
    manager_path = f'{path_experiments}/managers'
    em = DummyExperimentManager (path_experiments=path_experiments, manager_path=manager_path,
                                 verbose=0)
    em.remove_previous_experiments (parent=True)
    run_multiple_experiments(em=em, nruns=5, noise=0.1, verbose=False,
                             parameters_multiple_values=dict(offset=[0.1, 0.3, 0.6], epochs=[5, 10, 100]))
    run_multiple_experiments(em=em, nruns=5, noise=0.1, verbose=False, rate=0.0001,
                             parameters_multiple_values=dict(offset=[0.1, 0.3, 0.6], epochs=[5, 10, 100]))
    return em

## Dataframe I/O

In [None]:
#export
def read_df (path, name='experiments_data'):
    path_csv = f'{path}/{name}.csv'
    path_pickle = path_csv.replace('csv', 'pk')
    experiment_data = None
    try:
        experiment_data = pd.read_pickle (path_pickle)
    except:
        try:
            experiment_data = pd.read_csv (path_csv, index_col=0)
        except:
            experiment_data = None
    return experiment_data

def write_df (df, path, name='experiments_data'):
    path_csv = f'{path}/{name}.csv'
    path_pickle = path_csv.replace('csv', 'pk')
    df.to_pickle (path_pickle)
    df.to_csv (path_csv)
    
def write_binary_df_if_not_exists (df, path, name='experiments_data'):
    path_pickle = f'{path}/{name}.pk'
    df.to_pickle (path_pickle)

## get_experiment_data

In [5]:
#export
def get_experiment_data (experiments=None):
    """
    Returns data stored from previous experiments in the form DataFrame. 
    
    If path_experiments is not given, it uses the default one. 
    """
    from hpsearch.config.hpconfig import get_experiment_data
    return get_experiment_data (experiments=experiments)

### Usage example

In [6]:
#exports tests.utils.test_experiment_utils
def test_get_experiment_data ():
    path_experiments = 'get_experiment_data'
    em = generate_data (path_experiments)
    
    df = get_experiment_data ()
    reference = em.get_experiment_data ()
    pd.testing.assert_frame_equal (df, reference)
    
    em.remove_previous_experiments (parent=True)

In [7]:
tst.run (test_get_experiment_data, tag='dummy')

running test_get_experiment_data


NameError: name 'dflt' is not defined

## get experiment parameters and scores

### get_parameters_columns

In [8]:
#export
def get_parameters_columns (experiment_data, only_not_null=False):
    parameters = experiment_data[dflt.parameters_col].columns
    parameters = [(dflt.parameters_col, *x) for x in parameters]
    if only_not_null:
        parameters = np.array(parameters)[~experiment_data.loc[:,parameters].isnull().all(axis=0)].tolist()
        parameters = [(*x,) for x in parameters]
    return parameters

### get_experiment_parameters

In [None]:
#export
def get_experiment_parameters (experiment_data, only_not_null=False):
    return experiment_data[get_parameters_columns (experiment_data, only_not_null=only_not_null)]

### get_scores_columns

In [None]:
#export
def get_scores_columns (experiment_data=None, score_name=None, run_number=None):
    """
    Determine the columnns that provide evaluation scores.
    """
    if score_name is None and experiment_data is None:
        raise ValueError ('Either experiment_data or run_number should be different than None')
    if score_name is not None and not isinstance(score_name, list):
        score_name = [score_name]
    if run_number is not None and not isinstance(run_number, list):
        if isinstance(run_number, range): run_number=list(run_number)
        else: run_number = [run_number]
    if score_name is not None:
        scores_columns = []
        for score in score_name:
            new_columns = experiment_data[dflt.scores_col, score].columns
            if run_number is not None:
                new_columns = list(set(new_columns).intersection (run_number))
            new_columns = [(dflt.scores_col, score, c) for c in new_columns]
            scores_columns.extend (new_columns)
    else:
        scores_columns = experiment_data[dflt.scores_col].columns
        scores_columns = [(dflt.scores_col, *x) for x in scores_columns]
    return scores_columns

### get_experiment_scores

In [None]:
#export
def get_experiment_scores (experiment_data = None, score_name=None, run_number=None, remove_score_name=False):
    df = experiment_data[get_scores_columns (experiment_data, score_name=score_name, run_number=run_number)]
    if remove_score_name: df.columns = df.columns.get_level_values(2)
    return df

### Usage examples

In [None]:
#exports tests.utils.test_experiment_utils
def test_get_parameters_and_scores ():
    path_experiments = 'test_get_parameters_and_scores'
    em = generate_data (path_experiments)
    df = em.get_experiment_data ()

    # ************************************************************
    # get_parameters_columns
    # ************************************************************
    expected_result = [(dflt.parameters_col, x, '') for x in ['epochs', 'noise', 'offset', 'rate']]
    assert get_parameters_columns (df) == expected_result

    mi_offset = (dflt.parameters_col, 'offset', '')
    offset = df[mi_offset].values.copy()
    md ('- We can take only those which have at least some value that is not None.')
    df.loc[:, mi_offset] = None
    expected_result = [(dflt.parameters_col, x, '') for x in ['epochs', 'noise', 'rate']]
    assert get_parameters_columns (df, only_not_null=True) == expected_result

    md ('- If only some elements are None for a given parameter, we still include it.')
    df.loc[:, mi_offset] = offset
    df.loc[2, mi_offset] = None
    expected_result = [(dflt.parameters_col, x, '') for x in ['epochs', 'noise', 'offset', 'rate']]
    assert get_parameters_columns (df, only_not_null=True)==expected_result
    df.loc[:, mi_offset] = offset

    # ************************************************************
    # get_experiment_parameters
    # ************************************************************
    md ('- Same as get_parameters_columns, but returning dataframe of parameter values.')
    result = get_experiment_parameters (df)
    assert result.shape == (9, 4)
    expected_result = [(dflt.parameters_col, x, '') for x in ['epochs', 'noise', 'offset', 'rate']]
    assert result.columns.tolist() == expected_result

    # ************************************************************
    # get_scores_columns
    # ************************************************************
    md ('- Retrieve all columns that have scores, for all runs')
    expected_result = [(dflt.scores_col, x, y) for x in ['test_accuracy', 'validation_accuracy']
                       for y in range(5)]
    assert get_scores_columns (df) == expected_result

    md ('- Retrieve all columns for given score name, for all runs')
    expected_result = [('scores', 'test_accuracy', 0), ('scores', 'test_accuracy', 1), 
                       ('scores', 'test_accuracy', 2), ('scores', 'test_accuracy', 3), 
                       ('scores', 'test_accuracy', 4)]
    assert get_scores_columns (df, score_name='test_accuracy') == expected_result

    md ('- Retrieve all columns for given score name, for given runs')
    expected_result = [(dflt.scores_col, x, y) for x in ['test_accuracy'] 
                       for y in [2, 4]]
    assert get_scores_columns (df, score_name='test_accuracy', run_number=[2, 4]) == expected_result

    # ************************************************************
    # get_experiment_scores
    # ************************************************************
    md ('- Same, but returning dataframe with selected scores values:')
    result = get_experiment_scores (df)
    display (result)
    assert result.shape==(9,10)

    result = get_experiment_scores (df, score_name='test_accuracy')
    display (result)
    assert result.shape==(9,5)

    result = get_experiment_scores (df, score_name='test_accuracy', run_number=[2,4])
    display (result)
    assert result.shape==(9,2)

    md ('- We can remove the metric name and only keep the run number in each column:')
    result = get_experiment_scores (df, score_name='test_accuracy', run_number=[2,4], remove_score_name=True)
    display (result)
    assert result.shape==(9,2)
    
    # ************************************************************
    # get_scores_columns, first usage example: we do not indicate the name of the score
    # ************************************************************
    expected_result = [(dflt.scores_col, x, y) for x in ['test_accuracy', 'validation_accuracy'] 
                       for y in range(5)]
    assert get_scores_columns (df)==expected_result
    
    # ************************************************************
    # get_scores_columns, second usage: we indicate the name of the score
    # ************************************************************
    result = get_scores_columns (df, run_number=range(5), score_name='validation_accuracy')
    expected_result = [(dflt.scores_col, 'validation_accuracy', y) for y in range(5)]
    assert result == expected_result
    em.remove_previous_experiments (parent=True)

In [None]:
tst.run (test_get_parameters_and_scores, tag='dummy')

## get_scores_names

In [None]:
#export
def isnull (x): return x is None or np.isnan(x)

def get_scores_names (experiment_data=None, run_number=None, experiment=None, only_valid=True):
    """ 
    Determine the names of the scores included in experiment data. 
    
    If run_number is provided, we provide the scores stored for that run number. If, in addition to this, 
    experiment is provided, and only_valid=True, we provide only the scores that are not NaN for the given 
    experiment number.
    """
    
    if run_number is None:
        scores_names = experiment_data[dflt.scores_col].columns.get_level_values(0).unique()
    else:
        if not isinstance(run_number, list):
            if isinstance(run_number, range): run_number=list(run_number)
            else: run_number = [run_number]
        scores_names = [(dflt.scores_col, *c) for c in experiment_data[dflt.scores_col].columns 
                        if c[1] in run_number]
        if (experiment is not None) and only_valid:
            scores_names = [c for c in scores_names if not isnull(experiment_data.loc[experiment, c])]
        scores_names = pd.MultiIndex.from_tuples(scores_names).get_level_values(1).unique()
    scores_names = list(np.sort(scores_names))
    return scores_names

### Usage

In [None]:
#exports tests.utils.test_experiment_utils
def test_get_scores_names ():
    em = generate_data_exp_utils ('get_scores_names')
    
    df = em.get_experiment_data ()
    scores_names = get_scores_names (df)
    print (scores_names)
    assert scores_names == ['test_accuracy', 'validation_accuracy']
    
    scores_names=get_scores_names (df, run_number=3, experiment=7)
    print(scores_names)
    assert list(np.sort(scores_names))==['test_accuracy', 'validation_accuracy']

    # test when only some scores are valid
    df2 = df.copy()
    df2.loc[7, (dflt.scores_col, 'test_accuracy', 3)]=np.nan
    scores_names=get_scores_names (df2, run_number=3, experiment=7)
    print (scores_names)
    assert scores_names==['validation_accuracy']
    
    em.remove_previous_experiments (parent=True)

In [None]:
tst.run (test_get_scores_names, tag='dummy')

## get_monitored_training_metrics

In [None]:
#export
def get_monitored_training_metrics (experiment, run_number=0, history_file_name='model_history.pk', 
                                    path_results=None):
    if path_results is None:
        from hpsearch.config.hpconfig import get_path_results
        path_results = get_path_results(experiment, run_number)
    path_history = f'{path_results}/{history_file_name}'
    if os.path.exists(path_history):
        history=pickle.load(open(path_history,'rb'))
        return list(history.keys())
    else:
        return []

### Usage

In [None]:
#exports tests.utils.test_experiment_utils
def test_get_monitored_training_metrics ():
    em = generate_data_exp_utils ('get_monitored_training_metrics')
    
    monitored_metrics = get_monitored_training_metrics (0)
    print (monitored_metrics)
    assert monitored_metrics==['validation_accuracy', 'test_accuracy', 'accuracy']
    
    em.remove_previous_experiments (parent=True)

In [None]:
tst.run (test_get_monitored_training_metrics, tag='dummy')

## get_runs_with_results

In [None]:
#export
def get_runs_with_results (experiment_data = None, score_name=None, run_number=None):
    """
    Gets the list of run_number for whom there are results in experiment_data.
    """
    assert experiment_data is not None, 'experiment_data must be introduced'
    result_columns = get_scores_columns (experiment_data, score_name=score_name, run_number=run_number)
    completed_results = ~experiment_data.loc[:,result_columns].isnull()
    completed_results = completed_results.all(axis=0)
    completed_results = completed_results.iloc[np.where(completed_results)]
    completed_results = completed_results.index
    return completed_results.get_level_values(2).tolist()

### Usage

In [None]:
#exports tests.utils.test_experiment_utils
def test_get_runs_with_results ():
    em = generate_data ('get_runs_with_results')
    
    df = em.get_experiment_data ()
    # we need to introduce experiment_data df, and score_name
    result = get_runs_with_results (df, score_name='validation_accuracy')
    display (result)
    assert result==[0,1,2,3,4]
    
    # we can also restrict to certain run_number
    result = get_runs_with_results (df, score_name='validation_accuracy', run_number=[0,2])
    display (result)
    assert result==[0,2]
    em.remove_previous_experiments (parent=True)

In [None]:
tst.run (test_get_runs_with_results, tag='dummy')

## get_parameters_unique

In [None]:
#export
def get_parameters_unique(df):
    assert df.columns.nlevels == 3
    df_all = df
    df = df[dflt.parameters_col]
    if df.shape[0] > 1:
        parameters = []
        for k in df.columns:
            if len(df[k].unique()) > 1:
                parameters += [k]
    else:
        parameters = df.columns.tolist()
    df_parameters = df[parameters]
    columns = pd.MultiIndex.from_tuples([(dflt.parameters_col, *c) for c in parameters])
    df_parameters.columns = columns
    all_cols = df_all.columns.get_level_values(0).unique()
    no_par_cols = all_cols [all_cols != dflt.parameters_col]
    df_no_par = df_all[no_par_cols]
    df_all = pd.concat([df_parameters, df_no_par], axis=1)
    return columns, df_all

### Usage

In [None]:
#exports tests.utils.test_experiment_utils
def test_get_parameters_unique ():
    em = generate_data_exp_utils ('get_parameters_unique')
    df = em.get_experiment_data ()
    
    # keeps only those parameters with more than one value,
    # removing 'noise' in this case, since it has the same value in all rows
    result = get_parameters_unique (df)
    assert result[1].shape==(18,28)
    assert result[0].tolist() == [(dflt.parameters_col, 'epochs', ''), (dflt.parameters_col, 'offset', ''), 
                         (dflt.parameters_col, 'rate', '')]
    
    em.remove_previous_experiments (parent=True)

In [None]:
tst.run (test_get_parameters_unique, tag='dummy')

## compact_parameters

In [None]:
#export
def compact_parameters (df, number_characters=1):
    par_or = df.columns.get_level_values(1)
    par_new = [''.join(y[0].upper()+y[1:number_characters] for y in x.split('_')) for x in par_or]
    dict_rename = {k:v for k,v in zip(par_or, par_new)}
    if df.columns.nlevels==3:
        df.columns = pd.MultiIndex.from_arrays ([df.columns.get_level_values(0), par_new, 
                                             df.columns.get_level_values(2)])
    else:
        df.columns = pd.MultiIndex.from_arrays ([df.columns.get_level_values(0), par_new])
    
    return df, dict_rename

### Usage

In [None]:
#exports tests.utils.test_experiment_utils
def test_compact_parameters ():
    em = generate_data_exp_utils ('compact_parameters')
    df = em.get_experiment_data ()
    
    result = compact_parameters (df, number_characters=2)
    display (result[0].head())
    assert result[0].columns.tolist() == [('parameters',   'Ep', ''),
            ('parameters',   'No', ''),
            ('parameters',   'Of', ''),
            ('parameters',   'Ra', ''),
            (  'run_info',   'Da',  0),
            (  'run_info',   'Da',  1),
            (  'run_info',   'Da',  2),
            (  'run_info',   'Da',  3),
            (  'run_info',   'Da',  4),
            (  'run_info',   'Fi',  0),
            (  'run_info',   'Fi',  1),
            (  'run_info',   'Fi',  2),
            (  'run_info',   'Fi',  3),
            (  'run_info',   'Fi',  4),
            (  'run_info',   'Ti',  0),
            (  'run_info',   'Ti',  1),
            (  'run_info',   'Ti',  2),
            (  'run_info',   'Ti',  3),
            (  'run_info',   'Ti',  4),
            (    'scores', 'TeAc',  0),
            (    'scores', 'TeAc',  1),
            (    'scores', 'TeAc',  2),
            (    'scores', 'TeAc',  3),
            (    'scores', 'TeAc',  4),
            (    'scores', 'VaAc',  0),
            (    'scores', 'VaAc',  1),
            (    'scores', 'VaAc',  2),
            (    'scores', 'VaAc',  3),
            (    'scores', 'VaAc',  4)]

    assert result[1]=={'epochs': 'Ep', 'noise': 'No', 'offset': 'Of', 'rate': 'Ra', 'date': 'Da', 
                       'finished': 'Fi', 'time': 'Ti', 'test_accuracy': 'TeAc', 'validation_accuracy': 'VaAc'}
    
    em.remove_previous_experiments (parent=True)

In [None]:
tst.run (test_compact_parameters, tag='dummy')

## replace_with_default_values

In [None]:
#export
def replace_with_default_values (df, parameters={}):
    from hpsearch.config.hpconfig import get_default_parameters
    
    parameters_names = get_parameters_columns (df)
    
    for k in df.columns:
        experiments_idx=np.argwhere(df[k].isna().ravel()).ravel()
        experiments=df.index[experiments_idx]
        for experiment in experiments:
            parameters = df.loc[experiment, parameters_names].copy()
            parameters[parameters.isna().values] = None
            parameters = parameters.to_dict()
            parameters = {c[1]:parameters[c] for c in parameters if parameters[c] is not None}
            defaults = get_default_parameters(parameters)
            df.loc[experiment, k] = defaults.get(k[1])
    return df

### Usage

In [None]:
#exports tests.utils.test_experiment_utils
def test_replace_with_default_values ():
    em = generate_data_exp_utils ('replace_with_default_values')
    
    df = em.get_experiment_data ()
    df=replace_with_default_values(df)
    mi_epoch = (dflt.parameters_col, 'epochs', '')
    assert (df[mi_epoch].values == ([5.]*3 + [10.]*3 + [100.]*3)*2).all()
    
    em.remove_previous_experiments (parent=True)

In [None]:
tst.run (test_replace_with_default_values, tag='dummy')

## remove_defaults

In [None]:
#export
def remove_defaults (parameters):
    from hpsearch.config.hpconfig import get_default_parameters
    
    defaults = get_default_parameters(parameters)
    for key in defaults.keys():
        if key in parameters.keys() and (parameters[key] == defaults[key]):
            del parameters[key]
    return parameters

### Usage example

In [None]:
#exports tests.utils.test_experiment_utils
def test_remove_defaults ():
    em = init_em ('remove_defaults')
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05})
    
    parameters = remove_defaults ({'offset':0.1, 'rate': 0.05})
    assert parameters=={'offset':0.1, 'rate': 0.05}
    
    parameters = remove_defaults ({'offset':0.1, 'rate': 0.01, 'epochs': 10})
    assert parameters=={'offset':0.1}
    
    parameters = remove_defaults ({'offset':0.5, 'rate': 0.000001, 'epochs': 10})
    assert parameters=={'rate': 0.000001, 'epochs': 10}
    
    parameters = remove_defaults ({'offset':0.5, 'rate': 0.000001, 'epochs': 100})
    assert parameters=={'rate': 0.000001}
    
    em.remove_previous_experiments (parent=True)

In [None]:
tst.run (test_remove_defaults, tag='dummy')

## find_rows_with_parameters_dict

In [35]:
#export
def find_rows_with_parameters_dict (experiment_data, parameters_dict, create_if_not_exists=True, 
                                    exact_match=True, ignore_keys=[], precision = 1e-10):
    """
    Finds rows that match parameters. 
    
    If the dataframe doesn't have any parameter with that name, a new column 
    is created and changed_dataframe is set to True.
    """
    changed_dataframe = False
    matching_all_condition = pd.Series([True]*experiment_data.shape[0])
    existing_keys = [par for par in parameters_dict.keys() if par not in ignore_keys]
    for parameter in existing_keys:
        mi_parameter = (dflt.parameters_col, parameter, '')
        if mi_parameter not in experiment_data.columns:
            if create_if_not_exists:
                experiment_data[mi_parameter] = None
                changed_dataframe = True
            else:
                raise ValueError ('parameter %s not found in experiment_data' %parameter)
        if parameters_dict[parameter] is None:
            matching_condition = experiment_data[mi_parameter].isnull()
        elif experiment_data[mi_parameter].isnull().all():
            matching_condition = ~experiment_data[mi_parameter].isnull()
        elif (type(parameters_dict[parameter]) == float) or (type(parameters_dict[parameter]) == np.float32) or (type(parameters_dict[parameter]) == np.float64):
            if parameters_dict[parameter] == np.floor(parameters_dict[parameter]):
                matching_condition = experiment_data[mi_parameter]==parameters_dict[parameter]
            else:
                matching_condition = experiment_data[mi_parameter]==parameters_dict[parameter]
                for idx, v in enumerate(experiment_data[mi_parameter]):
                    if (type(v) == float or type(v) == np.float32 or type(v) == np.float64) and (np.abs(v-parameters_dict[parameter]) < precision):
                        matching_condition.iloc[idx]=True
                    else:
                        matching_condition.iloc[idx]=False
        else:
            matching_condition = experiment_data[mi_parameter]==parameters_dict[parameter]

        matching_all_condition = matching_all_condition & matching_condition.values
            
    if exact_match:
        rest_parameters = experiment_data[dflt.parameters_col].columns.get_level_values(0)
        rest_parameters = [par for par in rest_parameters if par not in parameters_dict.keys()]
        rest_parameters = [par for par in rest_parameters if par not in ignore_keys]
        for parameter in rest_parameters:
            mi_parameter = (dflt.parameters_col, parameter, '')
            matching_condition = experiment_data[mi_parameter].isnull()
            matching_all_condition = matching_all_condition & matching_condition.values
    
    matching_rows = matching_all_condition.index[matching_all_condition].tolist()
    
    return matching_rows, changed_dataframe, matching_all_condition

### Usage example

In [36]:
#exports tests.utils.test_experiment_utils
def test_find_rows_with_parameters_dict ():
    em = generate_data_exp_utils ('find_rows_with_parameters_dict')
    
    df = em.get_experiment_data ()
    result = find_rows_with_parameters_dict (df, dict (rate=0.0001))
    matching_rows, changed_dataframe, matching_all_condition = result
    assert matching_rows==[]
    assert not changed_dataframe
    
    result = find_rows_with_parameters_dict (df, dict (rate=0.0001), exact_match=False)
    matching_rows, changed_dataframe, matching_all_condition = result
    assert matching_rows == [9, 10, 11, 12, 13, 14, 15, 16, 17]
    
    result = find_rows_with_parameters_dict (df, dict (rate=0.0001, epochs=5, offset=0.6), exact_match=False,
                                        ignore_keys=['epochs'])
    matching_rows, changed_dataframe, matching_all_condition = result
    assert matching_rows==[11, 14, 17]

    mi_rate = (dflt.parameters_col, 'rate', '')
    df.loc[16, mi_rate]=0.00011
    result = find_rows_with_parameters_dict (df, dict (rate=0.0001), exact_match=False)
    matching_rows, changed_dataframe, matching_all_condition = result
    assert matching_rows==[9, 10, 11, 12, 13, 14, 15, 17]

    result = find_rows_with_parameters_dict (df, dict (rate=0.0001), exact_match=False, precision = 0.0001)
    matching_rows, changed_dataframe, matching_all_condition = result
    assert matching_rows==[9, 10, 11, 12, 13, 14, 15, 16, 17]

    result = find_rows_with_parameters_dict (df, dict (new_par=4), exact_match=False)
    matching_rows, changed_dataframe, matching_all_condition = result
    assert changed_dataframe
    assert df.shape == (18, 30)
    assert matching_rows==[]
    assert (dflt.parameters_col, 'new_par', '') in df.columns
    assert matching_rows==[]
    
    em.remove_previous_experiments (parent=True)

In [37]:
tst.run (test_find_rows_with_parameters_dict, tag='dummy')

running test_find_rows_with_parameters_dict


## summarize_results

In [38]:
#export
def summarize_results(intersection=False, 
                      experiments=None, 
                      score_name=None, 
                      min_results=0, 
                      run_number=None, 
                      parameters=None,
                      include_parameters=True,
                      include_num_results=True,
                      other_columns=None,
                      data=None,
                      ascending=False,
                      sort_key='mean',
                      #stats = ['mean','median','rank','min','max','std'],
                      stats = ['mean','median','min','max','std']):
    """
    Obtains summary scores for the desired list of experiments. 
    
    Uses the experiment_data csv for that purpose.    
    """
    
    if data is None:
        experiment_data = get_experiment_data ()
        experiment_data_original = experiment_data.copy()
        if experiments is not None:
            experiment_data = experiment_data.loc[experiments,:]
        if parameters is not None:
            experiment_rows, _, _ = find_rows_with_parameters_dict (experiment_data, parameters, 
                                                                    create_if_not_exists=False, 
                                                                    exact_match=False)
            experiment_data = experiment_data.loc[experiment_rows]
    else:
        experiment_data = data.copy()
        
    # Determine the columnns that provide evaluation scores. 
    result_columns = get_scores_columns (experiment_data, score_name=score_name, run_number=run_number)
    
    # Determine num_results and select those with minimum number of runs
    #num_results = (~experiment_data.loc[:,result_columns].isnull()).sum(axis=1, level=1)
    num_results = (~experiment_data.loc[:,result_columns].isnull()).groupby(axis=1, level=1).sum()
    num_results.columns = pd.MultiIndex.from_product ([[dflt.stats_col], num_results.columns.tolist(), 
                                                       ['num_results']])
    experiment_data = pd.concat([experiment_data, num_results], axis=1)
    num_results_columns = experiment_data.columns[
        experiment_data.columns.get_level_values(2) == 'num_results'
    ]
    min_num_results = experiment_data[num_results_columns].min(axis=1)
    experiment_data = experiment_data.drop (columns=num_results_columns)
    num_results_column = (dflt.num_results_col, 'num_results', '')
    experiment_data[num_results_column] = min_num_results
    if min_results > 0:
        number_before = experiment_data.shape[0]
        experiment_data = experiment_data[min_num_results>=min_results]
        print (f'{experiment_data.shape[0]} out of {number_before} experiments have {min_results} runs '
               'completed')
    
    # Take only those run_number where all experiments provide some score
    if intersection:
        number_before = len(result_columns)
        all_have_results = ~experiment_data.loc[:,result_columns].isnull().any(axis=0)
        result_columns = (np.array(result_columns)[all_have_results]).tolist()
        print (f'{len(result_columns)} out of {number_before} runs for whom all the '
                'selected experiments have completed')
        
    print (f'total data examined: {experiment_data.shape[0]} experiments '
           f'with at least {min_num_results.min()} runs done for each one')
        
    # TODO: make it work across different metrics
    #scores = experiment_data.loc[:, result_columns]
    #scores[scores.isna()]=np.nan
    #scores = -scores.values
    #rank = np.argsort(scores,axis=0)
    #rank = np.argsort(rank,axis=0).astype(np.float32)
    #rank[experiment_data.loc[:,result_columns].isnull()]=np.nan
    
    if other_columns != 'all':
        if include_parameters:
            columns_to_include = get_parameters_columns(experiment_data, True)
        else:
            columns_to_include = []
        if include_num_results:
            columns_to_include.append (num_results_column)
        if other_columns is not None: 
            columns_to_include.extend(other_columns)
    else:
        columns_to_include = experiment_data.columns.tolist()
    scores_to_return={}
    stat_df_all = []
    stats_columns=[]
    for stat in stats:
        stat_df = experiment_data.loc[:,result_columns].groupby (level=1, axis=1).agg(stat)
        #stat_df = experiment_data.loc[:,result_columns].agg(stat, axis=1, level=1)
        stat_df.columns = pd.MultiIndex.from_product (
            [[dflt.stats_col], stat_df.columns.tolist(), [stat]])
        scores_to_return[stat] = stat_df.columns.tolist()
        stats_columns.extend (stat_df.columns.tolist())
        stat_df_all.append (stat_df)
    experiment_data = pd.concat ([experiment_data]+stat_df_all, axis=1)
    if score_name is None: 
        score_name = experiment_data[dflt.scores_col].columns.get_level_values(0).unique()
        score_name = score_name[0]
    elif isinstance (score_name, list):
        score_name = score_name[0]
    summary = experiment_data.loc[:,columns_to_include+stats_columns]
    sort_column = None
    if sort_key is not None: 
        if sort_key in stats: 
            sort_column = (dflt.stats_col, score_name, sort_key)
        elif sort_key in summary[dflt.parameters_col].columns.get_level_values(0):
            sort_column = (dflt.parameters_col, sort_key, '')
        elif (dflt.scores_col in summary.columns.get_level_values(0) and 
              sort_key in summary[dflt.scores_col].columns.get_level_values(0)):
            run_number = summary[dflt.scores_col].columns.get_level_values(1)[0]
            sort_column = (dflt.scores_col, sort_key, run_number)
    if sort_column is not None:
        summary = summary.sort_values(by=sort_column,ascending=ascending)
    summary = summary[summary.columns.sort_values()]
        
    return summary

### Usage example

In [39]:
#exports tests.utils.test_experiment_utils
def test_summarize_results ():
    em = init_em ('summarize_results')
    em.run_multiple_repetitions (parameters=dict(offset=0.1, rate=0.01), nruns=3)
    em.run_multiple_repetitions (parameters=dict(offset=0.2, rate=0.001), nruns=5)
    em.run_multiple_repetitions (parameters=dict(offset=0.3, rate=0.02), nruns=2)
    
    md ('\n\n')
    summary = summarize_results ()
    display (summary)
    mi_num_results = (dflt.num_results_col, 'num_results', '')
    assert summary[mi_num_results].sum() == 10
    assert summary.shape==(3, 13)
    
    md ('\n\n')
    md ('- We can restrict the metric to be the indicated one:')
    summary = summarize_results (score_name='validation_accuracy')
    display (summary)
    assert summary[mi_num_results].sum() == 10
    assert summary.shape==(3, 8)
    assert set(['mean','median','min','max','std'])==set(summary[dflt.stats_col, 'validation_accuracy'].columns)
    
    md ('\n\n')
    md ('- We can also restrict the stats to be provided:')
    summary = summarize_results (score_name='validation_accuracy', stats=['mean', 'min', 'max'])
    assert summary.shape == (3, 6)
    assert set(['mean','min','max'])==set(summary[dflt.stats_col, 'validation_accuracy'].columns)
    
    md ('\n\n')
    md ('- We can filter those results that have less than X runs: ')
    summary = summarize_results (score_name='validation_accuracy', min_results=5)
    display (summary)
    assert summary[mi_num_results].sum() == 5
    assert summary.shape==(1, 8)
    
    md ('\n\n')
    md ('- We can filter by experiment number and/or number of results, and retrieve the original dataframe,'
        'plus new columns with stats: ')
    summary = summarize_results (score_name='validation_accuracy', experiments=[0,2])
    display (summary)
    assert summary.shape==(2, 8)
    assert all(summary.index==[2, 0])
    assert (sorted(summary.columns.get_level_values(1).unique().tolist())==
            sorted(['offset', 'rate', 'num_results', 'validation_accuracy']))
    assert summary['stats','validation_accuracy'].columns.tolist()==['max', 'mean', 'median', 'min', 'std']
    em.remove_previous_experiments (parent=True)

In [40]:
tst.run (test_summarize_results, tag='dummy')

running test_summarize_results
fitting model with 10 epochs
epoch 0: accuracy: 0.11
epoch 1: accuracy: 0.12
epoch 2: accuracy: 0.13
epoch 3: accuracy: 0.14
epoch 4: accuracy: 0.15000000000000002
epoch 5: accuracy: 0.16000000000000003
epoch 6: accuracy: 0.17000000000000004
epoch 7: accuracy: 0.18000000000000005
epoch 8: accuracy: 0.19000000000000006
epoch 9: accuracy: 0.20000000000000007
fitting model with 10 epochs
epoch 0: accuracy: 0.11
epoch 1: accuracy: 0.12
epoch 2: accuracy: 0.13
epoch 3: accuracy: 0.14
epoch 4: accuracy: 0.15000000000000002
epoch 5: accuracy: 0.16000000000000003
epoch 6: accuracy: 0.17000000000000004
epoch 7: accuracy: 0.18000000000000005
epoch 8: accuracy: 0.19000000000000006
epoch 9: accuracy: 0.20000000000000007
fitting model with 10 epochs
epoch 0: accuracy: 0.11
epoch 1: accuracy: 0.12
epoch 2: accuracy: 0.13
epoch 3: accuracy: 0.14
epoch 4: accuracy: 0.15000000000000002
epoch 5: accuracy: 0.16000000000000003
epoch 6: accuracy: 0.17000000000000004
epoch 7: 





- We need to indicate the metric to be retrieved, otherwise it will count as many results as num_results*num_metrics: 

total data examined: 3 experiments with at least 6 runs done for each one


Unnamed: 0,offset,rate,num_results,mean
2,0.3,0.02,6,0.633333
1,0.2,0.001,15,0.44
0,0.1,,9,0.433333






- The metric is indicated with `_` at the beginning: 

total data examined: 3 experiments with at least 2 runs done for each one


Unnamed: 0,offset,rate,num_results,mean
2,0.3,0.02,2,0.5
1,0.2,0.001,5,0.21
0,0.1,,3,0.2






- We can filter those results that have less than X runs: 

1 out of 3 experiments have 5 runs completed
total data examined: 1 experiments with at least 5 runs done for each one


Unnamed: 0,offset,rate,num_results,mean
1,0.2,0.001,5,0.21






- We can filter by experiment number and/or number of results, and retrieve the original dataframe,plus new columns with stats: 

total data examined: 2 experiments with at least 2 runs done for each one


Unnamed: 0,offset,0_validation_accuracy,0_test_accuracy,time_0,date,0_finished,1_validation_accuracy,1_test_accuracy,time_1,1_finished,...,time_4,4_finished,num_results,mean,min,max,std,median,rank,good
0,0.1,0.2,0.1,0.002003,15:54:43.969089,True,0.2,0.1,0.001967,True,...,,,3,0.2,0.2,0.2,0.0,0.2,0.666667,3
2,0.3,0.5,0.4,0.002099,15:54:44.225272,True,0.5,0.4,0.002191,True,...,,,2,0.5,0.5,0.5,0.0,0.5,0.0,2


## query

In [41]:
#export
def query (path_experiments=None, 
              folder_experiments=None,
              experiments=None,  
              parameters_fixed={},
              parameters_variable={},
              parameters_all=[],
              exact_match=True,
              query_other_parameters=False,
              em=None,
              **kwargs):
  
    if em is None: em = get_experiment_manager ()
    if path_experiments is None: path_experiments = em.path_experiments
    
    if query_other_parameters:
        experiment_data = pd.read_csv(f'{path_experiments}/other_parameters.csv', index_col=0)
    else:
        experiment_data = em.get_experiment_data ()
    
    non_valid_pars = set(
        [(dflt.parameters_col, c, '') for c in parameters_fixed.keys()]
    ).difference(set(experiment_data.columns))
    non_valid_pars = non_valid_pars.union(
        set([(dflt.parameters_col, c, '') for c in parameters_variable.keys()]
    ).difference(set(experiment_data.columns)))
    
    if len(non_valid_pars) > 0:
        print (f'\n**The following query parameters are not valid: {list(non_valid_pars)}**')
        print (f'\nValid parameters:\n{sorted(get_parameters_columns(experiment_data))}\n')
    
    parameters_multiple_values_all = list(ParameterGrid(parameters_variable))
    experiment_numbers = []
    for (i, parameters_multiple_values) in enumerate(parameters_multiple_values_all):
        parameters = parameters_multiple_values.copy()
        parameters.update(parameters_fixed)
        parameters_none = {k:v for k,v in parameters.items() if v is None}
        parameters_not_none = {k:v for k,v in parameters.items() if v is not None}

        parameters = remove_defaults (parameters_not_none)
        parameters.update(parameters_none)
    
        experiment_numbers_i, _, _ = find_rows_with_parameters_dict (experiment_data, parameters, 
                                                                     ignore_keys=parameters_all, 
                                                                     exact_match=exact_match)
        experiment_numbers += experiment_numbers_i
    
    experiment_data = experiment_data.iloc[experiment_numbers]
    
    if experiments is not None:
        experiment_data = experiment_data.loc[experiments]
        
    if query_other_parameters:
        return experiment_data
  
    summary = summarize_results (data=experiment_data, **kwargs)
                      
    return summary

### Usage example

In [48]:
#exports tests.utils.test_experiment_utils
def test_query ():
    em = generate_data_exp_utils ('query')
        
    summary = query (parameters_fixed=dict (rate=0.0001))
    assert summary.empty
    
    md ('the dataframe only has mean. Results are sorted by mean score')
    summary = query (parameters_fixed=dict (rate=0.0001), exact_match=False)
    par = lambda parameter: (dflt.parameters_col, parameter, '')
    stat = lambda statv: (dflt.stats_col, 'test_accuracy', statv)
    assert (summary.shape[0]==9 and (summary[par('rate')]==0.0001).all() and 
            len(summary[par('offset')].unique())==3 and 
            summary[stat('mean')].iloc[0]>summary[stat('mean')].iloc[1] 
            and summary[stat('mean')].iloc[1] > summary[stat('mean')].iloc[2])
    
    display (summary)
    md ('The second output d contains a field "stats" which is a dataframe. Results are sorted by mean score')
    assert (summary['stats','validation_accuracy'].columns.tolist()==[
        'max', 'mean', 'median', 'min', 'std'])
    assert summary.shape==(9, 15)
    
    md ('We can request parameter be in specific list of values')   
    summary = query (parameters_fixed=dict(rate=0.0001), exact_match=False, 
                  parameters_variable=dict(epochs=[5,10], offset=[0.1, 0.3]))
    assert sorted(summary[par('epochs')].unique()) == [5,10]
    assert sorted(summary[par('offset')].unique()) == [0.1, 0.3]
    assert summary.shape==(4, 15)
    display (summary)
    
    md ('If we want a value that is the default, we need to indicate None')
    summary = query (parameters_fixed=dict(rate=0.0001), exact_match=False, 
              parameters_variable=dict(epochs=[10, None], offset=[0.1, 0.3]))
    assert summary.shape==(4, 15)
    assert summary[par('epochs')].isna().sum() == 2
    assert (summary[par('epochs')] == 10).sum() == 2
    display (summary)
    
    em.remove_previous_experiments (parent=True)

In [49]:
tst.run (test_query, tag='dummy')

running test_query
total data examined: 0 experiments with at least nan runs done for each one


the dataframe only has mean. Results are sorted by mean score

total data examined: 9 experiments with at least 15 runs done for each one


Unnamed: 0,epochs,offset,rate,noise,num_results,mean
11,5.0,0.6,0.0001,0.1,15,0.731242
14,10.0,0.6,0.0001,0.1,15,0.693155
17,,0.6,0.0001,0.1,15,0.679728
10,5.0,0.3,0.0001,0.1,15,0.548471
13,10.0,0.3,0.0001,0.1,15,0.505715
16,,0.3,0.0001,0.1,15,0.481352
9,5.0,0.1,0.0001,0.1,15,0.432128
12,10.0,0.1,0.0001,0.1,15,0.386496
15,,0.1,0.0001,0.1,15,0.38006


The second output d contains a field "stats" which is a dataframe. Results are sorted by mean score

Unnamed: 0,epochs,offset,rate,noise,num_results,mean,median,rank,min,max,std
11,5.0,0.6,0.0001,0.1,15,0.731242,0.717476,1.066667,0.412303,1.0,0.216686
14,10.0,0.6,0.0001,0.1,15,0.693155,0.581141,2.533333,0.288745,1.0,0.260617
17,,0.6,0.0001,0.1,15,0.679728,0.670236,3.666667,0.312746,1.0,0.263702
10,5.0,0.3,0.0001,0.1,15,0.548471,0.426426,2.733333,0.095076,1.0,0.346302
13,10.0,0.3,0.0001,0.1,15,0.505715,0.315252,4.2,0.126844,1.0,0.367088
16,,0.3,0.0001,0.1,15,0.481352,0.298419,5.466667,0.065535,1.0,0.390327
9,5.0,0.1,0.0001,0.1,15,0.432128,0.23686,4.0,0.0,1.0,0.425926
12,10.0,0.1,0.0001,0.1,15,0.386496,0.119425,5.533333,0.0,1.0,0.451545
15,,0.1,0.0001,0.1,15,0.38006,0.152559,6.8,0.0,1.0,0.457517


We can request parameter be in specific list of values

total data examined: 4 experiments with at least 15 runs done for each one


Unnamed: 0,epochs,offset,rate,noise,num_results,mean
10,5.0,0.3,0.0001,0.1,15,0.548471
13,10.0,0.3,0.0001,0.1,15,0.505715
9,5.0,0.1,0.0001,0.1,15,0.432128
12,10.0,0.1,0.0001,0.1,15,0.386496


If we want a value that is the default, we need to indicate None

total data examined: 4 experiments with at least 15 runs done for each one


Unnamed: 0,epochs,offset,rate,noise,num_results,mean
13,10.0,0.3,0.0001,0.1,15,0.505715
16,,0.3,0.0001,0.1,15,0.481352
12,10.0,0.1,0.0001,0.1,15,0.386496
15,,0.1,0.0001,0.1,15,0.38006


## summary

In [50]:
#export
def summary (df, experiments = None, score=None, compact=True):
    if experiments is not None:
        df = df.loc[experiments]
    if compact:
        _, df = get_parameters_unique(df)
    parameters_columns = get_parameters_columns(df, True)
    df_pars = df[parameters_columns]
    df_pars.columns = df_pars.columns.get_level_values(level=1)
    df_scores = get_experiment_scores (df, score_name=score, remove_score_name=True)
    df = pd.concat([df_pars, df_scores], axis=1)
    return df

### Usage example

In [59]:
#exports tests.utils.test_experiment_utils
def test_summary ():
    em = init_em ('summary')
    em.run_multiple_repetitions (parameters=dict(offset=0.1, rate=0.01), nruns=3)
    em.run_multiple_repetitions (parameters=dict(offset=0.2, rate=0.001), nruns=5)
    em.run_multiple_repetitions (parameters=dict(offset=0.3, rate=0.02), nruns=2)
    df = em.get_experiment_data()
    result = summary (df, score='validation_accuracy')
    display (result)
    assert result.columns.tolist() == ['offset', 'rate', 0, 1, 2, 3, 4]
    assert result.shape == (3, 7)
    em.remove_previous_experiments (parent=True)

In [60]:
tst.run (test_summary, tag='dummy')

running test_summary
fitting model with 10 epochs
epoch 0: accuracy: 0.11
epoch 1: accuracy: 0.12
epoch 2: accuracy: 0.13
epoch 3: accuracy: 0.14
epoch 4: accuracy: 0.15000000000000002
epoch 5: accuracy: 0.16000000000000003
epoch 6: accuracy: 0.17000000000000004
epoch 7: accuracy: 0.18000000000000005
epoch 8: accuracy: 0.19000000000000006
epoch 9: accuracy: 0.20000000000000007
fitting model with 10 epochs
epoch 0: accuracy: 0.11
epoch 1: accuracy: 0.12
epoch 2: accuracy: 0.13
epoch 3: accuracy: 0.14
epoch 4: accuracy: 0.15000000000000002
epoch 5: accuracy: 0.16000000000000003
epoch 6: accuracy: 0.17000000000000004
epoch 7: accuracy: 0.18000000000000005
epoch 8: accuracy: 0.19000000000000006
epoch 9: accuracy: 0.20000000000000007
fitting model with 10 epochs
epoch 0: accuracy: 0.11
epoch 1: accuracy: 0.12
epoch 2: accuracy: 0.13
epoch 3: accuracy: 0.14
epoch 4: accuracy: 0.15000000000000002
epoch 5: accuracy: 0.16000000000000003
epoch 6: accuracy: 0.17000000000000004
epoch 7: accuracy: 

Unnamed: 0,offset,rate,0,1,2,3,4
0,0.1,,0.2,0.2,0.2,,
1,0.2,0.001,0.21,0.21,0.21,0.21,0.21
2,0.3,0.02,0.5,0.5,,,
