In [1]:
#hide
#default_exp tools.query
from nbdev.showdoc import *
from dsblocks.utils.nbdev_utils import nbdev_setup, TestRunner

nbdev_setup ()
tst = TestRunner (targets=['dummy'])

# Query

> Shows results for queried experiments.

In [2]:
#export
import warnings
warnings.filterwarnings('ignore')

import argparse
import sys
sys.path.append('.')
from collections import namedtuple
from IPython.display import display
import pandas as pd

# hpsearch api
import hpsearch.utils.experiment_utils as ut
import hpsearch.config.hp_defaults as dflt

In [3]:
#for tests
import pytest
import os

from hpsearch.examples.dummy_experiment_manager import generate_data

## query

In [4]:
# export
def query (pv = {}, pf = {}, pall=[], pexact=False, folder=None, 
           metric=None, experiments=None, runs=None, op=None, stats=['mean'], 
           results=0, other_parameters=False):
    
    
    result_query = ut.query(folder_experiments=folder, suffix_results='_'+metric, experiments=experiments,
                        classes=runs, parameters_fixed=pf, parameters_variable=pv, parameters_all = pall, exact_match=pexact, 
                        ascending=op=='min', stats=stats, min_results=results, query_other_parameters=other_parameters)
    
    if not other_parameters:
        result_query = result_query[1]
        result_query = result_query['stats']

    return result_query 

## do_query_and_show

In [5]:
#export
def do_query_and_show (pall=[], best=None, compact=0, exact=False, experiments=None, pf={}, last=None, 
                       metric=None, op=None, other_parameters=False, input_range=None, results=0, 
                       folder=None, round=2, runs=None, show=False, stats=['mean'], pv={},
                       sort=None, display_all_columns=False, col_width=None, 
                       manager_path=dflt.manager_path):
    
    from hpsearch.config.hpconfig import get_experiment_manager
    em = get_experiment_manager (manager_path=manager_path)
    if folder is not None or metric is not None or op is not None:
        if folder is not None: em.set_path_experiments (folder=folder)
        if metric is not None: em.key_score = metric
        if op is not None: em.op = op       
        
    df = query (pv=pv, pf=pf, pall=pall, pexact=exact, folder=em.folder, 
               metric=em.key_score, experiments=experiments, runs=runs, op=em.op, stats=stats, 
               results=results, other_parameters=other_parameters)
    df = ut.replace_with_default_values (df)
    if sort is not None:
        assert sort in df.columns, f'sort must be a column in dataframe ({df.columns})'
        df = df.sort_values(by=sort, ascending=(em.op=='min'))
    if experiments is None:
        experiments = []
    if last is not None:
        experiments += range(df.index.max()-last+1, df.index.max()+1)
    if best is not None:
        experiments += list(df.index[:best])
    if input_range is not None:
        assert len(input_range) == 2
        experiments += range(input_range[0], input_range[1])
    if len(experiments) > 0: 
        df = df.loc[[x for x in df.index if x in experiments]]
    
    if col_width is not None:
        pd.set_option('max_colwidth', col_width)
    
    if (round is not None) and (round != 0):
        df[stats] = df[stats].round(round)
    if display_all_columns:
        display (df)
    
    print (f'experiments: {list(df.index)}')
    print (f'min experiment #: {df.index.min()}, max experiment #: {df.index.max()}')

    print ('result of query:')
    _, df2 = ut.get_parameters_unique(df)
    #df2.index.name = 'experiment #'
    if compact > 0:
        prev_cols = df2.columns.copy()
        df2, dict_rename = ut.compact_parameters (df2, compact)
        for k, kor in zip(df2.columns, prev_cols):
            print (f'{k} => {kor}')
    display (df2)
            
    if show:
        import hpsearch.visualization.plot_visdom as pv
        pv.plot_multiple_histories(df.index, folder=em.folder,metrics=em.key_score, parameters=None)
    return df2

### Run simple query

Run query without any condition, retrieving all the data

In [6]:
#export tests.tools.test_query
def test_do_query_and_show ():
    path_results = 'do_query_and_show'
    em = generate_data (path_results)
    
    df=do_query_and_show (manager_path=em.manager_path)
    assert sorted(os.listdir (f'test_{path_results}/debug/managers'))==['fields', 'info', 'logs.txt', 'whole']
    assert sorted(os.listdir (f'test_{path_results}/debug/managers/whole'))==['DummyExperimentManager-default.pk', 'last.pk']
    assert (df.epochs == [15,30,5,15,30,15,5,30,5]).all()
    assert (df.offset == [.6,.6,.6,.3,.3,.1,.3,.1,.1]).all()
    assert (df['mean'] == [0.97, 0.89, 0.81, 0.8 , 0.65, 0.55, 0.46, 0.44, 0.19]).all()

    em.remove_previous_experiments (parent=True)

In [7]:
tst.run (test_do_query_and_show, tag='dummy')

running test_do_query_and_show
total data examined: 9 experiments with at least 5 runs done for each one
experiments: [5, 8, 2, 4, 7, 3, 1, 6, 0]
min experiment #: 0, max experiment #: 8
result of query:


Unnamed: 0,epochs,offset,mean
5,15.0,0.6,0.97
8,30.0,0.6,0.89
2,5.0,0.6,0.81
4,15.0,0.3,0.8
7,30.0,0.3,0.65
3,15.0,0.1,0.55
1,5.0,0.3,0.46
6,30.0,0.1,0.44
0,5.0,0.1,0.19


### Change the metric that we want to show

In [8]:
#export tests.tools.test_query
def test_do_query_and_show_change_metric ():
    em = generate_data ('do_query_and_show_change_metric')
    
    do_query_and_show (metric='test_accuracy', manager_path=em.manager_path)
    
    em.remove_previous_experiments (parent=True)

In [9]:
tst.run (test_do_query_and_show_change_metric, tag='dummy')

running test_do_query_and_show_change_metric
total data examined: 9 experiments with at least 5 runs done for each one
experiments: [5, 8, 2, 4, 3, 7, 1, 0, 6]
min experiment #: 0, max experiment #: 8
result of query:


Unnamed: 0,epochs,offset,mean
5,15.0,0.6,0.93
8,30.0,0.6,0.86
2,5.0,0.6,0.83
4,15.0,0.3,0.61
3,15.0,0.1,0.61
7,30.0,0.3,0.58
1,5.0,0.3,0.52
0,5.0,0.1,0.3
6,30.0,0.1,0.29


### Run query with conditions

In [10]:
#export tests.tools.test_query
def test_do_query_and_show_with_conditions ():
    em = generate_data ('do_query_and_show_with_conditions')
    
    do_query_and_show (metric='validation_accuracy', op='max', pf={'epochs':15}, 
                       manager_path=em.manager_path)
    
    em.remove_previous_experiments (parent=True)

In [11]:
tst.run (test_do_query_and_show_with_conditions, tag='dummy')

running test_do_query_and_show_with_conditions
total data examined: 3 experiments with at least 5 runs done for each one
experiments: [5, 4, 3]
min experiment #: 3, max experiment #: 5
result of query:


Unnamed: 0,offset,mean
5,0.6,0.97
4,0.3,0.8
3,0.1,0.55


### Run query that sorts by maximum

In [12]:
#export tests.tools.test_query
def test_do_query_and_show_sort_maximum ():
    em = generate_data ('do_query_and_show_sort_maximum')
    
    do_query_and_show (metric='validation_accuracy', op='max', sort='max', 
                       stats=['mean', 'min', 'max'], manager_path=em.manager_path);
    
    em.remove_previous_experiments (parent=True)

In [13]:
tst.run (test_do_query_and_show_sort_maximum, tag='dummy')

running test_do_query_and_show_sort_maximum
total data examined: 9 experiments with at least 5 runs done for each one
experiments: [5, 8, 4, 2, 7, 3, 6, 1, 0]
min experiment #: 0, max experiment #: 8
result of query:


Unnamed: 0,epochs,offset,mean,min,max
5,15.0,0.6,0.97,0.86,1.0
8,30.0,0.6,0.89,0.83,1.0
4,15.0,0.3,0.8,0.7,0.87
2,5.0,0.6,0.81,0.76,0.86
7,30.0,0.3,0.65,0.57,0.75
3,15.0,0.1,0.55,0.48,0.63
6,30.0,0.1,0.44,0.36,0.52
1,5.0,0.3,0.46,0.4,0.49
0,5.0,0.1,0.19,0.12,0.28


## parse_args

In [14]:
#export
def parse_args(args):
    default_always = ''

    parser = argparse.ArgumentParser(description='show metrics in visdom browser')
    # Datasets
    parser.add_argument('-m','--metric', type=str, default=None, help="metrics scores")
    parser.add_argument('--stats', type=str, nargs='+', default=['mean'],  help="statistics for multiple runs")
    parser.add_argument('--experiments', type=int, nargs='+', default=None,  help="experiment numbers")
    parser.add_argument('--folder', type=str, default=None)
    parser.add_argument('-v', type=str, default='{}', help='variable parameters')
    parser.add_argument('-f', type=str, default='{}', help='fixed parameters')
    parser.add_argument('-a', type=str, default='[]', help='all parameters')
    parser.add_argument('-e', '--exact', action= "store_true", help='exact match') 
    parser.add_argument('--last', type=int, default=None, help='include these last experiments') 
    parser.add_argument('--best', type=int, default=None, help='include these best experiments')
    parser.add_argument('--range', type=int, nargs='+', default=None, help='include this range of experiments')
    parser.add_argument('-c', '--compact', type=int, default=0, help='compact parameters to this number of characters') 
    parser.add_argument('--results', type=int, default=0, help='min number of results to consider') 
    parser.add_argument('-s', '--show', action= "store_true")
    parser.add_argument('--other', action= "store_true")
    parser.add_argument('--always', type=str, default = default_always)
    parser.add_argument('--op', default=None, type=str)
    parser.add_argument('--round', default=2, type=int, help='round scores to this number of digits')
    parser.add_argument('--runs', default=None, type=int, nargs='+', help='query restricted to run number provided')
    parser.add_argument('--sort', default=None, type=str)
    parser.add_argument('--width', default=None, type=int, help='max column width')
    parser.add_argument('-p', '--path', default=dflt.manager_path, type=str)
    pars = parser.parse_args(args)

    pars.v = eval(pars.v)
    pars.f = eval(pars.f)
    pars.a = eval(pars.a)
    pars.always = eval('dict(%s)' %pars.always)
    pars.f.update(pars.always)

    print (f'dictionary of query terms={pars.f}')
    
    return pars

## parse_arguments_and_query

In [15]:
#export
def parse_arguments_and_query (args):
    
    pars = parse_args(args)

    do_query_and_show (pall=pars.a, best = pars.best, compact = pars.compact, exact=pars.exact, 
                       experiments=pars.experiments, pf = pars.f, last=pars.last, 
                       metric = pars.metric, op = pars.op, other_parameters=pars.other,
                       input_range=pars.range, results=pars.results, folder= pars.folder, 
                       round=pars.round, runs = pars.runs, show=pars.show, stats=pars.stats,
                       pv = pars.v, sort=pars.sort, col_width=pars.width, manager_path=pars.path)

def main():
    parse_arguments_and_query (sys.argv[1:])

### Change the metric that we want to show

In [18]:
#export tests.tools.test_query
def test_parse_arguments_and_query_change_metric ():
    em = generate_data ('parse_arguments_and_query_change_metric')
    
    command = f'--metric test_accuracy -p {em.manager_path}'
    parse_arguments_and_query (command.split())
    
    em.remove_previous_experiments (parent=True)

In [19]:
tst.run (test_parse_arguments_and_query_change_metric, tag='dummy')

running test_parse_arguments_and_query_change_metric
dictionary of query terms={}
total data examined: 9 experiments with at least 5 runs done for each one
experiments: [5, 8, 2, 4, 3, 7, 1, 0, 6]
min experiment #: 0, max experiment #: 8
result of query:


Unnamed: 0,epochs,offset,mean
5,15.0,0.6,0.93
8,30.0,0.6,0.86
2,5.0,0.6,0.83
4,15.0,0.3,0.61
3,15.0,0.1,0.61
7,30.0,0.3,0.58
1,5.0,0.3,0.52
0,5.0,0.1,0.3
6,30.0,0.1,0.29


### Run query with conditions

In [20]:
#export tests.tools.test_query
def test_parse_arguments_and_query_with_conditions ():
    em = generate_data ('parse_arguments_and_query_with_conditions')

    command = f'--metric validation_accuracy --op max -f dict(epochs=15) -p {em.manager_path}'
    parse_arguments_and_query (command.split())

    em.remove_previous_experiments (parent=True)

In [21]:
tst.run (test_parse_arguments_and_query_with_conditions, tag='dummy')

running test_parse_arguments_and_query_with_conditions
dictionary of query terms={'epochs': 15}
total data examined: 3 experiments with at least 5 runs done for each one
experiments: [5, 4, 3]
min experiment #: 3, max experiment #: 5
result of query:


Unnamed: 0,offset,mean
5,0.6,0.97
4,0.3,0.8
3,0.1,0.55
