In [41]:
from mlscorecheck.check import (aggregate_problems,
                                prepare_for_mos,
                                expand_for_mos,
                                accumulate_dicts,
                                aggregate_dicts,
                                _resolve_p_n)
from mlscorecheck.core import (determine_fold_configurations, check_aggregation,
                                copy_problem_structure,
                                solution_structure)
from mlscorecheck.utils import calculate_scores, generate_problems_with_folds, calculate_all_scores

In [42]:
datasetb0 = {'dataset': 'common_datasets.ecoli1'}
datasetb1 = {'dataset': 'common_datasets.ecoli1', 'n_folds': 3, 'n_repeats': 5}
datasetb2 = {'dataset': 'common_datasets.ecoli1', 'score_bounds': {'acc': (0.8, 1.0)}}
datasetb3 = {'dataset': 'common_datasets.ecoli1', 'n_folds': 3, 'n_repeats': 5, 'score_bounds': {'acc': (0.8, 1.0)}}
datasetb4 = {'dataset': 'common_datasets.ecoli1', 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}
datasetb5 = {'dataset': 'common_datasets.ecoli1', 'n_folds': 3, 'n_repeats': 5, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}

In [43]:
_resolve_p_n(datasetb2)

{'dataset': 'common_datasets.ecoli1',
 'score_bounds': {'acc': (0.8, 1.0)},
 'p': 77,
 'n': 259}

In [44]:
dataseta0 = {'p': 10, 'n': 20}
dataseta1 = {'p': 20, 'n': 10, 'n_folds': 3, 'n_repeats': 5}
dataseta2 = {'p': 10, 'n': 20, 'score_bounds': {'acc': (0.8, 1.0)}}
dataseta3 = {'p': 20, 'n': 10, 'n_folds': 3, 'n_repeats': 5, 'score_bounds': {'acc': (0.8, 1.0)}, 'fold_score_bounds': {'acc': (0.75, 0.9)}}
dataseta4 = {'p': 10, 'n': 20, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}
dataseta5 = {'p': 20, 'n': 10, 'n_folds': 3, 'n_repeats': 5, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}

datasetc0 = {'folds': [{'p': 10, 'n': 20},
                        {'p': 20, 'n': 30}]}
datasetc1 = {'folds': [{'p': 10, 'n': 20, 'score_bounds': {'acc': (0.8, 1.0)}},
                        {'p': 20, 'n': 30, 'score_bounds': {'acc': (0.7, 0.9)}}]}
datasetc2 = {'folds': [{'p': 10, 'n': 20, 'tptn_bounds': {'tp': (5, 10)}},
                        {'p': 20, 'n': 30, 'tptn_bounds': {'tp': (2, 20)}}],
                'score_bounds': {'acc': (0.75, 0.85)}}
datasetc3 = {'folds': [{'p': 10, 'n': 20, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}},
                        {'p': 20, 'n': 30, 'score_bounds': {'acc': (0.7, 0.9)}, 'tptn_bounds': {'tp': (2, 20)}}]}


In [45]:
def _create_folds(dataset):
    results = {}
    if 'folds' not in dataset:
        if 'p' not in dataset:
            dataset = resolve_pn(dataset)
        results['folds'] = determine_fold_configurations(dataset['p'],
                                                            dataset['n'],
                                                            dataset.get('n_folds', 1),
                                                            dataset.get('n_repeats', 1))
        results['p'] = dataset['p'] * dataset.get('n_repeats', 1)
        results['n'] = dataset['n'] * dataset.get('n_repeats', 1)
        
        if len(results['folds']) == 1 and 'tptn_bounds' in dataset:
            results['folds'][0]['tptn_bounds'] = dataset['tptn_bounds']
            results['tptn_bounds'] = dataset['tptn_bounds']
        if 'score_bounds' in dataset:
            for fold in results['folds']:
                fold['score_bounds'] = dataset['score_bounds']
            results['score_bounds'] = dataset['score_bounds']
    else:
        results['folds'] = dataset['folds']
        results['p'] = sum(fold['p'] for fold in dataset['folds'])
        results['n'] = sum(fold['n'] for fold in dataset['folds'])
    return results

In [46]:
def _expand_datasets(datasets):
    if isinstance(datasets, dict):
        return _create_folds(datasets)
    return [_create_folds(dataset) for dataset in datasets]

In [47]:
def score_bounds_to_tptn_bounds(p, n, score_bounds):
    score_bounds['acc']

In [48]:
def prepare_problem(datasets, strategy):
    datasets = _expand_datasets(datasets)
    
    if strategy == ('som', 'mos'):
        raise ValueError(f'strategy {strategy} is not applicable')
    
    if isinstance(datasets, dict):
        if strategy == 'som':
            return ({'p': datasets['p'], 'n': datasets['n']}, 
                    _extract_bounds(datasets, 'score_bounds'), 
                    _extract_bounds(datasets, 'tptn_bounds'))
        elif strategy == 'mos':
            return datasets['folds']
    elif isinstance(datasets, list):
        if strategy == ('som', 'som'):
            return ({'p': sum(dataset['p'] for dataset in datasets),
                    'n': sum(dataset['n'] for dataset in datasets)},
                    _extract_bounds(datasets, 'score_bounds'),
                    _extract_bounds(datasets, 'tptn_bounds'))
        elif strategy == ('mos', 'som'):
            return [{'p': dataset['p'],
                        'n': dataset['n'],
                        'score_bounds': dataset.get('score_bounds'),
                        'tptn_bounds': dataset.get('tptn_bounds')} for dataset in datasets]
        elif strategy == ('mos', 'mos'):
            return [dataset['folds'] for dataset in datasets]

In [49]:
_expand_datasets([dataseta3, datasetc3])

[{'folds': [{'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 6, 'n': 4, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 6, 'n': 4, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 6, 'n': 4, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 6, 'n': 4, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 6, 'n': 4, 'score_bounds': {'acc': (0.8, 1.0)}}],
  'p': 100,
  'n': 50,
  'score_bounds': {'acc': (0.8, 1.0)}},
 {'folds': [{'p': 10,
    'n': 20,
    'score_bounds': {'

In [50]:
def calculate_base_scores_(tp, tn, p, n):
    return {'tp': tp,
            'tn': tn,
            'p': p,
            'n': n,
            'acc': (tp + tn)/(p + n),
            'sens': tp/p,
            'spec': tn/n,
            'bacc': (tp/p + tn/n)/2.0}

def calculate_scores_(problems, strategy):
    results = []
    for problem in problems:
        tmp = {'folds': []}
        for fold in problem['folds']:
            tmp['folds'].append(calculate_base_scores(tp=fold['tp'], tn=fold['tn'], p=fold['p'], n=fold['n']))
        tmp['p'] = 0
            

In [51]:
folds, problems = generate_problems_with_folds(n_problems=2)

In [52]:
folds

[[{'p': 60, 'n': 68, 'tp': 59, 'tn': 10},
  {'p': 60, 'n': 68, 'tp': 43, 'tn': 62},
  {'p': 60, 'n': 67, 'tp': 19, 'tn': 13},
  {'p': 60, 'n': 67, 'tp': 34, 'tn': 6},
  {'p': 60, 'n': 67, 'tp': 15, 'tn': 12}],
 [{'p': 193, 'n': 68, 'tp': 94, 'tn': 52},
  {'p': 193, 'n': 68, 'tp': 46, 'tn': 28},
  {'p': 193, 'n': 67, 'tp': 38, 'tn': 66},
  {'p': 193, 'n': 67, 'tp': 39, 'tn': 33},
  {'p': 192, 'n': 68, 'tp': 125, 'tn': 14}]]

In [53]:
_expand_datasets(problems)

[{'folds': [{'p': 60, 'n': 68},
   {'p': 60, 'n': 68},
   {'p': 60, 'n': 67},
   {'p': 60, 'n': 67},
   {'p': 60, 'n': 67}],
  'p': 300,
  'n': 337},
 {'folds': [{'p': 193, 'n': 68},
   {'p': 193, 'n': 68},
   {'p': 193, 'n': 67},
   {'p': 193, 'n': 67},
   {'p': 192, 'n': 68}],
  'p': 964,
  'n': 338}]

In [54]:
calculate_scores(folds, strategy=('mos', 'mos'))

{'acc': 0.4193352897988215,
 'sens': 0.4608727331606217,
 'spec': 0.4381474978050922,
 'bacc': 0.44951011548285696}

In [55]:
strategy = ('som', 'som')

In [56]:
scores = calculate_scores(folds, strategy=strategy)

In [57]:
scores

{'acc': 0.4167096441464673,
 'sens': 0.4050632911392405,
 'spec': 0.43851851851851853,
 'npv': 0.2824427480916031,
 'ppv': 0.574635241301908,
 'bacc': 0.4217909048288795,
 'f1p': 0.4751740139211137,
 'fm': 0.4824558446597393}

In [58]:
res = check_aggregation(scores, eps=0.0001, 
                    problems=_expand_datasets(problems),
                    strategy=strategy)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/6312afd6abb34969b51ef87518d55166-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/6312afd6abb34969b51ef87518d55166-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 13 COLUMNS
At line 175 RHS
At line 184 BOUNDS
At line 205 ENDATA
Problem MODEL has 8 rows, 20 columns and 120 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements
Cbc3007W No integer variables - nothing to do
Cuts at root node changed objective from 0 to -1.79769e+308
Probing was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
Gomory was tried 0 times

In [59]:
res[1]

[{'p': 300,
  'n': 337,
  'folds': [{'p': 60, 'n': 68, 'tn': 68.0, 'tp': 0.0},
   {'p': 60, 'n': 68, 'tn': 0.0, 'tp': 0.0},
   {'p': 60, 'n': 67, 'tn': 0.0, 'tp': 0.0},
   {'p': 60, 'n': 67, 'tn': 0.0, 'tp': 0.0},
   {'p': 60, 'n': 67, 'tn': 0.0, 'tp': 0.0}]},
 {'p': 964,
  'n': 338,
  'folds': [{'p': 193, 'n': 68, 'tn': 0.0, 'tp': 0.0},
   {'p': 193, 'n': 68, 'tn': 26.0, 'tp': 0.0},
   {'p': 193, 'n': 67, 'tn': 67.0, 'tp': 127.0},
   {'p': 193, 'n': 67, 'tn': 67.0, 'tp': 193.0},
   {'p': 192, 'n': 68, 'tn': 68.0, 'tp': 192.0}]}]

In [61]:
def calculate_scores(tp, tn, p, n):
    return calculate_all_scores({'tp': tp, 'tn': tn, 'p': p, 'n': n})
    #return {'acc': (tp + tn) * (1.0 / (p + n)),
    #        'sens': tp * (1.0/p),
    #        'spec': tn * (1.0/n),
    #        'bacc': (tp * (1.0/p) + tn * (1.0/n))*0.5}

In [63]:
def calculate_all_scores_(problems, strategy):
    for problem in problems:
        for fold in problem['folds']:
            scores = calculate_scores(tp=fold['tp'], tn=fold['tn'], p=fold['p'], n=fold['n'])
            for key in scores:
                fold[key] = scores[key]
        
        total_scores = calculate_scores_(problem['folds'], strategy[1])
        for key in total_scores:
            problem[key] = total_scores[key]
        
    return calculate_scores_(problems, strategy[0])
            

In [64]:
res[1]

[{'p': 300,
  'n': 337,
  'folds': [{'p': 60, 'n': 68, 'tn': 68.0, 'tp': 0.0},
   {'p': 60, 'n': 68, 'tn': 0.0, 'tp': 0.0},
   {'p': 60, 'n': 67, 'tn': 0.0, 'tp': 0.0},
   {'p': 60, 'n': 67, 'tn': 0.0, 'tp': 0.0},
   {'p': 60, 'n': 67, 'tn': 0.0, 'tp': 0.0}]},
 {'p': 964,
  'n': 338,
  'folds': [{'p': 193, 'n': 68, 'tn': 0.0, 'tp': 0.0},
   {'p': 193, 'n': 68, 'tn': 26.0, 'tp': 0.0},
   {'p': 193, 'n': 67, 'tn': 67.0, 'tp': 127.0},
   {'p': 193, 'n': 67, 'tn': 67.0, 'tp': 193.0},
   {'p': 192, 'n': 68, 'tn': 68.0, 'tp': 192.0}]}]

In [65]:
calculate_scores_2(res[1], strategy)

{'p': 300, 'n': 337, 'folds': [{'p': 60, 'n': 68, 'tn': 68.0, 'tp': 0.0}, {'p': 60, 'n': 68, 'tn': 0.0, 'tp': 0.0}, {'p': 60, 'n': 67, 'tn': 0.0, 'tp': 0.0}, {'p': 60, 'n': 67, 'tn': 0.0, 'tp': 0.0}, {'p': 60, 'n': 67, 'tn': 0.0, 'tp': 0.0}]}
{'p': 60, 'n': 68, 'tn': 68.0, 'tp': 0.0}
{'p': 60, 'n': 68, 'tn': 0.0, 'tp': 0.0}
{'p': 60, 'n': 67, 'tn': 0.0, 'tp': 0.0}
{'p': 60, 'n': 67, 'tn': 0.0, 'tp': 0.0}
{'p': 60, 'n': 67, 'tn': 0.0, 'tp': 0.0}
{'p': 964, 'n': 338, 'folds': [{'p': 193, 'n': 68, 'tn': 0.0, 'tp': 0.0}, {'p': 193, 'n': 68, 'tn': 26.0, 'tp': 0.0}, {'p': 193, 'n': 67, 'tn': 67.0, 'tp': 127.0}, {'p': 193, 'n': 67, 'tn': 67.0, 'tp': 193.0}, {'p': 192, 'n': 68, 'tn': 68.0, 'tp': 192.0}]}
{'p': 193, 'n': 68, 'tn': 0.0, 'tp': 0.0}
{'p': 193, 'n': 68, 'tn': 26.0, 'tp': 0.0}
{'p': 193, 'n': 67, 'tn': 67.0, 'tp': 127.0}
{'p': 193, 'n': 67, 'tn': 67.0, 'tp': 193.0}
{'p': 192, 'n': 68, 'tn': 68.0, 'tp': 192.0}


{'acc': 0.4167096441464673,
 'sens': 0.4050632911392405,
 'spec': 0.43851851851851853,
 'npv': 0.2824427480916031,
 'ppv': 0.574635241301908,
 'bacc': 0.4217909048288795,
 'f1p': 0.4751740139211137,
 'fm': 0.4824558446597393,
 'tp': 512.0,
 'tn': 296.0,
 'p': 1264,
 'n': 675}

In [66]:
res[1]

[{'p': 300,
  'n': 337,
  'folds': [{'p': 60,
    'n': 68,
    'tn': 68.0,
    'tp': 0.0,
    'acc': 0.53125,
    'sens': 0.0,
    'spec': 1.0,
    'npv': 0.53125,
    'ppv': None,
    'bacc': 0.5,
    'f1p': 0.0,
    'fm': None},
   {'p': 60,
    'n': 68,
    'tn': 0.0,
    'tp': 0.0,
    'acc': 0.0,
    'sens': 0.0,
    'spec': 0.0,
    'npv': 0.0,
    'ppv': 0.0,
    'bacc': 0.0,
    'f1p': 0.0,
    'fm': 0.0},
   {'p': 60,
    'n': 67,
    'tn': 0.0,
    'tp': 0.0,
    'acc': 0.0,
    'sens': 0.0,
    'spec': 0.0,
    'npv': 0.0,
    'ppv': 0.0,
    'bacc': 0.0,
    'f1p': 0.0,
    'fm': 0.0},
   {'p': 60,
    'n': 67,
    'tn': 0.0,
    'tp': 0.0,
    'acc': 0.0,
    'sens': 0.0,
    'spec': 0.0,
    'npv': 0.0,
    'ppv': 0.0,
    'bacc': 0.0,
    'f1p': 0.0,
    'fm': 0.0},
   {'p': 60,
    'n': 67,
    'tn': 0.0,
    'tp': 0.0,
    'acc': 0.0,
    'sens': 0.0,
    'spec': 0.0,
    'npv': 0.0,
    'ppv': 0.0,
    'bacc': 0.0,
    'f1p': 0.0,
    'fm': 0.0}],
  'tp': 0.0,
  'tn':

In [None]:
configuration = [[{} for fold in range(len(problem['folds']))] for problem in expanded]
for variable in res.variables():
    tokens = variable.name.split('_')
    pid = int(tokens[1])
    fid = int(tokens[2])
    configuration[pid][fid][tokens[0]] = variable.varValue
for idx, problem in enumerate(expanded):
    for jdx, fold in enumerate(problem['folds']):
        configuration[idx][jdx]['p'] = fold['p']
        configuration[idx][jdx]['n'] = fold['n']

In [None]:
configuration

[[{'tn': 199.0, 'tp': 0.0, 'p': 168, 'n': 199},
  {'tn': 0.0, 'tp': 0.0, 'p': 168, 'n': 199},
  {'tn': 0.0, 'tp': 0.0, 'p': 168, 'n': 199},
  {'tn': 0.0, 'tp': 0.0, 'p': 168, 'n': 199},
  {'tn': 0.0, 'tp': 0.0, 'p': 168, 'n': 199}],
 [{'tn': 0.0, 'tp': 68.0, 'p': 137, 'n': 138},
  {'tn': 13.0, 'tp': 137.0, 'p': 137, 'n': 138},
  {'tn': 138.0, 'tp': 137.0, 'p': 137, 'n': 138},
  {'tn': 138.0, 'tp': 137.0, 'p': 137, 'n': 138},
  {'tn': 138.0, 'tp': 136.0, 'p': 136, 'n': 138}],
 [{'tn': 93.0, 'tp': 121.0, 'p': 121, 'n': 93},
  {'tn': 92.0, 'tp': 121.0, 'p': 121, 'n': 92},
  {'tn': 92.0, 'tp': 121.0, 'p': 121, 'n': 92},
  {'tn': 92.0, 'tp': 121.0, 'p': 121, 'n': 92},
  {'tn': 93.0, 'tp': 120.0, 'p': 120, 'n': 93}]]

In [None]:
res.status

1

In [None]:
calculate_scores(configuration, strategy=strategy)

{'acc': 0.5396491228070175,
 'sens': 0.5728383458646616,
 'spec': 0.5067536096879367,
 'npv': 0.5448172258387581,
 'ppv': 0.5351185250219491,
 'bacc': 0.5397959777762992,
 'f1p': 0.5533363595097595,
 'fm': 0.5536573043996718}

In [None]:
scores

{'acc': 0.5396491228070175,
 'sens': 0.5728383458646616,
 'spec': 0.5067536096879367,
 'npv': 0.5448172258387581,
 'ppv': 0.5351185250219491,
 'bacc': 0.5397959777762992,
 'f1p': 0.5533363595097595,
 'fm': 0.5536573043996718}

In [None]:
configuration

[[{'tn': 199.0, 'tp': 0.0, 'p': 168, 'n': 199},
  {'tn': 0.0, 'tp': 0.0, 'p': 168, 'n': 199},
  {'tn': 0.0, 'tp': 0.0, 'p': 168, 'n': 199},
  {'tn': 0.0, 'tp': 0.0, 'p': 168, 'n': 199},
  {'tn': 0.0, 'tp': 0.0, 'p': 168, 'n': 199}],
 [{'tn': 0.0, 'tp': 68.0, 'p': 137, 'n': 138},
  {'tn': 13.0, 'tp': 137.0, 'p': 137, 'n': 138},
  {'tn': 138.0, 'tp': 137.0, 'p': 137, 'n': 138},
  {'tn': 138.0, 'tp': 137.0, 'p': 137, 'n': 138},
  {'tn': 138.0, 'tp': 136.0, 'p': 136, 'n': 138}],
 [{'tn': 93.0, 'tp': 121.0, 'p': 121, 'n': 93},
  {'tn': 92.0, 'tp': 121.0, 'p': 121, 'n': 92},
  {'tn': 92.0, 'tp': 121.0, 'p': 121, 'n': 92},
  {'tn': 92.0, 'tp': 121.0, 'p': 121, 'n': 92},
  {'tn': 93.0, 'tp': 120.0, 'p': 120, 'n': 93}]]

In [None]:
_expand_datasets([dataseta3, datasetc3])

[{'folds': [{'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 6, 'n': 4, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 6, 'n': 4, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 6, 'n': 4, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 6, 'n': 4, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 7, 'n': 3, 'score_bounds': {'acc': (0.8, 1.0)}},
   {'p': 6, 'n': 4, 'score_bounds': {'acc': (0.8, 1.0)}}],
  'p': 100,
  'n': 50,
  'score_bounds': {'acc': (0.8, 1.0)}},
 {'folds': [{'p': 10,
    'n': 20,
    'score_bounds': {'

In [None]:
_expand_datasets(dataseta4)

{'folds': [{'p': 10,
   'n': 20,
   'tptn_bounds': {'tp': (5, 10)},
   'score_bounds': {'acc': (0.8, 1.0)}}],
 'p': 10,
 'n': 20,
 'tptn_bounds': {'tp': (5, 10)},
 'score_bounds': {'acc': (0.8, 1.0)}}

In [None]:
def _standardize(datasets, strategy):
    if isinstance(datasets, dict):
        if strategy[0] == 'som':
            if 'fold_configuration' not in datasets:
                result = {'p': datasets.get('p') * datasets.get('n_repeats', 1),
                            'n': datasets.get('n') * datasets.get('n_repeats', 1)}
                if 'score_bounds' in datasets:
                    result['score_bounds'] = datasets['score_bounds']
            else:
                result = _standardize(datasets['fold_configuration'], ['som', 'som'])
            return result
        elif strategy[0] == 'mos':
            if 'fold_configuration' not in datasets:
                if 'n_folds' in datasets or 'n_repeats' in datasets:
                    return determine_fold_configurations(datasets['p'],
                                                            datasets['n'],
                                                            datasets.get('n_folds', 1),
                                                            datasets.get('n_repeats', 1))
                else:
                    return [datasets]
            else:
                return datasets['fold_configuration']
    else:
        datasets = [_standardize(dataset, strategy[1:]) for dataset in datasets]
        if strategy[0] == 'mos':
            return datasets
        else:
            return {'p': sum(dataset['p'] for dataset in datasets),
                    'n': sum(dataset['n'] for dataset in datasets)}
    

In [None]:
_standardize([dataseta3, datasetc2], ['mos', 'mos'])

[[{'p': 7, 'n': 3},
  {'p': 7, 'n': 3},
  {'p': 6, 'n': 4},
  {'p': 7, 'n': 3},
  {'p': 7, 'n': 3},
  {'p': 6, 'n': 4},
  {'p': 7, 'n': 3},
  {'p': 7, 'n': 3},
  {'p': 6, 'n': 4},
  {'p': 7, 'n': 3},
  {'p': 7, 'n': 3},
  {'p': 6, 'n': 4},
  {'p': 7, 'n': 3},
  {'p': 7, 'n': 3},
  {'p': 6, 'n': 4}],
 [{'folds': [{'p': 10, 'n': 20, 'tptn_bounds': {'tp': (5, 10)}},
    {'p': 20, 'n': 30, 'tptn_bounds': {'tp': (2, 20)}}],
   'score_bounds': {'acc': (0.75, 0.85)}}]]

In [None]:
accumulate_dicts({'a': 1, 'b': 2, 'c': 5, 'd': {'1': 2}},
                    {'a': 4, 'c': 5, 'd': {'1': 3, '2': 4}})

{'a': 5, 'c': 10, 'd': {'1': 5}}

In [None]:
aggregate_dicts([{'a': 1, 'b': 2, 'c': 5, 'd': {'1': 2}},
                    {'a': 4, 'c': 5, 'd': {'1': 3, '2': 4}},
                    {'a': 1, 'b': 2, 'c': 5, 'd': {'1': 2}}])

{'a': 6, 'c': 15, 'd': {'1': 7}}

In [None]:
print(prepare_for_mos({'p': 10, 'n': 20, 'n_repeats': 5, 'n_folds': 3, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, depth=0))

[{'p': 4, 'n': 6, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 4, 'n': 6, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 4, 'n': 6, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 4, 'n': 6, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 3, 'n': 7, 'score_bounds': {'acc': (

In [None]:
print(prepare_for_mos({'dataset': 'common_datasets.ecoli1', 'n_repeats': 5, 'n_folds': 3, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, depth=0))

[{'p': 26, 'n': 86, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 26, 'n': 86, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 25, 'n': 87, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 26, 'n': 86, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 26, 'n': 86, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 25, 'n': 87, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 26, 'n': 86, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 26, 'n': 86, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 25, 'n': 87, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 26, 'n': 86, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 26, 'n': 86, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 25, 'n': 87, '

In [None]:
print(prepare_for_mos({'fold_configuration': [{'p': 10, 'n': 5, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, 
                                            {'p': 5, 'n': 20, 'score_bounds': {'acc': (0.9, 1.0)}, 'tptn_bounds': {'tp': (8, 20)}}]}, depth=0))

[{'p': 10, 'n': 5, 'score_bounds': {'acc': (0.8, 1.0)}, 'tptn_bounds': {'tp': (5, 10)}}, {'p': 5, 'n': 20, 'score_bounds': {'acc': (0.9, 1.0)}, 'tptn_bounds': {'tp': (8, 20)}}]


In [None]:
fold_configuration = [{'p': 10, 'n': 5, 'score_bounds': {'acc': (0.8, 1.0)}, 
                                        'tptn_bounds': {'tp': (5, 10)}}, 
                        {'p': 5, 'n': 20, 'score_bounds': {'acc': (0.9, 1.0)}, 
                                            'tptn_bounds': {'tp': (8, 20)}}]

In [None]:
prepare_for_mos([{"p": 10, "n": 20},
                {"dataset": "common_datasets.ecoli1"},
                {"fold_configuration": fold_configuration}], depth=0)

[{'p': 10, 'n': 20},
 {'p': 77, 'n': 259},
 {'p': 15,
  'n': 25,
  'score_bounds': {'acc': (0.8500000000000001, 1.0)},
  'tptn_bounds': {'tp': (13, 30)}}]

In [None]:
prepare_for_mos([{"p": 10, "n": 20, "n_repeats": 5, "n_folds": 3, 
                    "score_bounds": {'acc': (0.7, 1.0)}, "tptn_bounds": {'tp': (5, 10)}},
                {"dataset": "common_datasets.ecoli1"},
                {"fold_configuration": fold_configuration}], depth=0)

[{'p': 50,
  'n': 100,
  'score_bounds': {'acc': (0.6999999999999998, 1.0)},
  'tptn_bounds': {'tp': (75, 150)}},
 {'p': 77, 'n': 259},
 {'p': 15,
  'n': 25,
  'score_bounds': {'acc': (0.8500000000000001, 1.0)},
  'tptn_bounds': {'tp': (13, 30)}}]

In [None]:
prepare_for_mos([{"p": 10, "n": 20, "n_repeats": 5, "n_folds": 3, 
                    "score_bounds": {'acc': (0.7, 1.0)}},
                {"dataset": "common_datasets.ecoli1"},
                {"fold_configuration": fold_configuration}], depth=1)

[[{'p': 4, 'n': 6, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 4, 'n': 6, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 4, 'n': 6, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 4, 'n': 6, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 4, 'n': 6, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.7, 1.0)}},
  {'p': 3, 'n': 7, 'score_bounds': {'acc': (0.7, 1.0)}}],
 [{'p': 77, 'n': 259}],
 [{'p': 10,
   'n': 5,
   'score_bounds': {'acc': (0.8, 1.0)},
   'tptn_bounds': {'tp': (5, 10)}},
  {'p': 5,
   'n': 20