In [9]:
from mlscorecheck.aggregated import (fold_partitioning_generator,
                                        generate_evaluations_with_all_kfolds,
                                        _check_specification_and_determine_p_n,
                                        random_identifier)
from mlscorecheck.core import logger
import copy

In [23]:
def kfolds_generator(evaluation: dict,
                        available_scores: list,
                        repeat_idx=0):
    p, n = _check_specification_and_determine_p_n(evaluation.get('dataset'),
                                                    evaluation.get('folding'))

    p_zero = False
    n_zero = False

    if 'sens' not in available_scores and 'bacc' not in available_scores:
        p_zero = True
        logger.info('sens and bacc not among the reported scores, p=0 folds are also considered')
    if 'spec' not in available_scores and 'bacc' not in available_scores:
        n_zero = True
        logger.info('spec and bacc not among the reported scores, n=0 folds are also considered')

    if evaluation['dataset'].get('dataset_name') is not None:
        evaluation['dataset']['identifier'] = \
            f'{evaluation["dataset"]["dataset_name"]}_{random_identifier(3)}'
    else:
        evaluation['dataset']['identifier'] = random_identifier(6)

    for jdx, (ps, ns) in enumerate(fold_partitioning_generator(p=p,
                                                n=n,
                                                k=evaluation['folding'].get('n_folds', 1),
                                                p_non_zero=not p_zero,
                                                n_non_zero=not n_zero)):
        yield [{'p': p_,
                'n': n_,
                'identifier': f"{evaluation['dataset']['identifier']}_f{idx}_k{jdx}_r{repeat_idx}"}
                for idx, (p_, n_) in enumerate(zip(ps, ns))]

import itertools

def repeated_kfolds_generator(evaluation: dict,
                                available_scores: list):
    n_repeats = evaluation['folding'].get('n_repeats', 1)

    generators = [kfolds_generator(evaluation, available_scores, idx) for idx in range(n_repeats)]

    print(generators)

    for folds in itertools.product(*generators):
        yield {'dataset': copy.deepcopy(evaluation['dataset']),
                'folding': {
                    'folds': [fold for fold_list in folds for fold in fold_list]},
                'fold_score_bounds': copy.deepcopy(evaluation.get('fold_score_bounds')),
                'aggregation': evaluation.get('aggregation')}

def experiment_kfolds_generator(experiment: dict,
                                available_scores: list):
    generators = [repeated_kfolds_generator(evaluation, available_scores)
                    for evaluation in experiment['evaluations']]
    for evaluations in itertools.product(*generators):
        yield {'evaluations': list(evaluations),
                'dataset_score_bounds': experiment.get('dataset_score_bounds'),
                'aggregation': experiment['aggregation']}

In [21]:
list(kfolds_generator(evaluation={'dataset': {'p': 5, 'n': 10}, 'folding': {'n_folds': 2}},
                        available_scores=['acc']))

2023-10-16 14:30:33,762:INFO:sens and bacc not among the reported scores, p=0 folds are also considered
2023-10-16 14:30:33,764:INFO:spec and bacc not among the reported scores, n=0 folds are also considered


[[{'p': 1, 'n': 7, 'identifier': 'bskpdi_f0_k0_r0'},
  {'p': 4, 'n': 3, 'identifier': 'bskpdi_f1_k0_r0'}],
 [{'p': 2, 'n': 6, 'identifier': 'bskpdi_f0_k1_r0'},
  {'p': 3, 'n': 4, 'identifier': 'bskpdi_f1_k1_r0'}],
 [{'p': 3, 'n': 5, 'identifier': 'bskpdi_f0_k2_r0'},
  {'p': 2, 'n': 5, 'identifier': 'bskpdi_f1_k2_r0'}],
 [{'p': 4, 'n': 4, 'identifier': 'bskpdi_f0_k3_r0'},
  {'p': 1, 'n': 6, 'identifier': 'bskpdi_f1_k3_r0'}]]

In [22]:
list(kfolds_generator_repeated(evaluation={'dataset': {'p': 5, 'n': 10},
                                            'folding': {'n_folds': 2, 'n_repeats': 2}},
                        available_scores=['acc']))

2023-10-16 14:30:34,387:INFO:sens and bacc not among the reported scores, p=0 folds are also considered
2023-10-16 14:30:34,390:INFO:spec and bacc not among the reported scores, n=0 folds are also considered
2023-10-16 14:30:34,392:INFO:sens and bacc not among the reported scores, p=0 folds are also considered
2023-10-16 14:30:34,393:INFO:spec and bacc not among the reported scores, n=0 folds are also considered


[<generator object kfolds_generator at 0x7fd6fbdb0040>, <generator object kfolds_generator at 0x7fd6fbdb06d0>]


[{'dataset': {'p': 5, 'n': 10, 'identifier': 'jbnjxl'},
  'folding': {'folds': [{'p': 1, 'n': 7, 'identifier': 'kbggql_f0_k0_r0'},
    {'p': 4, 'n': 3, 'identifier': 'kbggql_f1_k0_r0'},
    {'p': 1, 'n': 7, 'identifier': 'jbnjxl_f0_k0_r1'},
    {'p': 4, 'n': 3, 'identifier': 'jbnjxl_f1_k0_r1'}]},
  'fold_score_bounds': None,
  'aggregation': None},
 {'dataset': {'p': 5, 'n': 10, 'identifier': 'jbnjxl'},
  'folding': {'folds': [{'p': 1, 'n': 7, 'identifier': 'kbggql_f0_k0_r0'},
    {'p': 4, 'n': 3, 'identifier': 'kbggql_f1_k0_r0'},
    {'p': 2, 'n': 6, 'identifier': 'jbnjxl_f0_k1_r1'},
    {'p': 3, 'n': 4, 'identifier': 'jbnjxl_f1_k1_r1'}]},
  'fold_score_bounds': None,
  'aggregation': None},
 {'dataset': {'p': 5, 'n': 10, 'identifier': 'jbnjxl'},
  'folding': {'folds': [{'p': 1, 'n': 7, 'identifier': 'kbggql_f0_k0_r0'},
    {'p': 4, 'n': 3, 'identifier': 'kbggql_f1_k0_r0'},
    {'p': 3, 'n': 5, 'identifier': 'jbnjxl_f0_k2_r1'},
    {'p': 2, 'n': 5, 'identifier': 'jbnjxl_f1_k2_r1'}]},


In [24]:
experiment={'evaluations': [{'dataset': {'p': 5, 'n': 10},
                            'folding': {'n_folds': 2, 'n_repeats': 2}},
                            {'dataset': {'p': 4, 'n': 15},
                            'folding': {'n_folds': 3, 'n_repeats': 2}}],
            'aggregation': 'mor'}

In [25]:
list(experiment_kfolds_generator(experiment=experiment,
                                    available_scores=['acc']))

2023-10-16 14:43:39,026:INFO:sens and bacc not among the reported scores, p=0 folds are also considered
2023-10-16 14:43:39,028:INFO:spec and bacc not among the reported scores, n=0 folds are also considered
2023-10-16 14:43:39,029:INFO:sens and bacc not among the reported scores, p=0 folds are also considered
2023-10-16 14:43:39,030:INFO:spec and bacc not among the reported scores, n=0 folds are also considered
2023-10-16 14:43:39,033:INFO:sens and bacc not among the reported scores, p=0 folds are also considered
2023-10-16 14:43:39,034:INFO:spec and bacc not among the reported scores, n=0 folds are also considered
2023-10-16 14:43:39,036:INFO:sens and bacc not among the reported scores, p=0 folds are also considered
2023-10-16 14:43:39,038:INFO:spec and bacc not among the reported scores, n=0 folds are also considered


[<generator object kfolds_generator at 0x7fd6fbde3b50>, <generator object kfolds_generator at 0x7fd6fbde3ae0>]
[<generator object kfolds_generator at 0x7fd6fbde3b50>, <generator object kfolds_generator at 0x7fd6fbde3ae0>]


[{'evaluations': [{'dataset': {'p': 5, 'n': 10, 'identifier': 'kriclb'},
    'folding': {'folds': [{'p': 1, 'n': 7, 'identifier': 'arzudt_f0_k0_r0'},
      {'p': 4, 'n': 3, 'identifier': 'arzudt_f1_k0_r0'},
      {'p': 1, 'n': 7, 'identifier': 'kriclb_f0_k0_r1'},
      {'p': 4, 'n': 3, 'identifier': 'kriclb_f1_k0_r1'}]},
    'fold_score_bounds': None,
    'aggregation': None},
   {'dataset': {'p': 4, 'n': 15, 'identifier': 'estcna'},
    'folding': {'folds': [{'p': 0, 'n': 7, 'identifier': 'kkvnsh_f0_k0_r0'},
      {'p': 1, 'n': 5, 'identifier': 'kkvnsh_f1_k0_r0'},
      {'p': 3, 'n': 3, 'identifier': 'kkvnsh_f2_k0_r0'},
      {'p': 0, 'n': 7, 'identifier': 'estcna_f0_k0_r1'},
      {'p': 1, 'n': 5, 'identifier': 'estcna_f1_k0_r1'},
      {'p': 3, 'n': 3, 'identifier': 'estcna_f2_k0_r1'}]},
    'fold_score_bounds': None,
    'aggregation': None}],
  'dataset_score_bounds': None,
  'aggregation': 'mor'},
 {'evaluations': [{'dataset': {'p': 5, 'n': 10, 'identifier': 'kriclb'},
    'foldi