In [11]:
import numpy as np

from mlscorecheck.check import check_n_datasets_mos_unknown_folds_mos_scores, estimate_n_experiments
from mlscorecheck.aggregated import generate_experiment, experiment_kfolds_generator

In [12]:
def generate_test_case(random_seed: int,
                        rounding_decimals: int,
                        score_subset: list) -> (dict, dict):
    """
    Generate one random test case

    Args:
        random_seed (int): the random seed to use
        rounding_decimals (int): the number of decimals to round to
        score_subset (list): the list of scores to be used

    Returns:
        dict, dict: the experiment specification and the scores
    """
    evaluation_params = {'max_p': 25,
                        'max_n': 200,
                        'max_folds': 4,
                        'max_repeats': 1,
                        'aggregation': 'mos',
                        'no_folds': True,
                        'no_name': True}

    random_state = np.random.RandomState(random_seed)
    experiment, scores = generate_experiment(random_state=random_state,
                                                rounding_decimals=rounding_decimals,
                                                evaluation_params=evaluation_params,
                                                max_evaluations=2,
                                                aggregation='mos',
                                                return_scores=True)

    n_experiments = estimate_n_experiments(experiment['evaluations'],
                                            list(scores.keys()))

    while n_experiments > 1000\
        or len(experiment['evaluations']) == 1:
        experiment, scores = generate_experiment(random_state=random_state,
                                                rounding_decimals=rounding_decimals,
                                                evaluation_params=evaluation_params,
                                                max_evaluations=2,
                                                aggregation='mos',
                                                return_scores=True)

        n_experiments = estimate_n_experiments(experiment['evaluations'],
                                                list(scores.keys()))
    scores = {key: value for key, value in scores.items() if key in score_subset}
    return experiment, scores


In [13]:
experiment, scores = generate_test_case(random_seed=1,
                                        rounding_decimals=3,
                                        score_subset=['acc', 'sens', 'spec'])

In [14]:
evaluation0 = {'dataset': {'p': 13, 'n': 73},
                'folding': {'n_folds': 4, 'n_repeats': 1}}
evaluation1 = {'dataset': {'p': 7, 'n': 26},
                'folding': {'n_folds': 3, 'n_repeats': 1}}
evaluations = [evaluation0, evaluation1]
scores = {'acc': 0.357, 'sens': 0.323, 'spec': 0.362, 'bacc': 0.343}

result = check_n_datasets_mos_unknown_folds_mos_scores(evaluations=evaluations,
                                                        scores=scores,
                                                        eps=1e-3)
result['inconsistency']

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/d653504ba0ce4cb080ccc1652a531bfa-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/d653504ba0ce4cb080ccc1652a531bfa-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 13 COLUMNS
At line 127 RHS
At line 136 BOUNDS
At line 152 ENDATA
Problem MODEL has 8 rows, 15 columns and 84 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0004I processed model has 4 rows, 10 columns (10 integer (2 of which binary)) and 30 elements
Cbc0031I 3 added rows had average density of 7.3333333
Cbc0013I At root node, 19 cuts changed objective from 0 to 0 in 66 passes
Cbc0014I Cut generator 0 (Probing) - 28 row cuts average 2.4 elements, 1 column cuts (1 active)  in 0.001 secon

False

In [15]:
evaluation0 = {'dataset': {'p': 13, 'n': 73},
                'folding': {'n_folds': 4, 'n_repeats': 1}}
evaluation1 = {'dataset': {'p': 7, 'n': 26},
                'folding': {'n_folds': 3, 'n_repeats': 1}}
evaluations = [evaluation0, evaluation1]
scores = {'acc': 0.357, 'sens': 0.323, 'spec': 0.362, 'bacc': 0.9}

result = check_n_datasets_mos_unknown_folds_mos_scores(evaluations=evaluations,
                                                        scores=scores,
                                                        eps=1e-3)
result['inconsistency']

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/b57e9c77b7c44317981e7b2f1217c021-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/b57e9c77b7c44317981e7b2f1217c021-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 13 COLUMNS
At line 127 RHS
At line 136 BOUNDS
At line 152 ENDATA
Problem MODEL has 8 rows, 15 columns and 84 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Problem is infeasible - 0.00 seconds
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/15598fafca034066803c91

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/74ba6b3661e94eeead35589e53b3f54b-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/74ba6b3661e94eeead35589e53b3f54b-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 13 COLUMNS
At line 127 RHS
At line 136 BOUNDS
At line 152 ENDATA
Problem MODEL has 8 rows, 15 columns and 84 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Problem is infeasible - 0.00 seconds
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/27fb1c91f55e4b8c8f6fad

True