In [47]:
import numpy as np

from mlscorecheck.check import (check_multiple_datasets_mos_scores)

from mlscorecheck.utils import (generate_problems_with_folds,
                                calculate_scores)

In [48]:
k = 4
eps = 10**(-k)

In [49]:
folds, problems = generate_problems_with_folds(n_problems=5,
                                                n_folds=1,
                                                random_seed=5)

In [50]:
folds

[{'p': 869, 'n': 208, 'tp': 702, 'tn': 119},
 {'p': 402, 'n': 75, 'tp': 9, 'tn': 63},
 {'p': 413, 'n': 626, 'tp': 159, 'tn': 520},
 {'p': 206, 'n': 913, 'tp': 114, 'tn': 438},
 {'p': 82, 'n': 29, 'tp': 45, 'tn': 14}]

In [51]:
problems

[{'p': 869, 'n': 208, 'n_folds': 1, 'n_repeats': 1},
 {'p': 402, 'n': 75, 'n_folds': 1, 'n_repeats': 1},
 {'p': 413, 'n': 626, 'n_folds': 1, 'n_repeats': 1},
 {'p': 206, 'n': 913, 'n_folds': 1, 'n_repeats': 1},
 {'p': 82, 'n': 29, 'n_folds': 1, 'n_repeats': 1}]

In [52]:
scores = calculate_scores(folds, strategy='mos', rounding_decimals=4)

In [53]:
scores

{'acc': 0.5183, 'sens': 0.4635, 'spec': 0.6411, 'bacc': 0.5523}

In [54]:
flag, details = check_multiple_datasets_mos_scores(scores=scores, 
                                        eps=eps, 
                                        datasets=problems,
                                        return_details=True)

[None, None, None, None, None]
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/0be0b794b79943fb9451fa8c81694322-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/0be0b794b79943fb9451fa8c81694322-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 13 COLUMNS
At line 95 RHS
At line 104 BOUNDS
At line 115 ENDATA
Problem MODEL has 8 rows, 10 columns and 60 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0004I processed model has 8 rows, 10 columns (10 integer (0 of which binary)) and 60 elements
Cutoff increment increased from 1e-05 to 0.9999
Cbc0012I Integer solution of 0 found by DiveCoefficient after 0 iterations and 0 nodes (0.00 seconds)
Cbc0001I Search completed - best objective 0, took 0 iterati

In [55]:
assert flag

In [56]:
details

{'overall_consistency': True,
 'configuration': [{'p': 869, 'n': 208, 'tn': 173.0, 'tp': 0.0},
  {'p': 402, 'n': 75, 'tn': 75.0, 'tp': 402.0},
  {'p': 413, 'n': 626, 'tn': 0.0, 'tp': 131.0},
  {'p': 206, 'n': 913, 'tn': 341.0, 'tp': 0.0},
  {'p': 82, 'n': 29, 'tn': 29.0, 'tp': 82.0}]}

In [57]:
calculate_scores(details['configuration'], strategy='mos')

{'acc': 0.5182901054258326,
 'sens': 0.4634382566585956,
 'spec': 0.6410449490268768,
 'bacc': 0.5522416028427362}

In [58]:
folding_scores = [calculate_scores(folding, strategy='mos', scores_only=False) for folding in folds]

def score_ranges(folding_scores):
    mins = {}
    maxs = {}
    for folding in folding_scores:
        for key, value in folding.items():
            mins[key] = min(mins.get(key, np.inf), value)
            maxs[key] = max(maxs.get(key, -np.inf), value)
    
    return {key: (mins[key], maxs[key]) for key in mins}

score_rang = score_ranges(folding_scores)

score_rang

{'acc': (0.1509433962264151, 0.7623026926648097),
 'sens': (0.022388059701492536, 0.807825086306099),
 'spec': (0.47973713033953996, 0.84),
 'npv': (0.13815789473684212, 0.8264150943396227),
 'ppv': (0.1935483870967742, 0.8874841972187105),
 'bacc': (0.43119402985074623, 0.6899702354607418),
 'f1p': (0.0425531914893617, 0.8457831325301205),
 'fm': (0.09795347226725089, 0.8467183700697085),
 'p': (82, 869),
 'n': (29, 913),
 'tp': (9, 702),
 'tn': (14, 520)}

In [64]:
for problem in problems:
    problem['score_bounds'] = {'acc': score_rang['acc']}
    problem['tptn_bounds'] = {'tp': (100, 10000), 'tn': (100, 10000)}

In [65]:
flag, details = check_multiple_datasets_mos_scores(scores=scores, 
                                        eps=eps, 
                                        datasets=problems,
                                        return_details=True)

[(0.1509433962264151, 0.7623026926648097), (0.1509433962264151, 0.7623026926648097), (0.1509433962264151, 0.7623026926648097), (0.1509433962264151, 0.7623026926648097), (0.1509433962264151, 0.7623026926648097)]
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/de78c5c65adb43dbab4a6823f18838f7-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/de78c5c65adb43dbab4a6823f18838f7-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 43 COLUMNS
At line 165 RHS
At line 204 BOUNDS
At line 215 ENDATA
Problem MODEL has 38 rows, 10 columns and 100 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Problem is infeasible - 0.00 seconds
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00



In [28]:
scores['bacc'] = 0.9

In [29]:
flag, details = check_multiple_datasets_mos_scores(scores=scores, 
                                        eps=eps, 
                                        datasets=problems,
                                        return_details=True)

NameError: name 'check_multiple_dataset_mos_scores' is not defined

In [None]:
assert not flag