In [59]:
import numpy as np

from mlscorecheck.check import (check_kfold_mor_scores)

from mlscorecheck.utils import (generate_problems_with_folds,
                                calculate_scores)

In [60]:
k = 4
eps = 10**(-k)

In [61]:
foldings, problem = generate_problems_with_folds(n_repeats=2,
                                                    random_seed=5)

In [62]:
foldings

[{'p': 174, 'n': 42, 'tp': 119, 'tn': 17},
 {'p': 174, 'n': 42, 'tp': 74, 'tn': 9},
 {'p': 174, 'n': 41, 'tp': 156, 'tn': 31},
 {'p': 174, 'n': 41, 'tp': 8, 'tn': 13},
 {'p': 173, 'n': 42, 'tp': 144, 'tn': 40},
 {'p': 174, 'n': 42, 'tp': 81, 'tn': 28},
 {'p': 174, 'n': 42, 'tp': 45, 'tn': 14},
 {'p': 174, 'n': 41, 'tp': 66, 'tn': 31},
 {'p': 174, 'n': 41, 'tp': 87, 'tn': 19},
 {'p': 173, 'n': 42, 'tp': 122, 'tn': 10}]

In [63]:
problem

{'p': 869, 'n': 208, 'n_folds': 5, 'n_repeats': 2}

In [64]:
scores = calculate_scores(foldings, strategy='mor', rounding_decimals=k)

In [65]:
scores

{'acc': 0.5173, 'sens': 0.5193, 'spec': 0.5102, 'bacc': 0.5147}

In [66]:
flag, details = check_kfold_mor_scores(scores=scores, 
                                        eps=eps, 
                                        dataset=problem,
                                        return_details=True)

[None, None, None, None, None, None, None, None, None, None]
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/95c03643b90f4675ac3c6fdcfc41a2ee-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/95c03643b90f4675ac3c6fdcfc41a2ee-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 13 COLUMNS
At line 175 RHS
At line 184 BOUNDS
At line 205 ENDATA
Problem MODEL has 8 rows, 20 columns and 120 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0004I processed model has 8 rows, 7 columns (7 integer (0 of which binary)) and 42 elements
Cutoff increment increased from 1e-05 to 0.9999
Cbc0012I Integer solution of 0 found by DiveCoefficient after 16 iterations and 0 nodes (0.02 seconds)
Cbc0031I 3 added rows had ave

In [67]:
assert flag

In [68]:
details['configuration']

[{'p': 174, 'n': 42, 'tn': 42.0, 'tp': 0.0},
 {'p': 174, 'n': 42, 'tn': 0.0, 'tp': 50.0},
 {'p': 174, 'n': 41, 'tn': 10.0, 'tp': 174.0},
 {'p': 174, 'n': 41, 'tn': 41.0, 'tp': 174.0},
 {'p': 173, 'n': 42, 'tn': 0.0, 'tp': 173.0},
 {'p': 174, 'n': 42, 'tn': 0.0, 'tp': 0.0},
 {'p': 174, 'n': 42, 'tn': 36.0, 'tp': 0.0},
 {'p': 174, 'n': 41, 'tn': 41.0, 'tp': 174.0},
 {'p': 174, 'n': 41, 'tn': 41.0, 'tp': 86.0},
 {'p': 173, 'n': 42, 'tn': 0.0, 'tp': 71.0}]

In [69]:
calculate_scores(details['configuration'], strategy='mor')

{'acc': 0.5173987941429802,
 'sens': 0.5192013819679756,
 'spec': 0.5101045296167248,
 'bacc': 0.51465295579235}

In [86]:
folding_scores = [calculate_scores(folding, strategy='mor', scores_only=False) for folding in foldings]

def score_ranges(folding_scores):
    mins = {}
    maxs = {}
    for folding in folding_scores:
        for key, value in folding.items():
            mins[key] = min(mins.get(key, np.inf), value)
            maxs[key] = max(maxs.get(key, -np.inf), value)
    
    return {key: (mins[key], maxs[key]) for key in mins}

score_rang = score_ranges(folding_scores)

score_rang

{(0.04597701149425287, 0.896551724137931),
 (0.07262569832402235, 0.6326530612244898),
 (0.0761904761904762, 0.9176470588235294),
 (0.09767441860465116, 0.8697674418604651),
 (0.10107973914385379, 0.917901184294705),
 (0.1815250911129801, 0.8923754472887421),
 (0.21428571428571427, 0.9523809523809523),
 (0.2222222222222222, 0.9863013698630136),
 (8, 156),
 (9, 40),
 (41, 42),
 (173, 174)}

In [83]:
score_rang

{(0.04597701149425287, 0.896551724137931),
 (0.07262569832402235, 0.6326530612244898),
 (0.0761904761904762, 0.9176470588235294),
 (0.09767441860465116, 0.8697674418604651),
 (0.10107973914385379, 0.917901184294705),
 (0.1815250911129801, 0.8923754472887421),
 (0.21428571428571427, 0.9523809523809523),
 (0.2222222222222222, 0.9863013698630136),
 (8, 156),
 (9, 40),
 (41, 42),
 (173, 174)}

In [84]:
problem['score_bounds'] = {'acc': (min_accuracy*0.75, 1)}
problem['score_bounds'] = {'acc': (0.05, 1)}
problem['tptn_bounds'] = {'tp': (50, 10000), 'tn': (50, 10000)}

In [85]:
flag, details = check_kfold_mor_scores(scores=scores, 
                                        eps=eps, 
                                        dataset=problem,
                                        return_details=True)

[(0.05, 1), (0.05, 1), (0.05, 1), (0.05, 1), (0.05, 1), (0.05, 1), (0.05, 1), (0.05, 1), (0.05, 1), (0.05, 1)]
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/f52a7ea3f72647a0ad59aab54078bfab-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/f52a7ea3f72647a0ad59aab54078bfab-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 73 COLUMNS
At line 315 RHS
At line 384 BOUNDS
At line 405 ENDATA
Problem MODEL has 68 rows, 20 columns and 200 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Problem is infeasible - 0.00 seconds
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00



In [79]:
assert flag

In [38]:
scores['bacc'] = 0.9

In [39]:
flag, details = check_kfold_mor_scores(scores=scores, 
                                        eps=eps, 
                                        dataset=problem,
                                        return_details=True)

[(0.5, 1), (0.5, 1), (0.5, 1), (0.5, 1), (0.5, 1), (0.5, 1), (0.5, 1), (0.5, 1), (0.5, 1), (0.5, 1)]
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/fc3215d22692423f8547b59dbe64b25b-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/fc3215d22692423f8547b59dbe64b25b-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 33 COLUMNS
At line 235 RHS
At line 264 BOUNDS
At line 285 ENDATA
Problem MODEL has 28 rows, 20 columns and 160 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Problem is infeasible - 0.00 seconds
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00



In [40]:
assert not flag