In [54]:
import pandas as pd

from mlscorecheck.individual import calculate_scores
from mlscorecheck.scores import score_functions_with_solutions

In [55]:
folds = [{'p': 100, 'n': 201, 'tp': 78, 'tn': 189},
            {'p': 100, 'n': 200, 'tp': 65, 'tn': 191},
            {'p': 100, 'n': 200, 'tp': 81, 'tn': 160},
            {'p': 101, 'n': 200, 'tp': 75, 'tn': 164},
            {'p': 101, 'n': 200, 'tp': 72, 'tn': 171}]

In [56]:
tmp = pd.DataFrame(folds).reset_index(drop=False)
tmp.columns = ['fold', 'p', 'n', 'tp', 'tn']
print(tmp.to_latex(index=False))

\begin{tabular}{rrrrr}
\toprule
fold & p & n & tp & tn \\
\midrule
0 & 100 & 201 & 78 & 189 \\
1 & 100 & 200 & 65 & 191 \\
2 & 100 & 200 & 81 & 160 \\
3 & 101 & 200 & 75 & 164 \\
4 & 101 & 200 & 72 & 171 \\
\bottomrule
\end{tabular}



In [57]:
mos_scores = dict.fromkeys(score_functions_with_solutions, 0.0)
for fold in folds:
    scores = calculate_scores(fold | {'beta_positive': 2, 'beta_negative': 2})
    for key in scores:
        mos_scores[key] += scores[key]

for key in mos_scores:
    mos_scores[key] /= len(folds)

totals = {'p': 0, 'n': 0, 'tp': 0, 'tn': 0}
for fold in folds:
    for key in fold:
        totals[key] += fold[key]
som_scores = calculate_scores(totals | {'beta_positive': 2, 'beta_negative': 2})



In [58]:
tmp = pd.DataFrame([mos_scores, som_scores]).T
tmp.columns = ['MoS', 'SoM']

In [59]:
pd.options.display.float_format = '{:,.4f}'.format

In [60]:
import numpy as np

In [61]:
tmp['MoS'] = tmp['MoS'].apply(lambda x: np.round(x, 4))
tmp['SoM'] = tmp['SoM'].apply(lambda x: np.round(x, 4))

In [62]:
tmp = tmp.drop(['fbp', 'fbn', 'p4'], axis='index')
tmp = tmp.reset_index(drop=False)
tmp.columns = ['score', 'MoS', 'SoM']
latex = tmp.to_latex(index=False)

In [81]:
tmp = tmp.sort_values('score')

In [82]:
tmp2 = pd.concat([tmp.iloc[:5].reset_index(drop=True),
                  tmp.iloc[5:10].reset_index(drop=True),
                  tmp.iloc[10:15].reset_index(drop=True),
                  tmp.iloc[15:].reset_index(drop=True)], axis=1)

In [83]:
latex = tmp2.to_latex(index=False)

In [84]:
latex = latex.replace('f1p', '$f^1_+$').replace('f1n', '$f^1_-$').replace('00', '')

In [85]:
print(latex)

\begin{tabular}{lrrlrrlrrlrr}
\toprule
score & MoS & SoM & score & MoS & SoM & score & MoS & SoM & score & MoS & SoM \\
\midrule
acc & 0.8290 & 0.8290 & $f^1_+$ & 0.7443 & 0.7427 & lrn & 0.2975 & 0.2985 & ppv & 0.7606 & 0.7465 \\
bacc & 0.8066 & 0.8066 & fm & 0.7471 & 0.7428 & lrp & 8.1202 & 5.8713 & pt & 0.2795 & 0.2921 \\
bm & 0.6131 & 0.6132 & gm & 0.8021 & 0.8038 & mcc & 0.6215 & 0.6147 & sens & 0.7391 & 0.7390 \\
dor & 28.0174 & 19.6671 & ji & 0.5945 & 0.5908 & mk & 0.6312 & 0.6163 & spec & 0.8741 & 0.8741 \\
$f^1_-$ & 0.8709 & 0.8719 & kappa & 0.6165 & 0.6147 & npv & 0.8706 & 0.8698 & upm & 0.8025 & 0.8022 \\
\bottomrule
\end{tabular}



In [86]:
from mlscorecheck.check import check_1_dataset_known_folds_mos_scores

In [87]:
folds

[{'p': 100, 'n': 201, 'tp': 78, 'tn': 189},
 {'p': 100, 'n': 200, 'tp': 65, 'tn': 191},
 {'p': 100, 'n': 200, 'tp': 81, 'tn': 160},
 {'p': 101, 'n': 200, 'tp': 75, 'tn': 164},
 {'p': 101, 'n': 200, 'tp': 72, 'tn': 171}]

In [97]:
mos_scores = {key: np.round(value, 4) for key, value in mos_scores.items()}

In [98]:
mos_scores_test = {key: value for key, value in mos_scores.items() if key in ['acc', 'sens', 'spec']}

In [99]:
check_1_dataset_known_folds_mos_scores(dataset={'p': 502, 'n': 1001},
                                        folding={'folds': [{'p': item['p'], 'n': item['n']} for item in folds]},
                                        scores=mos_scores_test,
                                        eps=1e-4)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/c8e3067943154da192f05a118b77bcf2-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/c8e3067943154da192f05a118b77bcf2-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 11 COLUMNS
At line 73 RHS
At line 80 BOUNDS
At line 92 ENDATA
Problem MODEL has 6 rows, 11 columns and 40 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0004I processed model has 3 rows, 6 columns (6 integer (0 of which binary)) and 12 elements
Cbc0012I Integer solution of 0 found by DiveCoefficient after 8 iterations and 0 nodes (0.00 seconds)
Cbc0031I 1 added rows had average density of 3
Cbc0013I At root node, 1 cuts changed objective from 0 to 0 in 5 passes
Cbc0014I Cut generator 0

{'inconsistency': False,
 'lp_scores': {'acc': 0.8290786267995571,
  'sens': 0.7391881188118812,
  'spec': 0.874,
  'bacc': 0.8065940594059405},
 'lp_status': 'feasible',
 'lp_configuration_scores_match': True,
 'lp_configuration_bounds_match': True,
 'lp_configuration': {'evaluations': [{'folds': {'folds': [{'fold': {'p': 100,
        'n': 201,
        'identifier': 'eihnh',
        'tp': 100.0,
        'tn': 201.0},
       'scores': {'acc': 1.0, 'sens': 1.0, 'spec': 1.0, 'bacc': 1.0},
       'score_bounds': None,
       'bounds_flag': True},
      {'fold': {'p': 100,
        'n': 200,
        'identifier': 'youur',
        'tp': 100.0,
        'tn': 200.0},
       'scores': {'acc': 1.0, 'sens': 1.0, 'spec': 1.0, 'bacc': 1.0},
       'score_bounds': None,
       'bounds_flag': True},
      {'fold': {'p': 100,
        'n': 200,
        'identifier': 'dqjok',
        'tp': 29.0,
        'tn': 200.0},
       'scores': {'acc': 0.7633333333333334,
        'sens': 0.29,
        'spec': 1.0,

In [104]:
mos_scores_test2 = mos_scores_test.copy()

In [105]:
mos_scores_test

{'acc': 0.829, 'sens': 0.7391, 'spec': 0.8741}

In [106]:
mos_scores_test2['acc'] = 0.830

In [107]:
check_1_dataset_known_folds_mos_scores(dataset={'p': 502, 'n': 1001},
                                        folding={'folds': [{'p': item['p'], 'n': item['n']} for item in folds]},
                                        scores=mos_scores_test2,
                                        eps=1e-4)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/372bf5d3d71047de925be4ab45c58162-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/372bf5d3d71047de925be4ab45c58162-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 11 COLUMNS
At line 73 RHS
At line 80 BOUNDS
At line 92 ENDATA
Problem MODEL has 6 rows, 11 columns and 40 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Problem is infeasible - 0.00 seconds
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00



{'inconsistency': True,
 'lp_status': 'infeasible',
 'lp_configuration': {'evaluations': [{'folds': {'folds': [{'fold': {'p': 100,
        'n': 201,
        'identifier': 'yxsjr',
        'tp': 0.0,
        'tn': 201.0},
       'scores': {'acc': 0.6677740863787376,
        'sens': 0.0,
        'spec': 1.0,
        'bacc': 0.5},
       'score_bounds': None,
       'bounds_flag': True},
      {'fold': {'p': 100,
        'n': 200,
        'identifier': 'mjzul',
        'tp': 69.897337,
        'tn': 200.0},
       'scores': {'acc': 0.89965779,
        'sens': 0.6989733699999999,
        'spec': 1.0,
        'bacc': 0.849486685},
       'score_bounds': None,
       'bounds_flag': True},
      {'fold': {'p': 100,
        'n': 200,
        'identifier': 'vynmu',
        'tp': 100.0,
        'tn': 200.0},
       'scores': {'acc': 1.0, 'sens': 1.0, 'spec': 1.0, 'bacc': 1.0},
       'score_bounds': None,
       'bounds_flag': True},
      {'fold': {'p': 101,
        'n': 200,
        'identifie