In [5]:
import pulp as pl

from mlscorecheck.core import add_accuracy_mor, generate_structure_1, generate_structure_group
from mlscorecheck.core import consistency_1, consistency_grouped
from mlscorecheck.utils import (calculate_scores_mor)
from mlscorecheck.datasets import lookup_dataset


In [6]:
lookup_dataset('common_datasets.ADA')

{'p': 1029, 'n': 3118}

In [7]:
import common_datasets.binary_classification as binclas

In [8]:
cols = binclas.get_summary_pdf()[['name', 'n', 'n_minority']]

In [9]:
cols = cols.rename(columns={'n_minority': 'p', 'n': 'total'})
cols['n'] = cols['total'] - cols['p']
cols = cols.drop(columns='total')

In [10]:
cols.to_dict('records')

[{'name': 'abalone19', 'p': 32, 'n': 4142},
 {'name': 'abalone9_18', 'p': 42, 'n': 689},
 {'name': 'abalone-17_vs_7-8-9-10', 'p': 58, 'n': 2280},
 {'name': 'abalone-19_vs_10-11-12-13', 'p': 32, 'n': 1590},
 {'name': 'abalone-20_vs_8_9_10', 'p': 26, 'n': 1890},
 {'name': 'abalone-22_vs_8', 'p': 14, 'n': 567},
 {'name': 'abalone-3_vs_11', 'p': 15, 'n': 487},
 {'name': 'ADA', 'p': 1029, 'n': 3118},
 {'name': 'appendicitis', 'p': 21, 'n': 85},
 {'name': 'australian', 'p': 307, 'n': 383},
 {'name': 'bupa', 'p': 145, 'n': 200},
 {'name': 'car_good', 'p': 69, 'n': 1659},
 {'name': 'car-vgood', 'p': 65, 'n': 1663},
 {'name': 'cleveland-0_vs_4', 'p': 13, 'n': 164},
 {'name': 'CM1', 'p': 49, 'n': 449},
 {'name': 'crx', 'p': 296, 'n': 357},
 {'name': 'dermatology-6', 'p': 20, 'n': 338},
 {'name': 'ecoli1', 'p': 77, 'n': 259},
 {'name': 'ecoli2', 'p': 52, 'n': 284},
 {'name': 'ecoli3', 'p': 35, 'n': 301},
 {'name': 'ecoli4', 'p': 20, 'n': 316},
 {'name': 'ecoli_0_1_3_7_vs_2_6', 'p': 7, 'n': 274},


In [11]:
problems = [{'p': 10, 'n': 20, 'n_repeats': 2, 'n_folds': 3, 'score_bounds': {'acc': (0.8, 1.0), 'spec': (0.8, 1.0), 'sens': (0.8, 1.0), 'bacc': (0.8, 1.0)}},
            {'p': 15, 'n': 25, 'n_repeats': 2, 'n_folds': 5, 'score_bounds': {'acc': (0.8, 1.0), 'spec': (0.8, 1.0), 'sens': (0.8, 1.0), 'bacc': (0.8, 1.0)}}]
scores = {'acc': 0.95,
            'sens': 0.92,
            'spec': 0.98,
            'bacc': 0.93}

In [3]:
folds, problem = generate_folding_problem(n_repeats=2)

In [4]:
problem

{'p': 151, 'n': 456, 'n_folds': 5, 'n_repeats': 2}

In [5]:
folds

[{'p': 31, 'n': 91, 'tp': 8, 'tn': 66},
 {'p': 30, 'n': 92, 'tp': 28, 'tn': 60},
 {'p': 30, 'n': 91, 'tp': 5, 'tn': 72},
 {'p': 30, 'n': 91, 'tp': 29, 'tn': 52},
 {'p': 30, 'n': 91, 'tp': 24, 'tn': 39},
 {'p': 31, 'n': 91, 'tp': 30, 'tn': 79},
 {'p': 30, 'n': 92, 'tp': 24, 'tn': 12},
 {'p': 30, 'n': 91, 'tp': 25, 'tn': 51},
 {'p': 30, 'n': 91, 'tp': 19, 'tn': 3},
 {'p': 30, 'n': 91, 'tp': 1, 'tn': 63}]

In [6]:
scores = calculate_scores_mor(folds)

In [7]:
flag, configuration = consistency_1(problem,
            scores=scores,
            eps=1e-4,
            return_details=True)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/8ff1e166067e4104b4d0928d607dd8e8-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/8ff1e166067e4104b4d0928d607dd8e8-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 13 COLUMNS
At line 175 RHS
At line 184 BOUNDS
At line 205 ENDATA
Problem MODEL has 8 rows, 20 columns and 120 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0004I processed model has 8 rows, 7 columns (7 integer (0 of which binary)) and 42 elements
Cutoff increment increased from 1e-05 to 0.9999
Cbc0012I Integer solution of 0 found by DiveCoefficient after 9 iterations and 0 nodes (0.01 seconds)
Cbc0031I 4 added rows had average density of 2.75
Cbc0013I At root node, 4 cuts changed obj

In [8]:
configuration

{'overall_consistency': True,
 'configuration': [{'p': 31, 'n': 91, 'tn': 4.0, 'tp': 0.0},
  {'p': 30, 'n': 92, 'tn': 0.0, 'tp': 0.0},
  {'p': 30, 'n': 91, 'tn': 0.0, 'tp': 30.0},
  {'p': 30, 'n': 91, 'tn': 0.0, 'tp': 30.0},
  {'p': 30, 'n': 91, 'tn': 65.0, 'tp': 30.0},
  {'p': 31, 'n': 91, 'tn': 91.0, 'tp': 7.0},
  {'p': 30, 'n': 92, 'tn': 64.0, 'tp': 5.0},
  {'p': 30, 'n': 91, 'tn': 91.0, 'tp': 30.0},
  {'p': 30, 'n': 91, 'tn': 91.0, 'tp': 30.0},
  {'p': 30, 'n': 91, 'tn': 91.0, 'tp': 30.0}]}

In [9]:
calculate_scores_mor(configuration['configuration'])

{'acc': 0.5682631079799485,
 'sens': 0.639247311827957,
 'spec': 0.5453893932154802,
 'bacc': 0.5923183525217185}

In [10]:
scores

{'acc': 0.5681682698821298,
 'sens': 0.639247311827957,
 'spec': 0.5452938365981844,
 'bacc': 0.5922705742130706}

In [32]:
problems = []
folds = []
for idx in range(2):
    f, p = generate_folding_problem(n_repeats=idx+1, n_folds=3)
    problems.append(p)
    folds.append(f)

In [33]:
folds

[[{'p': 154, 'n': 98, 'tp': 115, 'tn': 21},
  {'p': 154, 'n': 98, 'tp': 92, 'tn': 81},
  {'p': 154, 'n': 98, 'tp': 29, 'tn': 17}],
 [{'p': 146, 'n': 177, 'tp': 144, 'tn': 91},
  {'p': 145, 'n': 178, 'tp': 12, 'tn': 83},
  {'p': 145, 'n': 178, 'tp': 134, 'tn': 82},
  {'p': 146, 'n': 177, 'tp': 33, 'tn': 11},
  {'p': 145, 'n': 178, 'tp': 100, 'tn': 142},
  {'p': 145, 'n': 178, 'tp': 37, 'tn': 10}]]

In [34]:
problems

[{'p': 462, 'n': 294, 'n_folds': 3, 'n_repeats': 1},
 {'p': 436, 'n': 533, 'n_folds': 3, 'n_repeats': 2}]

In [35]:
scores = calculate_score_mor_grouped(folds)

In [36]:
scores

{'acc': 0.4615685455468737,
 'sens': 0.5190823308324488,
 'spec': 0.3988117909513834,
 'bacc': 0.45894706089191617}

In [37]:
folds

[[{'p': 154, 'n': 98, 'tp': 115, 'tn': 21},
  {'p': 154, 'n': 98, 'tp': 92, 'tn': 81},
  {'p': 154, 'n': 98, 'tp': 29, 'tn': 17}],
 [{'p': 146, 'n': 177, 'tp': 144, 'tn': 91},
  {'p': 145, 'n': 178, 'tp': 12, 'tn': 83},
  {'p': 145, 'n': 178, 'tp': 134, 'tn': 82},
  {'p': 146, 'n': 177, 'tp': 33, 'tn': 11},
  {'p': 145, 'n': 178, 'tp': 100, 'tn': 142},
  {'p': 145, 'n': 178, 'tp': 37, 'tn': 10}]]

In [38]:
flag, configuration = consistency_grouped(problems,
            scores=scores,
            eps=1e-4,
            return_details=True)

[154, 154, 154, 146, 145, 145, 146, 145, 145]
[98, 98, 98, 177, 178, 178, 177, 178, 178]
[tp_0_0, tp_0_1, tp_0_2, tp_1_0, tp_1_1, tp_1_2, tp_1_3, tp_1_4, tp_1_5]
[tn_0_0, tn_0_1, tn_0_2, tn_1_0, tn_1_1, tn_1_2, tn_1_3, tn_1_4, tn_1_5]
[None, None]
[[0, 1, 2], [3, 4, 5, 6, 7, 8]]
OrderedDict([('_C1', 0.0006613756613756613*tn_0_0 + 0.0006613756613756613*tn_0_1 + 0.0006613756613756613*tn_0_2 + 0.00025799793601651185*tn_1_0 + 0.00025799793601651185*tn_1_1 + 0.00025799793601651185*tn_1_2 + 0.00025799793601651185*tn_1_3 + 0.00025799793601651185*tn_1_4 + 0.00025799793601651185*tn_1_5 + 0.0006613756613756613*tp_0_0 + 0.0006613756613756613*tp_0_1 + 0.0006613756613756613*tp_0_2 + 0.00025799793601651185*tp_1_0 + 0.00025799793601651185*tp_1_1 + 0.00025799793601651185*tp_1_2 + 0.00025799793601651185*tp_1_3 + 0.00025799793601651185*tp_1_4 + 0.00025799793601651185*tp_1_5 + -0.4616685455468737 <= 0), ('_C2', -0.0006613756613756613*tn_0_0 + -0.0006613756613756613*tn_0_1 + -0.0006613756613756613*tn_0_2 

In [39]:
flag

True

In [40]:
folds

[[{'p': 154, 'n': 98, 'tp': 115, 'tn': 21},
  {'p': 154, 'n': 98, 'tp': 92, 'tn': 81},
  {'p': 154, 'n': 98, 'tp': 29, 'tn': 17}],
 [{'p': 146, 'n': 177, 'tp': 144, 'tn': 91},
  {'p': 145, 'n': 178, 'tp': 12, 'tn': 83},
  {'p': 145, 'n': 178, 'tp': 134, 'tn': 82},
  {'p': 146, 'n': 177, 'tp': 33, 'tn': 11},
  {'p': 145, 'n': 178, 'tp': 100, 'tn': 142},
  {'p': 145, 'n': 178, 'tp': 37, 'tn': 10}]]

In [41]:
configuration['configuration']

[[{'tn': 78.0, 'tp': 0.0, 'p': 154, 'n': 98},
  {'tn': 0.0, 'tp': 154.0, 'p': 154, 'n': 98},
  {'tn': 0.0, 'tp': 18.0, 'p': 154, 'n': 98}],
 [{'tn': 0.0, 'tp': 76.0, 'p': 146, 'n': 177},
  {'tn': 0.0, 'tp': 69.0, 'p': 145, 'n': 178},
  {'tn': 152.0, 'tp': 145.0, 'p': 145, 'n': 178},
  {'tn': 60.0, 'tp': 0.0, 'p': 146, 'n': 177},
  {'tn': 178.0, 'tp': 145.0, 'p': 145, 'n': 178},
  {'tn': 178.0, 'tp': 145.0, 'p': 145, 'n': 178}]]

In [42]:
calculate_score_mor_grouped(configuration['configuration'])

{'acc': 0.46152554589087097,
 'sens': 0.5191813539947692,
 'spec': 0.39872936415091653,
 'bacc': 0.4589553590728428}

In [43]:
calculate_score_mor_grouped(folds)

{'acc': 0.4615685455468737,
 'sens': 0.5190823308324488,
 'spec': 0.3988117909513834,
 'bacc': 0.45894706089191617}

In [44]:
generate_structure_1({'p': 10, 'n': 20, 'n_repeats': 2, 'n_folds': 3, 'score_bounds': {'acc': (0.9, 1.0)}, 'tptn_bounds': {'tp': (5, 10), 'tn': (15, 20)}})

([4, 3, 3, 4, 3, 3],
 [6, 7, 7, 6, 7, 7],
 [tp_0, tp_1, tp_2, tp_3, tp_4, tp_5],
 [tn_0, tn_1, tn_2, tn_3, tn_4, tn_5],
 [{'acc': (0.9, 1.0)},
  {'acc': (0.9, 1.0)},
  {'acc': (0.9, 1.0)},
  {'acc': (0.9, 1.0)},
  {'acc': (0.9, 1.0)},
  {'acc': (0.9, 1.0)}],
 [[0], [1], [2], [3], [4], [5]],
 [{'tp': (5, 10), 'tn': (15, 20)},
  {'tp': (5, 10), 'tn': (15, 20)},
  {'tp': (5, 10), 'tn': (15, 20)},
  {'tp': (5, 10), 'tn': (15, 20)},
  {'tp': (5, 10), 'tn': (15, 20)},
  {'tp': (5, 10), 'tn': (15, 20)}])

In [45]:
generate_structure_group([{'p': 10, 'n': 20, 'n_repeats': 2, 'n_folds': 3, 'score_bounds': {'acc': (0.9, 1.0)}, 'tptn_bounds': {'tp': (5, 10), 'tn': (15, 20)}}])

([4, 3, 3, 4, 3, 3],
 [6, 7, 7, 6, 7, 7],
 [tp_0_0, tp_0_1, tp_0_2, tp_0_3, tp_0_4, tp_0_5],
 [tn_0_0, tn_0_1, tn_0_2, tn_0_3, tn_0_4, tn_0_5],
 [{'acc': (0.9, 1.0)}],
 [[0, 1, 2, 3, 4, 5]])

In [46]:
problem = pl.LpProblem("feasibility")

In [47]:
p = [5, 5, 5, 4, 4, 3]
n = [10, 10, 10, 20, 20, 30]

In [48]:
tps = [pl.LpVariable(f"tp_{idx}", 0, p[idx], pl.LpInteger) for idx in range(len(p))]
tns = [pl.LpVariable(f"tn_{idx}", 0, n[idx], pl.LpInteger) for idx in range(len(n))]

In [49]:
problem += tps[0]

In [50]:
1.0 / ((10 + 5) * 3 * 3)

0.007407407407407408

In [51]:
1.0 / ((10 + 5)*3)

0.022222222222222223

In [52]:
add_accuracy_mor(problem=problem,
                    tps=tps,
                    tns=tns,
                    p=p,
                    n=n,
                    acc=0.95,
                    eps=0.0001,
                    acc_bounds=[(0.5, 1.0), (0.6, 1.0), (0.7, 1.0)],
                    groups=[[0, 1, 2], [3, 4], [5]])

feasibility:
MINIMIZE
1*tp_0 + 0
SUBJECT TO
_C1: 0.00740740740741 tn_0 + 0.00740740740741 tn_1 + 0.00740740740741 tn_2
 + 0.00694444444444 tn_3 + 0.00694444444444 tn_4 + 0.010101010101 tn_5
 + 0.00740740740741 tp_0 + 0.00740740740741 tp_1 + 0.00740740740741 tp_2
 + 0.00694444444444 tp_3 + 0.00694444444444 tp_4 + 0.010101010101 tp_5
 <= 0.9501

_C2: - 0.00740740740741 tn_0 - 0.00740740740741 tn_1 - 0.00740740740741 tn_2
 - 0.00694444444444 tn_3 - 0.00694444444444 tn_4 - 0.010101010101 tn_5
 - 0.00740740740741 tp_0 - 0.00740740740741 tp_1 - 0.00740740740741 tp_2
 - 0.00694444444444 tp_3 - 0.00694444444444 tp_4 - 0.010101010101 tp_5
 <= -0.9499

_C3: 0.0222222222222 tn_0 + 0.0222222222222 tn_1 + 0.0222222222222 tn_2
 + 0.0222222222222 tp_0 + 0.0222222222222 tp_1 + 0.0222222222222 tp_2 >= 0.5

_C4: 0.0222222222222 tn_0 + 0.0222222222222 tn_1 + 0.0222222222222 tn_2
 + 0.0222222222222 tp_0 + 0.0222222222222 tp_1 + 0.0222222222222 tp_2 <= 1

_C5: 0.0208333333333 tn_3 + 0.0208333333333 tn_4 + 