In [33]:
import numpy as np

In [34]:
import common_datasets.binary_classification as bin_clas
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RepeatedStratifiedKFold

from mlscorecheck.core import consistency_aggregated_integer_programming_mos

In [35]:
dataset = bin_clas.load_appendicitis()

X = dataset['data']
y = dataset['target']

In [36]:
total, p = len(dataset['target']), sum(dataset['target'])
n = total - p

In [37]:
tps, tns, fps, fns = [], [], [], []
tps_degen, tns_degen, fps_degen, fns_degen = [], [], [], []

In [38]:
random_state = np.random.RandomState(5)

In [39]:
n_splits = 5
n_repeats = 1

In [40]:
validator = RepeatedStratifiedKFold(n_repeats=n_repeats, n_splits=n_splits, random_state=5)

In [41]:
for train, test in validator.split(X, y, y):
    X_train = X[train]
    X_test = X[test]
    y_train = y[train]
    y_test = y[test]

    classifier = RandomForestClassifier()

    classifier.fit(X_train, y_train)
    pred = classifier.predict(X_test)

    tp = np.sum((pred == y_test) & (y_test == 1))
    tn = np.sum((pred == y_test) & (y_test == 0))
    fp = np.sum((pred != y_test) & (y_test == 0))
    fn = np.sum((pred != y_test) & (y_test == 1))

    tps.append(tp)
    tns.append(tn)
    fps.append(fp)
    fns.append(fn)

    fn -= random_state.randint(0, 2)
    fp -= random_state.randint(0, 2)

    tps_degen.append(tp)
    tns_degen.append(tn)
    fps_degen.append(fp)
    fns_degen.append(fn)

In [42]:
tps = np.array(tps).reshape((-1, 5))
tns = np.array(tns).reshape((-1, 5))
fps = np.array(fps).reshape((-1, 5))
fns = np.array(fns).reshape((-1, 5))

tps_degen = np.array(tps_degen).reshape((-1, 5))
tns_degen = np.array(tns_degen).reshape((-1, 5))
fps_degen = np.array(fps_degen).reshape((-1, 5))
fns_degen = np.array(fns_degen).reshape((-1, 5))

In [43]:
tps = np.sum(tps, axis=1)
tns = np.sum(tns, axis=1)
fps = np.sum(fps, axis=1)
fns = np.sum(fns, axis=1)

tps_degen = np.sum(tps_degen, axis=1)
tns_degen = np.sum(tns_degen, axis=1)
fps_degen = np.sum(fps_degen, axis=1)
fns_degen = np.sum(fns_degen, axis=1)

In [44]:
acc = (tps + tns)/(tps + tns + fps + fns)
sens = tps/(tps + fns)
spec = tns/(tns + fps)

acc_degen = (tps_degen + tns_degen)/(tps_degen + tns_degen + fps_degen + fns_degen)
sens_degen = tps_degen/(tps_degen + fns_degen)
spec_degen = tns_degen/(tns_degen + fps_degen)

In [45]:
acc = np.mean(acc)
sens = np.mean(sens)
spec = np.mean(spec)

acc_degen = np.mean(acc_degen)
sens_degen = np.mean(sens_degen)
spec_degen = np.mean(spec_degen)

In [46]:
rounding = 4
eps = 10**(-rounding)

In [47]:
acc = np.round(acc, rounding)
sens = np.round(sens, rounding)
spec = np.round(spec, rounding)

acc_degen = np.round(acc_degen, rounding)
sens_degen = np.round(sens_degen, rounding)
spec_degen = np.round(spec_degen, rounding)

In [48]:
consistency_aggregated_integer_programming_mos(p=np.repeat(np.sum(y), n_repeats),
                                               n=np.repeat(np.sum(1.0 - y), n_repeats),
                                               acc=acc,
                                               sens=sens,
                                               spec=spec,
                                               eps=eps)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/3558ca9a51e14b02b8492e5b24b578d4-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/3558ca9a51e14b02b8492e5b24b578d4-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 11 COLUMNS
At line 25 RHS
At line 32 BOUNDS
At line 35 ENDATA
Problem MODEL has 6 rows, 2 columns and 8 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 12.9969 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements
Cbc3007W No integer variables - nothing to do
Cuts at root node changed objective from 13 to -1.79769e+308
Probing was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
Gomory was tried 0 time

True

In [49]:
consistency_aggregated_integer_programming_mos(p=np.repeat(np.sum(y), n_repeats),
                                               n=np.repeat(np.sum(1.0 - y), n_repeats),
                                               acc=acc_degen,
                                               sens=sens_degen,
                                               spec=spec_degen,
                                               eps=eps)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/3f9f26bbc87040e6a44ca6d2412ad7f9-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/3f9f26bbc87040e6a44ca6d2412ad7f9-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 11 COLUMNS
At line 25 RHS
At line 32 BOUNDS
At line 35 ENDATA
Problem MODEL has 6 rows, 2 columns and 8 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Problem is infeasible - 0.00 seconds
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00



False

In [None]:
# A Multivariate Multiscale Fuzzy Entropy Algorithm with Application to Uterine EMG Complexity Analysis

In [50]:
acc_degen = 0.93
sens_degen = 0.9
spec_degen = 0.97

eps = 1e-2

In [51]:
consistency_aggregated_integer_programming_mos(p=np.repeat(38, 1),
                                               n=np.repeat(262, 1),
                                               acc=np.array([acc_degen]),
                                               sens=np.array([sens_degen]),
                                               spec=np.array([spec_degen]),
                                               eps=eps)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/3dbaa3e73c4c411d935e36f0bb622d89-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/3dbaa3e73c4c411d935e36f0bb622d89-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 11 COLUMNS
At line 25 RHS
At line 32 BOUNDS
At line 35 ENDATA
Problem MODEL has 6 rows, 2 columns and 8 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Problem is infeasible - 0.00 seconds
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00



False