In [2]:
import numpy as np
from sklearn.model_selection import StratifiedKFold

In [3]:
y = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4])

In [4]:
classes = {0: 3, 1: 5, 2: 4, 3: 10}

In [5]:
def multiclass_stratified_folds(classes, n_folds):
    folds = []
    y = np.hstack([np.repeat(key, value) for key, value in classes.items()])
    for _, test in StratifiedKFold(n_splits=n_folds).split(y.reshape(-1, 1), y, y):
        folds.append({idx: count for idx, count in enumerate(np.bincount(y[test]))})

    return folds

In [6]:
multiclass_stratified_folds(classes, 3)

[{0: 1, 1: 2, 2: 1, 3: 4}, {0: 1, 1: 2, 2: 1, 3: 3}, {0: 1, 1: 1, 2: 2, 3: 3}]

In [7]:
def transform_multiclass_fold_to_binary(fold):
    n_total = sum(fold.values())
    return [{'p': value, 'n': n_total - value} for value in fold.values()]

In [8]:
transform_multiclass_fold_to_binary(multiclass_stratified_folds(classes, 3)[0])

[{'p': 1, 'n': 7}, {'p': 2, 'n': 6}, {'p': 1, 'n': 7}, {'p': 4, 'n': 4}]

In [9]:
def sample_multiclass_fold(fold, random_state=None):
    if not isinstance(random_state, np.random.RandomState):
        random_state = np.random.RandomState(random_state)

    sample = np.zeros(shape=(len(fold), len(fold)), dtype=int)

    for class_idx, count in fold.items():
        sample[class_idx, :] = random_state.multinomial(count,
                                                        np.ones(len(fold))/len(fold),
                                                        size=1)[0]

    sums = np.sum(sample, axis=1)
    values = np.array(list(fold.values()))

    assert np.all(sums == values)

    return sample


In [10]:
fold = multiclass_stratified_folds(classes, 3)[0]

In [11]:
confusion_fold = sample_multiclass_fold(fold, random_state=42)

In [12]:
from mlscorecheck.core import safe_call

In [58]:
def multiclass_score_macro(confusion_matrix, score_function):
    counts = np.sum(confusion_matrix, axis=1)
    n_total = np.sum(counts)

    scores = [safe_call(score_function, {'p': counts[idx],
                                    'n': n_total - counts[idx],
                                    'tp': confusion_matrix[idx, idx],
                                    'tn': np.sum(confusion_matrix[0:idx, 0:idx]) + np.sum(confusion_matrix[idx+1:, idx+1:]) +
                                            np.sum(confusion_matrix[:idx, idx+1:]) + np.sum(confusion_matrix[idx+1:, :idx])})
                for idx, count in enumerate(counts)]

    return np.mean(scores)

def multiclass_score_micro(confusion_matrix, score_function):
    counts = np.sum(confusion_matrix, axis=1)
    n_total = np.sum(counts)

    params = {'tp': 0,
                'tn': 0,
                'p': 0,
                'n': 0}

    for idx, count in enumerate(counts):
        params['p'] += counts[idx]
        params['n'] += n_total - counts[idx]
        params['tp'] += confusion_matrix[idx, idx]
        params['tn'] += np.sum(confusion_matrix[0:idx, 0:idx]) + np.sum(confusion_matrix[idx+1:, idx+1:]) + \
                                            np.sum(confusion_matrix[:idx, idx+1:]) + np.sum(confusion_matrix[idx+1:, :idx])

    return safe_call(score_function, params)

def multiclass_score_weighted(confusion_matrix, score_function):
    counts = np.sum(confusion_matrix, axis=1)
    n_total = np.sum(counts)

    scores = [safe_call(score_function, {'p': counts[idx],
                                    'n': n_total - counts[idx],
                                    'tp': confusion_matrix[idx, idx],
                                    'tn': np.sum(confusion_matrix[0:idx, 0:idx]) + np.sum(confusion_matrix[idx+1:, idx+1:]) +
                                            np.sum(confusion_matrix[:idx, idx+1:]) + np.sum(confusion_matrix[idx+1:, :idx])})\
                * count / n_total
                for idx, count in enumerate(counts)]

    return np.sum(scores)

def multiclass_scores(confusion_matrix, score_function, average):
    if average == 'micro':
        return multiclass_score_micro(confusion_matrix, score_function)
    if average == 'macro':
        return multiclass_score_macro(confusion_matrix, score_function)
    if average == 'weighted':
        return multiclass_score_weighted(confusion_matrix, score_function)

    raise ValueError(f'averaging {average} is not supported')

def multiclass_accuracy(confusion_matrix, average):
    return multiclass_score(confusion_matrix, accuracy_standardized, average)

def multiclass_sensitivity(confusion_matrix, average):
    return multiclass_score(confusion_matrix, sensitivity_standardized, average)

def multiclass_specificity(confusion_matrix, average):
    return multiclass_score(confusion_matrix, specificity_standardized, average)

def multiclass_balanced_accuracy(confusion_matrix, average):
    return multiclass_score(confusion_matrix, balanced_accuracy_standardized, average)

def multiclass_positive_predictive_value(confusion_matrix, average):
    return multiclass_score(confusion_matrix, positive_predictive_value_standardized, average)

def multiclass_negative_predictive_value(confusion_matrix, average):
    return multiclass_score(confusion_matrix, negative_predictive_value_standardized, average)

In [59]:
from mlscorecheck.scores import (
    accuracy_standardized,
    balanced_accuracy_standardized,
    positive_predictive_value_standardized,
    sensitivity_standardized,
    specificity_standardized,
)

In [60]:
multiclass_accuracy(confusion_fold)

0.640625

In [61]:
from sklearn.metrics import precision_recall_fscore_support

In [62]:
y_true = np.random.randint(5, size=20)
y_pred = np.random.randint(5, size=20)

In [63]:
precision_recall_fscore_support(y_true, y_pred, average='weighted')

(0.4041666666666667, 0.25, 0.2638888888888889, None)

In [64]:
confusion_matrix = np.zeros(shape=(5, 5))
for yt, yp in zip(y_true, y_pred, strict=False):
    confusion_matrix[yt, yp] += 1

In [65]:
multiclass_sensitivity(confusion_matrix)

0.25

In [66]:
multiclass_positive_predictive_value(confusion_matrix)

0.4041666666666667

In [40]:
multiclass_accuracy(confusion_matrix)

0.66

In [41]:
multiclass_specificity(confusion_matrix)

0.7875

In [33]:
multiclass_positive_predictive_value(confusion_matrix)

0.18

In [None]:
tp = np.sum()

In [45]:
def accuracy_multiclass(confusion_matrix):
    counts = np.sum(confusion_matrix, axis=1)
    n_total = np.sum(counts)

    accs = []

    for idx, count in enumerate(counts):
        p = counts[idx]
        n = n_total - counts[idx]
        tp = confusion_matrix[idx, idx]
        tn = np.sum(confusion_matrix[0:idx, 0:idx]) + np.sum(confusion_matrix[idx+1:, idx+1:]) + np.sum(confusion_matrix[:idx, idx+1:]) + np.sum(confusion_matrix[idx+1:, :idx])
        accs.append((tp + tn) / (p + n))

    return np.mean(accs)

In [46]:
accuracy_multiclass(confusion_fold)

0.6875

In [47]:
from mlscorecheck.check import check_1_dataset_known_folds_mos_scores

In [50]:
binary_folds = transform_multiclass_fold_to_binary(fold)

In [52]:
check_1_dataset_known_folds_mos_scores(dataset={'p': 8, 'n': 24}, folding={'folds': binary_folds}, eps=1e-4, scores={'acc': 0.6873})

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/gykovacs/anaconda3/envs/mlscorecheck/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/a065da42bbc34847a634d09fae57c662-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/a065da42bbc34847a634d09fae57c662-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 7 COLUMNS
At line 41 RHS
At line 44 BOUNDS
At line 54 ENDATA
Problem MODEL has 2 rows, 9 columns and 16 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0000I Cut generators found to be infeasible! (or unbounded)
Pre-processing says infeasible or unbounded
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00



{'inconsistency': True,
 'lp_status': 'infeasible',
 'lp_configuration': {'evaluations': [{'folds': {'folds': [{'fold': {'p': 1,
        'n': 7,
        'identifier': 'fkrkv',
        'tp': 0.0,
        'tn': 7.0},
       'scores': {'acc': 0.875},
       'score_bounds': None,
       'bounds_flag': True},
      {'fold': {'p': 2, 'n': 6, 'identifier': 'wpxvc', 'tp': 0.0, 'tn': 6.0},
       'scores': {'acc': 0.75},
       'score_bounds': None,
       'bounds_flag': True},
      {'fold': {'p': 1, 'n': 7, 'identifier': 'ngfid', 'tp': 0.0, 'tn': 7.0},
       'scores': {'acc': 0.875},
       'score_bounds': None,
       'bounds_flag': True},
      {'fold': {'p': 4,
        'n': 4,
        'identifier': 'yvtsr',
        'tp': 0.0,
        'tn': 1.990368},
       'scores': {'acc': 0.248796},
       'score_bounds': None,
       'bounds_flag': True}],
     'bounds_flag': True},
    'scores': {'acc': 0.687199},
    'score_bounds': None,
    'bounds_flag': {'folds': [{'fold': {'p': 1,
        'n': 