In [1]:
import sys
import numpy as np
import pandas as pd
sys.path.append("../")
from expected_cost import ec, other_metrics, utils
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef, balanced_accuracy_score
# from sklearn.metrics import class_likelihood_ratios no funca

## Tests

In [2]:
# Paths to data
DATA_PATH="outputs/data/"
# Load data
df_augmented_logits = pd.read_pickle(f'{DATA_PATH}augmented_logits.pickle')  
# System targets and scores to be used for computing metrics
targets = df_augmented_logits['labels'].values
scores = df_augmented_logits['logit_scores'].values

In [3]:
# Parameters for EC and other metrics computed from EC
# Number of classes
C = 2
# Beta parameter for Fscore
beta = 1
# Prior for class of interest. In this case 1 in PATH.
p0 = utils.get_binary_data_priors(targets)[1]
# Prior vector with given above and all other priors being equal to (1-p0)/(C-1)
data_priors = np.array([p0] + [(1 - p0) / (C - 1)] * (C - 1))
# Uniform priors
unif_priors = np.ones(C) / C
# Usual 0-1 cost matrix
costs_01 = ec.cost_matrix.zero_one_costs(C)

In [4]:
thr = 0.5
decisions = np.array([1 if i>thr else 0 for i in scores])
# Counts
N0, N1, K00, K11, K01, K10 = utils.get_counts_from_binary_data(targets, decisions)
K = N0 + N1

Fscore

In [6]:
# Fscore 
fs1 = 1-other_metrics.f_score(K10, K01, N0, N1)
fs2 = other_metrics.one_minus_fscore_from_EC(targets, decisions, beta)
fs3 = 1-f1_score(targets, decisions)
assert np.round(fs1,2) == np.round(fs2,2) == np.round(fs3,2)
print(fs2)

[0.68108652 0.31891348] 0.6810865191146881 0.6810865191146881
0.0454322862120153


Accuracy

In [None]:
acc1 = other_metrics.accuracy(K00, K11, K)
acc2 = other_metrics.accuracy_from_EC(targets, decisions)
acc3 = accuracy_score(targets, decisions)
assert np.round(acc1,2) == np.round(acc2,2) == np.round(acc3,2)
print(acc2)

Balanced accuracy

In [None]:
bal_acc1 = other_metrics.bal_accuracy(C, K00, K11, N0, N1)
bal_acc2 = other_metrics.bal_accuracy_from_EC(targets, decisions)
bal_acc3 = balanced_accuracy_score(targets, decisions)
assert np.round(bal_acc1,2) == np.round(bal_acc2,2) == np.round(bal_acc3,2)
print(bal_acc2)

MCC

In [None]:
mcc1 = other_metrics.mc_coeff(K10, K01, N0, N1)
mcc2 = other_metrics.mccoeff_from_EC(targets, decisions)
mcc3 = matthews_corrcoef(targets, decisions)
assert np.round(bal_acc1,2) == np.round(bal_acc2,2) == np.round(bal_acc3,2)
print(mcc2)

LR+

In [None]:
lrp1 = other_metrics.lr_plus(K10, K01, N0, N1)
lrp2 = other_metrics.lrplus_from_EC(targets, decisions)
#lrp3 = class_likelihood_ratios(targets, decisions)
assert np.round(lrp1,2) == np.round(lrp2,2)
print(lrp2)

Net benefit

In [None]:
# Not implemented in sklearn see discussion here:
# https://github.com/scikit-learn/scikit-learn/issues/22136
nb1 = other_metrics.nb(K01, K11, K, pt=thr)
nb2 = other_metrics.nb_from_EC(targets, decisions, pt=thr)
nb1, nb2