Skip to content

Commit

Permalink
wrote tests for cat labels for binary and multiclass
Browse files Browse the repository at this point in the history
  • Loading branch information
AtrCheema committed Sep 3, 2022
1 parent c6b0d0b commit 1c0846c
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 29 deletions.
File renamed without changes.
77 changes: 68 additions & 9 deletions SeqMetrics/_cls.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@

import numbers
import warnings
from typing import Union

Expand Down Expand Up @@ -61,16 +62,29 @@ class ClassificationMetrics(Metrics):
>>> metrics.cross_entropy()
... 0.71355817782
Working with categorical values is seamless
>>> true = np.array(['a', 'b', 'b', 'b'])
>>> pred = np.array(['a', 'a', 'a', 'a'])
>>> metrics = ClassificationMetrics(true, pred)
>>> accuracy = metrics.accuracy()
same goes for multiclass categorical labels
>>> t = np.array(['car', 'truck', 'truck', 'car', 'bike', 'truck'])
>>> p = np.array(['car', 'car', 'bike', 'car', 'bike', 'truck'])
>>> metrics = ClassificationMetrics(targets, predictions, multiclass=True)
>>> print(metrics.calculate_all())
"""
# todo add very major erro and major error

def __init__(
self,
true,
predicted,
multiclass=False,
multiclass:bool=False,
*args,
**kwargs):
**kwargs
):

self.multiclass = multiclass

Expand All @@ -81,6 +95,9 @@ def __init__(
self.is_categorical = True
assert self.predicted.dtype.kind in ['S', 'U']

self.true_cls , self.true_encoded = self._encode(self.true)
self.pred_cls, self.pred_encoded = self._encode(self.predicted)

self.true_labels = self._true_labels()
self.true_logits = self._true_logits()
self.pred_labels = self._pred_labels()
Expand Down Expand Up @@ -176,9 +193,14 @@ def cross_entropy(self, epsilon=1e-12)->float:
scalar
"""
predictions = np.clip(self.predicted, epsilon, 1. - epsilon)
n = predictions.shape[0]
ce = -np.sum(self.true * np.log(predictions + 1e-9)) / n
if self.is_categorical:
predictions = np.clip(self.pred_encoded, epsilon, 1. - epsilon)
n = predictions.shape[0]
ce = -np.sum(self.true_encoded * np.log(predictions + 1e-9)) / n
else:
predictions = np.clip(self.predicted, epsilon, 1. - epsilon)
n = predictions.shape[0]
ce = -np.sum(self.true * np.log(predictions + 1e-9)) / n
return ce

# def hinge_loss(self):
Expand All @@ -189,10 +211,11 @@ def cross_entropy(self, epsilon=1e-12)->float:

def accuracy(self, normalize:bool=True)->float:
"""
calculates accuracy
Parameters
----------
normalize
normalize : bool
Returns
-------
Expand Down Expand Up @@ -281,7 +304,6 @@ def _confusion_matrix(self, normalize=None):

return np.nan_to_num(cm)


def _tp(self):
return np.diag(self.cm)

Expand All @@ -301,6 +323,32 @@ def _tn(self):

return TN

@staticmethod
def _is_scalar_nan(x):
# same as sklearn function
return bool(isinstance(x, numbers.Real) and np.isnan(x))

def _encode(self, x:np.ndarray)->tuple:
"""encodes a categorical array into numerical values"""
classes, encoded = np.unique(x, return_inverse=True)

# following lines are taken from sklearn
# np.unique will have duplicate missing values at the end of `uniques`
# here we clip the nans and remove it from uniques
if classes.size and self._is_scalar_nan(classes[-1]):
nan_idx = np.searchsorted(classes, np.nan)
classes = classes[:nan_idx + 1]

encoded[encoded > nan_idx] = nan_idx

return classes, encoded

def _decode_true(self):
raise NotImplementedError

def _decode_prediction(self):
raise NotImplementedError

def precision(self, average=None):
"""
Returns precision score, also called positive predictive value.
Expand Down Expand Up @@ -407,7 +455,7 @@ def specificity(self, average=None):
return _spcificity.mean()

else:
return np.average(_spcificity, weights= self._tn() + self._fp())
return np.average(_spcificity, weights= TN + FP)

return _spcificity

Expand Down Expand Up @@ -447,7 +495,8 @@ def balanced_accuracy(self, average=None)->float:
return score

def f1_score(self, average=None)->Union[np.ndarray, float]:
"""calculates f1 score
"""calculates f1 score according to following formula
f1_score = 2 * (precision * recall) / (precision + recall)
Parameters
----------
Expand Down Expand Up @@ -501,6 +550,8 @@ def false_positive_rate(self):
TP = self._tp()
fpr = TP / (TP + self._tn())

fpr = np.nan_to_num(fpr)

return fpr

def false_discovery_rate(self):
Expand All @@ -511,6 +562,9 @@ def false_discovery_rate(self):
FP = self._fp()

fdr = FP / (self._tp() + FP)

fdr = np.nan_to_num(fdr)

return fdr

def false_negative_rate(self):
Expand All @@ -520,6 +574,9 @@ def false_negative_rate(self):
"""
FN = self._fn()
fnr = FN / (FN + self._tp())

fnr = np.nan_to_num(fnr)

return fnr

def negative_predictive_value(self):
Expand All @@ -529,6 +586,8 @@ def negative_predictive_value(self):
"""
TN = self._tn()
npv = TN / (TN + self._fn())

npv = np.nan_to_num(npv)
return npv


Expand Down
116 changes: 96 additions & 20 deletions tests/test_cls.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from SeqMetrics import ClassificationMetrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, balanced_accuracy_score
from sklearn.preprocessing import LabelBinarizer



Expand All @@ -21,15 +20,33 @@ class TestBinaryBooleanLabels(unittest.TestCase):
metrics = ClassificationMetrics(t, p)

def test_f1_score(self):
self.assertEqual(self.metrics.f1_score(average="weighted"),
f1_score(self.t, self.p, average="weighted"))
for avg in [None, "macro", "weighted", "micro"]:
np.testing.assert_almost_equal(self.metrics.f1_score(average=avg),
f1_score(self.t, self.p, average=avg))
return

def test_precision(self):
for avg in [None, "macro", "weighted", "micro"]:
np.testing.assert_almost_equal(self.metrics.precision(average=avg),
precision_score(self.t, self.p, average=avg))
return

def test_recall(self):
for avg in [None, "macro", "weighted", "micro"]:
np.testing.assert_almost_equal(self.metrics.recall(average=avg),
recall_score(self.t, self.p, average=avg))
return

def test_accuracy(self):
val_score = self.metrics.accuracy()
self.assertAlmostEqual(val_score, 0.25)
return

def test_balance_accuracy(self):
val_score = self.metrics.balanced_accuracy()
self.assertAlmostEqual(val_score, balanced_accuracy_score(self.t, self.p))
return

def test_confusion_metrics(self):
cm = self.metrics.confusion_matrix()
np.testing.assert_array_equal(cm, confusion_matrix(self.t, self.p))
Expand All @@ -40,6 +57,7 @@ def test_class_all(self):
assert len(all_metrics) == 13
return


class TestBinaryNumericalLabels(unittest.TestCase):
"""binary classification when the arrays are nuerical values"""

Expand Down Expand Up @@ -126,8 +144,21 @@ class TestBinaryCategoricalLabels(unittest.TestCase):
metrics = ClassificationMetrics(t, p)

def test_f1_score(self):
self.assertEqual(self.metrics.f1_score(average="weighted"),
f1_score(self.t, self.p, average="weighted"))
for avg in [None, "macro", "weighted", "micro"]:
np.testing.assert_almost_equal(self.metrics.f1_score(average=avg),
f1_score(self.t, self.p, average=avg))
return

def test_precision(self):
for avg in [None, "macro", "weighted", "micro"]:
np.testing.assert_almost_equal(self.metrics.precision(average=avg),
precision_score(self.t, self.p, average=avg))
return

def test_recall(self):
for avg in [None, "macro", "weighted", "micro"]:
np.testing.assert_almost_equal(self.metrics.recall(average=avg),
recall_score(self.t, self.p, average=avg))
return

def test_accuracy(self):
Expand All @@ -140,7 +171,15 @@ def test_confusion_metrics(self):
np.testing.assert_array_equal(cm, confusion_matrix(self.t, self.p))
return

def test_balance_accuracy(self):
val_score = self.metrics.balanced_accuracy()
self.assertAlmostEqual(val_score, balanced_accuracy_score(self.t, self.p))
return

def test_class_all(self):
all_metrics = self.metrics.calculate_all()
assert len(all_metrics) == len(self.metrics.all_methods)
return


class TestBinaryLogits(unittest.TestCase):
Expand Down Expand Up @@ -221,7 +260,7 @@ def test_confusion_matrix(self):
return

def test_precision(self):
for average in ['macro', 'weighted', None]:
for average in ['macro', 'weighted', 'micro', None]:

act_precision = precision_score(self.true, self.pred, average=average)
calc_precision = self.metrics.precision(average=average)
Expand All @@ -230,7 +269,7 @@ def test_precision(self):
return

def test_recall(self):
for average in ['macro', 'weighted', None]:
for average in ['macro', 'weighted', 'micro', None]:

act_recall = recall_score(self.true, self.pred, average=average)
calc_recall = self.metrics.recall(average=average)
Expand All @@ -239,7 +278,7 @@ def test_recall(self):
return

def test_f1_score(self):
for average in ['macro', 'weighted', None]:
for average in ['macro', 'weighted', 'micro', None]:
act_f1_score = f1_score(self.true, self.pred, average=average)
calc_f1_score = self.metrics.f1_score(average=average)
np.testing.assert_almost_equal(act_f1_score, calc_f1_score)
Expand All @@ -252,20 +291,57 @@ def test_balanced_accuracy(self):

return

# class TestMulticlassCategoricalLabels(unittest.TestCase):
# true = np.random.randint(1, 4, 100)
# pred = np.random.randint(1, 4, 100)
# metrics = ClassificationMetrics(true, pred, multiclass=True)
class TestMulticlassCategoricalLabels(unittest.TestCase):
t = np.array(['car', 'truck', 'truck', 'car', 'bike', 'truck'])
p = np.array(['car', 'car', 'bike', 'car', 'bike', 'truck'])
metrics = ClassificationMetrics(t, p, multiclass=True)

def test_all(self):
self.metrics.calculate_all()
return

def test_f1_score(self):
for avg in [None, "macro", "weighted", "micro"]:
np.testing.assert_almost_equal(self.metrics.f1_score(average=avg),
f1_score(self.t, self.p, average=avg))
return

def test_precision(self):
for avg in [None, "macro", "weighted", "micro"]:
np.testing.assert_almost_equal(self.metrics.precision(average=avg),
precision_score(self.t, self.p, average=avg))
return


def test_recall(self):
for avg in [None, "macro", "weighted", "micro"]:
np.testing.assert_almost_equal(self.metrics.recall(average=avg),
recall_score(self.t, self.p, average=avg))
return


def test_accuracy(self):
val_score = self.metrics.accuracy()
self.assertAlmostEqual(val_score, accuracy_score(self.t, self.p))
return


def test_confusion_metrics(self):
cm = self.metrics.confusion_matrix()
np.testing.assert_array_equal(cm, confusion_matrix(self.t, self.p))
return


def test_balance_accuracy(self):
val_score = self.metrics.balanced_accuracy()
self.assertAlmostEqual(val_score, balanced_accuracy_score(self.t, self.p))
return

# def test_all(self):
# self.metrics.calculate_all()
# return

# def test_accuracy(self):
# acc = self.metrics.accuracy()
# acc2 = accuracy_score(self.true, self.pred)
# self.assertAlmostEqual(acc, acc2)
# return
def test_class_all(self):
all_metrics = self.metrics.calculate_all()
assert len(all_metrics) == len(self.metrics.all_methods)
return


class TestMulticlassLogits(unittest.TestCase):
Expand Down

0 comments on commit 1c0846c

Please sign in to comment.