Robust ROCAUC for binary classification. fix #1040 (#1056)

This PR unifies the way ROCAUC treats binary classifiers with predict_proba and decision_function methods, adding a binary metaparameter and updating the docstrings accordingly
DistrictDataLabs · Apr 16, 2020 · 779487c · 779487c
1 parent 4737f0f
commit 779487c
Show file tree

Hide file tree

Showing 14 changed files with 212 additions and 89 deletions.
diff --git a/tests/baseline_images/test_classifier/test_rocauc/test_binary_decision.png b/tests/baseline_images/test_classifier/test_rocauc/test_binary_decision.png
diff --git a/.../baseline_images/test_classifier/test_rocauc/test_binary_decision_per_class.png b/.../baseline_images/test_classifier/test_rocauc/test_binary_decision_per_class.png
diff --git a/tests/baseline_images/test_classifier/test_rocauc/test_binary_probability.png b/tests/baseline_images/test_classifier/test_rocauc/test_binary_probability.png
diff --git a/...aseline_images/test_classifier/test_rocauc/test_binary_probability_decision.png b/...aseline_images/test_classifier/test_rocauc/test_binary_probability_decision.png
diff --git a/...s/test_classifier/test_rocauc/test_binary_probability_decision_single_curve.png b/...s/test_classifier/test_rocauc/test_binary_probability_decision_single_curve.png
diff --git a/tests/baseline_images/test_classifier/test_rocauc/test_multiclass_rocauc.png b/tests/baseline_images/test_classifier/test_rocauc/test_multiclass_rocauc.png
diff --git a/tests/baseline_images/test_classifier/test_rocauc/test_pandas_integration.png b/tests/baseline_images/test_classifier/test_rocauc/test_pandas_integration.png
diff --git a/tests/baseline_images/test_classifier/test_rocauc/test_rocauc_no_classes.png b/tests/baseline_images/test_classifier/test_rocauc/test_rocauc_no_classes.png
diff --git a/tests/baseline_images/test_classifier/test_rocauc/test_rocauc_no_macro.png b/tests/baseline_images/test_classifier/test_rocauc/test_rocauc_no_macro.png
diff --git a/...s/baseline_images/test_classifier/test_rocauc/test_rocauc_no_macro_no_micro.png b/...s/baseline_images/test_classifier/test_rocauc/test_rocauc_no_macro_no_micro.png
diff --git a/tests/baseline_images/test_classifier/test_rocauc/test_rocauc_no_micro.png b/tests/baseline_images/test_classifier/test_rocauc/test_rocauc_no_micro.png
diff --git a/tests/baseline_images/test_classifier/test_rocauc/test_rocauc_quickmethod.png b/tests/baseline_images/test_classifier/test_rocauc/test_rocauc_quickmethod.png
diff --git a/tests/test_classifier/test_rocauc.py b/tests/test_classifier/test_rocauc.py
@@ -26,8 +26,8 @@
 from tests.base import VisualTestCase
 
 from yellowbrick.classifier.rocauc import *
+from yellowbrick.exceptions import ModelError
 from yellowbrick.datasets import load_occupancy
-from yellowbrick.exceptions import ModelError, YellowbrickValueError
 
 from sklearn.svm import LinearSVC
 from sklearn.naive_bayes import GaussianNB
@@ -41,11 +41,11 @@
 except ImportError:
     pd = None
 
+
 ##########################################################################
 ## Fixtures
 ##########################################################################
 
-
 class FakeClassifier(BaseEstimator, ClassifierMixin):
     """
     A fake classifier for testing noops on the visualizer.
@@ -124,6 +124,29 @@ def test_binary_probability_decision(self):
         visualizer.finalize()
         self.assert_images_similar(visualizer, tol=0.1, windows_tol=10)
 
+    def test_binary_probability_decision_single_curve(self):
+        """
+        Test ROCAUC binary classifier with both decision & predict_proba with per_class=False
+        """
+        # Create and fit the visualizer
+        visualizer = ROCAUC(AdaBoostClassifier(), micro=False, macro=False, per_class=False)
+        visualizer.fit(self.binary.X.train, self.binary.y.train)
+
+        # Score the visualizer
+        s = visualizer.score(self.binary.X.test, self.binary.y.test)
+
+        # Test that score method successfully returns a value between 0 and 1
+        assert 0 <= s <= 1
+
+        # Check the scores
+        assert len(visualizer.fpr.keys()) == 1
+        assert len(visualizer.tpr.keys()) == 1
+        assert len(visualizer.roc_auc.keys()) == 1
+
+        # Compare the images
+        visualizer.finalize()
+        self.assert_images_similar(visualizer, tol=0.1, windows_tol=10)
+
     def test_binary_decision(self):
         """
         Test ROCAUC with a binary classifier with a decision_function
@@ -150,12 +173,38 @@ def test_binary_decision(self):
         visualizer.finalize()
         self.assert_images_similar(visualizer, tol=10)
 
+    def test_binary_decision_per_class(self):
+        """
+        Test ROCAUC with a binary classifier with a decision_function
+        """
+        # Create and fit the visualizer
+        visualizer = ROCAUC(
+            LinearSVC(random_state=42), micro=False, macro=False, per_class=True
+        )
+        visualizer.fit(self.binary.X.train, self.binary.y.train)
+
+        # Score the visualizer
+        s = visualizer.score(self.binary.X.test, self.binary.y.test)
+
+        # Test that score method successfully returns a value between 0 and 1
+        assert 0 <= s <= 1
+
+        # Check the scores
+        assert len(visualizer.fpr.keys()) == 2
+        assert len(visualizer.tpr.keys()) == 2
+        assert len(visualizer.roc_auc.keys()) == 2
+
+        # Compare the images
+        # NOTE: increased tolerance for both AppVeyor and Travis CI tests
+        visualizer.finalize()
+        self.assert_images_similar(visualizer, tol=10)
+
     def test_binary_micro_error(self):
         """
         Test ROCAUC to see if _binary_decision with micro = True raises an error
         """
         # Create visualizer with a linear model to force a binary decision
-        visualizer = ROCAUC(LinearSVC(random_state=42), micro=True)
+        visualizer = ROCAUC(LinearSVC(random_state=42), micro=True, per_class=False)
         visualizer.fit(self.binary.X.train, self.binary.y.train)
 
         # Ensure score raises error (micro curves aren't defined for binary decisions)
@@ -167,25 +216,13 @@ def test_binary_macro_error(self):
         Test ROCAUC to see if _binary_decision with macro = True raises an error
         """
         # Create visualizer with a linear model to force a binary decision
-        visualizer = ROCAUC(LinearSVC(random_state=42), macro=True)
+        visualizer = ROCAUC(LinearSVC(random_state=42), macro=True, per_class=False)
         visualizer.fit(self.binary.X.train, self.binary.y.train)
 
         # Ensure score raises error (macro curves aren't defined for binary decisions)
         with pytest.raises(ModelError):
             visualizer.score(self.binary.X.test, self.binary.y.test)
 
-    def test_binary_per_class_error(self):
-        """
-        Test ROCAUC to see if _binary_decision with per_class = True raises an error
-        """
-        # Create visualizer with a linear model to force a binary decision
-        visualizer = ROCAUC(LinearSVC(random_state=42), per_class=True)
-        visualizer.fit(self.binary.X.train, self.binary.y.train)
-
-        # Ensure score raises error (per_class curves not defined for binary decisions)
-        with pytest.raises(ModelError):
-            visualizer.score(self.binary.X.test, self.binary.y.test)
-
     def test_multiclass_rocauc(self):
         """
         Test ROCAUC with a multiclass classifier
@@ -207,6 +244,42 @@ def test_multiclass_rocauc(self):
         visualizer.finalize()
         self.assert_images_similar(visualizer, tol=0.1, windows_tol=10)
 
+    def test_rocauc_no_classes(self):
+        """
+        Test ROCAUC without per-class curves
+        """
+        # Create and fit the visualizer
+        visualizer = ROCAUC(GaussianNB(), per_class=False)
+        visualizer.fit(self.multiclass.X.train, self.multiclass.y.train)
+
+        # Score the visualizer (should be the micro average)
+        s = visualizer.score(self.multiclass.X.test, self.multiclass.y.test)
+        assert s == pytest.approx(0.77303, abs=1e-4)
+
+        # Assert that there still are per-class scores
+        for c in (0, 1):
+            assert c in visualizer.fpr
+            assert c in visualizer.tpr
+            assert c in visualizer.roc_auc
+
+        # Compare the images
+        visualizer.finalize()
+        self.assert_images_similar(visualizer, tol=0.1, windows_tol=10)
+
+    def test_rocauc_no_curves(self):
+        """
+        Test ROCAUC with no curves specified at all
+        """
+        # Create and fit the visualizer
+        visualizer = ROCAUC(
+            GaussianNB(), per_class=False, macro=False, micro=False
+        )
+        visualizer.fit(self.multiclass.X.train, self.multiclass.y.train)
+
+        # Attempt to score the visualizer
+        with pytest.raises(YellowbrickValueError, match="no curves will be drawn"):
+            visualizer.score(self.multiclass.X.test, self.multiclass.y.test)
+
     def test_rocauc_quickmethod(self):
         """
         Test the ROCAUC quick method
@@ -305,42 +378,6 @@ def test_rocauc_no_macro_no_micro(self):
         visualizer.finalize()
         self.assert_images_similar(visualizer, tol=0.1, windows_tol=10)
 
-    def test_rocauc_no_classes(self):
-        """
-        Test ROCAUC without per-class curves
-        """
-        # Create and fit the visualizer
-        visualizer = ROCAUC(LogisticRegression(), per_class=False)
-        visualizer.fit(self.binary.X.train, self.binary.y.train)
-
-        # Score the visualizer (should be the micro average)
-        s = visualizer.score(self.binary.X.test, self.binary.y.test)
-        assert s == pytest.approx(0.8661, abs=1e-4)
-
-        # Assert that there still are per-class scores
-        for c in (0, 1):
-            assert c in visualizer.fpr
-            assert c in visualizer.tpr
-            assert c in visualizer.roc_auc
-
-        # Compare the images
-        visualizer.finalize()
-        self.assert_images_similar(visualizer, tol=0.1, windows_tol=10)
-
-    def test_rocauc_no_curves(self):
-        """
-        Test ROCAUC with no curves specified at all
-        """
-        # Create and fit the visualizer
-        visualizer = ROCAUC(
-            LogisticRegression(), per_class=False, macro=False, micro=False
-        )
-        visualizer.fit(self.binary.X.train, self.binary.y.train)
-
-        # Attempt to score the visualizer
-        with pytest.raises(YellowbrickValueError, match="no curves will be drawn"):
-            visualizer.score(self.binary.X.test, self.binary.y.test)
-
     def test_rocauc_label_encoded(self):
         """
         Test ROCAUC with a target specifying a list of classes as strings
@@ -489,3 +526,17 @@ def test_with_fitted(self):
             oz = ROCAUC(model, classes=classes, is_fitted=False)
             oz.fit(X, y)
             mockfit.assert_called_once_with(X, y)
+
+    def test_binary_meta_param(self):
+        """
+        Test the binary meta param with ROCAUC
+        """
+        oz = ROCAUC(GaussianNB(), binary=False)
+        assert oz.micro is True
+        assert oz.macro is True
+        assert oz.per_class is True
+
+        oz = ROCAUC(GaussianNB(), binary=True)
+        assert oz.micro is False
+        assert oz.macro is False
+        assert oz.per_class is False