DistrictDataLabs · NealHumphrey · Mar 23, 2017 · Mar 4, 2017 · Mar 20, 2017 · Mar 20, 2017
diff --git a/docs/examples/images/confusionMatrix_3_0.png b/docs/examples/images/confusionMatrix_3_0.png
diff --git a/docs/examples/methods.rst b/docs/examples/methods.rst
@@ -432,6 +432,65 @@ heatmap in order for easy interpretation and detection.
 .. image:: images/examples_32_0.png
 
 
+Confusion Matrix Visualizer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``ConfusionMatrix`` visualizer is a ScoreVisualizer that takes a
+fitted scikit-learn classifier and a set of test X and y values and
+returns a report showing how each of the test values predicted classes
+compare to their actual classes. Data scientists use confusion matrices
+to understand which classes are most easily confused. These provide
+similar information as what is available in a ClassificationReport, but
+rather than top-level scores they provide deeper insight into the
+classification of individual data points.
+
+Below are a few examples of using the ConfusionMatrix visualizer; more
+information can be found by looking at the
+sklearn.metrics.confusion\_matrix documentation.
+
+.. code:: python
+
+    #First do our imports
+    import yellowbrick
+
+    from sklearn.datasets import load_digits
+    from sklearn.cross_validation import train_test_split
+    from sklearn.linear_model import LogisticRegression
+
+    from yellowbrick.classifier import ConfusionMatrix
+
+.. code:: python
+
+    # We'll use the handwritten digits data set from scikit-learn.
+    # Each feature of this dataset is an 8x8 pixel image of a handwritten number.
+    # Digits.data converts these 64 pixels into a single array of features
+    digits = load_digits()
+    X = digits.data
+    y = digits.target
+
+    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size =0.2, random_state=11)
+
+    model = LogisticRegression()
+
+    #The ConfusionMatrix visualizer taxes a model
+    cm = ConfusionMatrix(model, classes=[0,1,2,3,4,5,6,7,8,9])
+
+    #Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model
+    cm.fit(X_train, y_train)
+
+    #To create the ConfusionMatrix, we need some test data. Score runs predict() on the data
+    #and then creates the confusion_matrix from scikit learn.
+    cm.score(X_test, y_test)
+
+    #How did we do?
+    cm.poof()
+
+
+
+.. image:: images%5CconfusionMatrix_3_0.png
+
+
+
 ROCAUC
 ~~~~~~
 

diff --git a/examples/nealhumphrey/confusionMatrix.ipynb b/examples/nealhumphrey/confusionMatrix.ipynb
diff --git a/examples/nealhumphrey/data/default_features_1059_tracks.txt b/examples/nealhumphrey/data/default_features_1059_tracks.txt
diff --git a/examples/nealhumphrey/data/default_plus_chromatic_features_1059_tracks.txt b/examples/nealhumphrey/data/default_plus_chromatic_features_1059_tracks.txt
diff --git a/tests/test_classifier.py b/tests/test_classifier.py
@@ -22,6 +22,9 @@
 
 from sklearn.svm import LinearSVC
 from sklearn.metrics import *
+from sklearn.datasets import load_digits
+from sklearn.cross_validation import train_test_split
+from sklearn.linear_model import LogisticRegression
 
 ##########################################################################
 ## Data
@@ -68,3 +71,55 @@ def test_class_report(self):
         model.fit(X,y)
         visualizer = ClassificationReport(model, classes=["A", "B"])
         visualizer.score(X,y)
+
+class ConfusionMatrixTests(VisualTestCase):
+    def __init__(self, *args, **kwargs):
+        super(ConfusionMatrixTests, self).__init__(*args, **kwargs)
+        #Use the same data for all the tests
+        self.digits = load_digits()
+
+        X = self.digits.data
+        y = self.digits.target
+
+        X_train, X_test, y_train, y_test = train_test_split(X,y, test_size =0.2, random_state=11)
+        self.X_train = X_train
+        self.X_test = X_test
+        self.y_train = y_train
+        self.y_test = y_test
+
+    def test_confusion_matrix(self):
+        model = LogisticRegression()
+        cm = ConfusionMatrix(model, classes=[0,1,2,3,4,5,6,7,8,9])
+        cm.fit(self.X_train, self.y_train)
+        cm.score(self.X_test, self.y_test)
+
+    def test_no_classes_provided(self):
+        model = LogisticRegression()
+        cm = ConfusionMatrix(model)
+        cm.fit(self.X_train, self.y_train)
+        cm.score(self.X_test, self.y_test)
+
+    def test_raw_count_mode(self):
+        model = LogisticRegression()
+        cm = ConfusionMatrix(model)
+        cm.fit(self.X_train, self.y_train)
+        cm.score(self.X_test, self.y_test, percent=False)
+
+    def test_zoomed_in(self):
+        model = LogisticRegression()
+        cm = ConfusionMatrix(model, classes=[0,1,2])
+        cm.fit(self.X_train, self.y_train)
+        cm.score(self.X_test, self.y_test)
+
+    def test_extra_classes(self):
+        model = LogisticRegression()
+        cm = ConfusionMatrix(model, classes=[0,1,2,11])
+        cm.fit(self.X_train, self.y_train)
+        cm.score(self.X_test, self.y_test)
+        self.assertTrue(cm.selected_class_counts[3]==0)
+
+    def test_one_class(self):
+        model = LogisticRegression()
+        cm = ConfusionMatrix(model, classes=[0])
+        cm.fit(self.X_train, self.y_train)
+        cm.score(self.X_test, self.y_test)
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -326,6 +326,32 @@ def test_classifier_visualizer(self):
         model = ScoreVisualizer(RandomForestClassifier())
         self.assertTrue(is_classifier(model))
 
+
+class DivSafeTests(unittest.TestCase):
+
+    def test_div_1d_by_scalar(self):
+        result = div_safe( [-1, 0, 1], 0 )
+        self.assertTrue(result.all() == 0)
+
+    def test_div_1d_by_1d(self):
+        result =div_safe( [-1, 0 , 1], [0,0,0])
+        self.assertTrue(result.all() == 0)
+
+    def test_div_2d_by_1d(self):
+        numerator = np.array([[-1,0,1,2],[1,-1,0,3]])
+        denominator = [0,0,0,0]
+        result = div_safe(numerator, denominator)
+
+    def test_invalid_dimensions(self):
+            numerator = np.array([[-1,0,1,2],[1,-1,0,3]])
+            denominator = [0,0]
+            with self.assertRaises(ValueError):
+                result = div_safe(numerator, denominator)
+
+    def test_div_scalar_by_scalar(self):
+        with self.assertRaises(ValueError):
+            result = div_safe(5, 0)
+
 ##########################################################################
 ## Decorator Tests
 ##########################################################################