Skip to content

Commit

Permalink
Ready: Feature 139 confusion matrix (#144)
Browse files Browse the repository at this point in the history
* Initial version of ConfusionMatrix visualizer using the matplotlib pcolormesh.

- Allows for percent or raw count representation of the predictions
- Implements heatmap with white=0, green=100%, and yellow-orange-red heatmap for everything else
- Allows zooming in on confusion matrix using passed list of classes, with accurate %-of-all-true calculations
- Tested for moderately large class numbers (30+)
- Diagonal line indicates accurate predictions
- Documentation added to docs/examples/methods.rst for one example matrix

Suggested future improvements:
- Resize font based on image size + class count
- Allow custom color coding, including custom colors for _over and _under values (e.g. zero and 100%)
- Vary text font color based on background color
- While this branch currently adds an example to methods.rst, the examples/confusionMatrix.ipynb has additional examples using different of the passed parameters. This should probably also be exported as rst and added to the docs, but there was not an obvious place to put it so I am excluding that for now.

Note this commit squashes all previous commits on this branch

* Moved example notebook, and expanded numpy_div0 to div_safe

* Fixes one incorrect automatic merge issue and re-runs the two example notebooks to make sure they work.
  • Loading branch information
NealHumphrey committed Mar 23, 2017
1 parent 45268fc commit 1320a9a
Show file tree
Hide file tree
Showing 7 changed files with 2,189 additions and 40 deletions.
43 changes: 17 additions & 26 deletions examples/nealhumphrey/confusionMatrix.ipynb

Large diffs are not rendered by default.

1,059 changes: 1,059 additions & 0 deletions examples/nealhumphrey/data/default_features_1059_tracks.txt

Large diffs are not rendered by default.

1,059 changes: 1,059 additions & 0 deletions examples/nealhumphrey/data/default_plus_chromatic_features_1059_tracks.txt

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions examples/nealhumphrey/light-dark-text.ipynb

Large diffs are not rendered by default.

26 changes: 26 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,32 @@ def test_classifier_visualizer(self):
model = ScoreVisualizer(RandomForestClassifier())
self.assertTrue(is_classifier(model))


class DivSafeTests(unittest.TestCase):

def test_div_1d_by_scalar(self):
result = div_safe( [-1, 0, 1], 0 )
self.assertTrue(result.all() == 0)

def test_div_1d_by_1d(self):
result =div_safe( [-1, 0 , 1], [0,0,0])
self.assertTrue(result.all() == 0)

def test_div_2d_by_1d(self):
numerator = np.array([[-1,0,1,2],[1,-1,0,3]])
denominator = [0,0,0,0]
result = div_safe(numerator, denominator)

def test_invalid_dimensions(self):
numerator = np.array([[-1,0,1,2],[1,-1,0,3]])
denominator = [0,0]
with self.assertRaises(ValueError):
result = div_safe(numerator, denominator)

def test_div_scalar_by_scalar(self):
with self.assertRaises(ValueError):
result = div_safe(5, 0)

##########################################################################
## Decorator Tests
##########################################################################
Expand Down
6 changes: 3 additions & 3 deletions yellowbrick/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from .base import Visualizer, ScoreVisualizer, MultiModelMixin
from .style.palettes import color_sequence, color_palette, LINE_COLOR
from .style import find_text_color
from .utils import numpy_div0
from .utils import div_safe


##########################################################################
Expand Down Expand Up @@ -202,8 +202,8 @@ def draw(self, percent=True):

if percent == True:
#Convert confusion matrix to percent of each row, i.e. the predicted as a percent of true in each class
#numpy_div0 function returns 0 instead of NAN.
self._confusion_matrix_display = numpy_div0(
#div_safe function returns 0 instead of NAN.
self._confusion_matrix_display = div_safe(
self.confusion_matrix,
self.selected_class_counts
)
Expand Down
28 changes: 21 additions & 7 deletions yellowbrick/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,18 +158,32 @@ def is_dataframe(obj):


#From here: http://stackoverflow.com/questions/26248654/numpy-return-0-with-divide-by-zero
def numpy_div0( a, b ):
def div_safe( numerator, denominator ):
"""
Ufunc-extension that returns 0 instead of nan when dividing numpy arrays
example: >> div0( [-1, 0, 1], 0 ) -> [0, 0, 0]
"""
with np.errstate(divide='ignore', invalid='ignore'):
c = np.true_divide( a, b )
c[ ~ np.isfinite( c )] = 0 # -inf inf NaN
return c
Parameters
----------
numerator: array-like
denominator: scalar or array-like that can be validly divided by the numerator
returns a numpy array
example: div_safe( [-1, 0, 1], 0 ) == [0, 0, 0]
"""
#First handle scalars
if np.isscalar(numerator):
raise ValueError("div_safe should only be used with an array-like numerator")

#Then numpy arrays
try:
with np.errstate(divide='ignore', invalid='ignore'):
result = np.true_divide( numerator, denominator )
result[ ~ np.isfinite( result )] = 0 # -inf inf NaN
return result
except ValueError as e:
raise e

##########################################################################
## Decorators
Expand Down

0 comments on commit 1320a9a

Please sign in to comment.