Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Addresses Issue #615 : Adding new parameters "Colorbar" and "Heatmap" in PCA visualizer #884

Merged
merged 21 commits into from
Jul 15, 2019
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
64 changes: 63 additions & 1 deletion tests/test_features/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,20 @@
import numpy as np

from unittest import mock
from tests.base import VisualTestCase
from ..fixtures import Dataset
from tests.base import VisualTestCase


from yellowbrick.features.pca import *
from yellowbrick.exceptions import YellowbrickError

from sklearn.datasets import make_classification

try:
# Only available in Matplotlib >= 2.0.2
from mpl_toolkits.axes_grid1 import make_axes_locatable
except ImportError:
make_axes_locatable = None

##########################################################################
## Fixtures
Expand Down Expand Up @@ -212,4 +218,60 @@ def test_alpha_param(self, mock_sca):
assert "alpha" in scatter_kwargs
assert scatter_kwargs["alpha"] == 0.3
assert pca_array.shape == (self.dataset.X.shape[0], 2)

def test_colorbar(self):
"""
Test the PCADecomposition visualizer's colorbar features.
"""
params = {'scale': True, 'proj_dim': 2, 'random_state': 7382,
'color': self.dataset.y, 'colorbar': True}
visualizer = PCADecomposition(**params).fit(self.dataset.X)
visualizer.transform(self.dataset.X)

# Image comparison tests
self.assert_images_similar(visualizer)

def test_heatmap(self):
"""
Test the PCADecomposition visualizer's heatmap features.
"""
params = {'scale': True, 'proj_dim': 2, 'random_state': 7382,
'color': self.dataset.y, 'heatmap': True}
visualizer = PCADecomposition(**params).fit(self.dataset.X)
visualizer.transform(self.dataset.X)
visualizer.finalize()

# Image comparison tests
self.assert_images_similar(visualizer)

def test_colorbar_heatmap(self):
"""
Test the PCADecomposition visualizer's colorbar features.
"""
params = {'scale': True, 'proj_dim': 2, 'random_state': 7382,
'color': self.dataset.y, 'colorbar': True, 'heatmap': True}
visualizer = PCADecomposition(**params).fit(self.dataset.X)
visualizer.transform(self.dataset.X)
visualizer.finalize()

# Image comparison tests
self.assert_images_similar(visualizer)

def test_3d_colorbar_heatmap_enabled_error(self):
"""
Assert an exception if colorbar and heatmap is enabled with 3-dimensions
"""
with pytest.raises(YellowbrickValueError):
PCADecomposition(proj_dim=3, colorbar=True)

with pytest.raises(YellowbrickValueError):
PCADecomposition(proj_dim=3, heatmap=True)


@pytest.mark.skipif(make_axes_locatable is not None, reason="requires matplotlib <= 2.0.1")
def test_matplotlib_version_error():
"""
Assert an exception is raised with incompatible matplotlib versions
"""
with pytest.raises(YellowbrickValueError):
PCADecomposition(colorbar=True, heatmap=True)
72 changes: 63 additions & 9 deletions yellowbrick/features/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
##########################################################################

# NOTE: must import mplot3d to load the 3D projection
import mpl_toolkits.mplot3d # noqa
import numpy as np
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d # noqa
from mpl_toolkits.axes_grid1 import make_axes_locatable

from yellowbrick.features.base import MultiFeatureVisualizer
from yellowbrick.style import palettes
Expand Down Expand Up @@ -105,6 +107,8 @@ def __init__(self,
colormap=palettes.DEFAULT_SEQUENCE,
alpha=0.75,
random_state=None,
colorbar=False,
heatmap=False,
**kwargs):
super(PCADecomposition, self).__init__(ax=ax,
features=features,
Expand All @@ -124,9 +128,46 @@ def __init__(self,
('pca', PCA(self.proj_dim, random_state=random_state))]
)
self.alpha = alpha

# Visual Parameters
self.color = color
self.colormap = colormap
self.colorbar = colorbar
self.heatmap = heatmap

self.uax, self.lax = None, None

if self.proj_dim == 3 and (self.heatmap or self.colorbar):
raise YellowbrickValueError("heatmap and colorbar are not compatible with 3d projections")

if self.heatmap or self.colorbar:
self._layout()

def _layout(self):
"""
Creates the layout for colorbar and heatmap, adding new axes for the heatmap
if necessary and modifying the aspect ratio. Does not modify the axes or the
layout if self.heatmap is False or None.
"""
# Ensure the axes are created if not heatmap, then return.

if not (self.heatmap or self.colorbar):
self.ax
return

# Ensure matplotlib version compatibility
if make_axes_locatable is None:
raise YellowbrickValueError((
"heatmap requires matplotlib 2.0.2 or greater "
"please upgrade matplotlib or set heatmap=False on the visualizer"
))

# Create the new axes for the colorbar and heatmap
divider = make_axes_locatable(self.ax)
if self.colorbar:
self.uax = divider.append_axes("bottom", size="20%", pad=0.7)
if self.heatmap:
self.lax = divider.append_axes("bottom", size="100%", pad=0.1)

def fit(self, X, y=None, **kwargs):
"""
Expand All @@ -152,15 +193,22 @@ def fit(self, X, y=None, **kwargs):
return self

def transform(self, X, y=None, **kwargs):

self.pca_features_ = self.pca_transformer.transform(X)
self.draw()
return self.pca_features_

def draw(self, **kwargs):

X = self.pca_features_
if self.proj_dim == 2:
self.ax.scatter(X[:, 0], X[:, 1], c=self.color, cmap=self.colormap,
alpha=self.alpha)
im = self.ax.scatter(X[:, 0], X[:, 1], c=self.color, cmap=self.colormap, alpha=self.alpha,
vmin= self.pca_components_.min(), vmax = self.pca_components_.max(), **kwargs)
if self.colorbar:
plt.colorbar(im, cax=self.uax, orientation='horizontal',
ticks=[self.pca_components_.min(), 0, self.pca_components_.max()])
if self.heatmap:
self.lax.imshow(self.pca_components_, interpolation='none', cmap=self.colormap)
if self.proj_features:
x_vector = self.pca_components_[0]
y_vector = self.pca_components_[1]
Expand Down Expand Up @@ -209,11 +257,15 @@ def draw(self, **kwargs):
def finalize(self, **kwargs):
# Set the title
self.ax.set_title('Principal Component Plot')
self.ax.set_xlabel('Principal Component 1')
self.ax.set_ylabel('Principal Component 2')
self.ax.set_xlabel('Principal Component 1', linespacing=1)
self.ax.set_ylabel('Principal Component 2', linespacing=1.2)
if self.heatmap == True:
self.lax.set_xticks(np.arange(-.5, len(self.features_)))
self.lax.set_xticklabels(self.features_, rotation=90, ha='left', fontsize=12)
self.lax.set_yticks(np.arange(0.5, 2))
self.lax.set_yticklabels(['First PC', 'Second PC'], va='bottom', fontsize=12)
if self.proj_dim == 3:
self.ax.set_zlabel('Principal Component 3')

self.ax.set_zlabel('Principal Component 3', linespacing=1.2)

##########################################################################
## Quick Method
Expand All @@ -222,7 +274,8 @@ def finalize(self, **kwargs):
def pca_decomposition(X, y=None, ax=None, features=None, scale=True,
proj_dim=2, proj_features=False, color=None,
colormap=palettes.DEFAULT_SEQUENCE, alpha=0.75,
random_state=None, **kwargs):
random_state=None, colorbar=False, heatmap=False,
percygautam marked this conversation as resolved.
Show resolved Hide resolved
**kwargs):
"""Produce a two or three dimensional principal component plot of the data array ``X``
projected onto it's largest sequential principal components. It is common practice to scale the
data array ``X`` before applying a PC decomposition. Variable scaling can be controlled using
Expand Down Expand Up @@ -290,7 +343,8 @@ def pca_decomposition(X, y=None, ax=None, features=None, scale=True,
visualizer = PCADecomposition(
ax=ax, features=features, scale=scale, proj_dim=proj_dim,
proj_features=proj_features, color=color, colormap=colormap,
alpha=alpha, random_state=random_state, **kwargs
alpha=alpha, random_state=random_state, colorbar=colorbar,
heatmap=heatmap, **kwargs
)

# Fit and transform the visualizer (calls draw)
Expand Down