Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Addressing issue #589: Adding alpha params to PCA #811

Merged
merged 5 commits into from
Apr 14, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 26 additions & 0 deletions tests/test_features/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@
import pytest
import numpy as np

from unittest import mock
from tests.dataset import Dataset
from tests.base import VisualTestCase

from yellowbrick.features.pca import *
from yellowbrick.exceptions import YellowbrickError

from sklearn.datasets import make_classification


Expand Down Expand Up @@ -188,3 +191,26 @@ def test_scale_true_3d_execption(self):
with pytest.raises(ValueError, match=e):
pca = PCADecomposition(**params)
pca.fit(X)

@mock.patch('yellowbrick.features.pca.plt.sca', autospec=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am unsure about this test, I will wait on a review from @rebeccabilbro

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the test looks good! Patching pyplot's set current axes method (sca ) ensures that when we make the mock on line 207 (which is used to called scatter and retrieve the alpha), matplotlib doesn't complain about it.

def test_alpha_param(self, mock_sca):
"""
Test that the user can supply an alpha param on instantiation
"""
# Instantiate a prediction error plot, provide custom alpha
params = {'alpha': 0.3, 'proj_dim': 2, 'random_state': 9932}
visualizer = PCADecomposition(**params).fit(self.dataset.X)
pca_array = visualizer.transform(self.dataset.X)
alpha=0.3
assert visualizer.alpha == alpha
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0.3 doesn't need to be assigned to alpha. You should test directly assert visualizer.alpha == 0.3


visualizer.ax = mock.MagicMock()
visualizer.fit(self.dataset.X)
visualizer.transform(self.dataset.X)

# Test that alpha was passed to internal matplotlib scatterplot
_, scatter_kwargs = visualizer.ax.scatter.call_args
assert "alpha" in scatter_kwargs
assert scatter_kwargs["alpha"] == 0.3
assert pca_array.shape == (self.dataset.X.shape[0], 2)

20 changes: 15 additions & 5 deletions yellowbrick/features/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ class PCADecomposition(MultiFeatureVisualizer):
Optional string or matplotlib cmap to colorize lines.
Use either color to colorize the lines on a per class basis or
colormap to color them on a continuous scale.

alpha : float, default: 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

alpha should be set to 0.75

Specify a transparency where 1 is completely opaque and 0 is completely
transparent. This property makes densely clustered points more visible.

random_state : int, RandomState instance or None, optional (default None)
If input data is larger than 500x500 and the number of components to
Expand Down Expand Up @@ -99,6 +103,7 @@ def __init__(self,
proj_features=False,
color=None,
colormap=palettes.DEFAULT_SEQUENCE,
alpha = 1,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

set alpha to 0.75

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tiny thing, but the spacing is off here. We shouldn't have spaces before and after the equals sign for keyword arguments.

random_state=None,
**kwargs):
super(PCADecomposition, self).__init__(ax=ax,
Expand All @@ -118,7 +123,7 @@ def __init__(self,
[('scale', StandardScaler(with_std=self.scale)),
('pca', PCA(self.proj_dim, random_state=random_state))]
)

self.alpha=alpha
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another spacing thing here (should be self.alpha = alpha)

# Visual Parameters
self.color = color
self.colormap = colormap
Expand Down Expand Up @@ -154,7 +159,8 @@ def transform(self, X, y=None, **kwargs):
def draw(self, **kwargs):
X = self.pca_features_
if self.proj_dim == 2:
self.ax.scatter(X[:, 0], X[:, 1], c=self.color, cmap=self.colormap)
self.ax.scatter(X[:, 0], X[:, 1], c=self.color, cmap=self.colormap,
alpha=self.alpha)
if self.proj_features:
x_vector = self.pca_components_[0]
y_vector = self.pca_components_[1]
Expand All @@ -177,7 +183,7 @@ def draw(self, **kwargs):
self.fig = plt.figure()
self.ax = self.fig.add_subplot(111, projection='3d')
self.ax.scatter(X[:, 0], X[:, 1], X[:, 2],
c=self.color, cmap=self.colormap)
c=self.color, cmap=self.colormap, alpha=self.alpha)
if self.proj_features:
x_vector = self.pca_components_[0]
y_vector = self.pca_components_[1]
Expand Down Expand Up @@ -215,7 +221,7 @@ def finalize(self, **kwargs):

def pca_decomposition(X, y=None, ax=None, features=None, scale=True,
proj_dim=2, proj_features=False, color=None,
colormap=palettes.DEFAULT_SEQUENCE,
colormap=palettes.DEFAULT_SEQUENCE, alpha=1,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

set alpha = 0.75

random_state=None, **kwargs):
"""Produce a two or three dimensional principal component plot of the data array ``X``
projected onto it's largest sequential principal components. It is common practice to scale the
Expand Down Expand Up @@ -256,6 +262,10 @@ def pca_decomposition(X, y=None, ax=None, features=None, scale=True,
Optional string or matplotlib cmap to colorize lines.
Use either color to colorize the lines on a per class basis or
colormap to color them on a continuous scale.

alpha : float, default: 1
Specify a transparency where 1 is completely opaque and 0 is completely
transparent. This property makes densely clustered points more visible.

random_state : int, RandomState instance or None, optional (default None)
If input data is larger than 500x500 and the number of components to
Expand All @@ -280,7 +290,7 @@ def pca_decomposition(X, y=None, ax=None, features=None, scale=True,
visualizer = PCADecomposition(
ax=ax, features=features, scale=scale, proj_dim=proj_dim,
proj_features=proj_features, color=color, colormap=colormap,
random_state=random_state, **kwargs
alpha=alpha, random_state=random_state,**kwargs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the spaces were modified here? We should reinsert the space after the last comma and before **kwargs

)

# Fit and transform the visualizer (calls draw)
Expand Down