-
-
Notifications
You must be signed in to change notification settings - Fork 553
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Addressing issue #589: Adding alpha params to PCA #811
Changes from 2 commits
bf856c0
e14c595
6a9bb56
b578076
ea5f737
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,10 +21,13 @@ | |
import pytest | ||
import numpy as np | ||
|
||
from unittest import mock | ||
from tests.dataset import Dataset | ||
from tests.base import VisualTestCase | ||
|
||
from yellowbrick.features.pca import * | ||
from yellowbrick.exceptions import YellowbrickError | ||
|
||
from sklearn.datasets import make_classification | ||
|
||
|
||
|
@@ -188,3 +191,26 @@ def test_scale_true_3d_execption(self): | |
with pytest.raises(ValueError, match=e): | ||
pca = PCADecomposition(**params) | ||
pca.fit(X) | ||
|
||
@mock.patch('yellowbrick.features.pca.plt.sca', autospec=True) | ||
def test_alpha_param(self, mock_sca): | ||
""" | ||
Test that the user can supply an alpha param on instantiation | ||
""" | ||
# Instantiate a prediction error plot, provide custom alpha | ||
params = {'alpha': 0.3, 'proj_dim': 2, 'random_state': 9932} | ||
visualizer = PCADecomposition(**params).fit(self.dataset.X) | ||
pca_array = visualizer.transform(self.dataset.X) | ||
alpha=0.3 | ||
assert visualizer.alpha == alpha | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 0.3 doesn't need to be assigned to alpha. You should test directly |
||
|
||
visualizer.ax = mock.MagicMock() | ||
visualizer.fit(self.dataset.X) | ||
visualizer.transform(self.dataset.X) | ||
|
||
# Test that alpha was passed to internal matplotlib scatterplot | ||
_, scatter_kwargs = visualizer.ax.scatter.call_args | ||
assert "alpha" in scatter_kwargs | ||
assert scatter_kwargs["alpha"] == 0.3 | ||
assert pca_array.shape == (self.dataset.X.shape[0], 2) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,6 +69,10 @@ class PCADecomposition(MultiFeatureVisualizer): | |
Optional string or matplotlib cmap to colorize lines. | ||
Use either color to colorize the lines on a per class basis or | ||
colormap to color them on a continuous scale. | ||
|
||
alpha : float, default: 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. alpha should be set to 0.75 |
||
Specify a transparency where 1 is completely opaque and 0 is completely | ||
transparent. This property makes densely clustered points more visible. | ||
|
||
random_state : int, RandomState instance or None, optional (default None) | ||
If input data is larger than 500x500 and the number of components to | ||
|
@@ -99,6 +103,7 @@ def __init__(self, | |
proj_features=False, | ||
color=None, | ||
colormap=palettes.DEFAULT_SEQUENCE, | ||
alpha = 1, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. set alpha to 0.75 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tiny thing, but the spacing is off here. We shouldn't have spaces before and after the equals sign for keyword arguments. |
||
random_state=None, | ||
**kwargs): | ||
super(PCADecomposition, self).__init__(ax=ax, | ||
|
@@ -118,7 +123,7 @@ def __init__(self, | |
[('scale', StandardScaler(with_std=self.scale)), | ||
('pca', PCA(self.proj_dim, random_state=random_state))] | ||
) | ||
|
||
self.alpha=alpha | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another spacing thing here (should be |
||
# Visual Parameters | ||
self.color = color | ||
self.colormap = colormap | ||
|
@@ -154,7 +159,8 @@ def transform(self, X, y=None, **kwargs): | |
def draw(self, **kwargs): | ||
X = self.pca_features_ | ||
if self.proj_dim == 2: | ||
self.ax.scatter(X[:, 0], X[:, 1], c=self.color, cmap=self.colormap) | ||
self.ax.scatter(X[:, 0], X[:, 1], c=self.color, cmap=self.colormap, | ||
alpha=self.alpha) | ||
if self.proj_features: | ||
x_vector = self.pca_components_[0] | ||
y_vector = self.pca_components_[1] | ||
|
@@ -177,7 +183,7 @@ def draw(self, **kwargs): | |
self.fig = plt.figure() | ||
self.ax = self.fig.add_subplot(111, projection='3d') | ||
self.ax.scatter(X[:, 0], X[:, 1], X[:, 2], | ||
c=self.color, cmap=self.colormap) | ||
c=self.color, cmap=self.colormap, alpha=self.alpha) | ||
if self.proj_features: | ||
x_vector = self.pca_components_[0] | ||
y_vector = self.pca_components_[1] | ||
|
@@ -215,7 +221,7 @@ def finalize(self, **kwargs): | |
|
||
def pca_decomposition(X, y=None, ax=None, features=None, scale=True, | ||
proj_dim=2, proj_features=False, color=None, | ||
colormap=palettes.DEFAULT_SEQUENCE, | ||
colormap=palettes.DEFAULT_SEQUENCE, alpha=1, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. set alpha = 0.75 |
||
random_state=None, **kwargs): | ||
"""Produce a two or three dimensional principal component plot of the data array ``X`` | ||
projected onto it's largest sequential principal components. It is common practice to scale the | ||
|
@@ -256,6 +262,10 @@ def pca_decomposition(X, y=None, ax=None, features=None, scale=True, | |
Optional string or matplotlib cmap to colorize lines. | ||
Use either color to colorize the lines on a per class basis or | ||
colormap to color them on a continuous scale. | ||
|
||
alpha : float, default: 1 | ||
Specify a transparency where 1 is completely opaque and 0 is completely | ||
transparent. This property makes densely clustered points more visible. | ||
|
||
random_state : int, RandomState instance or None, optional (default None) | ||
If input data is larger than 500x500 and the number of components to | ||
|
@@ -280,7 +290,7 @@ def pca_decomposition(X, y=None, ax=None, features=None, scale=True, | |
visualizer = PCADecomposition( | ||
ax=ax, features=features, scale=scale, proj_dim=proj_dim, | ||
proj_features=proj_features, color=color, colormap=colormap, | ||
random_state=random_state, **kwargs | ||
alpha=alpha, random_state=random_state,**kwargs | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like the spaces were modified here? We should reinsert the space after the last comma and before |
||
) | ||
|
||
# Fit and transform the visualizer (calls draw) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am unsure about this test, I will wait on a review from @rebeccabilbro
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the test looks good! Patching pyplot's set current axes method (
sca
) ensures that when we make the mock on line 207 (which is used to calledscatter
and retrieve thealpha
), matplotlib doesn't complain about it.