DistrictDataLabs · rebeccabilbro · Apr 14, 2019 · Apr 11, 2019 · Apr 11, 2019 · Apr 12, 2019
diff --git a/tests/test_features/test_pca.py b/tests/test_features/test_pca.py
@@ -21,10 +21,13 @@
 import pytest
 import numpy as np
 
+from unittest import mock
 from tests.dataset import Dataset
 from tests.base import VisualTestCase
+
 from yellowbrick.features.pca import *
 from yellowbrick.exceptions import YellowbrickError
+
 from sklearn.datasets import make_classification
 
 
@@ -188,3 +191,26 @@ def test_scale_true_3d_execption(self):
         with pytest.raises(ValueError, match=e):
             pca = PCADecomposition(**params)
             pca.fit(X)
+
+    @mock.patch('yellowbrick.features.pca.plt.sca', autospec=True)
+    def test_alpha_param(self, mock_sca):
+        """
+        Test that the user can supply an alpha param on instantiation
+        """
+        # Instantiate a prediction error plot, provide custom alpha
+        params = {'alpha': 0.3, 'proj_dim': 2, 'random_state': 9932}
+        visualizer = PCADecomposition(**params).fit(self.dataset.X)
+        pca_array = visualizer.transform(self.dataset.X)
+        alpha=0.3
+        assert visualizer.alpha == alpha
+
+        visualizer.ax = mock.MagicMock()
+        visualizer.fit(self.dataset.X)
+        visualizer.transform(self.dataset.X)
+
+        # Test that alpha was passed to internal matplotlib scatterplot
+        _, scatter_kwargs = visualizer.ax.scatter.call_args
+        assert "alpha" in scatter_kwargs
+        assert scatter_kwargs["alpha"] == 0.3
+        assert pca_array.shape == (self.dataset.X.shape[0], 2)
+
diff --git a/yellowbrick/features/pca.py b/yellowbrick/features/pca.py
@@ -69,6 +69,10 @@ class PCADecomposition(MultiFeatureVisualizer):
         Optional string or matplotlib cmap to colorize lines.
         Use either color to colorize the lines on a per class basis or
         colormap to color them on a continuous scale.
+
+    alpha : float, default: 1
+        Specify a transparency where 1 is completely opaque and 0 is completely
+        transparent. This property makes densely clustered points more visible.
 
     random_state : int, RandomState instance or None, optional (default None)
         If input data is larger than 500x500 and the number of components to
@@ -99,6 +103,7 @@ def __init__(self,
                  proj_features=False,
                  color=None,
                  colormap=palettes.DEFAULT_SEQUENCE,
+                 alpha = 1,
                  random_state=None,
                  **kwargs):
         super(PCADecomposition, self).__init__(ax=ax,
@@ -118,7 +123,7 @@ def __init__(self,
             [('scale', StandardScaler(with_std=self.scale)),
              ('pca', PCA(self.proj_dim, random_state=random_state))]
         )
-
+        self.alpha=alpha
         # Visual Parameters
         self.color = color
         self.colormap = colormap
@@ -154,7 +159,8 @@ def transform(self, X, y=None, **kwargs):
     def draw(self, **kwargs):
         X = self.pca_features_
         if self.proj_dim == 2:
-            self.ax.scatter(X[:, 0], X[:, 1], c=self.color, cmap=self.colormap)
+            self.ax.scatter(X[:, 0], X[:, 1], c=self.color, cmap=self.colormap, 
+                            alpha=self.alpha)
             if self.proj_features:
                 x_vector = self.pca_components_[0]
                 y_vector = self.pca_components_[1]
@@ -177,7 +183,7 @@ def draw(self, **kwargs):
             self.fig = plt.figure()
             self.ax = self.fig.add_subplot(111, projection='3d')
             self.ax.scatter(X[:, 0], X[:, 1], X[:, 2],
-                            c=self.color, cmap=self.colormap)
+                            c=self.color, cmap=self.colormap, alpha=self.alpha)
             if self.proj_features:
                 x_vector = self.pca_components_[0]
                 y_vector = self.pca_components_[1]
@@ -215,7 +221,7 @@ def finalize(self, **kwargs):
 
 def pca_decomposition(X, y=None, ax=None, features=None, scale=True,
                       proj_dim=2, proj_features=False, color=None,
-                      colormap=palettes.DEFAULT_SEQUENCE,
+                      colormap=palettes.DEFAULT_SEQUENCE, alpha=1,
                       random_state=None, **kwargs):
     """Produce a two or three dimensional principal component plot of the data array ``X``
     projected onto it's largest sequential principal components. It is common practice to scale the
@@ -256,6 +262,10 @@ def pca_decomposition(X, y=None, ax=None, features=None, scale=True,
         Optional string or matplotlib cmap to colorize lines.
         Use either color to colorize the lines on a per class basis or
         colormap to color them on a continuous scale.
+
+    alpha : float, default: 1
+        Specify a transparency where 1 is completely opaque and 0 is completely
+        transparent. This property makes densely clustered points more visible.
 
     random_state : int, RandomState instance or None, optional (default None)
         If input data is larger than 500x500 and the number of components to
@@ -280,7 +290,7 @@ def pca_decomposition(X, y=None, ax=None, features=None, scale=True,
     visualizer = PCADecomposition(
         ax=ax, features=features, scale=scale, proj_dim=proj_dim,
         proj_features=proj_features, color=color, colormap=colormap,
-        random_state=random_state, **kwargs
+        alpha=alpha, random_state=random_state,**kwargs
     )
 
     # Fit and transform the visualizer (calls draw)