Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Addressing issue #590:Adding train alpha and test alpha to residual #806

Merged
merged 7 commits into from
Apr 8, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 11 additions & 8 deletions tests/test_regressor/test_residuals.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def test_residuals_plot(self):
visualizer.score(self.data.X.test, self.data.y.test)
visualizer.finalize()

self.assert_images_similar(visualizer, tol=1, remove_legend=True)
self.assert_images_similar(visualizer, tol=10, remove_legend=True)
naresh-bachwani marked this conversation as resolved.
Show resolved Hide resolved

@pytest.mark.xfail(
sys.platform == 'win32', reason="images not close on windows (RMSE=32)"
Expand All @@ -258,7 +258,7 @@ def test_residuals_plot_no_histogram(self):
visualizer.score(self.data.X.test, self.data.y.test)
visualizer.finalize()

self.assert_images_similar(visualizer, tol=1, remove_legend=True)
self.assert_images_similar(visualizer, tol=10, remove_legend=True)

@pytest.mark.skipif(MPL_VERS_MAJ >= 2, reason="test requires mpl earlier than 2.0.2")
def test_hist_matplotlib_version(self, mock_toolkit):
Expand Down Expand Up @@ -300,7 +300,7 @@ def test_residuals_quick_method(self):
model, self.data.X.train, self.data.y.train, ax=ax, random_state=23
)

self.assert_images_similar(ax=ax, tol=1, remove_legend=True)
self.assert_images_similar(ax=ax, tol=10, remove_legend=True)

@pytest.mark.xfail(
sys.platform == 'win32', reason="images not close on windows (RMSE=32)"
Expand Down Expand Up @@ -334,7 +334,7 @@ def test_residuals_plot_pandas(self):
visualizer.score(X_test, y_test)
visualizer.finalize()

self.assert_images_similar(visualizer, tol=1, remove_legend=True)
self.assert_images_similar(visualizer, tol=10, remove_legend=True)

def test_score(self):
"""
Expand All @@ -356,11 +356,14 @@ def test_alpha_param(self, mock_sca):
"""
# Instantiate a prediction error plot, provide custom alpha
visualizer = ResidualsPlot(
Ridge(random_state=8893), alpha=0.3, hist=False
Ridge(random_state=8893), train_alpha=0.3,test_alpha=0.75, hist=False
)

alpha = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

great job of adding this here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

'train_point': 0.3,
'test_point':0.75
}
# Test param gets set correctly
assert visualizer.alpha == 0.3
assert visualizer.alpha == alpha

visualizer.ax = mock.MagicMock()
visualizer.fit(self.data.X.train, self.data.y.train)
Expand All @@ -369,4 +372,4 @@ def test_alpha_param(self, mock_sca):
# Test that alpha was passed to internal matplotlib scatterplot
_, scatter_kwargs = visualizer.ax.scatter.call_args
assert "alpha" in scatter_kwargs
assert scatter_kwargs["alpha"] == 0.3
assert scatter_kwargs["alpha"] == 0.75
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Awesome, this shows a good understanding that alpha is 0.3 after visualizer.fit then changes to 0.75 after visualizer.score

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

48 changes: 34 additions & 14 deletions yellowbrick/regressor/residuals.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ def score(self, X, y=None, **kwargs):
return self.score_

def draw(self, y, y_pred):

"""
Parameters
----------
Expand Down Expand Up @@ -370,9 +371,15 @@ class ResidualsPlot(RegressionScoreVisualizer):
line_color : color, default: dark grey
Defines the color of the zero error line, can be any matplotlib color.

alpha : float, default: 0.75
Specify a transparency where 1 is completely opaque and 0 is completely
transparent. This property makes densely clustered points more visible.
train_alpha : float, default: 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any particular reason you didn't keep alphas set to default: 0.75

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No particular reasons for that. It's more of a personal choice. It can be set as per the yellowbrick standards.

Specify a transparency for traininig data, where 1 is completely opaque
and 0 is completely transparent. This property makes densely clustered
points more visible.

test_alpha : float, default: 1
Specify a transparency for test data, where 1 is completely opaque
and 0 is completely transparent. This property makes densely clustered
points more visible.

kwargs : dict
Keyword arguments that are passed to the base class and may influence
Expand All @@ -396,8 +403,8 @@ class ResidualsPlot(RegressionScoreVisualizer):
The residuals histogram feature requires matplotlib 2.0.2 or greater.
"""
def __init__(self, model, ax=None, hist=True, train_color='b',
test_color='g', line_color=LINE_COLOR, alpha=0.75,
**kwargs):
test_color='g', line_color=LINE_COLOR, train_alpha=1,
test_alpha=1,**kwargs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again here, we might want to set alphas to 0.75


super(ResidualsPlot, self).__init__(model, ax=ax, **kwargs)

Expand All @@ -422,7 +429,10 @@ def __init__(self, model, ax=None, hist=True, train_color='b',
# Store labels and colors for the legend ordered by call
self._labels, self._colors = [], []

self.alpha = alpha
self.alpha = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a minor suggestion and if you don't want to change it, it's ok. Maybe we could change the variable name to self.alphas to note that the variable contains multiple alphas

'train_point': train_alpha,
'test_point':test_alpha
naresh-bachwani marked this conversation as resolved.
Show resolved Hide resolved
}

@memoized
def hax(self):
Expand Down Expand Up @@ -496,7 +506,7 @@ def score(self, X, y=None, train=False, **kwargs):
y_pred = self.predict(X)
scores = y_pred - y
self.draw(y_pred, scores, train=train)

return score

def draw(self, y_pred, residuals, train=False, **kwargs):
Expand Down Expand Up @@ -528,17 +538,19 @@ def draw(self, y_pred, residuals, train=False, **kwargs):
if train:
color = self.colors['train_point']
label = "Train $R^2 = {:0.3f}$".format(self.train_score_)
alpha = self.alpha['train_point']
else:
color = self.colors['test_point']
label = "Test $R^2 = {:0.3f}$".format(self.test_score_)

alpha = self.alpha['test_point']

# Update the legend information
self._labels.append(label)
self._colors.append(color)

# Draw the residuals scatter plot
self.ax.scatter(
y_pred, residuals, c=color, alpha=self.alpha, label=label
y_pred, residuals, c=color, alpha=alpha, label=label
)

# Add residuals histogram
Expand Down Expand Up @@ -593,7 +605,8 @@ def residuals_plot(model,
test_color='g',
line_color=LINE_COLOR,
random_state=None,
alpha=0.75,
train_alpha=1,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

train and test alpha should be default of 0.75

test_alpha=1,
**kwargs):
"""Quick method:

Expand Down Expand Up @@ -648,9 +661,15 @@ def residuals_plot(model,
random_state : int, RandomState instance or None, optional
Passed to the train_test_split function.

alpha : float, default: 0.75
Specify a transparency where 1 is completely opaque and 0 is completely
transparent. This property makes densely clustered points more visible.
train_alpha : float, default: 1
Specify a transparency for traininig data, where 1 is completely opaque
and 0 is completely transparent. This property makes densely clustered
points more visible.

test_alpha : float, default: 1
Specify a transparency for test data, where 1 is completely opaque and
0 is completely transparent. This property makes densely clustered
points more visible.

kwargs : dict
Keyword arguments that are passed to the base class and may influence
Expand All @@ -662,9 +681,10 @@ def residuals_plot(model,
Returns the axes that the residuals plot was drawn on.
"""
# Instantiate the visualizer

visualizer = ResidualsPlot(
model=model, ax=ax, hist=hist, train_color=train_color,
test_color=test_color, line_color=line_color, alpha=alpha,
test_color=test_color, line_color=line_color, train_alpha=train_alpha,test_alpha=test_alpha,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The 3rd line is a bit long... you could shorten it as such

 visualizer = ResidualsPlot(
        model=model, ax=ax, hist=hist, train_color=train_color,
        test_color=test_color, line_color=line_color, alpha=alpha. 
        train_alpha=train_alpha,test_alpha=test_alpha,
        **kwargs
)

**kwargs
)

Expand Down