-
-
Notifications
You must be signed in to change notification settings - Fork 554
/
test_radviz.py
224 lines (188 loc) · 6.85 KB
/
test_radviz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# tests.test_features.test_radviz
# Test the RadViz feature analysis visualizers
#
# Author: Benjamin Bengfort
# Created: Fri Oct 07 12:19:19 2016 -0400
#
# Copyright (C) 2016 The scikit-yb developers
# For license information, see LICENSE.txt
#
# ID: test_radviz.py [01d5996] benjamin@bengfort.com $
"""
Test the RadViz feature analysis visualizers
"""
##########################################################################
## Imports
##########################################################################
import sys
import pytest
import numpy.testing as npt
from tests.base import IS_WINDOWS_OR_CONDA, VisualTestCase
from ..fixtures import Dataset
from sklearn.datasets import make_classification
from yellowbrick.datasets import load_occupancy
from yellowbrick.features.radviz import *
try:
import pandas as pd
except ImportError:
pd = None
##########################################################################
## Fixtures
##########################################################################
@pytest.fixture(scope="class")
def dataset(request):
"""
Creates a random multiclass classification dataset fixture
"""
X, y = make_classification(
n_samples=200,
n_features=5,
n_informative=4,
n_redundant=0,
n_classes=3,
n_clusters_per_class=1,
random_state=451,
flip_y=0,
class_sep=3,
scale=np.array([1.0, 2.0, 100.0, 20.0, 1.0]),
)
dataset = Dataset(X, y)
request.cls.dataset = dataset
##########################################################################
## RadViz Tests
##########################################################################
@pytest.mark.usefixtures("dataset")
class TestRadViz(VisualTestCase):
"""
Test the RadViz visualizer
"""
def test_normalize_x(self):
"""
Test the static normalization method on the RadViz class
"""
# Original data
X = np.array(
[
[2.318, 2.727, 4.260, 7.212, 4.792],
[2.315, 2.726, 4.295, 7.140, 4.783],
[2.315, 2.724, 4.260, 7.135, 4.779],
[2.110, 3.609, 4.330, 7.985, 5.595],
[2.110, 3.626, 4.330, 8.203, 5.621],
[2.110, 3.620, 4.470, 8.210, 5.612],
]
)
# Expected result
Xe = np.array(
[
[1.0, 0.00332594, 0.0, 0.07162791, 0.01543943],
[0.98557692, 0.00221729, 0.16666667, 0.00465116, 0.00475059],
[0.98557692, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.98115299, 0.33333333, 0.79069767, 0.96912114],
[0.0, 1.0, 0.33333333, 0.99348837, 1.0],
[0.0, 0.99334812, 1.0, 1.0, 0.98931116],
]
)
# Xprime (transformed X)
Xp = RadViz.normalize(X)
npt.assert_array_almost_equal(Xp, Xe)
def test_radviz(self):
"""
Assert image similarity on test dataset
"""
visualizer = RadViz()
visualizer.fit_transform(self.dataset.X, self.dataset.y)
visualizer.finalize()
self.assert_images_similar(visualizer, tol=0.25)
def test_radviz_alpha(self):
"""
Assert image similarity with alpha transparency
"""
visualizer = RadViz(alpha=0.5)
visualizer.fit_transform(self.dataset.X, self.dataset.y)
visualizer.finalize()
self.assert_images_similar(visualizer, tol=0.25)
@pytest.mark.xfail(
IS_WINDOWS_OR_CONDA,
reason="font rendering different in OS and/or Python; see #892",
)
@pytest.mark.skipif(pd is None, reason="test requires pandas")
def test_integrated_radviz_with_pandas(self):
"""
Test RadViz with Pandas on the occupancy dataset
"""
data = load_occupancy(return_dataset=True)
X, y = data.to_pandas()
assert isinstance(X, pd.DataFrame)
assert isinstance(y, pd.Series)
# Test the visualizer
visualizer = RadViz()
visualizer.fit_transform(X, y)
visualizer.finalize()
self.assert_images_similar(visualizer, tol=0.1)
@pytest.mark.xfail(sys.platform == "win32", reason="images not close on windows")
def test_integrated_radviz_with_numpy(self):
"""
Test RadViz with numpy on the occupancy dataset
"""
data = load_occupancy(return_dataset=True)
X, y = data.to_numpy()
assert isinstance(X, np.ndarray)
assert isinstance(y, np.ndarray)
# Test the visualizer
visualizer = RadViz()
visualizer.fit_transform(X, y)
visualizer.finalize()
self.assert_images_similar(visualizer, tol=0.1)
@pytest.mark.xfail(sys.platform == "win32", reason="images not close on windows")
@pytest.mark.skipif(pd is None, reason="test requires pandas")
def test_integrated_radviz_pandas_classes_features(self):
"""
Test RadViz with classes and features specified using Pandas
"""
# Load the data from the fixture
data = load_occupancy(return_dataset=True)
X, y = data.to_pandas()
features = ["temperature", "relative humidity", "light"]
classes = [
k for k, _ in sorted(data.meta["labels"].items(), key=lambda i: i[1])
]
assert isinstance(X, pd.DataFrame)
assert isinstance(y, pd.Series)
# Filter the dataset to make sure it's not just class names
X = X[features]
y = y.astype(int)
# Test the visualizer
visualizer = RadViz(features=features, classes=classes)
visualizer.fit_transform(X, y)
visualizer.finalize()
self.assert_images_similar(visualizer, tol=0.1)
@pytest.mark.xfail(sys.platform == "win32", reason="images not close on windows")
def test_integrated_radviz_numpy_classes_features(self):
"""
Test RadViz with classes and features specified using numpy
"""
# Load the data from the fixture
data = load_occupancy(return_dataset=True)
X, y = data.to_numpy()
features = data.meta["features"][0:3]
classes = [
k for k, _ in sorted(data.meta["labels"].items(), key=lambda i: i[1])
]
assert isinstance(X, np.ndarray)
assert isinstance(y, np.ndarray)
# Filter the dataset to make sure it's not just class names
X = X[:, :3]
y = y.astype(int)
# Test the visualizer
visualizer = RadViz(features=features, classes=classes)
visualizer.fit_transform(X, y)
visualizer.finalize()
self.assert_images_similar(visualizer, tol=0.1)
def test_radviz_quick_method(self):
"""
Test RadViz quick method with colors being set.
"""
visualizer = radviz(
*self.dataset, colors=["cyan", "magenta", "yellow"], show=False
)
self.assert_images_similar(visualizer)