-
Notifications
You must be signed in to change notification settings - Fork 17
/
test_cli.py
303 lines (250 loc) · 12.9 KB
/
test_cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
import os
import shlex
import subprocess
import sys
from pathlib import Path
from tempfile import TemporaryDirectory
from nose.tools import assert_raises, eq_, ok_
from rsmtool.test_utils import (check_file_output,
check_generated_output,
check_report,
collect_warning_messages_from_report,
rsmtool_test_dir)
class TestToolCLI:
@classmethod
def setUpClass(cls):
cls.temporary_directories = []
cls.expected_json_dir = Path(rsmtool_test_dir) / 'data' / 'output'
common_dir = Path(rsmtool_test_dir) / 'data' / 'experiments'
cls.rsmtool_config_file = common_dir / 'lr' / 'lr.json'
cls.rsmeval_config_file = common_dir / 'lr-eval' / 'lr_evaluation.json'
cls.rsmcompare_config_file = common_dir / 'lr-self-compare' / 'rsmcompare.json'
cls.rsmpredict_config_file = common_dir / 'lr-predict' / 'rsmpredict.json'
cls.rsmsummarize_config_file = common_dir / 'lr-self-summary' / 'rsmsummarize.json'
cls.expected_rsmtool_output_dir = common_dir / 'lr' / 'output'
cls.expected_rsmeval_output_dir = common_dir / 'lr-eval' / 'output'
cls.expected_rsmcompare_output_dir = common_dir / 'lr-self-compare' / 'output'
cls.expected_rsmpredict_output_dir = common_dir / 'lr-predict' / 'output'
cls.expected_rsmsummarize_output_dir = common_dir / 'lr-self-summary' / 'output'
@classmethod
def tearDownClass(cls):
for tempdir in cls.temporary_directories:
tempdir.cleanup()
def validate_run_output(self, name, experiment_dir):
"""
A helper method that validates that the output of the "run"
subcommand for the ``name`` tool as stored in ``experiment_dir``
is as expected.
This is heavily inspired by ``rsmtool.test_utils.check_run_*()``
functions.
Parameters
----------
name : str
The name of the tool being tested.
experiment_dir : str
Path to rsmtool output directory.
"""
expected_output_dir = getattr(self, f"expected_{name}_output_dir")
# all tools except rsmcompare need to have their output files validated
if name in ['rsmtool', 'rsmeval', 'rsmsummarize', 'rsmpredict']:
# rsmpredict has its own set of files and it puts them right at the root
# of the output directory rather than under the "output" subdirectory
if name == 'rsmpredict':
output_dir = Path(experiment_dir)
output_files = [output_dir / 'predictions_with_metadata.csv']
else:
output_dir = Path(experiment_dir) / 'output'
output_files = list(output_dir.glob('*.csv'))
for output_file in output_files:
output_filename = output_file.name
expected_output_file = expected_output_dir / output_filename
if expected_output_file.exists():
check_file_output(str(output_file), str(expected_output_file))
# we need to do an extra check for rsmtool
if name == 'rsmtool':
check_generated_output(list(map(str, output_files)), 'lr', 'rsmtool')
# there's no report for rsmpredict
if name != 'rsmpredict':
report_dir = Path(experiment_dir) / 'report'
html_report = list(report_dir.glob('*_report.html'))[0]
check_report(str(html_report))
# rsmcompare only has a report and we want it to be warning-free
else:
report_dir = Path(experiment_dir)
html_report = list(report_dir.glob('*_report.html'))[0]
check_report(str(html_report), raise_warnings=False)
warning_msgs = collect_warning_messages_from_report(html_report)
warning_msgs = [msg for msg in warning_msgs if 'DeprecationWarning' not in msg]
eq_(len(warning_msgs), 0)
def validate_generate_output(self, name, output, subgroups=False):
"""
A helper method that validates that the ``output`` of the ``name`` tool
as output by the "generate" subcommand is as expected.
Parameters
----------
name : str
The name of the tool being tested.
output : str
The output of the "generate" subcommand from ``name`` tool
subgroups : bool, optional
If ``True``, the ``--subgroups`` was added to the "generate" command
for ``name``.
Defaults to ``False``.
"""
# load the appropriate expected json file and check that its contents
# match what was printed to stdout with our generate command
if subgroups:
expected_json_file = (self.expected_json_dir /
f"autogenerated_{name}_config_groups.json")
else:
expected_json_file = (self.expected_json_dir /
f"autogenerated_{name}_config.json")
with expected_json_file.open('r', encoding='utf-8') as expectedfh:
expected_output = expectedfh.read().strip()
eq_(output, expected_output)
def check_tool_cmd(self, context, subcmd, output_dir=None, working_dir=None):
"""
A helper method to test that the ``cmd`` invocation for ``context`` works
as expected.
Parameters
----------
context : str
Name of the tool being tested.
subcmd : str
The tool command-line invocation that is being tested.
output_dir : None, optional
Directory containing the output for "run" subcommands.
Will be ``None`` for "generate" subcommands.
working_dir : None, optional
If we want the "run" subcommand to be run in a specific
working directory.
"""
# if the BINPATH environment variable is defined
# use that to construct the command instead of just
# the name; this is needed for the CI builds where
# we do not always activate the conda environment
binpath = os.environ.get('BINPATH', None)
if binpath is not None:
cmd = f"{binpath}/{context} {subcmd}"
else:
cmd = f"{context} {subcmd}"
# run different checks depending on the given command type
cmd_type = 'generate' if ' generate' in cmd else 'run'
if cmd_type == 'run':
# for run subcommands, we can ignore the messages printed to stdout
proc = subprocess.run(shlex.split(cmd, posix='win' not in sys.platform),
check=True,
cwd=working_dir,
stderr=subprocess.PIPE,
stdout=subprocess.DEVNULL,
encoding='utf-8')
# then check that the commmand ran successfully
ok_(proc.returncode == 0)
# and, finally, that the output was as expected
self.validate_run_output(context, output_dir)
else:
# for generate subcommands, we ignore the warnings printed to stderr
subgroups = "--subgroups" in cmd
proc = subprocess.run(shlex.split(cmd, posix='win' not in sys.platform),
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding='utf-8')
ok_(proc.returncode == 0)
self.validate_generate_output(context,
proc.stdout.strip(),
subgroups=subgroups)
def test_default_subcommand_is_run(self):
# test that the default subcommand for all contexts is "run"
# this applies to all tools
for context in ['rsmtool', 'rsmeval', 'rsmcompare', 'rsmpredict', 'rsmsummarize']:
# create a temporary dirextory
tempdir = TemporaryDirectory()
self.temporary_directories.append(tempdir)
# and test the default subcommand
config_file = getattr(self, f"{context}_config_file")
subcmd = f"{config_file} {tempdir.name}"
yield self.check_tool_cmd, context, subcmd, tempdir.name, None
def test_run_without_output_directory(self):
# test that "run" subcommand works without an output directory
# this applies to all tools except rsmpredict
for context in ['rsmtool', 'rsmeval', 'rsmcompare', 'rsmsummarize']:
# create a temporary dirextory
tempdir = TemporaryDirectory()
self.temporary_directories.append(tempdir)
# and test the run subcommand without an output directory
config_file = getattr(self, f"{context}_config_file")
subcmd = f"run {config_file}"
# we call check_tool_cmd with a working directory here to simulate
# the usage of the current working directory when the output directory
# is not specified
yield self.check_tool_cmd, context, subcmd, tempdir.name, tempdir.name
def check_run_bad_overwrite(self, cmd):
"""
A helper method that checks that the overwriting error is raised properly.
"""
with assert_raises(subprocess.CalledProcessError) as e:
_ = subprocess.run(shlex.split(cmd, posix='win' not in sys.platform),
check=True,
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL)
ok_('already contains' in e.msg)
ok_('OSError' in e.msg)
def test_run_bad_overwrite(self):
# test that the "run" command fails to overwrite when "-f" is not specified
# this applies to all tools except rsmpredict and rsmcompare
for context in ['rsmtool', 'rsmeval', 'rsmsummarize']:
tempdir = TemporaryDirectory()
self.temporary_directories.append(tempdir)
# make it look like we ran the tool in this directory already
os.makedirs(f"{tempdir.name}/output")
fake_file = Path(tempdir.name) / "output" / "foo.csv"
fake_file.touch()
config_file = getattr(self, f"{context}_config_file")
# if the BINPATH environment variable is defined
# use that to construct the command instead of just
# the name; this is needed for the CI builds where
# we do not always activate the conda environment
binpath = os.environ.get('BINPATH', None)
if binpath is not None:
cmd = f"{binpath}/{context} {config_file} {tempdir.name}"
else:
cmd = f"{context} {config_file} {tempdir.name}"
yield self.check_run_bad_overwrite, cmd
def test_run_good_overwrite(self):
# test that the "run" command does overwrite when "-f" is specified
# this applies to all tools except rsmpredict and rsmcompare
for context in ['rsmtool', 'rsmeval', 'rsmsummarize']:
tempdir = TemporaryDirectory()
self.temporary_directories.append(tempdir)
# make it look like we ran rsmtool in this directory already
os.makedirs(f"{tempdir.name}/output")
fake_file = Path(tempdir.name) / "output" / "foo.csv"
fake_file.touch()
config_file = getattr(self, f"{context}_config_file")
subcmd = f"{config_file} {tempdir.name} -f"
yield self.check_tool_cmd, context, subcmd, tempdir.name, None
def test_rsmpredict_run_features_file(self):
"""
test that rsmpredict "run" command works with ``--features``.
"""
tempdir = TemporaryDirectory()
self.temporary_directories.append(tempdir)
subcmd = (f"{self.rsmpredict_config_file} {tempdir.name} "
f"--features {tempdir.name}/preprocessed_features.csv")
self.check_tool_cmd("rsmpredict", subcmd, tempdir.name)
# check the features file separately
file1 = Path(tempdir.name) / "preprocessed_features.csv"
file2 = self.expected_rsmpredict_output_dir / "preprocessed_features.csv"
check_file_output(str(file1), str(file2))
def test_generate(self):
# test that the "generate" subcommand for all tools works as expected
# in batch mode
for context in ['rsmtool', 'rsmeval', 'rsmcompare', 'rsmpredict', 'rsmsummarize']:
yield self.check_tool_cmd, context, "generate", None, None
def test_generate_with_groups(self):
# test that the "generate --subgroups" subcommand for all tools works
# as expected in batch mode
# this applies to all tools except rsmpredict and rsmsummarize
for context in ['rsmtool', 'rsmeval', 'rsmcompare']:
yield self.check_tool_cmd, context, "generate --subgroups", None, None