From 010c1e3402c746b7e03112f58f4406112e57b8e9 Mon Sep 17 00:00:00 2001 From: Denis Date: Mon, 18 Nov 2019 18:56:22 +0300 Subject: [PATCH] Add README.md for performnace tests Minor changes in TestResults.add() --- sdc/tests/tests_perf/README.md | 59 ++++++++++++++++++++ sdc/tests/tests_perf/test_perf_series_str.py | 4 +- sdc/tests/tests_perf/test_perf_unicode.py | 4 +- sdc/tests/tests_perf/test_perf_utils.py | 9 +-- 4 files changed, 68 insertions(+), 8 deletions(-) create mode 100644 sdc/tests/tests_perf/README.md diff --git a/sdc/tests/tests_perf/README.md b/sdc/tests/tests_perf/README.md new file mode 100644 index 000000000..eb86adb39 --- /dev/null +++ b/sdc/tests/tests_perf/README.md @@ -0,0 +1,59 @@ +### Performance testing +based on Python unit testing framework where typical test suite looks like: +``` +class TestSuite(unittest.TestCase): + # how many times function will be executed for more accurate measurements + iter_number = 5 + + @classmethod + def setUpClass(cls): + """ + 1. Initalize object `TestResults` to work with results + 2. Define some testing attributes, e.g. list of data length + """ + cls.test_results = TestResults() + cls.total_data_length = [10**5, 10**6] + + @classmethod + def tearDownClass(cls): + """Manipulate result through object `TestResults`""" + cls.test_results.print() + + def test_series_smth(self): + """Test series.smth""" + pyfunc = series_smth + hpat_func = sdc.jit(pyfunc) + for data_length in self.total_data_length: + data = gen_some_data(data_length) + test_data = pd.Series(data) + + # calculate compilation time of `hpat_func` based in `pyfunc` + compile_results = calc_compilation(pyfunc, test_data, iter_number=self.iter_number) + # Warming up + hpat_func(test_data) + + # calculate execution and boxing/unboxing times of `hpat_func` + exec_times, boxing_times = get_times(hpat_func, test_data, iter_number=self.iter_number) + + # add these times to the results for further processing + self.test_results.add('test_series_smth', 'JIT', test_data.size, exec_times, + boxing_times, compile_results=compile_results) + + # calculate execution times of `pyfunc` + exec_times, _ = get_times(pyfunc, test_data, iter_number=self.iter_number) + + # add these times to the results for further processing + self.test_results.add('test_series_smth', 'Reference', test_data.size, exec_times) +``` + +##### Extras: +1. `test_perf_utils.py` contains utils for the development of the performance tests, +which can be extended if it is required. The utils use extra Python modules `xlrd` and `openpyxl` +which should be installed for correct work. +2. `__init__.py` defines all the test suites. + +##### How to run performance testing: +all:
+`python -m sdc.runtests sdc.tests.tests_perf`
+a single one:
+`python -m sdc.runtests sdc.tests.tests_perf.test_perf_series_str.TestSeriesStringMethods.test_series_str_len` diff --git a/sdc/tests/tests_perf/test_perf_series_str.py b/sdc/tests/tests_perf/test_perf_series_str.py index ac4d4afc2..01e40d1ce 100644 --- a/sdc/tests/tests_perf/test_perf_series_str.py +++ b/sdc/tests/tests_perf/test_perf_series_str.py @@ -190,10 +190,10 @@ def _test_series_str(self, pyfunc, name, input_data=None): hpat_func(test_data) exec_times, boxing_times = get_times(hpat_func, test_data, iter_number=self.iter_number) - self.test_results.add(name, 'JIT', test_data.size, data_width, exec_times, + self.test_results.add(name, 'JIT', test_data.size, exec_times, data_width, boxing_times, compile_results=compile_results, num_threads=self.num_threads) exec_times, _ = get_times(pyfunc, test_data, iter_number=self.iter_number) - self.test_results.add(name, 'Reference', test_data.size, data_width, exec_times, + self.test_results.add(name, 'Reference', test_data.size, exec_times, data_width, num_threads=self.num_threads) def test_series_str_len(self): diff --git a/sdc/tests/tests_perf/test_perf_unicode.py b/sdc/tests/tests_perf/test_perf_unicode.py index 417ee6323..12829ef6e 100644 --- a/sdc/tests/tests_perf/test_perf_unicode.py +++ b/sdc/tests/tests_perf/test_perf_unicode.py @@ -111,8 +111,8 @@ def _test_unicode(self, pyfunc, name): for data_size in self.total_data_size_bytes: for data_width in self.width: test_data = perf_data_gen(test_global_input_data_unicode_kind4, data_width, data_size) - self.test_results.add(name, 'JIT', len(test_data), data_width, hpat_func(test_data)) - self.test_results.add(name, 'Reference', len(test_data), data_width, pyfunc(test_data)) + self.test_results.add(name, 'JIT', len(test_data), hpat_func(test_data), data_width) + self.test_results.add(name, 'Reference', len(test_data), pyfunc(test_data), data_width) def test_unicode_split(self): self._test_unicode(usecase_split, 'unicode_split') diff --git a/sdc/tests/tests_perf/test_perf_utils.py b/sdc/tests/tests_perf/test_perf_utils.py index f7f370f72..7356504e6 100644 --- a/sdc/tests/tests_perf/test_perf_utils.py +++ b/sdc/tests/tests_perf/test_perf_utils.py @@ -168,17 +168,18 @@ def grouped_data(self): columns = ['median', 'min', 'max', 'compile', 'boxing'] return test_results_data.groupby(self.index)[columns].first().sort_values(self.index) - def add(self, test_name, test_type, data_size, data_width, test_results, + def add(self, test_name, test_type, data_size, test_results, data_width=None, boxing_results=None, compile_results=None, num_threads=config.NUMBA_NUM_THREADS): """ Add performance testing timing results into global storage test_name: Name of test (1st column in grouped result) - test_type: Type of test (2nd column in grouped result) - test_data_width: Scalability attribute for input data (3rd column in grouped result) + test_type: Type of test (3rd column in grouped result) + data_size: Size of input data (4s column in grouped result) test_results: List of timing results of the experiment + data_width: Scalability attribute for str input data (5s column in grouped result) boxing_results: List of timing results of the overhead (boxing/unboxing) compilation_time: Timing result of compilation - num_threads: Value from NUMBA_NUM_THREADS + num_threads: Value from NUMBA_NUM_THREADS (2nd column in grouped result) """ local_results = pandas.DataFrame({'name': test_name, 'N': num_threads,