From 010c1e3402c746b7e03112f58f4406112e57b8e9 Mon Sep 17 00:00:00 2001
From: Denis <denis.smirnov@intel.com>
Date: Mon, 18 Nov 2019 18:56:22 +0300
Subject: [PATCH] Add README.md for performnace tests

Minor changes in TestResults.add()
---
 sdc/tests/tests_perf/README.md               | 59 ++++++++++++++++++++
 sdc/tests/tests_perf/test_perf_series_str.py |  4 +-
 sdc/tests/tests_perf/test_perf_unicode.py    |  4 +-
 sdc/tests/tests_perf/test_perf_utils.py      |  9 +--
 4 files changed, 68 insertions(+), 8 deletions(-)
 create mode 100644 sdc/tests/tests_perf/README.md
diff --git a/sdc/tests/tests_perf/README.md b/sdc/tests/tests_perf/README.md
new file mode 100644
index 000000000..eb86adb39
--- /dev/null
+++ b/sdc/tests/tests_perf/README.md
@@ -0,0 +1,59 @@
+### Performance testing
+based on Python unit testing framework where typical test suite looks like:
+```
+class TestSuite(unittest.TestCase):
+    # how many times function will be executed for more accurate measurements
+    iter_number = 5
+
+    @classmethod
+    def setUpClass(cls):
+        """
+        1. Initalize object `TestResults` to work with results
+        2. Define some testing attributes, e.g. list of data length
+        """
+        cls.test_results = TestResults()
+        cls.total_data_length = [10**5, 10**6]
+
+    @classmethod
+    def tearDownClass(cls):
+        """Manipulate result through object `TestResults`"""
+        cls.test_results.print()
+
+    def test_series_smth(self):
+        """Test series.smth"""
+        pyfunc = series_smth
+        hpat_func = sdc.jit(pyfunc)
+        for data_length in self.total_data_length:
+            data = gen_some_data(data_length)
+            test_data = pd.Series(data)
+
+            # calculate compilation time of `hpat_func` based in `pyfunc`
+            compile_results = calc_compilation(pyfunc, test_data, iter_number=self.iter_number)
+            # Warming up
+            hpat_func(test_data)
+
+            # calculate execution and boxing/unboxing times of `hpat_func`
+            exec_times, boxing_times = get_times(hpat_func, test_data, iter_number=self.iter_number)
+
+            # add these times to the results for further processing
+            self.test_results.add('test_series_smth', 'JIT', test_data.size, exec_times,
+                                  boxing_times, compile_results=compile_results)
+
+            # calculate execution times of `pyfunc`
+            exec_times, _ = get_times(pyfunc, test_data, iter_number=self.iter_number)
+
+            # add these times to the results for further processing
+            self.test_results.add('test_series_smth', 'Reference', test_data.size, exec_times)
+```
+
+##### Extras:
+1. `test_perf_utils.py` contains utils for the development of the performance tests,
+which can be extended if it is required. The utils use extra Python modules `xlrd` and `openpyxl`
+which should be installed for correct work.
+2. `__init__.py` defines all the test suites.
+
+##### How to run performance testing:
+all:<br>
+`python -m sdc.runtests sdc.tests.tests_perf`<br>
+a single one:<br>
+`python -m sdc.runtests sdc.tests.tests_perf.test_perf_series_str.TestSeriesStringMethods.test_series_str_len`
diff --git a/sdc/tests/tests_perf/test_perf_series_str.py b/sdc/tests/tests_perf/test_perf_series_str.py
index ac4d4afc2..01e40d1ce 100644
--- a/sdc/tests/tests_perf/test_perf_series_str.py
+++ b/sdc/tests/tests_perf/test_perf_series_str.py
@@ -190,10 +190,10 @@ def _test_series_str(self, pyfunc, name, input_data=None):
             hpat_func(test_data)
 
             exec_times, boxing_times = get_times(hpat_func, test_data, iter_number=self.iter_number)
-            self.test_results.add(name, 'JIT', test_data.size, data_width, exec_times,
+            self.test_results.add(name, 'JIT', test_data.size, exec_times, data_width,
                                   boxing_times, compile_results=compile_results, num_threads=self.num_threads)
             exec_times, _ = get_times(pyfunc, test_data, iter_number=self.iter_number)
-            self.test_results.add(name, 'Reference', test_data.size, data_width, exec_times,
+            self.test_results.add(name, 'Reference', test_data.size, exec_times, data_width,
                                   num_threads=self.num_threads)
 
     def test_series_str_len(self):
diff --git a/sdc/tests/tests_perf/test_perf_unicode.py b/sdc/tests/tests_perf/test_perf_unicode.py
index 417ee6323..12829ef6e 100644
--- a/sdc/tests/tests_perf/test_perf_unicode.py
+++ b/sdc/tests/tests_perf/test_perf_unicode.py
@@ -111,8 +111,8 @@ def _test_unicode(self, pyfunc, name):
         for data_size in self.total_data_size_bytes:
             for data_width in self.width:
                 test_data = perf_data_gen(test_global_input_data_unicode_kind4, data_width, data_size)
-                self.test_results.add(name, 'JIT', len(test_data), data_width, hpat_func(test_data))
-                self.test_results.add(name, 'Reference', len(test_data), data_width, pyfunc(test_data))
+                self.test_results.add(name, 'JIT', len(test_data), hpat_func(test_data), data_width)
+                self.test_results.add(name, 'Reference', len(test_data), pyfunc(test_data), data_width)
 
     def test_unicode_split(self):
         self._test_unicode(usecase_split, 'unicode_split')
diff --git a/sdc/tests/tests_perf/test_perf_utils.py b/sdc/tests/tests_perf/test_perf_utils.py
index f7f370f72..7356504e6 100644
--- a/sdc/tests/tests_perf/test_perf_utils.py
+++ b/sdc/tests/tests_perf/test_perf_utils.py
@@ -168,17 +168,18 @@ def grouped_data(self):
         columns = ['median', 'min', 'max', 'compile', 'boxing']
         return test_results_data.groupby(self.index)[columns].first().sort_values(self.index)
 
-    def add(self, test_name, test_type, data_size, data_width, test_results,
+    def add(self, test_name, test_type, data_size, test_results, data_width=None,
             boxing_results=None, compile_results=None, num_threads=config.NUMBA_NUM_THREADS):
         """
         Add performance testing timing results into global storage
                   test_name: Name of test (1st column in grouped result)
-                  test_type: Type of test (2nd column in grouped result)
-            test_data_width: Scalability attribute for input data (3rd column in grouped result)
+                  test_type: Type of test (3rd column in grouped result)
+                  data_size: Size of input data (4s column in grouped result)
                test_results: List of timing results of the experiment
+                 data_width: Scalability attribute for str input data (5s column in grouped result)
              boxing_results: List of timing results of the overhead (boxing/unboxing)
            compilation_time: Timing result of compilation
-                num_threads: Value from NUMBA_NUM_THREADS
+                num_threads: Value from NUMBA_NUM_THREADS (2nd column in grouped result)
         """
         local_results = pandas.DataFrame({'name': test_name,
                                           'N': num_threads,