grass.gunittest: Set max time (timeout) per test (#2172)

This sets max execution time (timeout) per test file. After that, the test is ended. The subprocess timeout mechanism is used for that. Sub-processes of the test process may (likely will) keep running, but the testing continues. Test which times out is counted as a failure. Additional value is passed around in the code to indicate timeout, but it is not treated as a separate type of result. However, it is clearly visible in the outputs. The terminology (such as timeout versus max seconds per test) and the interface (config and messages) is similar to pytest. The value for timeout comes from the test for t.rast.algebra which takes 3.5 mins. This also changes the subprocess code to use higher-level subprocess.run instead of subprocess.Popen. It still uses subprocess.PIPE instead of capture_output for Python 3.6 compatibility (we still have Ubuntu 18.04 CI now on main).
OSGeo · Feb 7, 2022 · 89fe85c · 89fe85c
1 parent 9b3fb6f
commit 89fe85c
Show file tree

Hide file tree

Showing 4 changed files with 103 additions and 35 deletions.
diff --git a/.gunittest.cfg b/.gunittest.cfg
@@ -37,3 +37,10 @@ exclude =
     vector/v.out.lidar/testsuite/test_v_out_lidar.py
     vector/v.what/testsuite/test_vwhat_layers.py
     vector/v.what/testsuite/test_vwhat_ncspm.py
+
+# Maximum time for execution of one test file (not a test function)
+# after which test is terminated (which may not terminate child processes
+# from that test).
+# Using 5 minutes as maximum (average test time should be much shorter,
+# couple seconds per file, median should be around one second).
+timeout = 300
diff --git a/python/grass/gunittest/invoker.py b/python/grass/gunittest/invoker.py
@@ -64,7 +64,11 @@ def update_keyval_file(filename, module, returncode):
     keyval["name"] = module.name
     keyval["tested_dir"] = module.tested_dir
     if "status" not in keyval.keys():
-        keyval["status"] = "failed" if returncode else "passed"
+        if returncode is None or returncode:
+            status = "failed"
+        else:
+            status = "passed"
+        keyval["status"] = status
     keyval["returncode"] = returncode
     keyval["test_file_authors"] = test_file_authors
 
@@ -88,6 +92,7 @@ def __init__(
         clean_before=True,
         testsuite_dir="testsuite",
         file_anonymizer=None,
+        timeout=None,
     ):
         """
 
@@ -97,6 +102,7 @@ def __init__(
         :param bool clean_before: if mapsets, outputs, and results
             should be removed before the tests start
             (advantageous when the previous run left everything behind)
+        :param float timeout: maximum duration of one test in seconds
         """
         self.start_dir = start_dir
         self.clean_mapsets = clean_mapsets
@@ -112,6 +118,8 @@ def __init__(
         else:
             self._file_anonymizer = file_anonymizer
 
+        self.timeout = timeout
+
     def _create_mapset(self, gisdbase, location, module):
         """Create mapset according to information in module.
 
@@ -136,7 +144,7 @@ def _create_mapset(self, gisdbase, location, module):
         )
         return mapset, mapset_dir
 
-    def _run_test_module(self, module, results_dir, gisdbase, location):
+    def _run_test_module(self, module, results_dir, gisdbase, location, timeout):
         """Run one test file."""
         self.testsuite_dirs[module.tested_dir].append(module.name)
         cwd = os.path.join(results_dir, module.tested_dir, module.name)
@@ -175,13 +183,7 @@ def _run_test_module(self, module, results_dir, gisdbase, location):
             # ignoring shebang line to use current Python
             # and also pass parameters to it
             # add also '-Qwarn'?
-            if sys.version_info.major >= 3:
-                args = [sys.executable, "-tt", module.abs_file_path]
-            else:
-                args = [sys.executable, "-tt", "-3", module.abs_file_path]
-            p = subprocess.Popen(
-                args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE
-            )
+            args = [sys.executable, "-tt", module.abs_file_path]
         elif module.file_type == "sh":
             # ignoring shebang line to pass parameters to shell
             # expecting system to have sh or something compatible
@@ -195,23 +197,39 @@ def _run_test_module(self, module, results_dir, gisdbase, location):
             #                command is used to control an if, elif, while, or
             #                until; or if the command is the left hand operand
             #                of an '&&' or '||' operator.
-            p = subprocess.Popen(
-                ["sh", "-e", "-x", module.abs_file_path],
-                cwd=cwd,
-                env=env,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-            )
+            args = ["sh", "-e", "-x", module.abs_file_path]
         else:
-            p = subprocess.Popen(
-                [module.abs_file_path],
+            args = [module.abs_file_path]
+        try:
+            p = subprocess.run(
+                args,
                 cwd=cwd,
                 env=env,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
+                timeout=timeout,
+                check=False,
             )
-        stdout, stderr = p.communicate()
-        returncode = p.returncode
+            stdout = p.stdout
+            stderr = p.stderr
+            returncode = p.returncode
+            # No timeout to report. Non-none time out values are used to indicate
+            # the timeout case.
+            timed_out = None
+        except subprocess.TimeoutExpired as error:
+            stdout = error.stdout
+            stderr = error.stderr
+            if stdout is None:
+                stdout = ""
+            if stderr is None:
+                stderr = f"Process has timed out in {timeout}s and produced no error output.\n"
+            # Return code is None if the process times out.
+            # Rest of the code expects success to evaluate as False.
+            # So, we assign a failing return code.
+            # In any case, we treat the timeout case as a failure.
+            returncode = 1
+            timed_out = timeout
+
         encodings = [_get_encoding(), "utf8", "latin-1", "ascii"]
 
         def try_decode(data, encodings):
@@ -256,12 +274,21 @@ def try_decode(data, encodings):
             stdout=stdout_path,
             stderr=stderr_path,
             test_summary=test_summary,
+            timed_out=timed_out,
         )
         # TODO: add some try-except or with for better error handling
         os.remove(gisrc)
         # TODO: only if clean up
         if self.clean_mapsets:
-            shutil.rmtree(mapset_dir)
+            try:
+                shutil.rmtree(mapset_dir)
+            except OSError:
+                # If there are still running processes (e.g., in timeout case),
+                # the cleaning may fail on non-empty directory. Although this does
+                # not guarantee removal of the directory, try it again, but this
+                # time ignore errors if something happens. (More file can appear
+                # later on if the processes are still running.)
+                shutil.rmtree(mapset_dir, ignore_errors=True)
 
     def run_in_location(self, gisdbase, location, location_type, results_dir, exclude):
         """Run tests in a given location
@@ -311,6 +338,7 @@ def run_in_location(self, gisdbase, location, location_type, results_dir, exclud
                 results_dir=results_dir,
                 gisdbase=gisdbase,
                 location=location,
+                timeout=self.timeout,
             )
         self.reporter.finish()
 

diff --git a/python/grass/gunittest/main.py b/python/grass/gunittest/main.py
@@ -224,6 +224,7 @@ def main():
     invoker = GrassTestFilesInvoker(
         start_dir=start_dir,
         file_anonymizer=FileAnonymizer(paths_to_remove=[abs_start_dir]),
+        timeout=config.getfloat("timeout", None),
     )
     # TODO: remove also results dir from files
     # as an enhancemnt

diff --git a/python/grass/gunittest/reporters.py b/python/grass/gunittest/reporters.py
@@ -498,9 +498,13 @@ def tail(filename, n):
     return html.getvalue()
 
 
-def returncode_to_html_text(returncode):
+def returncode_to_html_text(returncode, timed_out=None):
     if returncode:
-        return '<span style="color: red">FAILED</span>'
+        if timed_out is not None:
+            extra = f" (timeout >{timed_out}s)"
+        else:
+            extra = ""
+        return f'<span style="color: red">FAILED{extra}</span>'
     else:
         # alternatives: SUCCEEDED, passed, OK
         return '<span style="color: green">succeeded</span>'
@@ -661,9 +665,16 @@ def start_file_test(self, module):
         super(GrassTestFilesHtmlReporter, self).start_file_test(module)
         self.main_index.flush()  # to get previous lines to the report
 
-    def end_file_test(self, module, cwd, returncode, stdout, stderr, test_summary):
+    def end_file_test(
+        self, module, cwd, returncode, stdout, stderr, test_summary, timed_out=None
+    ):
         super(GrassTestFilesHtmlReporter, self).end_file_test(
-            module=module, cwd=cwd, returncode=returncode, stdout=stdout, stderr=stderr
+            module=module,
+            cwd=cwd,
+            returncode=returncode,
+            stdout=stdout,
+            stderr=stderr,
+            timed_out=timed_out,
         )
         # considering others according to total is OK when we more or less
         # know that input data make sense (total >= errors + failures)
@@ -707,7 +718,7 @@ def end_file_test(self, module, cwd, returncode, stdout, stderr, test_summary):
             "<tr>".format(
                 d=to_web_path(module.tested_dir),
                 m=module.name,
-                status=returncode_to_html_text(returncode),
+                status=returncode_to_html_text(returncode, timed_out),
                 stests=successes,
                 ftests=bad_ones,
                 ntests=total,
@@ -759,7 +770,7 @@ def end_file_test(self, module, cwd, returncode, stdout, stderr, test_summary):
                 file_path=os.path.join(
                     module.tested_dir, "testsuite", module.name + "." + module.file_type
                 ),
-                status=returncode_to_html_text(returncode),
+                status=returncode_to_html_text(returncode, timed_out),
                 stests=successes,
                 ftests=bad_ones,
                 ntests=total,
@@ -905,9 +916,16 @@ def finish(self):
             text = keyvalue_to_text(summary, sep="=", vsep="\n", isep=",")
             summary_file.write(text)
 
-    def end_file_test(self, module, cwd, returncode, stdout, stderr, test_summary):
+    def end_file_test(
+        self, module, cwd, returncode, stdout, stderr, test_summary, timed_out=None
+    ):
         super(GrassTestFilesKeyValueReporter, self).end_file_test(
-            module=module, cwd=cwd, returncode=returncode, stdout=stdout, stderr=stderr
+            module=module,
+            cwd=cwd,
+            returncode=returncode,
+            stdout=stdout,
+            stderr=stderr,
+            timed_out=timed_out,
         )
         # TODO: considering others according to total, OK?
         # here we are using 0 for total but HTML reporter is using None
@@ -997,9 +1015,16 @@ def start_file_test(self, module):
         # get the above line and all previous ones to the report
         self._stream.flush()
 
-    def end_file_test(self, module, cwd, returncode, stdout, stderr, test_summary):
+    def end_file_test(
+        self, module, cwd, returncode, stdout, stderr, test_summary, timed_out=None
+    ):
         super(GrassTestFilesTextReporter, self).end_file_test(
-            module=module, cwd=cwd, returncode=returncode, stdout=stdout, stderr=stderr
+            module=module,
+            cwd=cwd,
+            returncode=returncode,
+            stdout=stdout,
+            stderr=stderr,
+            timed_out=timed_out,
         )
 
         if returncode:
@@ -1010,7 +1035,9 @@ def end_file_test(self, module, cwd, returncode, stdout, stderr, test_summary):
                 self._stream.write(text.read())
             self._stream.write(width * "=")
             self._stream.write("\n")
-            self._stream.write("FAILED {file}".format(file=module.file_path))
+            self._stream.write(f"FAILED {module.file_path}")
+            if timed_out:
+                self._stream.write(f" - Timeout >{timed_out}s")
             num_failed = test_summary.get("failures", 0)
             num_failed += test_summary.get("errors", 0)
             if num_failed:
@@ -1127,13 +1154,18 @@ def report_for_dir(self, root, directory, test_files):
 
             # TODO: keyvalue method should have types for keys function
             # perhaps just the current post processing function is enough
-            test_file_authors = summary["test_file_authors"]
+            test_file_authors = summary.get("test_file_authors")
+            if not test_file_authors:
+                test_file_authors = []
             if type(test_file_authors) is not list:
                 test_file_authors = [test_file_authors]
             test_files_authors.extend(test_file_authors)
 
             file_total += 1
-            file_successes += 0 if summary["returncode"] else 1
+            # Use non-zero return code in case it is missing.
+            # (This can happen when the test has timed out.)
+            return_code = summary.get("returncode", 1)
+            file_successes += 0 if return_code else 1
 
             pass_per = success_to_html_percent(total=total, successes=successes)
             row = (
@@ -1144,7 +1176,7 @@ def report_for_dir(self, root, directory, test_files):
                 "<td>{ftests}</td><td>{ptests}</td>"
                 "<tr>".format(
                     f=test_file_name,
-                    status=returncode_to_html_text(summary["returncode"]),
+                    status=returncode_to_html_text(return_code),
                     stests=successes,
                     ftests=bad_ones,
                     ntests=total,