Add --base option to test-perf.

servo · Nov 14, 2017 · 109df33 · 109df33
1 parent 657b233
commit 109df33
Show file tree

Hide file tree

Showing 6 changed files with 91 additions and 31 deletions.
diff --git a/etc/ci/performance/README.md b/etc/ci/performance/README.md
@@ -66,6 +66,40 @@ Individual test can be run by `python -m pytest <filename>`:
 
 # Advanced Usage
 
+## Reducing variance
+
+Running the same performance test results in a lot of variance, caused
+by the OS the test is running on. Experimentally, the things which
+seem to tame randomness the most are a) disbling CPU frequency
+changes, b) increasing the priority of the tests, c) running one one
+CPU core, d) loading files directly rather than via localhost http,
+and e) serving files from memory rather than from disk.
+
+First run the performance tests normally (this downloads the test suite):
+```
+./mach test-perf
+```
+Disable CPU frequency changes, e.g. on Linux:
+```
+sudo cpupower frequency-set --min 3.5GHz --max 3.5GHz
+```
+Copy the test files to a `tmpfs` file,
+such as `/run/user/`, for example if your `uid` is `1000`:
+```
+cp -r etc/ci/performance /run/user/1000
+```
+Then run the test suite on one core, at high priority, using a `file://` base URL:
+```
+sudo nice --20 chrt -r 99 sudo -u *userid* taskset 1 ./mach test-perf --base file:///run/user/1000/performance/
+```
+These fixes seem to take down the variance for most tests to under 5% for individual
+tests, and under 0.5% total.
+
+(IRC logs:
+ [2017-11-09](https://mozilla.logbot.info/servo/20171109#c13829674) |
+ [2017-11-10](https://mozilla.logbot.info/servo/20171110#c13835736)
+)
+
 ## Test Perfherder Locally
 
 If you want to test the data submission code in `submit_to_perfherder.py` without getting a credential for the production server, you can setup a local treeherder VM. If you don't need to test `submit_to_perfherder.py`, you can skip this step.

diff --git a/etc/ci/performance/gecko_driver.py b/etc/ci/performance/gecko_driver.py
@@ -71,11 +71,11 @@ def generate_placeholder(testcase):
         return [timings]
 
 
-def run_gecko_test(testcase, timeout, is_async):
+def run_gecko_test(testcase, url, timeout, is_async):
     with create_gecko_session() as driver:
         driver.set_page_load_timeout(timeout)
         try:
-            driver.get(testcase)
+            driver.get(url)
         except TimeoutException:
             print("Timeout!")
             return generate_placeholder(testcase)

diff --git a/etc/ci/performance/runner.py b/etc/ci/performance/runner.py
@@ -11,6 +11,7 @@
 import subprocess
 from functools import partial
 from statistics import median, StatisticsError
+from urllib.parse import urlsplit, urlunsplit, urljoin
 
 
 def load_manifest(filename):
@@ -31,6 +32,17 @@ def parse_manifest(text):
     return output
 
 
+def testcase_url(base, testcase):
+    # The tp5 manifest hardwires http://localhost/ as the base URL,
+    # which requires running the server as root in order to open
+    # the server on port 80. To allow non-root users to run the test
+    # case, we take the URL to be relative to a base URL.
+    (scheme, netloc, path, query, fragment) = urlsplit(testcase)
+    relative_url = urlunsplit(('', '', '.' + path, query, fragment))
+    absolute_url = urljoin(base, relative_url)
+    return absolute_url
+
+
 def execute_test(url, command, timeout):
     try:
         return subprocess.check_output(
@@ -46,11 +58,11 @@ def execute_test(url, command, timeout):
     return ""
 
 
-def run_servo_test(url, timeout, is_async):
+def run_servo_test(testcase, url, timeout, is_async):
     if is_async:
         print("Servo does not support async test!")
         # Return a placeholder
-        return parse_log("", url)
+        return parse_log("", testcase, url)
 
     ua_script_path = "{}/user-agent-js".format(os.getcwd())
     command = [
@@ -71,11 +83,11 @@ def run_servo_test(url, timeout, is_async):
             ' '.join(command)
         ))
     except subprocess.TimeoutExpired:
-        print("Test FAILED due to timeout: {}".format(url))
-    return parse_log(log, url)
+        print("Test FAILED due to timeout: {}".format(testcase))
+    return parse_log(log, testcase, url)
 
 
-def parse_log(log, testcase):
+def parse_log(log, testcase, url):
     blocks = []
     block = []
     copy = False
@@ -112,11 +124,11 @@ def parse_block(block):
 
         return timing
 
-    def valid_timing(timing, testcase=None):
+    def valid_timing(timing, url=None):
         if (timing is None or
                 testcase is None or
                 timing.get('title') == 'Error response' or
-                timing.get('testcase') != testcase):
+                timing.get('testcase') != url):
             return False
         else:
             return True
@@ -152,8 +164,15 @@ def create_placeholder(testcase):
             "domComplete": -1,
         }
 
-    valid_timing_for_case = partial(valid_timing, testcase=testcase)
-    timings = list(filter(valid_timing_for_case, map(parse_block, blocks)))
+    # Set the testcase field to contain the original testcase name,
+    # rather than the url.
+    def set_testcase(timing, testcase=None):
+        timing['testcase'] = testcase
+        return timing
+
+    valid_timing_for_case = partial(valid_timing, url=url)
+    set_testcase_for_case = partial(set_testcase, testcase=testcase)
+    timings = list(map(set_testcase_for_case, filter(valid_timing_for_case, map(parse_block, blocks))))
 
     if len(timings) == 0:
         print("Didn't find any perf data in the log, test timeout?")
@@ -167,10 +186,11 @@ def create_placeholder(testcase):
         return timings
 
 
-def filter_result_by_manifest(result_json, manifest):
+def filter_result_by_manifest(result_json, manifest, base):
     filtered = []
     for name, is_async in manifest:
-        match = [tc for tc in result_json if tc['testcase'] == name]
+        url = testcase_url(base, name)
+        match = [tc for tc in result_json if tc['testcase'] == url]
         if len(match) == 0:
             raise Exception(("Missing test result: {}. This will cause a "
                              "discontinuity in the treeherder graph, "
@@ -201,9 +221,9 @@ def take_result_median(result_json, expected_runs):
     return median_results
 
 
-def save_result_json(results, filename, manifest, expected_runs):
+def save_result_json(results, filename, manifest, expected_runs, base):
 
-    results = filter_result_by_manifest(results, manifest)
+    results = filter_result_by_manifest(results, manifest, base)
     results = take_result_median(results, expected_runs)
 
     if len(results) == 0:
@@ -245,6 +265,10 @@ def main():
                         help="the test manifest in tp5 format")
     parser.add_argument("output_file",
                         help="filename for the output json")
+    parser.add_argument("--base",
+                        type=str,
+                        default='http://localhost:8000/',
+                        help="the base URL for tests. Default: http://localhost:8000/")
     parser.add_argument("--runs",
                         type=int,
                         default=20,
@@ -270,18 +294,19 @@ def main():
         testcases = load_manifest(args.tp5_manifest)
         results = []
         for testcase, is_async in testcases:
+            url = testcase_url(args.base, testcase)
             for run in range(args.runs):
                 print("Running test {}/{} on {}".format(run + 1,
                                                         args.runs,
-                                                        testcase))
+                                                        url))
                 # results will be a mixure of timings dict and testcase strings
                 # testcase string indicates a failed test
-                results += run_test(testcase, args.timeout, is_async)
+                results += run_test(testcase, url, args.timeout, is_async)
                 print("Finished")
                 # TODO: Record and analyze other performance.timing properties
 
         print(format_result_summary(results))
-        save_result_json(results, args.output_file, testcases, args.runs)
+        save_result_json(results, args.output_file, testcases, args.runs, args.base)
 
     except KeyboardInterrupt:
         print("Test stopped by user, saving partial result")

diff --git a/etc/ci/performance/test_all.sh b/etc/ci/performance/test_all.sh
@@ -8,6 +8,8 @@ set -o errexit
 set -o nounset
 set -o pipefail
 
+base="http://localhost:8000"
+
 while (( "${#}" ))
 do
 case "${1}" in
@@ -22,6 +24,10 @@ case "${1}" in
   --submit)
     submit=1
     ;;
+  --base)
+    base="${2}"
+    shift
+    ;;
   *)
     echo "Unknown option ${1}."
     exit
@@ -46,7 +52,7 @@ MANIFEST="page_load_test/example.manifest" # A manifest that excludes
 PERF_FILE="output/perf-$(date +%s).json"
 
 echo "Running tests"
-python3 runner.py ${engine} --runs 3 --timeout "${timeout}" \
+python3 runner.py ${engine} --runs 4 --timeout "${timeout}" --base "${base}" \
   "${MANIFEST}" "${PERF_FILE}"
 
 if [[ "${submit:-}" ]];

diff --git a/etc/ci/performance/test_perf.sh b/etc/ci/performance/test_perf.sh
@@ -35,13 +35,4 @@ mkdir -p servo
 mkdir -p output # Test result will be saved to output/perf-<timestamp>.json
 ./git_log_to_json.sh > servo/revision.json
 
-if [[ "${#}" -eq 1 ]]; then
-  if [[ "${1}" = "--submit" ]]; then
-    ./test_all.sh --servo --submit
-  else
-    echo "Unrecognized argument: ${1}; Ignore and proceed without submitting"
-    ./test_all.sh --servo
-  fi
-else
-  ./test_all.sh --servo
-fi
+./test_all.sh --servo ${*}
diff --git a/python/servo/testing_commands.py b/python/servo/testing_commands.py
@@ -173,14 +173,18 @@ def suite_for_path(self, path_arg):
     @Command('test-perf',
              description='Run the page load performance test',
              category='testing')
-    @CommandArgument('--submit', default=False, action="store_true",
+    @CommandArgument('--base', default=None,
+                     help="the base URL for testcases")
+    @CommandArgument('-submit', '-a', default=False, action="store_true",
                      help="submit the data to perfherder")
-    def test_perf(self, submit=False):
+    def test_perf(self, base=None, submit=False):
         self.set_software_rendering_env(True)
 
         self.ensure_bootstrapped()
         env = self.build_env()
         cmd = ["bash", "test_perf.sh"]
+        if base:
+            cmd += ["--base", base]
         if submit:
             if not ("TREEHERDER_CLIENT_ID" in os.environ and
                     "TREEHERDER_CLIENT_SECRET" in os.environ):