Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ci): Change test washer logic to mark as flake all parents of a flake test #25504

Merged
merged 17 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions flakes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,4 @@
# TODO: https://datadoghq.atlassian.net/browse/CONTINT-4143
test/new-e2e/tests/containers:
- TestEKSSuite/TestCPU/metric___container.cpu.usage{^kube_deployment:stress-ng$,^kube_namespace:workload-cpustress$}
- TestEKSSuite/TestCPU
- TestEKSSuite
- TestKindSuite/TestCPU/metric___container.cpu.usage{^kube_deployment:stress-ng$,^kube_namespace:workload-cpustress$}
- TestKindSuite/TestCPU
- TestKindSuite
90 changes: 68 additions & 22 deletions tasks/testwasher.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,37 +26,31 @@ def get_non_flaky_failing_tests(self, module_path):
"""
Parse the test output json file and compute the failing tests and the one known flaky
"""
failing_tests = defaultdict(set)
flaky_marked_tests = defaultdict(set)
with open(f"{module_path}/{self.test_output_json_file}", encoding='utf-8') as f:
for line in f:
test_result = json.loads(line)
if test_result["Action"] == "fail" and "Test" in test_result:
failing_tests[test_result["Package"]].add(test_result["Test"])
if test_result["Action"] == "success" and "Test" in test_result:
if test_result["Test"] in failing_tests[test_result["Package"]]:
failing_tests[test_result["Package"]].remove(test_result["Test"])
if (
"Output" in test_result
and "Test" in test_result
and self.flaky_test_indicator in test_result["Output"]
):
flaky_marked_tests[test_result["Package"]].add(test_result["Test"])

failing_tests, flaky_marked_tests = self.parse_test_results(module_path)

all_known_flakes = self.merge_known_flakes(flaky_marked_tests)
non_flaky_failing_tests = defaultdict(set)

for package, tests in failing_tests.items():
non_flaky_failing_tests_in_package = set()
known_flaky_tests_parents = self.get_tests_family(all_known_flakes[package])
for failing_test in tests:
if all(
not failing_test.startswith(flaky_marked_test) for flaky_marked_test in flaky_marked_tests[package]
) and all(
not failing_test.startswith(flaky_marked_test)
for flaky_marked_test in self.known_flaky_tests[package]
):
if not self.is_known_flaky_test(failing_test, all_known_flakes[package], known_flaky_tests_parents):
non_flaky_failing_tests_in_package.add(failing_test)
if non_flaky_failing_tests_in_package:
non_flaky_failing_tests[package] = non_flaky_failing_tests_in_package
return non_flaky_failing_tests

def merge_known_flakes(self, marked_flakes):
"""
Merge flakes marked in the go code and the ones from the flakes.yaml file
"""
shared_packages = marked_flakes.keys() & self.known_flaky_tests.keys()
for package in shared_packages:
marked_flakes[package] |= self.known_flaky_tests[package]
return self.known_flaky_tests | marked_flakes

def parse_flaky_file(self):
"""
Parse the flakes.yaml file and add the tests listed there to the kown flaky tests list
Expand All @@ -68,6 +62,24 @@ def parse_flaky_file(self):
for package, tests in flakes.items():
self.known_flaky_tests[f"github.com/DataDog/datadog-agent/{package}"].update(set(tests))

def parse_test_results(self, module_path: str) -> tuple[dict, dict]:
failing_tests = defaultdict(set)
flaky_marked_tests = defaultdict(set)

with open(f"{module_path}/{self.test_output_json_file}", encoding='utf-8') as f:
for line in f:
test_result = json.loads(line)
if "Test" not in test_result:
continue
if test_result["Action"] == "fail":
failing_tests[test_result["Package"]].add(test_result["Test"])
if test_result["Action"] == "success":
if test_result["Test"] in failing_tests[test_result["Package"]]:
failing_tests[test_result["Package"]].remove(test_result["Test"])
if "Output" in test_result and self.flaky_test_indicator in test_result["Output"]:
flaky_marked_tests[test_result["Package"]].add(test_result["Test"])
return failing_tests, flaky_marked_tests

def process_module_results(self, module_results: list[ModuleTestResult]):
"""
Process the module test results and decide whether we should succeed or not.
Expand All @@ -89,3 +101,37 @@ def process_module_results(self, module_results: list[ModuleTestResult]):
print(failed_tests_string)

return should_succeed

def is_known_flaky_test(self, failing_test, known_flaky_tests, known_flaky_tests_parents):
Copy link
Contributor

@KevinFairise2 KevinFairise2 May 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically we could compute known_flaky_tests_parents from known_flaky_tests. I did not do it to avoid computing it on very test name. We can compute it once for each package. We could say that we do not care about the performance here and compute the known_flaky_tests_parents every time the method is called, to make this function call way simpler

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find it cleaner to have the ancestors as an input to this function, building it per package seems fine

"""
Check if a test is known to be flaky
The method should be called with the following arguments:
- failing_test: the test that is failing
- known_flaky_tests: the set of tests that are known to be flaky
- known_flaky_tests_parents: the set of tests that are ancestors of a known flaky test, thus would fail when the flaky leaf test fails
If a test is a parent of a test that is known to be flaky, the test should be considered flaky
For example:
- if TestEKSSuite/TestCPU is known to be flaky, TestEKSSuite/TestCPU/TestCPUUtilization should be considered flaky
- if TestEKSSuite/TestCPU is known to be flaky, TestEKSSuite should be considered flaky
- if TestEKSSuite/TestCPU is known to be flaky, TestEKSSuite/TestMemory should not be considered flaky
"""

failing_test_parents = self.get_tests_family([failing_test])

if any(parent in known_flaky_tests for parent in failing_test_parents):
return True

return failing_test in known_flaky_tests_parents

def get_tests_family(self, test_name_list):
"""
Get the parent tests of a list of tests
For example with the test ["TestEKSSuite/TestCPU/TestCPUUtilization", "TestKindSuite/TestCPU"]
this method should return the set{"TestEKSSuite/TestCPU/TestCPUUtilization", "TestEKSSuite/TestCPU", "TestEKSSuite", "TestKindSuite/TestCPU", "TestKindSuite"}
"""
test_family = set(test_name_list)
for test_name in test_name_list:
while test_name.count('/') > 0:
test_name = test_name.rsplit('/', 1)[0]
test_family.add(test_name)
return test_family
12 changes: 12 additions & 0 deletions tasks/unit-tests/testdata/test_output_failure_parent.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{"Time":"2024-04-25T19:35:40.868296+02:00","Action":"run","Package":"github.com/DataDog/datadog-agent/test/new-e2e/tests/containers","Test":"TestEKSSuite/TestCPU/metric___container.cpu.usage{^kube_deployment:stress-ng$,^kube_namespace:workload-cpustress$}"}
{"Time":"2024-04-25T19:35:40.870728+02:00","Action":"output","Package":"github.com/DataDog/datadog-agent/test/new-e2e/tests/containers","Test":"TestEKSSuite/TestCPU/metric___container.cpu.usage{^kube_deployment:stress-ng$,^kube_namespace:workload-cpustress$}","Output":" Messages: No value of `container.cpu.usage{kube_deployment:stress-ng,kube_namespace:workload-cpustress}` is in the range [145000000.000000;155000000.000000]: [1.2720585182086919e+08] base_test.go:143: \n"}
{"Time":"2024-04-25T19:35:40.870757+02:00","Action":"fail","Package":"github.com/DataDog/datadog-agent/test/new-e2e/tests/containers","Test":"TestEKSSuite/TestCPU/metric___container.cpu.usage{^kube_deployment:stress-ng$,^kube_namespace:workload-cpustress$}","Elapsed":0}
{"Time":"2024-04-25T19:35:40.870768+02:00","Action":"run","Package":"github.com/DataDog/datadog-agent/test/new-e2e/tests/containers","Test":"TestEKSSuite/TestCPU"}
{"Time":"2024-04-25T19:35:40.870772+02:00","Action":"output","Package":"github.com/DataDog/datadog-agent/test/new-e2e/tests/containers","Test":"TestEKSSuite/TestCPU","Output":" k8s_test.go:452: flakytest: this is a known flaky test\n"}
{"Time":"2024-04-25T19:35:40.871439+02:00","Action":"fail","Package":"github.com/DataDog/datadog-agent/test/new-e2e/tests/containers","Test":"TestEKSSuite/TestCPU","Elapsed":0}
{"Time":"2024-04-25T19:35:40.870768+02:00","Action":"run","Package":"github.com/DataDog/datadog-agent/test/new-e2e/tests/containers","Test":"TestEKSSuite/TestMemory"}
{"Time":"2024-04-25T19:35:40.871439+02:00","Action":"fail","Package":"github.com/DataDog/datadog-agent/test/new-e2e/tests/containers","Test":"TestEKSSuite/TestMemory","Elapsed":0}
{"Time":"2024-04-25T19:35:40.871444+02:00","Action":"run","Package":"github.com/DataDog/datadog-agent/test/new-e2e/tests/containers","Test":"TestEKSSuite"}
{"Time":"2024-04-25T19:35:40.871448+02:00","Action":"output","Package":"github.com/DataDog/datadog-agent/test/new-e2e/tests/containers","Test":"TestEKSSuite","Output":"=== RUN TestGetPayloadContainerizedWithDocker0\n"}
{"Time":"2024-04-25T19:35:40.873652+02:00","Action":"fail","Package":"github.com/DataDog/datadog-agent/test/new-e2e/tests/containers","Test":"TestEKSSuite","Elapsed":0}
{"Time":"2024-04-25T19:35:40.874579+02:00","Action":"fail","Package":"github.com/DataDog/datadog-agent/test/new-e2e/tests/containers","Elapsed":0.494}
146 changes: 146 additions & 0 deletions tasks/unit-tests/testwasher_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,149 @@ def test_should_fail_failing_tests(self):
module_path = "tasks/unit-tests/testdata"
non_flaky_failing_tests = test_washer_3.get_non_flaky_failing_tests(module_path)
self.assertEqual(non_flaky_failing_tests, {"github.com/DataDog/datadog-agent/pkg/gohai": {"TestGetPayload"}})

def test_should_mark_parent_flaky(self):
test_washer = TestWasher(
test_output_json_file="test_output_failure_parent.json",
flakes_file_path="tasks/unit-tests/testdata/flakes_2.yaml",
)
module_path = "tasks/unit-tests/testdata"
non_flaky_failing_tests = test_washer.get_non_flaky_failing_tests(module_path)
self.assertEqual(
non_flaky_failing_tests,
{"github.com/DataDog/datadog-agent/test/new-e2e/tests/containers": {"TestEKSSuite/TestMemory"}},
)


class TestMergeKnownFlakes(unittest.TestCase):
def test_with_shared_keys(self):
test_washer = TestWasher()
marked_flakes = {
"nintendo": {"mario", "luigi"},
"sega": {"sonic"},
}
test_washer.known_flaky_tests = {
"nintendo": {"peach"},
"sony": {"crashbandicoot"},
}
merged_flakes = test_washer.merge_known_flakes(marked_flakes)
self.assertEqual(
merged_flakes,
{
"nintendo": {"mario", "luigi", "peach"},
"sega": {"sonic"},
"sony": {"crashbandicoot"},
},
)

def test_no_shared_keys(self):
test_washer = TestWasher()
marked_flakes = {
"nintendo": {"mario", "luigi"},
}
test_washer.known_flaky_tests = {
"sega": {"sonic"},
}
merged_flakes = test_washer.merge_known_flakes(marked_flakes)
self.assertEqual(
merged_flakes,
{
"nintendo": {"mario", "luigi"},
"sega": {"sonic"},
},
)

def test_empty_marked(self):
test_washer = TestWasher()
marked_flakes = {}
test_washer.known_flaky_tests = {
"sega": {"sonic"},
}
merged_flakes = test_washer.merge_known_flakes(marked_flakes)
self.assertEqual(
merged_flakes,
{"sega": {"sonic"}},
)

def test_empty_yaml(self):
test_washer = TestWasher()
marked_flakes = {
"nintendo": {"mario", "luigi"},
}
test_washer.known_flaky_tests = {}
merged_flakes = test_washer.merge_known_flakes(marked_flakes)
print(merged_flakes)
self.assertEqual(
merged_flakes,
{"nintendo": {"mario", "luigi"}},
)


class TestGetTestParents(unittest.TestCase):
def test_get_tests_parents(self):
test_washer = TestWasher()
parents = test_washer.get_tests_family(["TestEKSSuite/TestCPU/TestCPUUtilization", "TestKindSuite/TestKind"])
self.assertEqual(
parents,
{
"TestEKSSuite",
"TestEKSSuite/TestCPU",
"TestEKSSuite/TestCPU/TestCPUUtilization",
"TestKindSuite",
"TestKindSuite/TestKind",
},
)

def test_get_test_parents_empty(self):
test_washer = TestWasher()
parents = test_washer.get_tests_family([])
self.assertEqual(
parents,
set(),
)


class TestIsKnownFlake(unittest.TestCase):
def test_known_flake(self):
test_washer = TestWasher()
is_known_flaky = test_washer.is_known_flaky_test(
"TestEKSSuite/mario", {"TestEKSSuite/mario"}, {"TestEKSSuite", "TestEKSSuite/mario"}
)
self.assertTrue(is_known_flaky)

def test_known_flake_parent_failing(self):
test_washer = TestWasher()
is_known_flaky = test_washer.is_known_flaky_test(
"TestEKSSuite", {"TestEKSSuite/mario"}, {"TestEKSSuite", "TestEKSSuite/mario"}
)
self.assertTrue(is_known_flaky)

def test_known_flake_parent_failing_2(self):
test_washer = TestWasher()
is_known_flaky = test_washer.is_known_flaky_test(
"TestEKSSuite/mario",
{"TestEKSSuite/mario/luigi"},
{"TestEKSSuite", "TestEKSSuite/mario", "TestEKSSuite/mario/luigi"},
)
self.assertTrue(is_known_flaky)

def test_not_known_flake(self):
test_washer = TestWasher()
is_known_flaky = test_washer.is_known_flaky_test(
"TestEKSSuite/luigi", {"TestEKSSuite/mario"}, {"TestEKSSuite", "TestEKSSuite/mario"}
)
self.assertFalse(is_known_flaky)

def test_not_known_flake_ambiguous_start(self):
test_washer = TestWasher()
is_known_flaky = test_washer.is_known_flaky_test(
"TestEKSSuiteVM/mario", {"TestEKSSuite/mario"}, {"TestEKSSuite"}
)
self.assertFalse(is_known_flaky)

def test_not_known_flake_ambiguous_start_2(self):
test_washer = TestWasher()
is_known_flaky = test_washer.is_known_flaky_test(
"TestEKSSuite/mario", {"TestEKSSuiteVM/mario"}, {"TestEKSSuiteVM"}
)
self.assertFalse(is_known_flaky)
Loading