diff --git a/.github/create_combined_ci_report.py b/.github/create_combined_ci_report.py index 012348b3e278..a24bc84932a2 100755 --- a/.github/create_combined_ci_report.py +++ b/.github/create_combined_ci_report.py @@ -3,6 +3,7 @@ import os from pathlib import Path from itertools import combinations +import json import requests from clickhouse_driver import Client @@ -23,15 +24,44 @@ def get_checks_fails(client: Client, job_url: str): columns = ( "check_status, check_name, test_status, test_name, report_url as results_link" ) - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url='{job_url}' - AND test_status in ('FAIL', 'ERROR') + query = f"""SELECT {columns} FROM `gh-data`.checks + WHERE task_url='{job_url}' + AND test_status IN ('FAIL', 'ERROR') AND check_status!='error' ORDER BY check_name, test_name """ return client.query_dataframe(query) +def get_checks_known_fails(client: Client, job_url: str, known_fails: dict): + """ + Get tests that are known to fail for the given job URL. + """ + columns = ( + "check_status, check_name, test_status, test_name, report_url as results_link" + ) + query = f"""SELECT {columns} FROM `gh-data`.checks + WHERE task_url='{job_url}' + AND test_status='BROKEN' + AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())}) + ORDER BY check_name, test_name + """ + + df = client.query_dataframe(query) + + df.insert( + len(df.columns) - 1, + "reason", + df["test_name"] + .cat.remove_unused_categories() + .apply( + lambda test_name: known_fails[test_name].get("reason", "No reason given") + ), + ) + + return df + + def get_checks_errors(client: Client, job_url: str): """ Get checks that have status 'error' for the given job URL. @@ -39,8 +69,8 @@ def get_checks_errors(client: Client, job_url: str): columns = ( "check_status, check_name, test_status, test_name, report_url as results_link" ) - query = f"""SELECT {columns} FROM `gh-data`.checks - WHERE task_url='{job_url}' + query = f"""SELECT {columns} FROM `gh-data`.checks + WHERE task_url='{job_url}' AND check_status=='error' ORDER BY check_name, test_name """ @@ -104,8 +134,8 @@ def format_test_name_for_linewrap(text: str) -> str: def format_results_as_html_table(results) -> str: - if results.empty: - return "" + if len(results) == 0: + return "

Nothing to report

" results.columns = [col.replace("_", " ").title() for col in results.columns] html = ( results.to_html( @@ -139,6 +169,9 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--no-upload", action="store_true", help="Do not upload the report" ) + parser.add_argument( + "--known-fails", type=str, help="Path to the file with known fails" + ) parser.add_argument( "--mark-preview", action="store_true", help="Mark the report as a preview" ) @@ -175,10 +208,24 @@ def main(): fail_results = { "checks_fails": get_checks_fails(db_client, args.actions_run_url), + "checks_known_fails": [], "checks_errors": get_checks_errors(db_client, args.actions_run_url), "regression_fails": get_regression_fails(db_client, args.actions_run_url), } + if args.known_fails: + if not os.path.exists(args.known_fails): + print(f"Known fails file {args.known_fails} not found.") + exit(1) + + with open(args.known_fails) as f: + known_fails = json.load(f) + + if known_fails: + fail_results["checks_known_fails"] = get_checks_known_fails( + db_client, args.actions_run_url, known_fails + ) + combined_report = ( ci_running_report.replace("ClickHouse CI running for", "Combined CI Report for") .replace( @@ -188,12 +235,14 @@ def main():

CI Jobs Status

""", + 1, ) .replace( "
", @@ -202,12 +251,16 @@ def main():

Checks Fails

{format_results_as_html_table(fail_results['checks_fails'])} +

Checks Known Fails

+{format_results_as_html_table(fail_results['checks_known_fails'])} +

Checks Errors

{format_results_as_html_table(fail_results['checks_errors'])}

Regression Fails

{format_results_as_html_table(fail_results['regression_fails'])} """, + 1, ) ) report_path = Path("combined_report.html") diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 36bf370954cb..3019e07b9213 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -639,7 +639,7 @@ jobs: run: | pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.2.0 - REPORT_LINK=$(python3 .github/create_combined_ci_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL) + REPORT_LINK=$(python3 .github/create_combined_ci_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json) IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://') if [[ -n $IS_VALID_URL ]]; then