Altinity · MyroTk · Feb 13, 2025 · Feb 12, 2025 · Feb 12, 2025
diff --git a/.github/create_combined_ci_report.py b/.github/create_combined_ci_report.py
@@ -3,6 +3,7 @@
 import os
 from pathlib import Path
 from itertools import combinations
+import json
 
 import requests
 from clickhouse_driver import Client
@@ -23,24 +24,53 @@ def get_checks_fails(client: Client, job_url: str):
     columns = (
         "check_status, check_name, test_status, test_name, report_url as results_link"
     )
-    query = f"""SELECT {columns} FROM `gh-data`.checks 
-                WHERE task_url='{job_url}' 
-                AND test_status in ('FAIL', 'ERROR')
+    query = f"""SELECT {columns} FROM `gh-data`.checks
+                WHERE task_url='{job_url}'
+                AND test_status IN ('FAIL', 'ERROR')
                 AND check_status!='error'
                 ORDER BY check_name, test_name
                 """
     return client.query_dataframe(query)
 
 
+def get_checks_known_fails(client: Client, job_url: str, known_fails: dict):
+    """
+    Get tests that are known to fail for the given job URL.
+    """
+    columns = (
+        "check_status, check_name, test_status, test_name, report_url as results_link"
+    )
+    query = f"""SELECT {columns} FROM `gh-data`.checks
+                WHERE task_url='{job_url}'
+                AND test_status='BROKEN'
+                AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())})
+                ORDER BY check_name, test_name
+                """
+
+    df = client.query_dataframe(query)
+
+    df.insert(
+        len(df.columns) - 1,
+        "reason",
+        df["test_name"]
+        .cat.remove_unused_categories()
+        .apply(
+            lambda test_name: known_fails[test_name].get("reason", "No reason given")
+        ),
+    )
+
+    return df
+
+
 def get_checks_errors(client: Client, job_url: str):
     """
     Get checks that have status 'error' for the given job URL.
     """
     columns = (
         "check_status, check_name, test_status, test_name, report_url as results_link"
     )
-    query = f"""SELECT {columns} FROM `gh-data`.checks 
-                WHERE task_url='{job_url}' 
+    query = f"""SELECT {columns} FROM `gh-data`.checks
+                WHERE task_url='{job_url}'
                 AND check_status=='error'
                 ORDER BY check_name, test_name
                 """
@@ -104,8 +134,8 @@ def format_test_name_for_linewrap(text: str) -> str:
 
 
 def format_results_as_html_table(results) -> str:
-    if results.empty:
-        return ""
+    if len(results) == 0:
+        return "<p>Nothing to report</p>"
     results.columns = [col.replace("_", " ").title() for col in results.columns]
     html = (
         results.to_html(
@@ -139,6 +169,9 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--no-upload", action="store_true", help="Do not upload the report"
     )
+    parser.add_argument(
+        "--known-fails", type=str, help="Path to the file with known fails"
+    )
     parser.add_argument(
         "--mark-preview", action="store_true", help="Mark the report as a preview"
     )
@@ -175,10 +208,24 @@ def main():
 
     fail_results = {
         "checks_fails": get_checks_fails(db_client, args.actions_run_url),
+        "checks_known_fails": [],
         "checks_errors": get_checks_errors(db_client, args.actions_run_url),
         "regression_fails": get_regression_fails(db_client, args.actions_run_url),
     }
 
+    if args.known_fails:
+        if not os.path.exists(args.known_fails):
+            print(f"Known fails file {args.known_fails} not found.")
+            exit(1)
+
+        with open(args.known_fails) as f:
+            known_fails = json.load(f)
+
+        if known_fails:
+            fail_results["checks_known_fails"] = get_checks_known_fails(
+                db_client, args.actions_run_url, known_fails
+            )
+
     combined_report = (
         ci_running_report.replace("ClickHouse CI running for", "Combined CI Report for")
         .replace(
@@ -188,12 +235,14 @@ def main():
 <ul>
     <li><a href="#ci-jobs-status">CI Jobs Status</a></li>
     <li><a href="#checks-fails">Checks Fails</a> ({len(fail_results['checks_fails'])})</li>
+    <li><a href="#checks-known-fails">Checks Known Fails</a> ({len(fail_results['checks_known_fails'])})</li>
     <li><a href="#checks-errors">Checks Errors</a> ({len(fail_results['checks_errors'])})</li>
     <li><a href="#regression-fails">Regression Fails</a> ({len(fail_results['regression_fails'])})</li>
 </ul>
 
 <h2 id="ci-jobs-status">CI Jobs Status</h2>
 <table>""",
+            1,
         )
         .replace(
             "</table>",
@@ -202,12 +251,16 @@ def main():
 <h2 id="checks-fails">Checks Fails</h2>
 {format_results_as_html_table(fail_results['checks_fails'])}
 
+<h2 id="checks-known-fails">Checks Known Fails</h2>
+{format_results_as_html_table(fail_results['checks_known_fails'])}
+
 <h2 id="checks-errors">Checks Errors</h2>
 {format_results_as_html_table(fail_results['checks_errors'])}
 
 <h2 id="regression-fails">Regression Fails</h2>
 {format_results_as_html_table(fail_results['regression_fails'])}
 """,
+            1,
         )
     )
     report_path = Path("combined_report.html")

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
@@ -639,7 +639,7 @@ jobs:
         run: |
           pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.2.0
 
-          REPORT_LINK=$(python3 .github/create_combined_ci_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL)
+          REPORT_LINK=$(python3 .github/create_combined_ci_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json)
 
           IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://')
           if [[ -n $IS_VALID_URL ]]; then