Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
a80413a
proof of concept for regression tests in ci_running
strtgbb Feb 4, 2025
ffaf474
Use the correct sha and report url
strtgbb Feb 4, 2025
3d665e2
fix quoting
strtgbb Feb 4, 2025
ca2661c
set status and description for all regression tests
strtgbb Feb 4, 2025
3173413
fix file name
strtgbb Feb 5, 2025
216130c
update hash to use requests instead of pygithub
strtgbb Feb 5, 2025
3f1d1b6
update hash to use shorter status message
strtgbb Feb 5, 2025
25be027
update regression hash again
strtgbb Feb 5, 2025
d689c26
update regression hash again
strtgbb Feb 5, 2025
64606a9
fix regression not differentiating architecture in ci_running
strtgbb Feb 5, 2025
f2c71ca
Merge branch 'customizations/24.8.11' into add_regression_to_ci_running
MyroTk Feb 5, 2025
14bc97f
enable uploads to regression db
strtgbb Feb 6, 2025
1f8be1f
increase integration tests timeout
strtgbb Feb 7, 2025
4cf2f86
script for creating combined ci report
strtgbb Feb 7, 2025
00230a1
venv not installed by default, try without
strtgbb Feb 8, 2025
176b1a7
fix typo
strtgbb Feb 8, 2025
7bf0f0a
another typo
strtgbb Feb 9, 2025
ee93267
need to isntall dependencies explicitly
strtgbb Feb 9, 2025
d53c213
need to specify secrets
strtgbb Feb 9, 2025
2365916
cleanup
strtgbb Feb 10, 2025
adba730
fix typo
strtgbb Feb 10, 2025
ed49677
Account for regression reruns
strtgbb Feb 10, 2025
12e717f
change title
strtgbb Feb 10, 2025
d49fede
rename actions-job-url to actions-run-url because that's what it real…
strtgbb Feb 10, 2025
7655591
fix regression query
strtgbb Feb 11, 2025
420d17c
add toc and order by
strtgbb Feb 11, 2025
73af40e
docstrings
strtgbb Feb 11, 2025
e116e0d
working table sorting
strtgbb Feb 11, 2025
896c2b9
better line wrapping of test names and layout tweak
strtgbb Feb 11, 2025
948640c
increase timeout for arm integration tests
strtgbb Feb 11, 2025
66f7c1c
more responsive table size
strtgbb Feb 11, 2025
e45724a
little tweaks
strtgbb Feb 11, 2025
03590fe
adjust column order
strtgbb Feb 11, 2025
4ab146d
drop redundant rows from the regression fails table
strtgbb Feb 11, 2025
b7e8e4d
increase arm integration timeout again
strtgbb Feb 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 237 additions & 0 deletions .github/create_combined_ci_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
#!/usr/bin/env python3
import argparse
import os
from pathlib import Path
from itertools import combinations

import requests
from clickhouse_driver import Client
import boto3
from botocore.exceptions import NoCredentialsError

DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST"
DATABASE_USER_VAR = "CHECKS_DATABASE_USER"
DATABASE_PASSWORD_VAR = "CHECKS_DATABASE_PASSWORD"
S3_BUCKET = "altinity-build-artifacts"


def get_checks_fails(client: Client, job_url: str):
"""
Get tests that did not succeed for the given job URL.
Exclude checks that have status 'error' as they are counted in get_checks_errors.
"""
columns = (
"check_status, check_name, test_status, test_name, report_url as results_link"
)
query = f"""SELECT {columns} FROM `gh-data`.checks
WHERE task_url='{job_url}'
AND test_status in ('FAIL', 'ERROR')
AND check_status!='error'
ORDER BY check_name, test_name
"""
return client.query_dataframe(query)


def get_checks_errors(client: Client, job_url: str):
"""
Get checks that have status 'error' for the given job URL.
"""
columns = (
"check_status, check_name, test_status, test_name, report_url as results_link"
)
query = f"""SELECT {columns} FROM `gh-data`.checks
WHERE task_url='{job_url}'
AND check_status=='error'
ORDER BY check_name, test_name
"""
return client.query_dataframe(query)


def drop_prefix_rows(df, column_to_clean):
"""
Drop rows from the dataframe if:
- the row matches another row completely except for the specified column
- the specified column of that row is a prefix of the same column in another row
"""
to_drop = set()
reference_columns = [col for col in df.columns if col != column_to_clean]
for (i, row_1), (j, row_2) in combinations(df.iterrows(), 2):
if all(row_1[col] == row_2[col] for col in reference_columns):
if row_2[column_to_clean].startswith(row_1[column_to_clean]):
to_drop.add(i)
elif row_1[column_to_clean].startswith(row_2[column_to_clean]):
to_drop.add(j)
return df.drop(to_drop)


def get_regression_fails(client: Client, job_url: str):
"""
Get regression tests that did not succeed for the given job URL.
"""
# If you rename the alias for report_url, also update the formatters in format_results_as_html_table
query = f"""SELECT arch, status, test_name, results_link
FROM (
SELECT
architecture as arch,
test_name,
argMax(result, start_time) AS status,
job_url,
report_url as results_link
FROM `gh-data`.clickhouse_regression_results
GROUP BY architecture, test_name, job_url, report_url, start_time
ORDER BY start_time DESC, length(test_name) DESC
)
WHERE job_url='{job_url}'
AND status IN ('Fail', 'Error')
"""
df = client.query_dataframe(query)
df = drop_prefix_rows(df, "test_name")
return df


def url_to_html_link(url: str) -> str:
if not url:
return ""
text = url.split("/")[-1]
if not text:
text = "results"
return f'<a href="{url}">{text}</a>'


def format_test_name_for_linewrap(text: str) -> str:
"""Tweak the test name to improve line wrapping."""
return text.replace(".py::", "/")


def format_results_as_html_table(results) -> str:
if results.empty:
return ""
results.columns = [col.replace("_", " ").title() for col in results.columns]
html = (
results.to_html(
index=False,
formatters={
"Results Link": url_to_html_link,
"Test Name": format_test_name_for_linewrap,
},
escape=False,
) # tbody/thead tags interfere with the table sorting script
.replace("<tbody>\n", "")
.replace("</tbody>\n", "")
.replace("<thead>\n", "")
.replace("</thead>\n", "")
.replace('<table border="1"', '<table style="min-width: min(900px, 98vw);"')
)
return html


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Create a combined CI report.")
parser.add_argument(
"--actions-run-url", required=True, help="URL of the actions run"
)
parser.add_argument(
"--pr-number", required=True, help="Pull request number for the S3 path"
)
parser.add_argument(
"--commit-sha", required=True, help="Commit SHA for the S3 path"
)
parser.add_argument(
"--no-upload", action="store_true", help="Do not upload the report"
)
parser.add_argument(
"--mark-preview", action="store_true", help="Mark the report as a preview"
)
return parser.parse_args()


def main():
args = parse_args()

db_client = Client(
host=os.getenv(DATABASE_HOST_VAR),
user=os.getenv(DATABASE_USER_VAR),
password=os.getenv(DATABASE_PASSWORD_VAR),
port=9440,
secure="y",
verify=False,
settings={"use_numpy": True},
)

s3_path = (
f"https://s3.amazonaws.com/{S3_BUCKET}/{args.pr_number}/{args.commit_sha}/"
)
report_destination_url = s3_path + "combined_report.html"
ci_running_report_url = s3_path + "ci_running.html"

response = requests.get(ci_running_report_url)
if response.status_code == 200:
ci_running_report: str = response.text
else:
print(
f"Failed to download CI running report. Status code: {response.status_code}, Response: {response.text}"
)
exit(1)

fail_results = {
"checks_fails": get_checks_fails(db_client, args.actions_run_url),
"checks_errors": get_checks_errors(db_client, args.actions_run_url),
"regression_fails": get_regression_fails(db_client, args.actions_run_url),
}

combined_report = (
ci_running_report.replace("ClickHouse CI running for", "Combined CI Report for")
.replace(
"<table>",
f"""<h2>Table of Contents</h2>
{'<p style="font-weight: bold;color: #F00;">This is a preview. FinishCheck has not completed.</p>' if args.mark_preview else ""}
<ul>
<li><a href="#ci-jobs-status">CI Jobs Status</a></li>
<li><a href="#checks-fails">Checks Fails</a> ({len(fail_results['checks_fails'])})</li>
<li><a href="#checks-errors">Checks Errors</a> ({len(fail_results['checks_errors'])})</li>
<li><a href="#regression-fails">Regression Fails</a> ({len(fail_results['regression_fails'])})</li>
</ul>

<h2 id="ci-jobs-status">CI Jobs Status</h2>
<table>""",
)
.replace(
"</table>",
f"""</table>

<h2 id="checks-fails">Checks Fails</h2>
{format_results_as_html_table(fail_results['checks_fails'])}

<h2 id="checks-errors">Checks Errors</h2>
{format_results_as_html_table(fail_results['checks_errors'])}

<h2 id="regression-fails">Regression Fails</h2>
{format_results_as_html_table(fail_results['regression_fails'])}
""",
)
)
report_path = Path("combined_report.html")
report_path.write_text(combined_report, encoding="utf-8")

if args.no_upload:
print(f"Report saved to {report_path}")
exit(0)

# Upload the report to S3
s3_client = boto3.client("s3")

try:
s3_client.put_object(
Bucket=S3_BUCKET,
Key=f"{args.pr_number}/{args.commit_sha}/combined_report.html",
Body=combined_report,
ContentType="text/html; charset=utf-8",
)
except NoCredentialsError:
print("Credentials not available for S3 upload.")

print(report_destination_url)


if __name__ == "__main__":
main()
Loading
Loading