From ec9987dc71a1e9b84863b0262c838780e7b3576e Mon Sep 17 00:00:00 2001 From: Pruthvi Madugundu Date: Fri, 8 Mar 2024 11:29:12 -0800 Subject: [PATCH 01/16] CONSOLIDATED COMMITS: Triton build updates ========================================== Triton build conditionalized on ROCM_VERSION Include the ROCm version in triton version (cherry picked from commit 7d33910198d20c755e40f30de6c9b9e2e03b1a8c) (cherry picked from commit 0412eb4eed668567d394615aa644eacc2741dcd3) Update triton-rocm.txt to triton.txt (cherry picked from commit 0ce9f6ea5f6a654e05fa098f1693c05f3c33d6ec) Use ROCm/triton for install_triton.sh (cherry picked from commit 6e9714bbfbe8f003643252ec58f7b42a3c9c364c) update triton commit Revert "Use ROCm/triton for install_triton.sh" This reverts commit 81b0cbc8435122030044049c661f252ee8aa7ae5. change triton repo Update triton.txt to use release/internal/3.3.x branch Use ROCm/triton Use ROCm/triton for install_triton.sh (cherry picked from commit 0036db5ab1a6fcf13662cd8ccd99f021422fe547) --- .ci/docker/common/install_triton.sh | 2 +- .github/scripts/build_triton_wheel.py | 37 +++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/.ci/docker/common/install_triton.sh b/.ci/docker/common/install_triton.sh index 1b68e3c247839..b2fdebdcc4747 100755 --- a/.ci/docker/common/install_triton.sh +++ b/.ci/docker/common/install_triton.sh @@ -21,7 +21,7 @@ elif [ -n "${TRITON_CPU}" ]; then TRITON_REPO="https://github.com/triton-lang/triton-cpu" TRITON_TEXT_FILE="triton-cpu" else - TRITON_REPO="https://github.com/triton-lang/triton" + TRITON_REPO="https://github.com/ROCm/triton" TRITON_TEXT_FILE="triton" fi diff --git a/.github/scripts/build_triton_wheel.py b/.github/scripts/build_triton_wheel.py index 11fa8404273d3..b12d7bfbeb1ee 100644 --- a/.github/scripts/build_triton_wheel.py +++ b/.github/scripts/build_triton_wheel.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import os +import re import shutil import sys from pathlib import Path @@ -50,6 +51,31 @@ def patch_init_py( with open(path, "w") as f: f.write(orig) +def get_rocm_version() -> str: + rocm_path = os.environ.get('ROCM_HOME') or os.environ.get('ROCM_PATH') or "/opt/rocm" + rocm_version = "0.0.0" + rocm_version_h = f"{rocm_path}/include/rocm-core/rocm_version.h" + if not os.path.isfile(rocm_version_h): + rocm_version_h = f"{rocm_path}/include/rocm_version.h" + + # The file could be missing due to 1) ROCm version < 5.2, or 2) no ROCm install. + if os.path.isfile(rocm_version_h): + RE_MAJOR = re.compile(r"#define\s+ROCM_VERSION_MAJOR\s+(\d+)") + RE_MINOR = re.compile(r"#define\s+ROCM_VERSION_MINOR\s+(\d+)") + RE_PATCH = re.compile(r"#define\s+ROCM_VERSION_PATCH\s+(\d+)") + major, minor, patch = 0, 0, 0 + for line in open(rocm_version_h): + match = RE_MAJOR.search(line) + if match: + major = int(match.group(1)) + match = RE_MINOR.search(line) + if match: + minor = int(match.group(1)) + match = RE_PATCH.search(line) + if match: + patch = int(match.group(1)) + rocm_version = str(major)+"."+str(minor)+"."+str(patch) + return rocm_version def build_triton( *, @@ -65,6 +91,14 @@ def build_triton( max_jobs = os.cpu_count() or 1 env["MAX_JOBS"] = str(max_jobs) + version_suffix = "" + if not release: + # Nightly binaries include the triton commit hash, i.e. 2.1.0+e6216047b8 + # while release build should only include the version, i.e. 2.1.0 + rocm_version = get_rocm_version() + version_suffix = f"+rocm{rocm_version}_{commit_hash[:10]}" + version += version_suffix + with TemporaryDirectory() as tmpdir: triton_basedir = Path(tmpdir) / "triton" triton_pythondir = triton_basedir / "python" @@ -72,6 +106,7 @@ def build_triton( triton_repo = "https://github.com/openai/triton" if device == "rocm": triton_pkg_name = "pytorch-triton-rocm" + triton_repo = "https://github.com/ROCm/triton/" elif device == "xpu": triton_pkg_name = "pytorch-triton-xpu" triton_repo = "https://github.com/intel/intel-xpu-backend-for-triton" @@ -104,6 +139,8 @@ def build_triton( cwd=triton_basedir, shell=True, ) + cur_rocm_ver = get_rocm_version() + check_call(["scripts/amd/setup_rocm_libs.sh", cur_rocm_ver], cwd=triton_basedir) print("ROCm libraries setup for triton installation...") # old triton versions have setup.py in the python/ dir, From 68824af7d8c9d49ca7f0b4992f31c293299a55b6 Mon Sep 17 00:00:00 2001 From: Ethan Wee Date: Tue, 12 Aug 2025 11:40:53 -0700 Subject: [PATCH 02/16] [rocm7.1_internal_testing] Change pytorch-triton-rocm to pytorch-triton (#2482) Related to https://github.com/ROCm/builder/pull/90/files http://rocm-ci.amd.com/job/mainline-pytorch_internal-manylinux-wheels/305/ PyTorch wheel installs successfully when building torchvision/torchaudio (cherry picked from commit c1ee54d9804886c5de9cb8eff295185afffafd1d) --- .circleci/scripts/binary_populate_env.sh | 4 ++-- .github/scripts/build_triton_wheel.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.circleci/scripts/binary_populate_env.sh b/.circleci/scripts/binary_populate_env.sh index 11f9678579935..f876ac8efcf7f 100755 --- a/.circleci/scripts/binary_populate_env.sh +++ b/.circleci/scripts/binary_populate_env.sh @@ -84,10 +84,10 @@ fi # Set triton via PYTORCH_EXTRA_INSTALL_REQUIREMENTS for triton rocm package if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*rocm.* && $(uname) == "Linux" ]]; then - TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}; ${TRITON_CONSTRAINT}" + TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}" if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then TRITON_SHORTHASH=$(cut -c1-8 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton.txt) - TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}+git${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}" + TRITON_REQUIREMENT="triton==${TRITON_VERSION}+git${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}" fi if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}" diff --git a/.github/scripts/build_triton_wheel.py b/.github/scripts/build_triton_wheel.py index b12d7bfbeb1ee..3b5877b6a987c 100644 --- a/.github/scripts/build_triton_wheel.py +++ b/.github/scripts/build_triton_wheel.py @@ -105,8 +105,8 @@ def build_triton( triton_repo = "https://github.com/openai/triton" if device == "rocm": - triton_pkg_name = "pytorch-triton-rocm" - triton_repo = "https://github.com/ROCm/triton/" + triton_pkg_name = "triton" + triton_repo = "https://github.com/ROCm/triton" elif device == "xpu": triton_pkg_name = "pytorch-triton-xpu" triton_repo = "https://github.com/intel/intel-xpu-backend-for-triton" From 05e104c7c9941bab925c2d61859587856070a358 Mon Sep 17 00:00:00 2001 From: Prachi Gupta Date: Fri, 22 Aug 2025 18:35:35 -0500 Subject: [PATCH 03/16] [rocm7.1_internal_testing] Add triton_kernels wheel generation (#2566) Fixes #ISSUE_NUMBER (cherry picked from commit 0ea0592f5fd18fd105995af578ce8156de71d57c) --- .github/scripts/build_triton_wheel.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/scripts/build_triton_wheel.py b/.github/scripts/build_triton_wheel.py index 3b5877b6a987c..16eb6a7851974 100644 --- a/.github/scripts/build_triton_wheel.py +++ b/.github/scripts/build_triton_wheel.py @@ -164,6 +164,13 @@ def build_triton( cwd=triton_basedir, ) + # For gpt-oss models, triton requires this extra triton_kernels wheel + # triton_kernels came after pytorch release/2.8 + triton_kernels_dir = Path(f"{triton_basedir}/python/triton_kernels") + check_call([sys.executable, "-m", "build", "--wheel"], cwd=triton_kernels_dir, env=env) + kernels_whl_path = next(iter((triton_kernels_dir / "dist").glob("*.whl"))) + shutil.copy(kernels_whl_path, Path.cwd()) + return Path.cwd() / whl_path.name From 9b4ad7dd6afea77b14c337d80c32f812f98a9888 Mon Sep 17 00:00:00 2001 From: Bo Li <110066325+BLOrange-AMD@users.noreply.github.com> Date: Thu, 18 Apr 2024 11:12:30 -0500 Subject: [PATCH 04/16] CONSOLIDATED COMMITS: Implementation of PyTorch ut parsing script - QA helper functions ======================================================================================= Implementation of PyTorch ut parsing script - QA helper function (#1386) * Initial implementation of PyTorch ut parsing script * Extracted path variables * Use nested dict to save results * Fixes typo * Cleanup * Fixes several issues * Minor name change * Update run_pytorch_unit_tests.py * Added file banners * Supported running from API * Added more help info * Consistent naming * Format help text --------- Co-authored-by: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> Co-authored-by: Jithun Nair Print consolidated log file for pytorch unit test automation scripts (#1433) * Print consolidated log file for pytorch uts * Update run_entire_tests subprocess call as well * lint * Add ERROR string [SWDEV-466849] Enhancements for PyTorch UT helper scripts (#1491) * Check that >1 GPUs are visible when running TEST_CONFIG=distributed * Add EXECUTION_TIME to file-level and aggregate statistics PyTorch unit test helper scripts enhancements (#1517) * Fail earlier for distributed-on-1-GPU scenario * print cmd in consolidated log with prettier formatting * python->python3 Fixes https://ontrack-internal.amd.com/browse/SWDEV-477264 --------- Co-authored-by: blorange-amd Several issues fix of QA helper script (#1564) Fixes SWDEV-475071: https://ontrack-internal.amd.com/browse/SWDEV-475071 Removed args inside function (#1595) Fixes SWDEV-475071 (cherry picked from commit 041aa1b47978154de63edc6b7ffcdea218a847a3) QA script - Added multi gpu check with priority_tests (#1604) Fixes SWDEV-487907. Verified throwing exception for distributed is working correctly on single gpu with command: python .automation_scripts/run_pytorch_unit_tests.py --priority_test (cherry picked from commit 57cc742271cbf4547f9213710e57f6444bbc983e) (cherry picked from commit 6d5c3dcae5d02196749df11b57e6d0d5e4e2212e) (cherry picked from commit 2ee3aa2de081680756002bbfc4006b5f68e68c16) --- .automation_scripts/parse_xml_results.py | 178 ++++++ .automation_scripts/run_pytorch_unit_tests.py | 518 ++++++++++++++++++ 2 files changed, 696 insertions(+) create mode 100644 .automation_scripts/parse_xml_results.py create mode 100644 .automation_scripts/run_pytorch_unit_tests.py diff --git a/.automation_scripts/parse_xml_results.py b/.automation_scripts/parse_xml_results.py new file mode 100644 index 0000000000000..7db2e1ce9233c --- /dev/null +++ b/.automation_scripts/parse_xml_results.py @@ -0,0 +1,178 @@ +""" The Python PyTorch testing script. +## +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +""" + +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import Any, Dict, Tuple + +# Backends list +BACKENDS_LIST = [ + "dist-gloo", + "dist-nccl" +] + +TARGET_WORKFLOW = "--rerun-disabled-tests" + +def get_job_id(report: Path) -> int: + # [Job id in artifacts] + # Retrieve the job id from the report path. In our GHA workflows, we append + # the job id to the end of the report name, so `report` looks like: + # unzipped-test-reports-foo_5596745227/test/test-reports/foo/TEST-foo.xml + # and we want to get `5596745227` out of it. + try: + return int(report.parts[0].rpartition("_")[2]) + except ValueError: + return -1 + +def is_rerun_disabled_tests(root: ET.ElementTree) -> bool: + """ + Check if the test report is coming from rerun_disabled_tests workflow + """ + skipped = root.find(".//*skipped") + # Need to check against None here, if not skipped doesn't work as expected + if skipped is None: + return False + + message = skipped.attrib.get("message", "") + return TARGET_WORKFLOW in message or "num_red" in message + +def parse_xml_report( + tag: str, + report: Path, + workflow_id: int, + workflow_run_attempt: int, + work_flow_name: str +) -> Dict[Tuple[str], Dict[str, Any]]: + """Convert a test report xml file into a JSON-serializable list of test cases.""" + print(f"Parsing {tag}s for test report: {report}") + + job_id = get_job_id(report) + print(f"Found job id: {job_id}") + + test_cases: Dict[Tuple[str], Dict[str, Any]] = {} + + root = ET.parse(report) + # TODO: unlike unittest, pytest-flakefinder used by rerun disabled tests for test_ops + # includes skipped messages multiple times (50 times by default). This slows down + # this script too much (O(n)) because it tries to gather all the stats. This should + # be fixed later in the way we use pytest-flakefinder. A zipped test report from rerun + # disabled test is only few MB, but will balloon up to a much bigger XML file after + # extracting from a dozen to few hundred MB + if is_rerun_disabled_tests(root): + return test_cases + + for test_case in root.iter(tag): + case = process_xml_element(test_case) + if tag == 'testcase': + case["workflow_id"] = workflow_id + case["workflow_run_attempt"] = workflow_run_attempt + case["job_id"] = job_id + case["work_flow_name"] = work_flow_name + + # [invoking file] + # The name of the file that the test is located in is not necessarily + # the same as the name of the file that invoked the test. + # For example, `test_jit.py` calls into multiple other test files (e.g. + # jit/test_dce.py). For sharding/test selection purposes, we want to + # record the file that invoked the test. + # + # To do this, we leverage an implementation detail of how we write out + # tests (https://bit.ly/3ajEV1M), which is that reports are created + # under a folder with the same name as the invoking file. + case_name = report.parent.name + for ind in range(len(BACKENDS_LIST)): + if BACKENDS_LIST[ind] in report.parts: + case_name = case_name + "_" + BACKENDS_LIST[ind] + break + case["invoking_file"] = case_name + test_cases[ ( case["invoking_file"], case["classname"], case["name"], case["work_flow_name"] ) ] = case + elif tag == 'testsuite': + case["work_flow_name"] = work_flow_name + case["invoking_xml"] = report.name + case["running_time_xml"] = case["time"] + case_name = report.parent.name + for ind in range(len(BACKENDS_LIST)): + if BACKENDS_LIST[ind] in report.parts: + case_name = case_name + "_" + BACKENDS_LIST[ind] + break + case["invoking_file"] = case_name + + test_cases[ ( case["invoking_file"], case["invoking_xml"], case["work_flow_name"] ) ] = case + + return test_cases + +def process_xml_element(element: ET.Element) -> Dict[str, Any]: + """Convert a test suite element into a JSON-serializable dict.""" + ret: Dict[str, Any] = {} + + # Convert attributes directly into dict elements. + # e.g. + # + # becomes: + # {"name": "test_foo", "classname": "test_bar"} + ret.update(element.attrib) + + # The XML format encodes all values as strings. Convert to ints/floats if + # possible to make aggregation possible in Rockset. + for k, v in ret.items(): + try: + ret[k] = int(v) + except ValueError: + pass + try: + ret[k] = float(v) + except ValueError: + pass + + # Convert inner and outer text into special dict elements. + # e.g. + # my_inner_text my_tail + # becomes: + # {"text": "my_inner_text", "tail": " my_tail"} + if element.text and element.text.strip(): + ret["text"] = element.text + if element.tail and element.tail.strip(): + ret["tail"] = element.tail + + # Convert child elements recursively, placing them at a key: + # e.g. + # + # hello + # world + # another + # + # becomes + # { + # "foo": [{"text": "hello"}, {"text": "world"}], + # "bar": {"text": "another"} + # } + for child in element: + if child.tag not in ret: + ret[child.tag] = process_xml_element(child) + else: + # If there are multiple tags with the same name, they should be + # coalesced into a list. + if not isinstance(ret[child.tag], list): + ret[child.tag] = [ret[child.tag]] + ret[child.tag].append(process_xml_element(child)) + return ret \ No newline at end of file diff --git a/.automation_scripts/run_pytorch_unit_tests.py b/.automation_scripts/run_pytorch_unit_tests.py new file mode 100644 index 0000000000000..514afd19624c3 --- /dev/null +++ b/.automation_scripts/run_pytorch_unit_tests.py @@ -0,0 +1,518 @@ +#!/usr/bin/env python3 + +""" The Python PyTorch testing script. +## +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +""" + +import argparse +import os +import shutil +import subprocess +from subprocess import STDOUT, CalledProcessError + +from collections import namedtuple +from datetime import datetime +from pathlib import Path +from parse_xml_results import ( + parse_xml_report +) +from pprint import pprint +from typing import Any, Dict, List + +# unit test status list +UT_STATUS_LIST = [ + "PASSED", + "MISSED", + "SKIPPED", + "FAILED", + "XFAILED", + "ERROR" +] + +DEFAULT_CORE_TESTS = [ + "test_nn", + "test_torch", + "test_cuda", + "test_ops", + "test_unary_ufuncs", + "test_autograd", + "inductor/test_torchinductor" +] + +DISTRIBUTED_CORE_TESTS = [ + "distributed/test_c10d_common", + "distributed/test_c10d_nccl", + "distributed/test_distributed_spawn" +] + +CONSOLIDATED_LOG_FILE_NAME="pytorch_unit_tests.log" + +def parse_xml_reports_as_dict(workflow_run_id, workflow_run_attempt, tag, workflow_name, path="."): + test_cases = {} + items_list = os.listdir(path) + for dir in items_list: + new_dir = path + '/' + dir + '/' + if os.path.isdir(new_dir): + for xml_report in Path(new_dir).glob("**/*.xml"): + test_cases.update( + parse_xml_report( + tag, + xml_report, + workflow_run_id, + workflow_run_attempt, + workflow_name + ) + ) + return test_cases + +def get_test_status(test_case): + # In order of priority: S=skipped, F=failure, E=error, P=pass + if "skipped" in test_case and test_case["skipped"]: + type_message = test_case["skipped"] + if type_message.__contains__('type') and type_message['type'] == "pytest.xfail": + return "XFAILED" + else: + return "SKIPPED" + elif "failure" in test_case and test_case["failure"]: + return "FAILED" + elif "error" in test_case and test_case["error"]: + return "ERROR" + else: + return "PASSED" + +def get_test_message(test_case, status=None): + if status == "SKIPPED": + return test_case["skipped"] if "skipped" in test_case else "" + elif status == "FAILED": + return test_case["failure"] if "failure" in test_case else "" + elif status == "ERROR": + return test_case["error"] if "error" in test_case else "" + else: + if "skipped" in test_case: + return test_case["skipped"] + elif "failure" in test_case: + return test_case["failure"] + elif "error" in test_case: + return test_case["error"] + else: + return "" + +def get_test_file_running_time(test_suite): + if test_suite.__contains__('time'): + return test_suite["time"] + return 0 + +def get_test_running_time(test_case): + if test_case.__contains__('time'): + return test_case["time"] + return "" + +def summarize_xml_files(path, workflow_name): + # statistics + TOTAL_TEST_NUM = 0 + TOTAL_PASSED_NUM = 0 + TOTAL_SKIPPED_NUM = 0 + TOTAL_XFAIL_NUM = 0 + TOTAL_FAILED_NUM = 0 + TOTAL_ERROR_NUM = 0 + TOTAL_EXECUTION_TIME = 0 + + #parse the xml files + test_cases = parse_xml_reports_as_dict(-1, -1, 'testcase', workflow_name, path) + test_suites = parse_xml_reports_as_dict(-1, -1, 'testsuite', workflow_name, path) + test_file_and_status = namedtuple("test_file_and_status", ["file_name", "status"]) + # results dict + res = {} + res_item_list = [ "PASSED", "SKIPPED", "XFAILED", "FAILED", "ERROR" ] + test_file_items = set() + for (k,v) in list(test_suites.items()): + file_name = k[0] + if not file_name in test_file_items: + test_file_items.add(file_name) + # initialization + for item in res_item_list: + temp_item = test_file_and_status(file_name, item) + res[temp_item] = {} + temp_item_statistics = test_file_and_status(file_name, "STATISTICS") + res[temp_item_statistics] = {'TOTAL': 0, 'PASSED': 0, 'SKIPPED': 0, 'XFAILED': 0, 'FAILED': 0, 'ERROR': 0, 'EXECUTION_TIME': 0} + test_running_time = get_test_file_running_time(v) + res[temp_item_statistics]["EXECUTION_TIME"] += test_running_time + TOTAL_EXECUTION_TIME += test_running_time + else: + test_tuple_key_statistics = test_file_and_status(file_name, "STATISTICS") + test_running_time = get_test_file_running_time(v) + res[test_tuple_key_statistics]["EXECUTION_TIME"] += test_running_time + TOTAL_EXECUTION_TIME += test_running_time + + for (k,v) in list(test_cases.items()): + file_name = k[0] + class_name = k[1] + test_name = k[2] + combined_name = file_name + "::" + class_name + "::" + test_name + test_status = get_test_status(v) + test_running_time = get_test_running_time(v) + test_message = get_test_message(v, test_status) + test_info_value = "" + test_tuple_key_status = test_file_and_status(file_name, test_status) + test_tuple_key_statistics = test_file_and_status(file_name, "STATISTICS") + TOTAL_TEST_NUM += 1 + res[test_tuple_key_statistics]["TOTAL"] += 1 + if test_status == "PASSED": + test_info_value = str(test_running_time) + res[test_tuple_key_status][combined_name] = test_info_value + res[test_tuple_key_statistics]["PASSED"] += 1 + TOTAL_PASSED_NUM += 1 + elif test_status == "SKIPPED": + test_info_value = str(test_running_time) + res[test_tuple_key_status][combined_name] = test_info_value + res[test_tuple_key_statistics]["SKIPPED"] += 1 + TOTAL_SKIPPED_NUM += 1 + elif test_status == "XFAILED": + test_info_value = str(test_running_time) + res[test_tuple_key_status][combined_name] = test_info_value + res[test_tuple_key_statistics]["XFAILED"] += 1 + TOTAL_XFAIL_NUM += 1 + elif test_status == "FAILED": + test_info_value = test_message + res[test_tuple_key_status][combined_name] = test_info_value + res[test_tuple_key_statistics]["FAILED"] += 1 + TOTAL_FAILED_NUM += 1 + elif test_status == "ERROR": + test_info_value = test_message + res[test_tuple_key_status][combined_name] = test_info_value + res[test_tuple_key_statistics]["ERROR"] += 1 + TOTAL_ERROR_NUM += 1 + + # generate statistics_dict + statistics_dict = {} + statistics_dict["TOTAL"] = TOTAL_TEST_NUM + statistics_dict["PASSED"] = TOTAL_PASSED_NUM + statistics_dict["SKIPPED"] = TOTAL_SKIPPED_NUM + statistics_dict["XFAILED"] = TOTAL_XFAIL_NUM + statistics_dict["FAILED"] = TOTAL_FAILED_NUM + statistics_dict["ERROR"] = TOTAL_ERROR_NUM + statistics_dict["EXECUTION_TIME"] = TOTAL_EXECUTION_TIME + aggregate_item = workflow_name + "_aggregate" + total_item = test_file_and_status(aggregate_item, "STATISTICS") + res[total_item] = statistics_dict + + return res + +def run_command_and_capture_output(cmd): + try: + print(f"Running command '{cmd}'") + with open(CONSOLIDATED_LOG_FILE_PATH, "a+") as output_file: + print(f"========================================", file=output_file, flush=True) + print(f"[RUN_PYTORCH_UNIT_TESTS] Running command '{cmd}'", file=output_file, flush=True) # send to consolidated file as well + print(f"========================================", file=output_file, flush=True) + p = subprocess.run(cmd, shell=True, stdout=output_file, stderr=STDOUT, text=True) + except CalledProcessError as e: + print(f"ERROR: Cmd {cmd} failed with return code: {e.returncode}!") + +def run_entire_tests(workflow_name, test_shell_path, overall_logs_path_current_run, test_reports_src): + if os.path.exists(test_reports_src): + shutil.rmtree(test_reports_src) + + os.mkdir(test_reports_src) + copied_logs_path = "" + if workflow_name == "default": + os.environ['TEST_CONFIG'] = 'default' + copied_logs_path = overall_logs_path_current_run + "default_xml_results_entire_tests/" + elif workflow_name == "distributed": + os.environ['TEST_CONFIG'] = 'distributed' + copied_logs_path = overall_logs_path_current_run + "distributed_xml_results_entire_tests/" + elif workflow_name == "inductor": + os.environ['TEST_CONFIG'] = 'inductor' + copied_logs_path = overall_logs_path_current_run + "inductor_xml_results_entire_tests/" + # use test.sh for tests execution + run_command_and_capture_output(test_shell_path) + copied_logs_path_destination = shutil.copytree(test_reports_src, copied_logs_path) + entire_results_dict = summarize_xml_files(copied_logs_path_destination, workflow_name) + return entire_results_dict + +def run_priority_tests(workflow_name, test_run_test_path, overall_logs_path_current_run, test_reports_src): + if os.path.exists(test_reports_src): + shutil.rmtree(test_reports_src) + + os.mkdir(test_reports_src) + copied_logs_path = "" + if workflow_name == "default": + os.environ['TEST_CONFIG'] = 'default' + os.environ['HIP_VISIBLE_DEVICES'] = '0' + copied_logs_path = overall_logs_path_current_run + "default_xml_results_priority_tests/" + # use run_test.py for tests execution + default_priority_test_suites = " ".join(DEFAULT_CORE_TESTS) + command = "python3 " + test_run_test_path + " --include " + default_priority_test_suites + " --exclude-jit-executor --exclude-distributed-tests --verbose" + run_command_and_capture_output(command) + del os.environ['HIP_VISIBLE_DEVICES'] + elif workflow_name == "distributed": + os.environ['TEST_CONFIG'] = 'distributed' + os.environ['HIP_VISIBLE_DEVICES'] = '0,1' + copied_logs_path = overall_logs_path_current_run + "distributed_xml_results_priority_tests/" + # use run_test.py for tests execution + distributed_priority_test_suites = " ".join(DISTRIBUTED_CORE_TESTS) + command = "python3 " + test_run_test_path + " --include " + distributed_priority_test_suites + " --distributed-tests --verbose" + run_command_and_capture_output(command) + del os.environ['HIP_VISIBLE_DEVICES'] + copied_logs_path_destination = shutil.copytree(test_reports_src, copied_logs_path) + priority_results_dict = summarize_xml_files(copied_logs_path_destination, workflow_name) + + return priority_results_dict + +def run_selected_tests(workflow_name, test_run_test_path, overall_logs_path_current_run, test_reports_src, selected_list): + if os.path.exists(test_reports_src): + shutil.rmtree(test_reports_src) + + os.mkdir(test_reports_src) + copied_logs_path = "" + if workflow_name == "default": + os.environ['TEST_CONFIG'] = 'default' + os.environ['HIP_VISIBLE_DEVICES'] = '0' + copied_logs_path = overall_logs_path_current_run + "default_xml_results_selected_tests/" + # use run_test.py for tests execution + default_selected_test_suites = " ".join(selected_list) + command = "python3 " + test_run_test_path + " --include " + default_selected_test_suites + " --exclude-jit-executor --exclude-distributed-tests --verbose" + run_command_and_capture_output(command) + del os.environ['HIP_VISIBLE_DEVICES'] + elif workflow_name == "distributed": + os.environ['TEST_CONFIG'] = 'distributed' + os.environ['HIP_VISIBLE_DEVICES'] = '0,1' + copied_logs_path = overall_logs_path_current_run + "distributed_xml_results_selected_tests/" + # use run_test.py for tests execution + distributed_selected_test_suites = " ".join(selected_list) + command = "python3 " + test_run_test_path + " --include " + distributed_selected_test_suites + " --distributed-tests --verbose" + run_command_and_capture_output(command) + del os.environ['HIP_VISIBLE_DEVICES'] + elif workflow_name == "inductor": + os.environ['TEST_CONFIG'] = 'inductor' + copied_logs_path = overall_logs_path_current_run + "inductor_xml_results_selected_tests/" + inductor_selected_test_suites = "" + non_inductor_selected_test_suites = "" + for item in selected_list: + if "inductor/" in item: + inductor_selected_test_suites += item + inductor_selected_test_suites += " " + else: + non_inductor_selected_test_suites += item + non_inductor_selected_test_suites += " " + if inductor_selected_test_suites != "": + inductor_selected_test_suites = inductor_selected_test_suites[:-1] + command = "python3 " + test_run_test_path + " --include " + inductor_selected_test_suites + " --verbose" + run_command_and_capture_output(command) + if non_inductor_selected_test_suites != "": + non_inductor_selected_test_suites = non_inductor_selected_test_suites[:-1] + command = "python3 " + test_run_test_path + " --inductor --include " + non_inductor_selected_test_suites + " --verbose" + run_command_and_capture_output(command) + copied_logs_path_destination = shutil.copytree(test_reports_src, copied_logs_path) + selected_results_dict = summarize_xml_files(copied_logs_path_destination, workflow_name) + + return selected_results_dict + +def run_test_and_summarize_results( + pytorch_root_dir: str, + priority_tests: bool, + test_config: List[str], + default_list: List[str], + distributed_list: List[str], + inductor_list: List[str], + skip_rerun: bool) -> Dict[str, Any]: + + # copy current environment variables + _environ = dict(os.environ) + + # modify path + test_shell_path = pytorch_root_dir + "/.ci/pytorch/test.sh" + test_run_test_path = pytorch_root_dir + "/test/run_test.py" + repo_test_log_folder_path = pytorch_root_dir + "/.automation_logs/" + test_reports_src = pytorch_root_dir + "/test/test-reports/" + run_test_python_file = pytorch_root_dir + "/test/run_test.py" + + # change directory to pytorch root + os.chdir(pytorch_root_dir) + + # all test results dict + res_all_tests_dict = {} + + # patterns + search_text = "--reruns=2" + replace_text = "--reruns=0" + + # create logs folder + if not os.path.exists(repo_test_log_folder_path): + os.mkdir(repo_test_log_folder_path) + + # Set common environment variables for all scenarios + os.environ['CI'] = '1' + os.environ['PYTORCH_TEST_WITH_ROCM'] = '1' + os.environ['HSA_FORCE_FINE_GRAIN_PCIE'] = '1' + os.environ['PYTORCH_TESTING_DEVICE_ONLY_FOR'] = 'cuda' + os.environ['CONTINUE_THROUGH_ERROR'] = 'True' + if skip_rerun: + # modify run_test.py in-place + with open(run_test_python_file, 'r') as file: + data = file.read() + data = data.replace(search_text, replace_text) + with open(run_test_python_file, 'w') as file: + file.write(data) + + # Time stamp + current_datetime = datetime.now().strftime("%Y%m%d_%H-%M-%S") + print("Current date & time : ", current_datetime) + # performed as Job ID + str_current_datetime = str(current_datetime) + overall_logs_path_current_run = repo_test_log_folder_path + str_current_datetime + "/" + os.mkdir(overall_logs_path_current_run) + + global CONSOLIDATED_LOG_FILE_PATH + CONSOLIDATED_LOG_FILE_PATH = overall_logs_path_current_run + CONSOLIDATED_LOG_FILE_NAME + + # Check multi gpu availability if distributed tests are enabled + if ("distributed" in test_config) or len(distributed_list) != 0: + check_num_gpus_for_distributed() + + # Install test requirements + command = "pip3 install -r requirements.txt && pip3 install -r .ci/docker/requirements-ci.txt" + run_command_and_capture_output(command) + + # Run entire tests for each workflow + if not priority_tests and not default_list and not distributed_list and not inductor_list: + # run entire tests for default, distributed and inductor workflows → use test.sh + if not test_config: + check_num_gpus_for_distributed() + # default test process + res_default_all = run_entire_tests("default", test_shell_path, overall_logs_path_current_run, test_reports_src) + res_all_tests_dict["default"] = res_default_all + # distributed test process + res_distributed_all = run_entire_tests("distributed", test_shell_path, overall_logs_path_current_run, test_reports_src) + res_all_tests_dict["distributed"] = res_distributed_all + # inductor test process + res_inductor_all = run_entire_tests("inductor", test_shell_path, overall_logs_path_current_run, test_reports_src) + res_all_tests_dict["inductor"] = res_inductor_all + else: + workflow_list = [] + for item in test_config: + workflow_list.append(item) + if "default" in workflow_list: + res_default_all = run_entire_tests("default", test_shell_path, overall_logs_path_current_run, test_reports_src) + res_all_tests_dict["default"] = res_default_all + if "distributed" in workflow_list: + res_distributed_all = run_entire_tests("distributed", test_shell_path, overall_logs_path_current_run, test_reports_src) + res_all_tests_dict["distributed"] = res_distributed_all + if "inductor" in workflow_list: + res_inductor_all = run_entire_tests("inductor", test_shell_path, overall_logs_path_current_run, test_reports_src) + res_all_tests_dict["inductor"] = res_inductor_all + # Run priority test for each workflow + elif priority_tests and not default_list and not distributed_list and not inductor_list: + if not test_config: + check_num_gpus_for_distributed() + # default test process + res_default_priority = run_priority_tests("default", test_run_test_path, overall_logs_path_current_run, test_reports_src) + res_all_tests_dict["default"] = res_default_priority + # distributed test process + res_distributed_priority = run_priority_tests("distributed", test_run_test_path, overall_logs_path_current_run, test_reports_src) + res_all_tests_dict["distributed"] = res_distributed_priority + # will not run inductor priority tests + print("Inductor priority tests cannot run since no core tests defined with inductor workflow.") + else: + workflow_list = [] + for item in test_config: + workflow_list.append(item) + if "default" in workflow_list: + res_default_priority = run_priority_tests("default", test_run_test_path, overall_logs_path_current_run, test_reports_src) + res_all_tests_dict["default"] = res_default_priority + if "distributed" in workflow_list: + res_distributed_priority = run_priority_tests("distributed", test_run_test_path, overall_logs_path_current_run, test_reports_src) + res_all_tests_dict["distributed"] = res_distributed_priority + if "inductor" in workflow_list: + print("Inductor priority tests cannot run since no core tests defined with inductor workflow.") + # Run specified tests for each workflow + elif (default_list or distributed_list or inductor_list) and not test_config and not priority_tests: + if default_list: + default_workflow_list = [] + for item in default_list: + default_workflow_list.append(item) + res_default_selected = run_selected_tests("default", test_run_test_path, overall_logs_path_current_run, test_reports_src, default_workflow_list) + res_all_tests_dict["default"] = res_default_selected + if distributed_list: + distributed_workflow_list = [] + for item in distributed_list: + distributed_workflow_list.append(item) + res_distributed_selected = run_selected_tests("distributed", test_run_test_path, overall_logs_path_current_run, test_reports_src, distributed_workflow_list) + res_all_tests_dict["distributed"] = res_distributed_selected + if inductor_list: + inductor_workflow_list = [] + for item in inductor_list: + inductor_workflow_list.append(item) + res_inductor_selected = run_selected_tests("inductor", test_run_test_path, overall_logs_path_current_run, test_reports_src, inductor_workflow_list) + res_all_tests_dict["inductor"] = res_inductor_selected + else: + raise Exception("Invalid test configurations!") + + # restore environment variables + os.environ.clear() + os.environ.update(_environ) + + # restore files + if skip_rerun: + # modify run_test.py in-place + with open(run_test_python_file, 'r') as file: + data = file.read() + data = data.replace(replace_text, search_text) + with open(run_test_python_file, 'w') as file: + file.write(data) + + return res_all_tests_dict + +def parse_args(): + parser = argparse.ArgumentParser(description='Run PyTorch unit tests and generate xml results summary', formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument('--test_config', nargs='+', default=[], type=str, help="space-separated list of test workflows to be executed eg. 'default distributed'") + parser.add_argument('--priority_tests', action='store_true', help="run priority tests only") + parser.add_argument('--default_list', nargs='+', default=[], help="space-separated list of 'default' config test suites/files to be executed eg. 'test_weak test_dlpack'") + parser.add_argument('--distributed_list', nargs='+', default=[], help="space-separated list of 'distributed' config test suites/files to be executed eg. 'distributed/test_c10d_common distributed/test_c10d_nccl'") + parser.add_argument('--inductor_list', nargs='+', default=[], help="space-separated list of 'inductor' config test suites/files to be executed eg. 'inductor/test_torchinductor test_ops'") + parser.add_argument('--pytorch_root', default='.', type=str, help="PyTorch root directory") + parser.add_argument('--skip_rerun', action='store_true', help="skip rerun process") + parser.add_argument('--example_output', type=str, help="{'workflow_name': {\n" + " test_file_and_status(file_name='workflow_aggregate', status='STATISTICS'): {}, \n" + " test_file_and_status(file_name='test_file_name_1', status='ERROR'): {}, \n" + " test_file_and_status(file_name='test_file_name_1', status='FAILED'): {}, \n" + " test_file_and_status(file_name='test_file_name_1', status='PASSED'): {}, \n" + " test_file_and_status(file_name='test_file_name_1', status='SKIPPED'): {}, \n" + " test_file_and_status(file_name='test_file_name_1', status='STATISTICS'): {} \n" + "}}\n") + parser.add_argument('--example_usages', type=str, help="RUN ALL TESTS: python3 run_pytorch_unit_tests.py \n" + "RUN PRIORITY TESTS: python3 run_pytorch_unit_tests.py --test_config distributed --priority_test \n" + "RUN SELECTED TESTS: python3 run_pytorch_unit_tests.py --default_list test_weak test_dlpack --inductor_list inductor/test_torchinductor") + return parser.parse_args() + +def check_num_gpus_for_distributed(): + p = subprocess.run("rocminfo | grep -cE 'Name:\s+gfx'", shell=True, capture_output=True, text=True) + num_gpus_visible = int(p.stdout) + assert num_gpus_visible > 1, "Number of visible GPUs should be >1 to run distributed unit tests" + +def main(): + args = parse_args() + all_tests_results = run_test_and_summarize_results(args.pytorch_root, args.priority_tests, args.test_config, args.default_list, args.distributed_list, args.inductor_list, args.skip_rerun) + pprint(dict(all_tests_results)) + +if __name__ == "__main__": + main() From 799f8dbd9611d30d94a9703602e0d7e1daf81483 Mon Sep 17 00:00:00 2001 From: Ethan Wee <158101733+ethanwee1@users.noreply.github.com> Date: Thu, 20 Feb 2025 14:54:08 -0800 Subject: [PATCH 05/16] [rocm6.4_internal_testing] Enable wheels (#1884) (#1907) * Use triton commit same as that used for release/2.6 branch since both are triton version 3.2.0, so assuming they're compatible. Relates to: https://github.com/ROCm/rocAutomation/pull/660/files https://github.com/ROCm/builder/pull/70/files Validation http://ml-ci-internal.amd.com:8080/job/pytorch/job/manylinux_rocm_wheels/568/ --------- Co-authored-by: Jithun Nair Co-authored-by: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> (cherry picked from commit 14c1417d4b13abb77db5fcba521791a9b6086737) (cherry picked from commit c20a8f8632a447adc3ed369a5f85ea87a2b518f0) --- .circleci/scripts/binary_populate_env.sh | 42 ------------------------ .github/scripts/build_triton_wheel.py | 5 ++- CMakeLists.txt | 6 ++-- 3 files changed, 5 insertions(+), 48 deletions(-) diff --git a/.circleci/scripts/binary_populate_env.sh b/.circleci/scripts/binary_populate_env.sh index f876ac8efcf7f..e64a690af1d6a 100755 --- a/.circleci/scripts/binary_populate_env.sh +++ b/.circleci/scripts/binary_populate_env.sh @@ -69,48 +69,6 @@ fi export PYTORCH_BUILD_NUMBER=1 -# Set triton version as part of PYTORCH_EXTRA_INSTALL_REQUIREMENTS -TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt) -TRITON_CONSTRAINT="platform_system == 'Linux'" - -if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" && ! "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then - TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}" - if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then - TRITON_SHORTHASH=$(cut -c1-8 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton.txt) - TRITON_REQUIREMENT="pytorch-triton==${TRITON_VERSION}+git${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}" - fi - export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | ${TRITON_REQUIREMENT}" -fi - -# Set triton via PYTORCH_EXTRA_INSTALL_REQUIREMENTS for triton rocm package -if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*rocm.* && $(uname) == "Linux" ]]; then - TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}" - if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then - TRITON_SHORTHASH=$(cut -c1-8 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton.txt) - TRITON_REQUIREMENT="triton==${TRITON_VERSION}+git${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}" - fi - if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then - export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}" - else - export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | ${TRITON_REQUIREMENT}" - fi -fi - -# Set triton via PYTORCH_EXTRA_INSTALL_REQUIREMENTS for triton xpu package -if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then - TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_xpu_version.txt) - TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}" - if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then - TRITON_SHORTHASH=$(cut -c1-8 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-xpu.txt) - TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}+git${TRITON_SHORTHASH}" - fi - if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then - export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}" - else - export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | ${TRITON_REQUIREMENT}" - fi -fi - USE_GLOO_WITH_OPENSSL="ON" if [[ "$GPU_ARCH_TYPE" =~ .*aarch64.* ]]; then USE_GLOO_WITH_OPENSSL="OFF" diff --git a/.github/scripts/build_triton_wheel.py b/.github/scripts/build_triton_wheel.py index 16eb6a7851974..e541e7a86f653 100644 --- a/.github/scripts/build_triton_wheel.py +++ b/.github/scripts/build_triton_wheel.py @@ -96,7 +96,7 @@ def build_triton( # Nightly binaries include the triton commit hash, i.e. 2.1.0+e6216047b8 # while release build should only include the version, i.e. 2.1.0 rocm_version = get_rocm_version() - version_suffix = f"+rocm{rocm_version}_{commit_hash[:10]}" + version_suffix = f"+rocm{rocm_version}.git{commit_hash[:8]}" version += version_suffix with TemporaryDirectory() as tmpdir: @@ -124,6 +124,7 @@ def build_triton( # change built wheel name and version env["TRITON_WHEEL_NAME"] = triton_pkg_name + env["TRITON_WHEEL_VERSION_SUFFIX"] = version_suffix if with_clang_ldd: env["TRITON_BUILD_WITH_CLANG_LLD"] = "1" @@ -139,8 +140,6 @@ def build_triton( cwd=triton_basedir, shell=True, ) - cur_rocm_ver = get_rocm_version() - check_call(["scripts/amd/setup_rocm_libs.sh", cur_rocm_ver], cwd=triton_basedir) print("ROCm libraries setup for triton installation...") # old triton versions have setup.py in the python/ dir, diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b88247df27a5..991ea336a175b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,11 +56,11 @@ set(CMAKE_C_STANDARD # ---[ Utils include(cmake/public/utils.cmake) -# --- [ Check that minimal gcc version is 9.3+ -if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.3) +# --- [ Check that minimal gcc version is 9.2+ +if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.2) message( FATAL_ERROR - "GCC-9.3 or newer is required to compile PyTorch, but found ${CMAKE_CXX_COMPILER_VERSION}" + "GCC-9.2 or newer is required to compile PyTorch, but found ${CMAKE_CXX_COMPILER_VERSION}" ) endif() From 2c73b981e12b7d91d592ad32b427e7b32d69ea49 Mon Sep 17 00:00:00 2001 From: Jagadish Krishnamoorthy Date: Fri, 18 Apr 2025 18:56:37 -0700 Subject: [PATCH 06/16] ROCm: Enable tf32 testing on test_nn (#55) * Add trailing comma for consistency in gfx architecture list Signed-off-by: Jagadish Krishnamoorthy * ROCm: Enable tf32 testing on test_nn Signed-off-by: Jagadish Krishnamoorthy --------- Signed-off-by: Jagadish Krishnamoorthy (cherry picked from commit c113e1482c68c9bcf11b43351e9187003747804e) --- torch/testing/_internal/common_cuda.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/torch/testing/_internal/common_cuda.py b/torch/testing/_internal/common_cuda.py index 74dfe0c56c232..3f475bd6823b5 100644 --- a/torch/testing/_internal/common_cuda.py +++ b/torch/testing/_internal/common_cuda.py @@ -192,6 +192,9 @@ def tf32_off(): @contextlib.contextmanager def tf32_on(self, tf32_precision=1e-5): + if torch.version.hip: + hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None) + os.environ["HIPBLASLT_ALLOW_TF32"] = "1" old_allow_tf32_matmul = torch.backends.cuda.matmul.allow_tf32 old_precision = self.precision try: @@ -200,6 +203,11 @@ def tf32_on(self, tf32_precision=1e-5): with torch.backends.cudnn.flags(enabled=None, benchmark=None, deterministic=None, allow_tf32=True): yield finally: + if torch.version.hip: + if hip_allow_tf32 is not None: + os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32 + else: + del os.environ["HIPBLASLT_ALLOW_TF32"] torch.backends.cuda.matmul.allow_tf32 = old_allow_tf32_matmul self.precision = old_precision From 9237b263e12ef8bb8e4f87fa9432a3f90dd2a8ce Mon Sep 17 00:00:00 2001 From: omkar kakarparthi <75638701+okakarpa@users.noreply.github.com> Date: Tue, 13 May 2025 13:14:36 -0500 Subject: [PATCH 07/16] [AUTOGENERATED] [rocm6.5_internal_testing] Remove --no-index and --no-deps flags (#2121) Cherry-pick of https://github.com/ROCm/pytorch/pull/2103 Co-authored-by: Ethan Wee (cherry picked from commit 1dea6e825b91409af44d873d025eae9d6f578e7c) --- .ci/pytorch/common_utils.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh index ff9d8ad41cc92..9c9d223777466 100644 --- a/.ci/pytorch/common_utils.sh +++ b/.ci/pytorch/common_utils.sh @@ -67,13 +67,13 @@ function pip_install_whl() { # Loop through each path and install individually for path in "${paths[@]}"; do echo "Installing $path" - python3 -mpip install --no-index --no-deps "$path" + python3 -mpip install "$path" done else # Loop through each argument and install individually for path in "${args[@]}"; do echo "Installing $path" - python3 -mpip install --no-index --no-deps "$path" + python3 -mpip install "$path" done fi } From ee8c4e91866b3749f176e381de4475f910f26b17 Mon Sep 17 00:00:00 2001 From: Ethan Wee Date: Tue, 10 Jun 2025 07:08:01 -0700 Subject: [PATCH 08/16] [rocm7.0_internal_testing] upgrading numpy (#2256) Relates to: https://github.com/ROCm/builder/pull/82 Validation: http://rocm-ci.amd.com/job/mainline-pytorch_internal-manylinux-wheels/98/ Using `registry-sc-harbor.amd.com/framework/compute-rocm-dkms-no-npi-hipclang:16180_ubuntu24.04_py3.12_pytorch_lw_rocm7.0_IT_upgrade_numpy_452f3df6`: ``` root@d92befdbb2a6:/# pip list | egrep "numpy|pandas" numpy 2.1.2 pandas 2.2.3 root@d92befdbb2a6:/# python3 Python 3.12.3 (main, Feb 4 2025, 14:48:35) [GCC 13.3.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import pandas >>> import torch >>> import numpy >>> exit() root@d92befdbb2a6:/data/pytorch-micro-benchmarking# HIP_VISIBLE_DEVICES=1 python3 micro_benchmarking_pytorch.py --network resnet50 INFO: running forward and backward for warmup. INFO: running the benchmark.. OK: finished running benchmark.. --------------------SUMMARY-------------------------- Microbenchmark for network : resnet50 Num devices: 1 Dtype: FP32 Mini batch size [img] : 64 Time per mini-batch : 0.11369450092315674 Throughput [img/sec] : 562.9120096428937 ``` --------- Co-authored-by: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> (cherry picked from commit cf324795568f35f91a78fae9cb8f0efc8d955351) --- .ci/docker/requirements-ci.txt | 14 ++++++-------- requirements-build.txt | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt index d44dfb1ed67ae..47c9a44a775bb 100644 --- a/.ci/docker/requirements-ci.txt +++ b/.ci/docker/requirements-ci.txt @@ -117,10 +117,10 @@ ninja==1.11.1.4 #Pinned versions: 1.11.1.4 #test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py -numba==0.55.2 ; python_version == "3.10" and platform_machine != "s390x" -numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x" +numba==0.60.0 ; python_version == "3.9" +numba==0.61.2 ; python_version > "3.9" #Description: Just-In-Time Compiler for Numerical Functions -#Pinned versions: 0.55.2, 0.60.0 +#Pinned versions: 0.61.2, 0.60.0 #test that import: test_numba_integration.py #Need release > 0.61.2 for s390x due to https://github.com/numba/numba/pull/10073 @@ -136,12 +136,10 @@ numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x" #test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py, #test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py, #test_binary_ufuncs.py -numpy==1.22.4; python_version == "3.10" -numpy==1.26.2; python_version == "3.11" or python_version == "3.12" -numpy==2.1.2; python_version >= "3.13" +numpy==2.0.2; python_version == "3.9" +numpy==2.1.2; python_version >= "3.9" -pandas==2.0.3; python_version < "3.13" -pandas==2.2.3; python_version >= "3.13" +pandas==2.2.3 #onnxruntime #Description: scoring engine for Open Neural Network Exchange (ONNX) models diff --git a/requirements-build.txt b/requirements-build.txt index 85923ae39cbdb..170868e57db1a 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -2,7 +2,7 @@ setuptools>=70.1.0 cmake>=3.27 ninja -numpy +numpy==2.1.2 packaging pyyaml requests From 9621d7f956ebf0f1302cd61a5c59d2c4d6a7407b Mon Sep 17 00:00:00 2001 From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> Date: Thu, 12 Jun 2025 23:32:43 -0500 Subject: [PATCH 09/16] [rocm7.0_internal_testing] Use different package versions for py3.9 (#2269) Fixes SWDEV-536456 Fixes error post-https://github.com/ROCm/pytorch/pull/2256: ``` 00:12:44.248 #22 155.3 ERROR: Ignored the following versions that require a different python version: 0.52.0 Requires-Python >=3.6,<3.9; 0.52.0rc3 Requires-Python >=3.6,<3.9; 0.61.0 Requires-Python >=3.10; 0.61.0rc1 Requires-Python >=3.10; 0.61.0rc2 Requires-Python >=3.10; 0.61.1rc1 Requires-Python >=3.10; 0.61.2 Requires-Python >=3.10; 3.3 Requires-Python >=3.10; 3.3rc0 Requires-Python >=3.10; 3.4 Requires-Python >=3.10; 3.4.1 Requires-Python >=3.10; 3.4.2 Requires-Python >=3.10; 3.4rc0 Requires-Python >=3.10; 3.5 Requires-Python >=3.11; 3.5rc0 Requires-Python >=3.11; 8.2.0 Requires-Python >=3.10; 8.2.1 Requires-Python >=3.10 00:12:44.248 #22 155.3 ERROR: Could not find a version that satisfies the requirement numba==0.61.2 (from versions: 0.1, 0.2, 0.3, 0.5.0, 0.6.0, 0.7.0, 0.7.1, 0.7.2, 0.8.0, 0.8.1, 0.9.0, 0.10.0, 0.10.1, 0.11.0, 0.12.0, 0.12.1, 0.12.2, 0.13.0, 0.13.2, 0.13.3, 0.13.4, 0.14.0, 0.15.1, 0.16.0, 0.17.0, 0.18.1, 0.18.2, 0.19.1, 0.19.2, 0.20.0, 0.21.0, 0.22.0, 0.22.1, 0.23.0, 0.23.1, 0.24.0, 0.25.0, 0.26.0, 0.27.0, 0.28.1, 0.29.0, 0.30.0, 0.30.1, 0.31.0, 0.32.0, 0.33.0, 0.34.0, 0.35.0, 0.36.1, 0.36.2, 0.37.0, 0.38.0, 0.38.1, 0.39.0, 0.40.0, 0.40.1, 0.41.0, 0.42.0, 0.42.1, 0.43.0, 0.43.1, 0.44.0, 0.44.1, 0.45.0, 0.45.1, 0.46.0, 0.47.0, 0.48.0, 0.49.0, 0.49.1rc1, 0.49.1, 0.50.0rc1, 0.50.0, 0.50.1, 0.51.0rc1, 0.51.0, 0.51.1, 0.51.2, 0.52.0rc2, 0.53.0rc1.post1, 0.53.0rc2, 0.53.0rc3, 0.53.0, 0.53.1, 0.54.0rc2, 0.54.0rc3, 0.54.0, 0.54.1rc1, 0.54.1, 0.55.0rc1, 0.55.0, 0.55.1, 0.55.2, 0.56.0rc1, 0.56.0, 0.56.2, 0.56.3, 0.56.4, 0.57.0rc1, 0.57.0, 0.57.1rc1, 0.57.1, 0.58.0rc1, 0.58.0rc2, 0.58.0, 0.58.1, 0.59.0rc1, 0.59.0, 0.59.1, 0.60.0rc1, 0.60.0) 00:12:44.248 #22 155.3 ERROR: No matching distribution found for numba==0.61.2 ``` Validation: * Docker image: http://rocm-ci.amd.com/job/mainline-framework-pytorch-internal-cs9-ci/132 * Wheels: http://rocm-ci.amd.com/job/mainline-pytorch_internal-manylinux-wheels/102/ From `registry-sc-harbor.amd.com/framework/compute-rocm-dkms-no-npi-hipclang:16180_ubuntu22.04_py3.9_pytorch_lw_rocm7.0_IT_py3.9_a11d94ad`: ``` root@f43861a0a856:/# pip list | egrep "numpy|pandas" numpy 2.0.2 pandas 2.2.3 root@f43861a0a856:/# python Python 3.9.23 (main, Jun 4 2025, 08:55:38) [GCC 11.4.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import torch >>> import numpy >>> import pandas root@f43861a0a856:/data/pytorch-micro-benchmarking# HIP_VISIBLE_DEVICES=1 python3 micro_benchmarking_pytorch.py --network resnet50 INFO: running forward and backward for warmup. INFO: running the benchmark.. OK: finished running benchmark.. --------------------SUMMARY-------------------------- Microbenchmark for network : resnet50 Num devices: 1 Dtype: FP32 Mini batch size [img] : 64 Time per mini-batch : 0.11354223489761353 Throughput [img/sec] : 563.6669038416574 ``` (cherry picked from commit a0a9d816537b921a3d82edf77d103fbe0fbb5fe1) --- .ci/docker/requirements-ci.txt | 10 +++++----- requirements-build.txt | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt index 47c9a44a775bb..0081762c320d3 100644 --- a/.ci/docker/requirements-ci.txt +++ b/.ci/docker/requirements-ci.txt @@ -120,7 +120,7 @@ ninja==1.11.1.4 numba==0.60.0 ; python_version == "3.9" numba==0.61.2 ; python_version > "3.9" #Description: Just-In-Time Compiler for Numerical Functions -#Pinned versions: 0.61.2, 0.60.0 +#Pinned versions: 0.54.1, 0.49.0, <=0.49.1 #test that import: test_numba_integration.py #Need release > 0.61.2 for s390x due to https://github.com/numba/numba/pull/10073 @@ -136,8 +136,8 @@ numba==0.61.2 ; python_version > "3.9" #test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py, #test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py, #test_binary_ufuncs.py -numpy==2.0.2; python_version == "3.9" -numpy==2.1.2; python_version >= "3.9" +numpy==2.0.2 ; python_version == "3.9" +numpy==2.1.2 ; python_version > "3.9" pandas==2.2.3 @@ -249,8 +249,8 @@ scikit-image==0.22.0 #Pinned versions: 0.20.3 #test that import: -scipy==1.10.1 ; python_version <= "3.11" -scipy==1.14.1 ; python_version >= "3.12" +scipy==1.13.1 ; python_version == "3.9" +scipy==1.14.1 ; python_version > "3.9" # Pin SciPy because of failing distribution tests (see #60347) #Description: scientific python #Pinned versions: 1.10.1 diff --git a/requirements-build.txt b/requirements-build.txt index 170868e57db1a..25f1e47a14968 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -2,7 +2,8 @@ setuptools>=70.1.0 cmake>=3.27 ninja -numpy==2.1.2 +numpy==2.0.2 ; python_version == "3.9"" +numpy==2.1.2 ; python_version > "3.9" packaging pyyaml requests From b387c9e429d4f463e06b6032eab0ce076c3a6c81 Mon Sep 17 00:00:00 2001 From: Dmitry Nikolaev <139769634+dnikolaev-amd@users.noreply.github.com> Date: Mon, 30 Jun 2025 19:01:04 +0200 Subject: [PATCH 10/16] [rocm7.0_internal_testing] fix enabling sparse tests fp16/bf16 for rocm7.0/7.1 (#2239) Revamped version of #2108 PR to: - enable complex data types for sparse matmul on ROCm - fix sparse addmm/baddbmm on ROCm - fix sparse hipification for ROCm - fix/enable sparse tests on ROCm (~50 tests total for non-fp16/bf16): - enable fp16/bf16 sparse path for rocm7.0 - enable fp16/bf16 sparse tests for rocm7.0/7.1 ``` test_sparse_csr.py::TestSparseCSRCUDA::test_bmm_cuda_* test_sparse.py::TestSparseCUDA::test_sparse_matmul_cuda_* test_sparse_csr.py::TestSparseCSRCUDA::test_mm_cuda_float64 test_sparse_csr.py::TestSparseCSRCUDA::test_addmm_all_sparse_csr_SparseCS* test_sparse_csr.py::TestSparseCSRCUDA::test_addmm_sizes_all_sparse_csr_* test_sparse_csr.py::TestSparseCSRCUDA::test_sparse_addmm_cuda_float16 ``` (cherry picked from commit cc2a69c6b6c35e6024f40189ceebe530c1968810) --- .../ATen/native/sparse/cuda/SparseMatMul.cu | 20 +++++++++++++++++++ test/test_sparse.py | 6 ++++++ test/test_sparse_csr.py | 7 ++++--- torch/utils/hipify/cuda_to_hip_mappings.py | 8 ++++++++ 4 files changed, 38 insertions(+), 3 deletions(-) diff --git a/aten/src/ATen/native/sparse/cuda/SparseMatMul.cu b/aten/src/ATen/native/sparse/cuda/SparseMatMul.cu index 49bea10c65104..8402555a5c340 100644 --- a/aten/src/ATen/native/sparse/cuda/SparseMatMul.cu +++ b/aten/src/ATen/native/sparse/cuda/SparseMatMul.cu @@ -40,7 +40,27 @@ #include +#if defined(__CUDACC__) && (defined(CUSPARSE_VERSION) || (defined(USE_ROCM) && ROCM_VERSION >= 60300)) +#define IS_CUSPARSE11_AVAILABLE() 1 +#else +#define IS_CUSPARSE11_AVAILABLE() 0 +#endif + +#if defined(USE_ROCM) && (ROCM_VERSION >= 70000) +#define HIPSPARSE_FP16_SUPPORT 1 +#else +#define HIPSPARSE_FP16_SUPPORT 0 +#endif + +#if defined(USE_ROCM) && (ROCM_VERSION >= 70100) +#define HIPSPARSE_FP16_BF16_SUPPORT 1 +#else +#define HIPSPARSE_FP16_BF16_SUPPORT 0 +#endif + +#if IS_CUSPARSE11_AVAILABLE() #include +#endif namespace at::native { diff --git a/test/test_sparse.py b/test/test_sparse.py index 5150dab4b7cf1..eb6877b419d0b 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -69,6 +69,12 @@ def _op_supports_any_sparse(op): ) or (not IS_WINDOWS and not TEST_WITH_ROCM) HIPSPARSE_SPMM_COMPLEX128_SUPPORTED = torch.version.hip and version.parse(torch.version.hip.split("-")[0]) >= version.parse("6.0") +HIPSPARSE_FP16_SUPPORTED = torch.version.hip and version.parse(torch.version.hip.split("-")[0]) >= version.parse("7.0") +HIPSPARSE_BF16_SUPPORTED = torch.version.hip and version.parse(torch.version.hip.split("-")[0]) >= version.parse("7.1") + +SPARSE_COMPLEX128_SUPPORTED = CUSPARSE_SPMM_COMPLEX128_SUPPORTED or HIPSPARSE_SPMM_COMPLEX128_SUPPORTED +SPARSE_FLOAT16_SUPPORTED = (SM53OrLater and torch.version.cuda) or (HIPSPARSE_FP16_SUPPORTED) +SPARSE_BFLOAT16_SUPPORTED = (SM80OrLater and torch.version.cuda) or (HIPSPARSE_BF16_SUPPORTED) def all_sparse_layouts(test_name='layout', include_strided=False): return parametrize(test_name, [ diff --git a/test/test_sparse_csr.py b/test/test_sparse_csr.py index f84adcc7bd262..e1bfd3f146991 100644 --- a/test/test_sparse_csr.py +++ b/test/test_sparse_csr.py @@ -25,7 +25,8 @@ all_types_and_complex, floating_and_complex_types_and) from torch.testing._internal.opinfo.definitions.linalg import sample_inputs_linalg_solve from torch.testing._internal.opinfo.definitions.sparse import validate_sample_input_sparse -from test_sparse import CUSPARSE_SPMM_COMPLEX128_SUPPORTED, HIPSPARSE_SPMM_COMPLEX128_SUPPORTED +from test_sparse import HIPSPARSE_BF16_SUPPORTED, HIPSPARSE_FP16_SUPPORTED, \ + SPARSE_FLOAT16_SUPPORTED, SPARSE_BFLOAT16_SUPPORTED, SPARSE_COMPLEX128_SUPPORTED import operator if TEST_SCIPY: @@ -1940,8 +1941,8 @@ def test_shape(d1, d2, d3, nnz, transposed, index_dtype): @dtypes(*floating_and_complex_types()) @dtypesIfCUDA(*floating_and_complex_types_and( - *[torch.half] if SM53OrLater and TEST_CUSPARSE_GENERIC else [], - *[torch.bfloat16] if SM80OrLater and TEST_CUSPARSE_GENERIC else [])) + *[torch.half] if SPARSE_FLOAT16_SUPPORTED else [], + *[torch.bfloat16] if SPARSE_BFLOAT16_SUPPORTED else [])) @precisionOverride({torch.bfloat16: 3.5e-2, torch.float16: 1e-2}) def test_sparse_addmm(self, device, dtype): def test_shape(m, n, p, nnz, broadcast, index_dtype, alpha_beta=None): diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py index 82547c8e28540..12e1a1209c2cd 100644 --- a/torch/utils/hipify/cuda_to_hip_mappings.py +++ b/torch/utils/hipify/cuda_to_hip_mappings.py @@ -8593,6 +8593,14 @@ "CUSPARSE_STATUS_ZERO_PIVOT", ("HIPSPARSE_STATUS_ZERO_PIVOT", CONV_NUMERIC_LITERAL, API_SPECIAL), ), + ( + "CUSPARSE_STATUS_NOT_SUPPORTED", + ("HIPSPARSE_STATUS_NOT_SUPPORTED", CONV_NUMERIC_LITERAL, API_SPECIAL), + ), + ( + "CUSPARSE_STATUS_INSUFFICIENT_RESOURCES", + ("HIPSPARSE_STATUS_INSUFFICIENT_RESOURCES", CONV_NUMERIC_LITERAL, API_SPECIAL), + ), ( "CUSPARSE_OPERATION_TRANSPOSE", ("HIPSPARSE_OPERATION_TRANSPOSE", CONV_NUMERIC_LITERAL, API_SPECIAL), From 8812312ad426d56aeae74294c06bc7fe09d41e13 Mon Sep 17 00:00:00 2001 From: Ethan Wee Date: Wed, 9 Jul 2025 11:59:44 -0700 Subject: [PATCH 11/16] [rocm7.0_internal_testing] upgrade tensorboard compatible with numpy 2 (#2326) Fixes https://ontrack-internal.amd.com/browse/SWDEV-541809 Upgrading tensorboard after numpy upgrade Ran in **registry-sc-harbor.amd.com/framework/compute-rocm-dkms-no-npi-hipclang:16381_ubuntu24.04_py3.12_pytorch_lw_rocm7.0_internal_testing_afe8b782** ``` 7 git checkout rocm7.0_IT_upgrade_tensorboard 8 pip install .ci/docker/requirements-ci.txt 9 pip install -r .ci/docker/requirements-ci.txt 10 PYTORCH_TEST_WITH_ROCM=1 python test/test_monitor.py TestMonitorTensorboard.test_event_handler root@ubb4-rack-22:/var/lib/jenkins/pytorch# PYTORCH_TEST_WITH_ROCM=1 python test/test_monitor.py TestMonitorTensorboard.test_event_handler /opt/venv/lib/python3.12/site-packages/google/protobuf/internal/well_known_types.py:91: DeprecationWarning: datetime.datetime.utcfromtimestamp() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.fromtimestamp(timestamp, datetime.UTC). _EPOCH_DATETIME_NAIVE = datetime.datetime.utcfromtimestamp(0) . ---------------------------------------------------------------------- Ran 1 test in 0.327s OK root@ubb4-rack-22:/var/lib/jenkins/pytorch# ``` (cherry picked from commit c7f61f42059a9a56956190eef5ef4e5f6f127038) --- .ci/docker/requirements-ci.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt index 0081762c320d3..93d32b803b199 100644 --- a/.ci/docker/requirements-ci.txt +++ b/.ci/docker/requirements-ci.txt @@ -309,8 +309,7 @@ z3-solver==4.15.1.0 ; platform_machine != "s390x" #Pinned versions: #test that import: -tensorboard==2.13.0 ; python_version < "3.13" -tensorboard==2.18.0 ; python_version >= "3.13" +tensorboard==2.18.0 #Description: Also included in .ci/docker/requirements-docs.txt #Pinned versions: #test that import: test_tensorboard From 504fea97dc8f70ea61573463cb68781c181f90bb Mon Sep 17 00:00:00 2001 From: Ethan Wee Date: Fri, 15 Aug 2025 13:28:53 -0700 Subject: [PATCH 12/16] [rocm7.1_internal_testing] Pin requirements (#2526) Tested locally successfully ``` root@rocm-framework-47:/var/lib/jenkins/pytorch# pip install -r requirements.txt Ignoring numpy: markers 'python_version == "3.9"' don't match your environment Requirement already satisfied: setuptools<80.0,>=70.1.0 in /opt/venv/lib/python3.10/site-packages (from -r /var/lib/jenkins/pytorch/requirements-build.txt (line 2)) (79.0.1) Requirement already satisfied: cmake>=3.31.4 in /opt/venv/lib/python3.10/site-packages (from -r /var/lib/jenkins/pytorch/requirements-build.txt (line 3)) (4.0.0) Requirement already satisfied: ninja==1.11.1.3 in /opt/venv/lib/python3.10/site-packages (from -r /var/lib/jenkins/pytorch/requirements-build.txt (line 4)) (1.11.1.3) Requirement already satisfied: numpy==2.1.2 in /opt/venv/lib/python3.10/site-packages (from -r /var/lib/jenkins/pytorch/requirements-build.txt (line 5)) (2.1.2) Requirement already satisfied: packaging==25.0 in /opt/venv/lib/python3.10/site-packages (from -r /var/lib/jenkins/pytorch/requirements-build.txt (line 6)) (25.0) Requirement already satisfied: pyyaml==6.0.2 in /opt/venv/lib/python3.10/site-packages (from -r /var/lib/jenkins/pytorch/requirements-build.txt (line 7)) (6.0.2) Requirement already satisfied: requests==2.32.4 in /opt/venv/lib/python3.10/site-packages (from -r /var/lib/jenkins/pytorch/requirements-build.txt (line 8)) (2.32.4) Requirement already satisfied: six==1.17.0 in /opt/venv/lib/python3.10/site-packages (from -r /var/lib/jenkins/pytorch/requirements-build.txt (line 9)) (1.17.0) Requirement already satisfied: typing-extensions==4.14.1 in /opt/venv/lib/python3.10/site-packages (from -r /var/lib/jenkins/pytorch/requirements-build.txt (line 10)) (4.14.1) Requirement already satisfied: expecttest==0.3.0 in /opt/venv/lib/python3.10/site-packages (from -r requirements.txt (line 8)) (0.3.0) Requirement already satisfied: filelock==3.18.0 in /opt/venv/lib/python3.10/site-packages (from -r requirements.txt (line 9)) (3.18.0) Requirement already satisfied: fsspec==2025.7.0 in /opt/venv/lib/python3.10/site-packages (from -r requirements.txt (line 10)) (2025.7.0) Requirement already satisfied: hypothesis==5.35.1 in /opt/venv/lib/python3.10/site-packages (from -r requirements.txt (line 11)) (5.35.1) Requirement already satisfied: jinja2==3.1.6 in /opt/venv/lib/python3.10/site-packages (from -r requirements.txt (line 12)) (3.1.6) Requirement already satisfied: lintrunner==0.12.7 in /opt/venv/lib/python3.10/site-packages (from -r requirements.txt (line 13)) (0.12.7) Requirement already satisfied: networkx==2.8.8 in /opt/venv/lib/python3.10/site-packages (from -r requirements.txt (line 14)) (2.8.8) Requirement already satisfied: optree==0.13.0 in /opt/venv/lib/python3.10/site-packages (from -r requirements.txt (line 18)) (0.13.0) Requirement already satisfied: psutil==7.0.0 in /opt/venv/lib/python3.10/site-packages (from -r requirements.txt (line 19)) (7.0.0) Requirement already satisfied: sympy==1.13.3 in /opt/venv/lib/python3.10/site-packages (from -r requirements.txt (line 20)) (1.13.3) Requirement already satisfied: wheel==0.45.1 in /opt/venv/lib/python3.10/site-packages (from -r requirements.txt (line 22)) (0.45.1) Requirement already satisfied: build[uv] in /opt/venv/lib/python3.10/site-packages (from -r requirements.txt (line 7)) (1.3.0) Requirement already satisfied: charset_normalizer<4,>=2 in /opt/venv/lib/python3.10/site-packages (from requests==2.32.4->-r /var/lib/jenkins/pytorch/requirements-build.txt (line 8)) (3.4.3) Requirement already satisfied: idna<4,>=2.5 in /opt/venv/lib/python3.10/site-packages (from requests==2.32.4->-r /var/lib/jenkins/pytorch/requirements-build.txt (line 8)) (3.10) Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/venv/lib/python3.10/site-packages (from requests==2.32.4->-r /var/lib/jenkins/pytorch/requirements-build.txt (line 8)) (2.5.0) Requirement already satisfied: certifi>=2017.4.17 in /opt/venv/lib/python3.10/site-packages (from requests==2.32.4->-r /var/lib/jenkins/pytorch/requirements-build.txt (line 8)) (2025.8.3) Requirement already satisfied: attrs>=19.2.0 in /opt/venv/lib/python3.10/site-packages (from hypothesis==5.35.1->-r requirements.txt (line 11)) (25.3.0) Requirement already satisfied: sortedcontainers<3.0.0,>=2.1.0 in /opt/venv/lib/python3.10/site-packages (from hypothesis==5.35.1->-r requirements.txt (line 11)) (2.4.0) Requirement already satisfied: MarkupSafe>=2.0 in /opt/venv/lib/python3.10/site-packages (from jinja2==3.1.6->-r requirements.txt (line 12)) (3.0.2) Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/venv/lib/python3.10/site-packages (from sympy==1.13.3->-r requirements.txt (line 20)) (1.3.0) Requirement already satisfied: pyproject_hooks in /opt/venv/lib/python3.10/site-packages (from build[uv]->-r requirements.txt (line 7)) (1.2.0) Requirement already satisfied: tomli>=1.1.0 in /opt/venv/lib/python3.10/site-packages (from build[uv]->-r requirements.txt (line 7)) (2.2.1) Requirement already satisfied: uv>=0.1.18 in /opt/venv/lib/python3.10/site-packages (from build[uv]->-r requirements.txt (line 7)) (0.8.10) root@rocm-framework-47:/var/lib/jenkins/pytorch# pip install -r requirements-build.txt ``` (cherry picked from commit 6e6e45424f4ba643f9f642f69206f6d1df4e9192) --- requirements-build.txt | 20 ++++++++++---------- requirements.txt | 27 +++++++++++++++------------ 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/requirements-build.txt b/requirements-build.txt index 25f1e47a14968..f2edf387fb97a 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -1,12 +1,12 @@ # Build System requirements -setuptools>=70.1.0 -cmake>=3.27 -ninja -numpy==2.0.2 ; python_version == "3.9"" -numpy==2.1.2 ; python_version > "3.9" -packaging -pyyaml -requests -six # dependency chain: NNPACK -> PeachPy -> six -typing-extensions>=4.10.0 pip # not technically needed, but this makes setup.py invocation work +setuptools>=70.1.0,<80.0 # setuptools develop deprecated on 80.0 +cmake>=3.31.4 +ninja==1.11.1.3 +numpy==2.0.2 ; python_version == "3.9" +numpy==2.1.2 ; python_version > "3.9" +packaging==25.0 +pyyaml==6.0.2 +requests==2.32.4 +six==1.17.0 # dependency chain: NNPACK -> PeachPy -> six +typing-extensions==4.14.1 diff --git a/requirements.txt b/requirements.txt index fc4b53dfd49ea..090a733726658 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,15 +5,18 @@ # Install / Development extra requirements build[uv] # for building sdist and wheel -expecttest>=0.3.0 -filelock -fsspec>=0.8.5 -hypothesis -jinja2 -lintrunner ; platform_machine != "s390x" and platform_machine != "riscv64" -networkx>=2.5.1 -optree>=0.13.0 -psutil -sympy>=1.13.3 -typing-extensions>=4.13.2 -wheel +expecttest==0.3.0 +filelock==3.18.0 +fsspec==2025.7.0 +hypothesis==5.35.1 +jinja2==3.1.6 +lintrunner==0.12.7 ; platform_machine != "s390x" +networkx==2.8.8 +ninja==1.11.1.3 +numpy==2.0.2 ; python_version == "3.9" +numpy==2.1.2 ; python_version > "3.9" +optree==0.13.0 +psutil==7.0.0 +sympy==1.13.3 +typing-extensions==4.14.1 +wheel==0.45.1 From 3a11b6dddb446419314fd1d0f08fcf9b2f3774f6 Mon Sep 17 00:00:00 2001 From: Xinya Zhang Date: Wed, 20 Dec 2023 16:26:17 -0600 Subject: [PATCH 13/16] Enable gesvda for ROCM >= 6.1 (#1339) This also fixes a problem in gesvd driver when UV is not needed. (cherry picked from commit 4ce57ecb3665796ff737d23cccba314c7ea12e92) (cherry picked from commit 167b4c1c24abc61968b776a30db6794065de5625) --- .../native/cuda/linalg/BatchLinearAlgebraLib.cpp | 14 +++++++------- aten/src/ATen/native/cuda/linalg/CUDASolver.cpp | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebraLib.cpp b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebraLib.cpp index 267d1f5acea52..5b28cc6eccf01 100644 --- a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebraLib.cpp +++ b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebraLib.cpp @@ -332,11 +332,11 @@ static void svd_cusolver_gesvd(const Tensor& A, const Tensor& U, const Tensor& S // gesvd just knows how to handle m >= n, so in the other case we need to transpose A const auto not_A_H = A.size(-2) >= A.size(-1); Tensor Vcopy = V; // Shallow copy -#ifdef USE_ROCM +#ifdef ROCM_VERSION // Similar to the case in svd_magma(), experiments have shown Vh tensor is // not guaranteed to be column major on ROCM, we have to create a copy to // deal with this - if (!not_A_H) { + if (compute_uv && !not_A_H) { Vcopy = at::empty_like(V.mT(), V.options() .device(V.device()) @@ -351,8 +351,8 @@ static void svd_cusolver_gesvd(const Tensor& A, const Tensor& U, const Tensor& S infos, full_matrices, compute_uv, calculate_all_batches, batches); }); -#ifdef USE_ROCM - if (!not_A_H) { +#ifdef ROCM_VERSION + if (compute_uv && !not_A_H) { V.copy_(Vcopy); } #endif @@ -526,8 +526,8 @@ static void svd_cusolver_gesvdjBatched(const Tensor& A, const Tensor& U, const T template static void apply_svd_cusolver_gesvdaStridedBatched(const Tensor& A, const Tensor& U, const Tensor& S, const Tensor& V, const Tensor& infos, bool full_matrices, bool compute_uv) { -#ifndef CUDART_VERSION - TORCH_CHECK(false, "gesvda: Batched version is supported only with cuBLAS backend.") +#if defined(CUDART_VERSION) || defined(USE_ROCM) && ROCM_VERSION < 60100 + TORCH_CHECK(false, "gesvda: Batched version is supported only with cuBLAS backend or ROCM >= 5.7.0.") #else using value_t = typename c10::scalar_value_type::type; int m = cuda_int_cast(A.size(-2), "m"); @@ -665,7 +665,7 @@ void svd_cusolver(const Tensor& A, static constexpr const char* check_svd_doc = "Check doc at https://pytorch.org/docs/stable/generated/torch.linalg.svd.html"; // The default heuristic is to use gesvdj driver -#ifdef USE_ROCM +#if defined(ROCM_VERSION) && ROCM_VERSION < 60100 const auto driver_v = std::string_view("gesvdj"); #else const auto driver_v = driver.value_or("gesvdj"); diff --git a/aten/src/ATen/native/cuda/linalg/CUDASolver.cpp b/aten/src/ATen/native/cuda/linalg/CUDASolver.cpp index 99c38077611d6..af183038bb8e4 100644 --- a/aten/src/ATen/native/cuda/linalg/CUDASolver.cpp +++ b/aten/src/ATen/native/cuda/linalg/CUDASolver.cpp @@ -470,8 +470,8 @@ void gesvdjBatched>( } -// ROCM does not implement gesdva yet -#ifdef CUDART_VERSION +// ROCM does not implement gesdva correctly before 6.1 +#if defined(CUDART_VERSION) || defined(ROCM_VERSION) && ROCM_VERSION >= 60100 template<> void gesvdaStridedBatched_buffersize( cusolverDnHandle_t handle, cusolverEigMode_t jobz, int rank, int m, int n, float *A, int lda, long long int strideA, From 2a0b33a356d74bd459ac812343316338bb709e3e Mon Sep 17 00:00:00 2001 From: Pruthvi Madugundu Date: Tue, 12 Mar 2024 09:56:58 -0700 Subject: [PATCH 14/16] Remove ROCmloops specific test (cherry picked from commit d6879fa6b482188ccfc54a8ea8be272a35fc7cdf) (cherry picked from commit 123a16486958b171b538126ef4fe0045464a505e) --- aten/src/ATen/test/cuda_vectorized_test.cu | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/aten/src/ATen/test/cuda_vectorized_test.cu b/aten/src/ATen/test/cuda_vectorized_test.cu index e4c18102526ac..1b3ed4dc4ac42 100644 --- a/aten/src/ATen/test/cuda_vectorized_test.cu +++ b/aten/src/ATen/test/cuda_vectorized_test.cu @@ -32,23 +32,6 @@ void reset_buffers() { } } -#if defined(USE_ROCM) && !defined(_WIN32) -TEST(TestLoops, HasSameArgTypes) { - // This is a compile-time unit test. If this file compiles without error, - // then the test passes and during runtime, we just need to return. - using namespace at::native::modern::detail; - using func1_t = int (*)(float, float); - using func2_t = int (*)(bool, float, float); - using func3_t = int (*)(float); - using func4_t = int (*)(); - static_assert(has_same_arg_types::value, "func1_t has the same argument types"); - static_assert(!has_same_arg_types::value, "func2_t does not have the same argument types"); - static_assert(has_same_arg_types::value, "func3_t has the same argument types"); - static_assert(has_same_arg_types::value, "func4_t has the same argument types"); - return; -} -#endif - TEST(TestVectorizedMemoryAccess, CanVectorizeUpTo) { char *ptr = reinterpret_cast(buffer1); From 462187f60c9bc525eab466068cb8b2a559935220 Mon Sep 17 00:00:00 2001 From: Prachi Gupta Date: Wed, 29 Oct 2025 17:41:21 +0000 Subject: [PATCH 15/16] Bump triton to 3.5.x and update related_commits --- .ci/docker/ci_commit_pins/triton.txt | 2 +- related_commits | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 related_commits diff --git a/.ci/docker/ci_commit_pins/triton.txt b/.ci/docker/ci_commit_pins/triton.txt index 10f1207e60e6c..d893bdd32ab34 100644 --- a/.ci/docker/ci_commit_pins/triton.txt +++ b/.ci/docker/ci_commit_pins/triton.txt @@ -1 +1 @@ -7416ffcb92cdbe98d9f97e4e6f95247e46dfc9fd +ac80c4190aa0321f761a08af97e1e1eee41f01d9 diff --git a/related_commits b/related_commits new file mode 100644 index 0000000000000..ee36e55601d0f --- /dev/null +++ b/related_commits @@ -0,0 +1,10 @@ +ubuntu|pytorch|apex|master|2190fbaeb88384ed792373adbb83c182af117ca0|https://github.com/ROCm/apex +centos|pytorch|apex|master|2190fbaeb88384ed792373adbb83c182af117ca0|https://github.com/ROCm/apex +ubuntu|pytorch|torchvision|main|218d2ab791d437309f91e0486eb9fa7f00badc17|https://github.com/pytorch/vision +centos|pytorch|torchvision|main|218d2ab791d437309f91e0486eb9fa7f00badc17|https://github.com/pytorch/vision +ubuntu|pytorch|torchdata|main|92950795e0790eb74df995daf40b658e85fd2c9f|https://github.com/pytorch/data +centos|pytorch|torchdata|main|92950795e0790eb74df995daf40b658e85fd2c9f|https://github.com/pytorch/data +ubuntu|pytorch|torchaudio|main|3b0e7a6f192ca2715e7e6cbe5db007aea7165fe2|https://github.com/pytorch/audio +centos|pytorch|torchaudio|main|3b0e7a6f192ca2715e7e6cbe5db007aea7165fe2|https://github.com/pytorch/audio +ubuntu|pytorch|ao|main|3577306c8b32517afe8eb6eb7e84335601180598|https://github.com/pytorch/ao +centos|pytorch|ao|main|3577306c8b32517afe8eb6eb7e84335601180598|https://github.com/pytorch/ao From ba4531d2560231e22e80f0d0cae1ec7d555d7ea1 Mon Sep 17 00:00:00 2001 From: Jagadish Krishnamoorthy Date: Tue, 1 Apr 2025 09:19:40 -0700 Subject: [PATCH 16/16] Revert to prev sccache by ROCm Signed-off-by: Jagadish Krishnamoorthy (cherry picked from commit 1ad5bb95d796283d5f56ac1edd16f1731d24a49d) (cherry picked from commit 519160d466782f5a62365be051fcb3ef90fa0b00) --- .ci/docker/common/install_cache.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.ci/docker/common/install_cache.sh b/.ci/docker/common/install_cache.sh index f38cb3d06d88b..80839990e4e6f 100644 --- a/.ci/docker/common/install_cache.sh +++ b/.ci/docker/common/install_cache.sh @@ -36,7 +36,12 @@ sed -e 's|PATH="\(.*\)"|PATH="/opt/cache/bin:\1"|g' -i /etc/environment export PATH="/opt/cache/bin:$PATH" # Setup compiler cache -install_ubuntu +if [ -n "$ROCM_VERSION" ]; then + curl --retry 3 http://repo.radeon.com/misc/.sccache_amd/sccache -o /opt/cache/bin/sccache +else + install_ubuntu +fi + chmod a+x /opt/cache/bin/sccache function write_sccache_stub() {