diff --git a/.automation_scripts/parse_xml_results.py b/.automation_scripts/parse_xml_results.py
new file mode 100644
index 000000000000..7db2e1ce9233
--- /dev/null
+++ b/.automation_scripts/parse_xml_results.py
@@ -0,0 +1,178 @@
+""" The Python PyTorch testing script.
+##
+# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+"""
+
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from typing import Any, Dict, Tuple
+
+# Backends list
+BACKENDS_LIST = [
+ "dist-gloo",
+ "dist-nccl"
+]
+
+TARGET_WORKFLOW = "--rerun-disabled-tests"
+
+def get_job_id(report: Path) -> int:
+ # [Job id in artifacts]
+ # Retrieve the job id from the report path. In our GHA workflows, we append
+ # the job id to the end of the report name, so `report` looks like:
+ # unzipped-test-reports-foo_5596745227/test/test-reports/foo/TEST-foo.xml
+ # and we want to get `5596745227` out of it.
+ try:
+ return int(report.parts[0].rpartition("_")[2])
+ except ValueError:
+ return -1
+
+def is_rerun_disabled_tests(root: ET.ElementTree) -> bool:
+ """
+ Check if the test report is coming from rerun_disabled_tests workflow
+ """
+ skipped = root.find(".//*skipped")
+ # Need to check against None here, if not skipped doesn't work as expected
+ if skipped is None:
+ return False
+
+ message = skipped.attrib.get("message", "")
+ return TARGET_WORKFLOW in message or "num_red" in message
+
+def parse_xml_report(
+ tag: str,
+ report: Path,
+ workflow_id: int,
+ workflow_run_attempt: int,
+ work_flow_name: str
+) -> Dict[Tuple[str], Dict[str, Any]]:
+ """Convert a test report xml file into a JSON-serializable list of test cases."""
+ print(f"Parsing {tag}s for test report: {report}")
+
+ job_id = get_job_id(report)
+ print(f"Found job id: {job_id}")
+
+ test_cases: Dict[Tuple[str], Dict[str, Any]] = {}
+
+ root = ET.parse(report)
+ # TODO: unlike unittest, pytest-flakefinder used by rerun disabled tests for test_ops
+ # includes skipped messages multiple times (50 times by default). This slows down
+ # this script too much (O(n)) because it tries to gather all the stats. This should
+ # be fixed later in the way we use pytest-flakefinder. A zipped test report from rerun
+ # disabled test is only few MB, but will balloon up to a much bigger XML file after
+ # extracting from a dozen to few hundred MB
+ if is_rerun_disabled_tests(root):
+ return test_cases
+
+ for test_case in root.iter(tag):
+ case = process_xml_element(test_case)
+ if tag == 'testcase':
+ case["workflow_id"] = workflow_id
+ case["workflow_run_attempt"] = workflow_run_attempt
+ case["job_id"] = job_id
+ case["work_flow_name"] = work_flow_name
+
+ # [invoking file]
+ # The name of the file that the test is located in is not necessarily
+ # the same as the name of the file that invoked the test.
+ # For example, `test_jit.py` calls into multiple other test files (e.g.
+ # jit/test_dce.py). For sharding/test selection purposes, we want to
+ # record the file that invoked the test.
+ #
+ # To do this, we leverage an implementation detail of how we write out
+ # tests (https://bit.ly/3ajEV1M), which is that reports are created
+ # under a folder with the same name as the invoking file.
+ case_name = report.parent.name
+ for ind in range(len(BACKENDS_LIST)):
+ if BACKENDS_LIST[ind] in report.parts:
+ case_name = case_name + "_" + BACKENDS_LIST[ind]
+ break
+ case["invoking_file"] = case_name
+ test_cases[ ( case["invoking_file"], case["classname"], case["name"], case["work_flow_name"] ) ] = case
+ elif tag == 'testsuite':
+ case["work_flow_name"] = work_flow_name
+ case["invoking_xml"] = report.name
+ case["running_time_xml"] = case["time"]
+ case_name = report.parent.name
+ for ind in range(len(BACKENDS_LIST)):
+ if BACKENDS_LIST[ind] in report.parts:
+ case_name = case_name + "_" + BACKENDS_LIST[ind]
+ break
+ case["invoking_file"] = case_name
+
+ test_cases[ ( case["invoking_file"], case["invoking_xml"], case["work_flow_name"] ) ] = case
+
+ return test_cases
+
+def process_xml_element(element: ET.Element) -> Dict[str, Any]:
+ """Convert a test suite element into a JSON-serializable dict."""
+ ret: Dict[str, Any] = {}
+
+ # Convert attributes directly into dict elements.
+ # e.g.
+ #
+ # becomes:
+ # {"name": "test_foo", "classname": "test_bar"}
+ ret.update(element.attrib)
+
+ # The XML format encodes all values as strings. Convert to ints/floats if
+ # possible to make aggregation possible in Rockset.
+ for k, v in ret.items():
+ try:
+ ret[k] = int(v)
+ except ValueError:
+ pass
+ try:
+ ret[k] = float(v)
+ except ValueError:
+ pass
+
+ # Convert inner and outer text into special dict elements.
+ # e.g.
+ # my_inner_text my_tail
+ # becomes:
+ # {"text": "my_inner_text", "tail": " my_tail"}
+ if element.text and element.text.strip():
+ ret["text"] = element.text
+ if element.tail and element.tail.strip():
+ ret["tail"] = element.tail
+
+ # Convert child elements recursively, placing them at a key:
+ # e.g.
+ #
+ # hello
+ # world
+ # another
+ #
+ # becomes
+ # {
+ # "foo": [{"text": "hello"}, {"text": "world"}],
+ # "bar": {"text": "another"}
+ # }
+ for child in element:
+ if child.tag not in ret:
+ ret[child.tag] = process_xml_element(child)
+ else:
+ # If there are multiple tags with the same name, they should be
+ # coalesced into a list.
+ if not isinstance(ret[child.tag], list):
+ ret[child.tag] = [ret[child.tag]]
+ ret[child.tag].append(process_xml_element(child))
+ return ret
\ No newline at end of file
diff --git a/.automation_scripts/run_pytorch_unit_tests.py b/.automation_scripts/run_pytorch_unit_tests.py
new file mode 100644
index 000000000000..514afd19624c
--- /dev/null
+++ b/.automation_scripts/run_pytorch_unit_tests.py
@@ -0,0 +1,518 @@
+#!/usr/bin/env python3
+
+""" The Python PyTorch testing script.
+##
+# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+"""
+
+import argparse
+import os
+import shutil
+import subprocess
+from subprocess import STDOUT, CalledProcessError
+
+from collections import namedtuple
+from datetime import datetime
+from pathlib import Path
+from parse_xml_results import (
+ parse_xml_report
+)
+from pprint import pprint
+from typing import Any, Dict, List
+
+# unit test status list
+UT_STATUS_LIST = [
+ "PASSED",
+ "MISSED",
+ "SKIPPED",
+ "FAILED",
+ "XFAILED",
+ "ERROR"
+]
+
+DEFAULT_CORE_TESTS = [
+ "test_nn",
+ "test_torch",
+ "test_cuda",
+ "test_ops",
+ "test_unary_ufuncs",
+ "test_autograd",
+ "inductor/test_torchinductor"
+]
+
+DISTRIBUTED_CORE_TESTS = [
+ "distributed/test_c10d_common",
+ "distributed/test_c10d_nccl",
+ "distributed/test_distributed_spawn"
+]
+
+CONSOLIDATED_LOG_FILE_NAME="pytorch_unit_tests.log"
+
+def parse_xml_reports_as_dict(workflow_run_id, workflow_run_attempt, tag, workflow_name, path="."):
+ test_cases = {}
+ items_list = os.listdir(path)
+ for dir in items_list:
+ new_dir = path + '/' + dir + '/'
+ if os.path.isdir(new_dir):
+ for xml_report in Path(new_dir).glob("**/*.xml"):
+ test_cases.update(
+ parse_xml_report(
+ tag,
+ xml_report,
+ workflow_run_id,
+ workflow_run_attempt,
+ workflow_name
+ )
+ )
+ return test_cases
+
+def get_test_status(test_case):
+ # In order of priority: S=skipped, F=failure, E=error, P=pass
+ if "skipped" in test_case and test_case["skipped"]:
+ type_message = test_case["skipped"]
+ if type_message.__contains__('type') and type_message['type'] == "pytest.xfail":
+ return "XFAILED"
+ else:
+ return "SKIPPED"
+ elif "failure" in test_case and test_case["failure"]:
+ return "FAILED"
+ elif "error" in test_case and test_case["error"]:
+ return "ERROR"
+ else:
+ return "PASSED"
+
+def get_test_message(test_case, status=None):
+ if status == "SKIPPED":
+ return test_case["skipped"] if "skipped" in test_case else ""
+ elif status == "FAILED":
+ return test_case["failure"] if "failure" in test_case else ""
+ elif status == "ERROR":
+ return test_case["error"] if "error" in test_case else ""
+ else:
+ if "skipped" in test_case:
+ return test_case["skipped"]
+ elif "failure" in test_case:
+ return test_case["failure"]
+ elif "error" in test_case:
+ return test_case["error"]
+ else:
+ return ""
+
+def get_test_file_running_time(test_suite):
+ if test_suite.__contains__('time'):
+ return test_suite["time"]
+ return 0
+
+def get_test_running_time(test_case):
+ if test_case.__contains__('time'):
+ return test_case["time"]
+ return ""
+
+def summarize_xml_files(path, workflow_name):
+ # statistics
+ TOTAL_TEST_NUM = 0
+ TOTAL_PASSED_NUM = 0
+ TOTAL_SKIPPED_NUM = 0
+ TOTAL_XFAIL_NUM = 0
+ TOTAL_FAILED_NUM = 0
+ TOTAL_ERROR_NUM = 0
+ TOTAL_EXECUTION_TIME = 0
+
+ #parse the xml files
+ test_cases = parse_xml_reports_as_dict(-1, -1, 'testcase', workflow_name, path)
+ test_suites = parse_xml_reports_as_dict(-1, -1, 'testsuite', workflow_name, path)
+ test_file_and_status = namedtuple("test_file_and_status", ["file_name", "status"])
+ # results dict
+ res = {}
+ res_item_list = [ "PASSED", "SKIPPED", "XFAILED", "FAILED", "ERROR" ]
+ test_file_items = set()
+ for (k,v) in list(test_suites.items()):
+ file_name = k[0]
+ if not file_name in test_file_items:
+ test_file_items.add(file_name)
+ # initialization
+ for item in res_item_list:
+ temp_item = test_file_and_status(file_name, item)
+ res[temp_item] = {}
+ temp_item_statistics = test_file_and_status(file_name, "STATISTICS")
+ res[temp_item_statistics] = {'TOTAL': 0, 'PASSED': 0, 'SKIPPED': 0, 'XFAILED': 0, 'FAILED': 0, 'ERROR': 0, 'EXECUTION_TIME': 0}
+ test_running_time = get_test_file_running_time(v)
+ res[temp_item_statistics]["EXECUTION_TIME"] += test_running_time
+ TOTAL_EXECUTION_TIME += test_running_time
+ else:
+ test_tuple_key_statistics = test_file_and_status(file_name, "STATISTICS")
+ test_running_time = get_test_file_running_time(v)
+ res[test_tuple_key_statistics]["EXECUTION_TIME"] += test_running_time
+ TOTAL_EXECUTION_TIME += test_running_time
+
+ for (k,v) in list(test_cases.items()):
+ file_name = k[0]
+ class_name = k[1]
+ test_name = k[2]
+ combined_name = file_name + "::" + class_name + "::" + test_name
+ test_status = get_test_status(v)
+ test_running_time = get_test_running_time(v)
+ test_message = get_test_message(v, test_status)
+ test_info_value = ""
+ test_tuple_key_status = test_file_and_status(file_name, test_status)
+ test_tuple_key_statistics = test_file_and_status(file_name, "STATISTICS")
+ TOTAL_TEST_NUM += 1
+ res[test_tuple_key_statistics]["TOTAL"] += 1
+ if test_status == "PASSED":
+ test_info_value = str(test_running_time)
+ res[test_tuple_key_status][combined_name] = test_info_value
+ res[test_tuple_key_statistics]["PASSED"] += 1
+ TOTAL_PASSED_NUM += 1
+ elif test_status == "SKIPPED":
+ test_info_value = str(test_running_time)
+ res[test_tuple_key_status][combined_name] = test_info_value
+ res[test_tuple_key_statistics]["SKIPPED"] += 1
+ TOTAL_SKIPPED_NUM += 1
+ elif test_status == "XFAILED":
+ test_info_value = str(test_running_time)
+ res[test_tuple_key_status][combined_name] = test_info_value
+ res[test_tuple_key_statistics]["XFAILED"] += 1
+ TOTAL_XFAIL_NUM += 1
+ elif test_status == "FAILED":
+ test_info_value = test_message
+ res[test_tuple_key_status][combined_name] = test_info_value
+ res[test_tuple_key_statistics]["FAILED"] += 1
+ TOTAL_FAILED_NUM += 1
+ elif test_status == "ERROR":
+ test_info_value = test_message
+ res[test_tuple_key_status][combined_name] = test_info_value
+ res[test_tuple_key_statistics]["ERROR"] += 1
+ TOTAL_ERROR_NUM += 1
+
+ # generate statistics_dict
+ statistics_dict = {}
+ statistics_dict["TOTAL"] = TOTAL_TEST_NUM
+ statistics_dict["PASSED"] = TOTAL_PASSED_NUM
+ statistics_dict["SKIPPED"] = TOTAL_SKIPPED_NUM
+ statistics_dict["XFAILED"] = TOTAL_XFAIL_NUM
+ statistics_dict["FAILED"] = TOTAL_FAILED_NUM
+ statistics_dict["ERROR"] = TOTAL_ERROR_NUM
+ statistics_dict["EXECUTION_TIME"] = TOTAL_EXECUTION_TIME
+ aggregate_item = workflow_name + "_aggregate"
+ total_item = test_file_and_status(aggregate_item, "STATISTICS")
+ res[total_item] = statistics_dict
+
+ return res
+
+def run_command_and_capture_output(cmd):
+ try:
+ print(f"Running command '{cmd}'")
+ with open(CONSOLIDATED_LOG_FILE_PATH, "a+") as output_file:
+ print(f"========================================", file=output_file, flush=True)
+ print(f"[RUN_PYTORCH_UNIT_TESTS] Running command '{cmd}'", file=output_file, flush=True) # send to consolidated file as well
+ print(f"========================================", file=output_file, flush=True)
+ p = subprocess.run(cmd, shell=True, stdout=output_file, stderr=STDOUT, text=True)
+ except CalledProcessError as e:
+ print(f"ERROR: Cmd {cmd} failed with return code: {e.returncode}!")
+
+def run_entire_tests(workflow_name, test_shell_path, overall_logs_path_current_run, test_reports_src):
+ if os.path.exists(test_reports_src):
+ shutil.rmtree(test_reports_src)
+
+ os.mkdir(test_reports_src)
+ copied_logs_path = ""
+ if workflow_name == "default":
+ os.environ['TEST_CONFIG'] = 'default'
+ copied_logs_path = overall_logs_path_current_run + "default_xml_results_entire_tests/"
+ elif workflow_name == "distributed":
+ os.environ['TEST_CONFIG'] = 'distributed'
+ copied_logs_path = overall_logs_path_current_run + "distributed_xml_results_entire_tests/"
+ elif workflow_name == "inductor":
+ os.environ['TEST_CONFIG'] = 'inductor'
+ copied_logs_path = overall_logs_path_current_run + "inductor_xml_results_entire_tests/"
+ # use test.sh for tests execution
+ run_command_and_capture_output(test_shell_path)
+ copied_logs_path_destination = shutil.copytree(test_reports_src, copied_logs_path)
+ entire_results_dict = summarize_xml_files(copied_logs_path_destination, workflow_name)
+ return entire_results_dict
+
+def run_priority_tests(workflow_name, test_run_test_path, overall_logs_path_current_run, test_reports_src):
+ if os.path.exists(test_reports_src):
+ shutil.rmtree(test_reports_src)
+
+ os.mkdir(test_reports_src)
+ copied_logs_path = ""
+ if workflow_name == "default":
+ os.environ['TEST_CONFIG'] = 'default'
+ os.environ['HIP_VISIBLE_DEVICES'] = '0'
+ copied_logs_path = overall_logs_path_current_run + "default_xml_results_priority_tests/"
+ # use run_test.py for tests execution
+ default_priority_test_suites = " ".join(DEFAULT_CORE_TESTS)
+ command = "python3 " + test_run_test_path + " --include " + default_priority_test_suites + " --exclude-jit-executor --exclude-distributed-tests --verbose"
+ run_command_and_capture_output(command)
+ del os.environ['HIP_VISIBLE_DEVICES']
+ elif workflow_name == "distributed":
+ os.environ['TEST_CONFIG'] = 'distributed'
+ os.environ['HIP_VISIBLE_DEVICES'] = '0,1'
+ copied_logs_path = overall_logs_path_current_run + "distributed_xml_results_priority_tests/"
+ # use run_test.py for tests execution
+ distributed_priority_test_suites = " ".join(DISTRIBUTED_CORE_TESTS)
+ command = "python3 " + test_run_test_path + " --include " + distributed_priority_test_suites + " --distributed-tests --verbose"
+ run_command_and_capture_output(command)
+ del os.environ['HIP_VISIBLE_DEVICES']
+ copied_logs_path_destination = shutil.copytree(test_reports_src, copied_logs_path)
+ priority_results_dict = summarize_xml_files(copied_logs_path_destination, workflow_name)
+
+ return priority_results_dict
+
+def run_selected_tests(workflow_name, test_run_test_path, overall_logs_path_current_run, test_reports_src, selected_list):
+ if os.path.exists(test_reports_src):
+ shutil.rmtree(test_reports_src)
+
+ os.mkdir(test_reports_src)
+ copied_logs_path = ""
+ if workflow_name == "default":
+ os.environ['TEST_CONFIG'] = 'default'
+ os.environ['HIP_VISIBLE_DEVICES'] = '0'
+ copied_logs_path = overall_logs_path_current_run + "default_xml_results_selected_tests/"
+ # use run_test.py for tests execution
+ default_selected_test_suites = " ".join(selected_list)
+ command = "python3 " + test_run_test_path + " --include " + default_selected_test_suites + " --exclude-jit-executor --exclude-distributed-tests --verbose"
+ run_command_and_capture_output(command)
+ del os.environ['HIP_VISIBLE_DEVICES']
+ elif workflow_name == "distributed":
+ os.environ['TEST_CONFIG'] = 'distributed'
+ os.environ['HIP_VISIBLE_DEVICES'] = '0,1'
+ copied_logs_path = overall_logs_path_current_run + "distributed_xml_results_selected_tests/"
+ # use run_test.py for tests execution
+ distributed_selected_test_suites = " ".join(selected_list)
+ command = "python3 " + test_run_test_path + " --include " + distributed_selected_test_suites + " --distributed-tests --verbose"
+ run_command_and_capture_output(command)
+ del os.environ['HIP_VISIBLE_DEVICES']
+ elif workflow_name == "inductor":
+ os.environ['TEST_CONFIG'] = 'inductor'
+ copied_logs_path = overall_logs_path_current_run + "inductor_xml_results_selected_tests/"
+ inductor_selected_test_suites = ""
+ non_inductor_selected_test_suites = ""
+ for item in selected_list:
+ if "inductor/" in item:
+ inductor_selected_test_suites += item
+ inductor_selected_test_suites += " "
+ else:
+ non_inductor_selected_test_suites += item
+ non_inductor_selected_test_suites += " "
+ if inductor_selected_test_suites != "":
+ inductor_selected_test_suites = inductor_selected_test_suites[:-1]
+ command = "python3 " + test_run_test_path + " --include " + inductor_selected_test_suites + " --verbose"
+ run_command_and_capture_output(command)
+ if non_inductor_selected_test_suites != "":
+ non_inductor_selected_test_suites = non_inductor_selected_test_suites[:-1]
+ command = "python3 " + test_run_test_path + " --inductor --include " + non_inductor_selected_test_suites + " --verbose"
+ run_command_and_capture_output(command)
+ copied_logs_path_destination = shutil.copytree(test_reports_src, copied_logs_path)
+ selected_results_dict = summarize_xml_files(copied_logs_path_destination, workflow_name)
+
+ return selected_results_dict
+
+def run_test_and_summarize_results(
+ pytorch_root_dir: str,
+ priority_tests: bool,
+ test_config: List[str],
+ default_list: List[str],
+ distributed_list: List[str],
+ inductor_list: List[str],
+ skip_rerun: bool) -> Dict[str, Any]:
+
+ # copy current environment variables
+ _environ = dict(os.environ)
+
+ # modify path
+ test_shell_path = pytorch_root_dir + "/.ci/pytorch/test.sh"
+ test_run_test_path = pytorch_root_dir + "/test/run_test.py"
+ repo_test_log_folder_path = pytorch_root_dir + "/.automation_logs/"
+ test_reports_src = pytorch_root_dir + "/test/test-reports/"
+ run_test_python_file = pytorch_root_dir + "/test/run_test.py"
+
+ # change directory to pytorch root
+ os.chdir(pytorch_root_dir)
+
+ # all test results dict
+ res_all_tests_dict = {}
+
+ # patterns
+ search_text = "--reruns=2"
+ replace_text = "--reruns=0"
+
+ # create logs folder
+ if not os.path.exists(repo_test_log_folder_path):
+ os.mkdir(repo_test_log_folder_path)
+
+ # Set common environment variables for all scenarios
+ os.environ['CI'] = '1'
+ os.environ['PYTORCH_TEST_WITH_ROCM'] = '1'
+ os.environ['HSA_FORCE_FINE_GRAIN_PCIE'] = '1'
+ os.environ['PYTORCH_TESTING_DEVICE_ONLY_FOR'] = 'cuda'
+ os.environ['CONTINUE_THROUGH_ERROR'] = 'True'
+ if skip_rerun:
+ # modify run_test.py in-place
+ with open(run_test_python_file, 'r') as file:
+ data = file.read()
+ data = data.replace(search_text, replace_text)
+ with open(run_test_python_file, 'w') as file:
+ file.write(data)
+
+ # Time stamp
+ current_datetime = datetime.now().strftime("%Y%m%d_%H-%M-%S")
+ print("Current date & time : ", current_datetime)
+ # performed as Job ID
+ str_current_datetime = str(current_datetime)
+ overall_logs_path_current_run = repo_test_log_folder_path + str_current_datetime + "/"
+ os.mkdir(overall_logs_path_current_run)
+
+ global CONSOLIDATED_LOG_FILE_PATH
+ CONSOLIDATED_LOG_FILE_PATH = overall_logs_path_current_run + CONSOLIDATED_LOG_FILE_NAME
+
+ # Check multi gpu availability if distributed tests are enabled
+ if ("distributed" in test_config) or len(distributed_list) != 0:
+ check_num_gpus_for_distributed()
+
+ # Install test requirements
+ command = "pip3 install -r requirements.txt && pip3 install -r .ci/docker/requirements-ci.txt"
+ run_command_and_capture_output(command)
+
+ # Run entire tests for each workflow
+ if not priority_tests and not default_list and not distributed_list and not inductor_list:
+ # run entire tests for default, distributed and inductor workflows → use test.sh
+ if not test_config:
+ check_num_gpus_for_distributed()
+ # default test process
+ res_default_all = run_entire_tests("default", test_shell_path, overall_logs_path_current_run, test_reports_src)
+ res_all_tests_dict["default"] = res_default_all
+ # distributed test process
+ res_distributed_all = run_entire_tests("distributed", test_shell_path, overall_logs_path_current_run, test_reports_src)
+ res_all_tests_dict["distributed"] = res_distributed_all
+ # inductor test process
+ res_inductor_all = run_entire_tests("inductor", test_shell_path, overall_logs_path_current_run, test_reports_src)
+ res_all_tests_dict["inductor"] = res_inductor_all
+ else:
+ workflow_list = []
+ for item in test_config:
+ workflow_list.append(item)
+ if "default" in workflow_list:
+ res_default_all = run_entire_tests("default", test_shell_path, overall_logs_path_current_run, test_reports_src)
+ res_all_tests_dict["default"] = res_default_all
+ if "distributed" in workflow_list:
+ res_distributed_all = run_entire_tests("distributed", test_shell_path, overall_logs_path_current_run, test_reports_src)
+ res_all_tests_dict["distributed"] = res_distributed_all
+ if "inductor" in workflow_list:
+ res_inductor_all = run_entire_tests("inductor", test_shell_path, overall_logs_path_current_run, test_reports_src)
+ res_all_tests_dict["inductor"] = res_inductor_all
+ # Run priority test for each workflow
+ elif priority_tests and not default_list and not distributed_list and not inductor_list:
+ if not test_config:
+ check_num_gpus_for_distributed()
+ # default test process
+ res_default_priority = run_priority_tests("default", test_run_test_path, overall_logs_path_current_run, test_reports_src)
+ res_all_tests_dict["default"] = res_default_priority
+ # distributed test process
+ res_distributed_priority = run_priority_tests("distributed", test_run_test_path, overall_logs_path_current_run, test_reports_src)
+ res_all_tests_dict["distributed"] = res_distributed_priority
+ # will not run inductor priority tests
+ print("Inductor priority tests cannot run since no core tests defined with inductor workflow.")
+ else:
+ workflow_list = []
+ for item in test_config:
+ workflow_list.append(item)
+ if "default" in workflow_list:
+ res_default_priority = run_priority_tests("default", test_run_test_path, overall_logs_path_current_run, test_reports_src)
+ res_all_tests_dict["default"] = res_default_priority
+ if "distributed" in workflow_list:
+ res_distributed_priority = run_priority_tests("distributed", test_run_test_path, overall_logs_path_current_run, test_reports_src)
+ res_all_tests_dict["distributed"] = res_distributed_priority
+ if "inductor" in workflow_list:
+ print("Inductor priority tests cannot run since no core tests defined with inductor workflow.")
+ # Run specified tests for each workflow
+ elif (default_list or distributed_list or inductor_list) and not test_config and not priority_tests:
+ if default_list:
+ default_workflow_list = []
+ for item in default_list:
+ default_workflow_list.append(item)
+ res_default_selected = run_selected_tests("default", test_run_test_path, overall_logs_path_current_run, test_reports_src, default_workflow_list)
+ res_all_tests_dict["default"] = res_default_selected
+ if distributed_list:
+ distributed_workflow_list = []
+ for item in distributed_list:
+ distributed_workflow_list.append(item)
+ res_distributed_selected = run_selected_tests("distributed", test_run_test_path, overall_logs_path_current_run, test_reports_src, distributed_workflow_list)
+ res_all_tests_dict["distributed"] = res_distributed_selected
+ if inductor_list:
+ inductor_workflow_list = []
+ for item in inductor_list:
+ inductor_workflow_list.append(item)
+ res_inductor_selected = run_selected_tests("inductor", test_run_test_path, overall_logs_path_current_run, test_reports_src, inductor_workflow_list)
+ res_all_tests_dict["inductor"] = res_inductor_selected
+ else:
+ raise Exception("Invalid test configurations!")
+
+ # restore environment variables
+ os.environ.clear()
+ os.environ.update(_environ)
+
+ # restore files
+ if skip_rerun:
+ # modify run_test.py in-place
+ with open(run_test_python_file, 'r') as file:
+ data = file.read()
+ data = data.replace(replace_text, search_text)
+ with open(run_test_python_file, 'w') as file:
+ file.write(data)
+
+ return res_all_tests_dict
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Run PyTorch unit tests and generate xml results summary', formatter_class=argparse.RawTextHelpFormatter)
+ parser.add_argument('--test_config', nargs='+', default=[], type=str, help="space-separated list of test workflows to be executed eg. 'default distributed'")
+ parser.add_argument('--priority_tests', action='store_true', help="run priority tests only")
+ parser.add_argument('--default_list', nargs='+', default=[], help="space-separated list of 'default' config test suites/files to be executed eg. 'test_weak test_dlpack'")
+ parser.add_argument('--distributed_list', nargs='+', default=[], help="space-separated list of 'distributed' config test suites/files to be executed eg. 'distributed/test_c10d_common distributed/test_c10d_nccl'")
+ parser.add_argument('--inductor_list', nargs='+', default=[], help="space-separated list of 'inductor' config test suites/files to be executed eg. 'inductor/test_torchinductor test_ops'")
+ parser.add_argument('--pytorch_root', default='.', type=str, help="PyTorch root directory")
+ parser.add_argument('--skip_rerun', action='store_true', help="skip rerun process")
+ parser.add_argument('--example_output', type=str, help="{'workflow_name': {\n"
+ " test_file_and_status(file_name='workflow_aggregate', status='STATISTICS'): {}, \n"
+ " test_file_and_status(file_name='test_file_name_1', status='ERROR'): {}, \n"
+ " test_file_and_status(file_name='test_file_name_1', status='FAILED'): {}, \n"
+ " test_file_and_status(file_name='test_file_name_1', status='PASSED'): {}, \n"
+ " test_file_and_status(file_name='test_file_name_1', status='SKIPPED'): {}, \n"
+ " test_file_and_status(file_name='test_file_name_1', status='STATISTICS'): {} \n"
+ "}}\n")
+ parser.add_argument('--example_usages', type=str, help="RUN ALL TESTS: python3 run_pytorch_unit_tests.py \n"
+ "RUN PRIORITY TESTS: python3 run_pytorch_unit_tests.py --test_config distributed --priority_test \n"
+ "RUN SELECTED TESTS: python3 run_pytorch_unit_tests.py --default_list test_weak test_dlpack --inductor_list inductor/test_torchinductor")
+ return parser.parse_args()
+
+def check_num_gpus_for_distributed():
+ p = subprocess.run("rocminfo | grep -cE 'Name:\s+gfx'", shell=True, capture_output=True, text=True)
+ num_gpus_visible = int(p.stdout)
+ assert num_gpus_visible > 1, "Number of visible GPUs should be >1 to run distributed unit tests"
+
+def main():
+ args = parse_args()
+ all_tests_results = run_test_and_summarize_results(args.pytorch_root, args.priority_tests, args.test_config, args.default_list, args.distributed_list, args.inductor_list, args.skip_rerun)
+ pprint(dict(all_tests_results))
+
+if __name__ == "__main__":
+ main()
diff --git a/.ci/aarch64_linux/aarch64_ci_build.sh b/.ci/aarch64_linux/aarch64_ci_build.sh
index 424ddd0013cd..41cabc3bf511 100644
--- a/.ci/aarch64_linux/aarch64_ci_build.sh
+++ b/.ci/aarch64_linux/aarch64_ci_build.sh
@@ -3,8 +3,20 @@ set -eux -o pipefail
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
-if [[ "$GPU_ARCH_VERSION" == *"12.9"* ]]; then
+# Set CUDA architecture lists to match x86 build_cuda.sh
+if [[ "$GPU_ARCH_VERSION" == *"12.6"* ]]; then
+ export TORCH_CUDA_ARCH_LIST="8.0;9.0"
+elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
+elif [[ "$GPU_ARCH_VERSION" == *"13.0"* ]]; then
+ export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;11.0;12.0+PTX"
+fi
+
+# Compress the fatbin with -compress-mode=size for CUDA 13
+if [[ "$DESIRED_CUDA" == *"13"* ]]; then
+ export TORCH_NVCC_FLAGS="-compress-mode=size"
+ # Bundle ptxas into the cu13 wheel, see https://github.com/pytorch/pytorch/issues/163801
+ export BUILD_BUNDLE_PTXAS=1
fi
SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
@@ -18,7 +30,7 @@ cd /
# on the mounted pytorch repo
git config --global --add safe.directory /pytorch
pip install -r /pytorch/requirements.txt
-pip install auditwheel==6.2.0
+pip install auditwheel==6.2.0 wheel
if [ "$DESIRED_CUDA" = "cpu" ]; then
echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
@@ -26,6 +38,16 @@ if [ "$DESIRED_CUDA" = "cpu" ]; then
else
echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
export USE_SYSTEM_NCCL=1
+
+ # Check if we should use NVIDIA libs from PyPI (similar to x86 build_cuda.sh logic)
+ if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
+ echo "Bundling CUDA libraries with wheel for aarch64."
+ else
+ echo "Using nvidia libs from pypi for aarch64."
+ echo "Updated PYTORCH_EXTRA_INSTALL_REQUIREMENTS for aarch64: $PYTORCH_EXTRA_INSTALL_REQUIREMENTS"
+ export USE_NVIDIA_PYPI_LIBS=1
+ fi
+
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
fi
diff --git a/.ci/aarch64_linux/aarch64_wheel_ci_build.py b/.ci/aarch64_linux/aarch64_wheel_ci_build.py
index a2b5f6912c9a..1b6429fa8c06 100755
--- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py
@@ -69,62 +69,186 @@ def replace_tag(filename) -> None:
f.writelines(lines)
+def patch_library_rpath(
+ folder: str,
+ lib_name: str,
+ use_nvidia_pypi_libs: bool = False,
+ desired_cuda: str = "",
+) -> None:
+ """Apply patchelf to set RPATH for a library in torch/lib"""
+ lib_path = f"{folder}/tmp/torch/lib/{lib_name}"
+
+ if use_nvidia_pypi_libs:
+ # For PyPI NVIDIA libraries, construct CUDA RPATH
+ cuda_rpaths = [
+ "$ORIGIN/../../nvidia/cudnn/lib",
+ "$ORIGIN/../../nvidia/nvshmem/lib",
+ "$ORIGIN/../../nvidia/nccl/lib",
+ "$ORIGIN/../../nvidia/cusparselt/lib",
+ ]
+
+ if "130" in desired_cuda:
+ cuda_rpaths.append("$ORIGIN/../../nvidia/cu13/lib")
+ else:
+ cuda_rpaths.extend(
+ [
+ "$ORIGIN/../../nvidia/cublas/lib",
+ "$ORIGIN/../../nvidia/cuda_cupti/lib",
+ "$ORIGIN/../../nvidia/cuda_nvrtc/lib",
+ "$ORIGIN/../../nvidia/cuda_runtime/lib",
+ "$ORIGIN/../../nvidia/cufft/lib",
+ "$ORIGIN/../../nvidia/curand/lib",
+ "$ORIGIN/../../nvidia/cusolver/lib",
+ "$ORIGIN/../../nvidia/cusparse/lib",
+ "$ORIGIN/../../nvidia/nvtx/lib",
+ "$ORIGIN/../../nvidia/cufile/lib",
+ ]
+ )
+
+ # Add $ORIGIN for local torch libs
+ rpath = ":".join(cuda_rpaths) + ":$ORIGIN"
+ else:
+ # For bundled libraries, just use $ORIGIN
+ rpath = "$ORIGIN"
+
+ if os.path.exists(lib_path):
+ os.system(
+ f"cd {folder}/tmp/torch/lib/; "
+ f"patchelf --set-rpath '{rpath}' --force-rpath {lib_name}"
+ )
+
+
+def copy_and_patch_library(
+ src_path: str,
+ folder: str,
+ use_nvidia_pypi_libs: bool = False,
+ desired_cuda: str = "",
+) -> None:
+ """Copy a library to torch/lib and patch its RPATH"""
+ if os.path.exists(src_path):
+ lib_name = os.path.basename(src_path)
+ shutil.copy2(src_path, f"{folder}/tmp/torch/lib/{lib_name}")
+ patch_library_rpath(folder, lib_name, use_nvidia_pypi_libs, desired_cuda)
+
+
def package_cuda_wheel(wheel_path, desired_cuda) -> None:
"""
Package the cuda wheel libraries
"""
folder = os.path.dirname(wheel_path)
- wheelname = os.path.basename(wheel_path)
os.mkdir(f"{folder}/tmp")
os.system(f"unzip {wheel_path} -d {folder}/tmp")
- libs_to_copy = [
- "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
- "/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so",
- "/usr/local/cuda/lib64/libcudnn.so.9",
- "/usr/local/cuda/lib64/libcublas.so.12",
- "/usr/local/cuda/lib64/libcublasLt.so.12",
- "/usr/local/cuda/lib64/libcudart.so.12",
- "/usr/local/cuda/lib64/libcufft.so.11",
- "/usr/local/cuda/lib64/libcusparse.so.12",
- "/usr/local/cuda/lib64/libcusparseLt.so.0",
- "/usr/local/cuda/lib64/libcusolver.so.11",
- "/usr/local/cuda/lib64/libcurand.so.10",
- "/usr/local/cuda/lib64/libnccl.so.2",
- "/usr/local/cuda/lib64/libnvJitLink.so.12",
- "/usr/local/cuda/lib64/libnvrtc.so.12",
- "/usr/local/cuda/lib64/libnvshmem_host.so.3",
- "/usr/local/cuda/lib64/libcudnn_adv.so.9",
- "/usr/local/cuda/lib64/libcudnn_cnn.so.9",
- "/usr/local/cuda/lib64/libcudnn_graph.so.9",
- "/usr/local/cuda/lib64/libcudnn_ops.so.9",
- "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9",
- "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9",
- "/usr/local/cuda/lib64/libcudnn_heuristic.so.9",
- "/lib64/libgomp.so.1",
- "/usr/lib64/libgfortran.so.5",
- "/acl/build/libarm_compute.so",
- "/acl/build/libarm_compute_graph.so",
- "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
- "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
- "/usr/local/lib/libnvpl_lapack_core.so.0",
- "/usr/local/lib/libnvpl_blas_core.so.0",
- ]
+ # Delete original wheel since it will be repackaged
+ os.system(f"rm {wheel_path}")
+
+ # Check if we should use PyPI NVIDIA libraries or bundle system libraries
+ use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1"
+
+ if use_nvidia_pypi_libs:
+ print("Using nvidia libs from pypi - skipping CUDA library bundling")
+ # For PyPI approach, we don't bundle CUDA libraries - they come from PyPI packages
+ # We only need to bundle non-NVIDIA libraries
+ minimal_libs_to_copy = [
+ "/lib64/libgomp.so.1",
+ "/usr/lib64/libgfortran.so.5",
+ "/acl/build/libarm_compute.so",
+ "/acl/build/libarm_compute_graph.so",
+ "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
+ "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
+ "/usr/local/lib/libnvpl_lapack_core.so.0",
+ "/usr/local/lib/libnvpl_blas_core.so.0",
+ ]
- if "129" in desired_cuda:
- libs_to_copy += [
- "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.9",
+ # Copy minimal libraries to unzipped_folder/torch/lib
+ for lib_path in minimal_libs_to_copy:
+ copy_and_patch_library(lib_path, folder, use_nvidia_pypi_libs, desired_cuda)
+
+ # Patch torch libraries used for searching libraries
+ torch_libs_to_patch = [
+ "libtorch.so",
+ "libtorch_cpu.so",
+ "libtorch_cuda.so",
+ "libtorch_cuda_linalg.so",
+ "libtorch_global_deps.so",
+ "libtorch_python.so",
+ "libtorch_nvshmem.so",
+ "libc10.so",
+ "libc10_cuda.so",
+ "libcaffe2_nvrtc.so",
+ "libshm.so",
+ ]
+ for lib_name in torch_libs_to_patch:
+ patch_library_rpath(folder, lib_name, use_nvidia_pypi_libs, desired_cuda)
+ else:
+ print("Bundling CUDA libraries with wheel")
+ # Original logic for bundling system CUDA libraries
+ # Common libraries for all CUDA versions
+ common_libs = [
+ # Non-NVIDIA system libraries
+ "/lib64/libgomp.so.1",
+ "/usr/lib64/libgfortran.so.5",
+ "/acl/build/libarm_compute.so",
+ "/acl/build/libarm_compute_graph.so",
+ # Common CUDA libraries (same for all versions)
+ "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
+ "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
+ "/usr/local/lib/libnvpl_lapack_core.so.0",
+ "/usr/local/lib/libnvpl_blas_core.so.0",
+ "/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so",
+ "/usr/local/cuda/lib64/libcudnn.so.9",
+ "/usr/local/cuda/lib64/libcusparseLt.so.0",
+ "/usr/local/cuda/lib64/libcurand.so.10",
+ "/usr/local/cuda/lib64/libnccl.so.2",
+ "/usr/local/cuda/lib64/libnvshmem_host.so.3",
+ "/usr/local/cuda/lib64/libcudnn_adv.so.9",
+ "/usr/local/cuda/lib64/libcudnn_cnn.so.9",
+ "/usr/local/cuda/lib64/libcudnn_graph.so.9",
+ "/usr/local/cuda/lib64/libcudnn_ops.so.9",
+ "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9",
+ "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9",
+ "/usr/local/cuda/lib64/libcudnn_heuristic.so.9",
"/usr/local/cuda/lib64/libcufile.so.0",
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
+ "/usr/local/cuda/lib64/libcusparse.so.12",
]
- # Copy libraries to unzipped_folder/a/lib
- for lib_path in libs_to_copy:
- lib_name = os.path.basename(lib_path)
- shutil.copy2(lib_path, f"{folder}/tmp/torch/lib/{lib_name}")
- os.system(
- f"cd {folder}/tmp/torch/lib/; "
- f"patchelf --set-rpath '$ORIGIN' --force-rpath {folder}/tmp/torch/lib/{lib_name}"
- )
+ # CUDA version-specific libraries
+ if "13" in desired_cuda:
+ minor_version = desired_cuda[-1]
+ version_specific_libs = [
+ "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13",
+ "/usr/local/cuda/lib64/libcublas.so.13",
+ "/usr/local/cuda/lib64/libcublasLt.so.13",
+ "/usr/local/cuda/lib64/libcudart.so.13",
+ "/usr/local/cuda/lib64/libcufft.so.12",
+ "/usr/local/cuda/lib64/libcusolver.so.12",
+ "/usr/local/cuda/lib64/libnvJitLink.so.13",
+ "/usr/local/cuda/lib64/libnvrtc.so.13",
+ f"/usr/local/cuda/lib64/libnvrtc-builtins.so.13.{minor_version}",
+ ]
+ elif "12" in desired_cuda:
+ # Get the last character for libnvrtc-builtins version (e.g., "129" -> "9")
+ minor_version = desired_cuda[-1]
+ version_specific_libs = [
+ "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
+ "/usr/local/cuda/lib64/libcublas.so.12",
+ "/usr/local/cuda/lib64/libcublasLt.so.12",
+ "/usr/local/cuda/lib64/libcudart.so.12",
+ "/usr/local/cuda/lib64/libcufft.so.11",
+ "/usr/local/cuda/lib64/libcusolver.so.11",
+ "/usr/local/cuda/lib64/libnvJitLink.so.12",
+ "/usr/local/cuda/lib64/libnvrtc.so.12",
+ f"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.{minor_version}",
+ ]
+ else:
+ raise ValueError(f"Unsupported CUDA version: {desired_cuda}.")
+
+ # Combine all libraries
+ libs_to_copy = common_libs + version_specific_libs
+
+ # Copy libraries to unzipped_folder/torch/lib
+ for lib_path in libs_to_copy:
+ copy_and_patch_library(lib_path, folder, use_nvidia_pypi_libs, desired_cuda)
# Make sure the wheel is tagged with manylinux_2_28
for f in os.scandir(f"{folder}/tmp/"):
@@ -132,14 +256,8 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
replace_tag(f"{f.path}/WHEEL")
break
- os.mkdir(f"{folder}/cuda_wheel")
- os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *")
- shutil.move(
- f"{folder}/cuda_wheel/{wheelname}",
- f"{folder}/{wheelname}",
- copy_function=shutil.copy2,
- )
- os.system(f"rm -rf {folder}/tmp/ {folder}/cuda_wheel/")
+ os.system(f"wheel pack {folder}/tmp/ -d {folder}")
+ os.system(f"rm -rf {folder}/tmp/")
def complete_wheel(folder: str) -> str:
@@ -162,14 +280,7 @@ def complete_wheel(folder: str) -> str:
f"/{folder}/dist/{repaired_wheel_name}",
)
else:
- repaired_wheel_name = wheel_name.replace(
- "linux_aarch64", "manylinux_2_28_aarch64"
- )
- print(f"Renaming {wheel_name} wheel to {repaired_wheel_name}")
- os.rename(
- f"/{folder}/dist/{wheel_name}",
- f"/{folder}/dist/{repaired_wheel_name}",
- )
+ repaired_wheel_name = list_dir(f"/{folder}/dist")[0]
print(f"Copying {repaired_wheel_name} to artifacts")
shutil.copy2(
@@ -211,6 +322,16 @@ def parse_arguments():
if enable_cuda:
build_vars += "MAX_JOBS=5 "
+ # Handle PyPI NVIDIA libraries vs bundled libraries
+ use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1"
+ if use_nvidia_pypi_libs:
+ print("Configuring build for PyPI NVIDIA libraries")
+ # Configure for dynamic linking (matching x86 logic)
+ build_vars += "ATEN_STATIC_CUDA=0 USE_CUDA_STATIC_LINK=0 USE_CUPTI_SO=1 "
+ else:
+ print("Configuring build for bundled NVIDIA libraries")
+ # Keep existing static linking approach - already configured above
+
override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
desired_cuda = os.getenv("DESIRED_CUDA")
if override_package_version is not None:
diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh
index f22aa919e434..8672fae2bbdd 100755
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@@ -81,8 +81,8 @@ elif [[ "$image" == *riscv* ]]; then
DOCKERFILE="ubuntu-cross-riscv/Dockerfile"
fi
-_UCX_COMMIT=7bb2722ff2187a0cad557ae4a6afa090569f83fb
-_UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b
+_UCX_COMMIT=7836b165abdbe468a2f607e7254011c07d788152
+_UCC_COMMIT=430e241bf5d38cbc73fc7a6b89155397232e3f96
if [[ "$image" == *rocm* ]]; then
_UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6
_UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d
@@ -114,31 +114,19 @@ case "$tag" in
UCC_COMMIT=${_UCC_COMMIT}
TRITON=yes
;;
- pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks)
- CUDA_VERSION=12.8.1
+ pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11)
+ CUDA_VERSION=13.0.0
ANACONDA_PYTHON_VERSION=3.10
- GCC_VERSION=9
- VISION=yes
- KATEX=yes
- UCX_COMMIT=${_UCX_COMMIT}
- UCC_COMMIT=${_UCC_COMMIT}
- TRITON=yes
- INDUCTOR_BENCHMARKS=yes
- ;;
- pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc9-inductor-benchmarks)
- CUDA_VERSION=12.8.1
- ANACONDA_PYTHON_VERSION=3.12
- GCC_VERSION=9
+ GCC_VERSION=11
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
TRITON=yes
- INDUCTOR_BENCHMARKS=yes
;;
- pytorch-linux-jammy-cuda12.8-cudnn9-py3.13-gcc9-inductor-benchmarks)
+ pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks)
CUDA_VERSION=12.8.1
- ANACONDA_PYTHON_VERSION=3.13
+ ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
VISION=yes
KATEX=yes
@@ -173,8 +161,8 @@ case "$tag" in
VISION=yes
ONNX=yes
;;
- pytorch-linux-jammy-py3.9-clang12)
- ANACONDA_PYTHON_VERSION=3.9
+ pytorch-linux-jammy-py3.10-clang12)
+ ANACONDA_PYTHON_VERSION=3.10
CLANG_VERSION=12
VISION=yes
TRITON=yes
@@ -209,24 +197,24 @@ case "$tag" in
UCC_COMMIT=${_UCC_COMMIT}
PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950"
;;
- pytorch-linux-jammy-xpu-2025.0-py3)
- ANACONDA_PYTHON_VERSION=3.9
+ pytorch-linux-jammy-xpu-n-1-py3)
+ ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
VISION=yes
- XPU_VERSION=2025.0
+ XPU_VERSION=2025.1
NINJA_VERSION=1.9.0
TRITON=yes
;;
- pytorch-linux-jammy-xpu-2025.1-py3)
- ANACONDA_PYTHON_VERSION=3.9
+ pytorch-linux-jammy-xpu-n-py3)
+ ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
VISION=yes
- XPU_VERSION=2025.1
+ XPU_VERSION=2025.2
NINJA_VERSION=1.9.0
TRITON=yes
;;
- pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks)
- ANACONDA_PYTHON_VERSION=3.9
+ pytorch-linux-jammy-py3-gcc11-inductor-benchmarks)
+ ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
VISION=yes
KATEX=yes
@@ -234,8 +222,8 @@ case "$tag" in
DOCS=yes
INDUCTOR_BENCHMARKS=yes
;;
- pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-clang12)
- ANACONDA_PYTHON_VERSION=3.9
+ pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-clang12)
+ ANACONDA_PYTHON_VERSION=3.10
CUDA_VERSION=12.8.1
CLANG_VERSION=12
VISION=yes
@@ -246,8 +234,8 @@ case "$tag" in
CLANG_VERSION=18
VISION=yes
;;
- pytorch-linux-jammy-py3.9-gcc11)
- ANACONDA_PYTHON_VERSION=3.9
+ pytorch-linux-jammy-py3.10-gcc11)
+ ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
VISION=yes
KATEX=yes
@@ -274,13 +262,10 @@ case "$tag" in
TRITON_CPU=yes
;;
pytorch-linux-jammy-linter)
- # TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
- # We will need to update mypy version eventually, but that's for another day. The task
- # would be to upgrade mypy to 1.0.0 with Python 3.11
- PYTHON_VERSION=3.9
+ PYTHON_VERSION=3.10
;;
- pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter)
- PYTHON_VERSION=3.9
+ pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter)
+ PYTHON_VERSION=3.10
CUDA_VERSION=12.8.1
;;
pytorch-linux-jammy-aarch64-py3.10-gcc11)
diff --git a/.ci/docker/ci_commit_pins/torchbench.txt b/.ci/docker/ci_commit_pins/torchbench.txt
index efbc3ceeb2af..c9be7b440bae 100644
--- a/.ci/docker/ci_commit_pins/torchbench.txt
+++ b/.ci/docker/ci_commit_pins/torchbench.txt
@@ -1 +1 @@
-e03a63be43e33596f7f0a43b0f530353785e4a59
+74a23feff57432129df84d8099e622773cf77925
diff --git a/.ci/docker/ci_commit_pins/triton-xpu.txt b/.ci/docker/ci_commit_pins/triton-xpu.txt
index 3be14be85ad6..b03606f6defc 100644
--- a/.ci/docker/ci_commit_pins/triton-xpu.txt
+++ b/.ci/docker/ci_commit_pins/triton-xpu.txt
@@ -1 +1 @@
-a6572fb0be5b9b0a19b0641a0ce05810fa04e44c
+1b0418a9a454b2b93ab8d71f40e59d2297157fae
diff --git a/.ci/docker/ci_commit_pins/triton.txt b/.ci/docker/ci_commit_pins/triton.txt
index 60c896b80c8f..99ec5b4aa341 100644
--- a/.ci/docker/ci_commit_pins/triton.txt
+++ b/.ci/docker/ci_commit_pins/triton.txt
@@ -1 +1 @@
-f7888497a1eb9e98d4c07537f0d0bcfe180d1363
+d08c31a24d622b4bf767a6645135b7b3d0d886f4
diff --git a/.ci/docker/common/install_cpython.sh b/.ci/docker/common/install_cpython.sh
index c160e5704ba3..692edd0b898f 100755
--- a/.ci/docker/common/install_cpython.sh
+++ b/.ci/docker/common/install_cpython.sh
@@ -83,9 +83,9 @@ function build_cpython {
py_suffix=${py_ver::-1}
py_folder=$py_suffix
fi
- # Only b3 is available now
+ # Update to rc2 due to https://github.com/python/cpython/commit/c72699086fe4
if [ "$py_suffix" == "3.14.0" ]; then
- py_suffix="3.14.0b3"
+ py_suffix="3.14.0rc2"
fi
wget -q $PYTHON_DOWNLOAD_URL/$py_folder/Python-$py_suffix.tgz -O Python-$py_ver.tgz
do_cpython_build $py_ver Python-$py_suffix
diff --git a/.ci/docker/common/install_cuda.sh b/.ci/docker/common/install_cuda.sh
index 00c3cfd06b41..c6808ea4a7a2 100644
--- a/.ci/docker/common/install_cuda.sh
+++ b/.ci/docker/common/install_cuda.sh
@@ -147,7 +147,7 @@ function install_128 {
}
function install_130 {
- CUDNN_VERSION=9.12.0.46
+ CUDNN_VERSION=9.13.0.50
echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
# install CUDA 13.0 in the same container
install_cuda 13.0.0 cuda_13.0.0_580.65.06_linux
diff --git a/.ci/docker/common/install_triton.sh b/.ci/docker/common/install_triton.sh
index f48140952c3a..8e714bcb6cd3 100755
--- a/.ci/docker/common/install_triton.sh
+++ b/.ci/docker/common/install_triton.sh
@@ -21,7 +21,7 @@ elif [ -n "${TRITON_CPU}" ]; then
TRITON_REPO="https://github.com/triton-lang/triton-cpu"
TRITON_TEXT_FILE="triton-cpu"
else
- TRITON_REPO="https://github.com/triton-lang/triton"
+ TRITON_REPO="https://github.com/ROCm/triton"
TRITON_TEXT_FILE="triton"
fi
diff --git a/.ci/docker/common/install_ucc.sh b/.ci/docker/common/install_ucc.sh
index b7f884ea9648..04f15a52e88e 100755
--- a/.ci/docker/common/install_ucc.sh
+++ b/.ci/docker/common/install_ucc.sh
@@ -44,8 +44,12 @@ function install_ucc() {
./autogen.sh
- # We only run distributed tests on Tesla M60 and A10G
- NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
+ if [[ -n "$CUDA_VERSION" && $CUDA_VERSION == 13* ]]; then
+ NVCC_GENCODE="-gencode=arch=compute_86,code=compute_86"
+ else
+ # We only run distributed tests on Tesla M60 and A10G
+ NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
+ fi
if [[ -n "$ROCM_VERSION" ]]; then
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
diff --git a/.ci/docker/common/install_xpu.sh b/.ci/docker/common/install_xpu.sh
index 7f21d2e42c72..0b150872f93c 100644
--- a/.ci/docker/common/install_xpu.sh
+++ b/.ci/docker/common/install_xpu.sh
@@ -65,10 +65,14 @@ function install_ubuntu() {
function install_rhel() {
. /etc/os-release
-
- if [[ ! " 8.8 8.10 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then
- echo "RHEL version ${VERSION_ID} not supported"
- exit
+ if [[ "${ID}" == "rhel" ]]; then
+ if [[ ! " 8.8 8.9 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then
+ echo "RHEL version ${VERSION_ID} not supported"
+ exit
+ fi
+ elif [[ "${ID}" == "almalinux" ]]; then
+ # Workaround for almalinux8 which used by quay.io/pypa/manylinux_2_28_x86_64
+ VERSION_ID="8.8"
fi
dnf install -y 'dnf-command(config-manager)'
@@ -146,11 +150,11 @@ if [[ "${XPU_DRIVER_TYPE,,}" == "lts" ]]; then
XPU_DRIVER_VERSION="/lts/2350"
fi
-# Default use Intel® oneAPI Deep Learning Essentials 2025.0
-if [[ "$XPU_VERSION" == "2025.1" ]]; then
- XPU_PACKAGES="intel-deep-learning-essentials-2025.1"
+# Default use Intel® oneAPI Deep Learning Essentials 2025.1
+if [[ "$XPU_VERSION" == "2025.2" ]]; then
+ XPU_PACKAGES="intel-deep-learning-essentials-2025.2"
else
- XPU_PACKAGES="intel-deep-learning-essentials-2025.0"
+ XPU_PACKAGES="intel-deep-learning-essentials-2025.1"
fi
# The installation depends on the base OS
diff --git a/.ci/docker/common/patch_libstdc.sh b/.ci/docker/common/patch_libstdc.sh
new file mode 100755
index 000000000000..7e3a00d0dad8
--- /dev/null
+++ b/.ci/docker/common/patch_libstdc.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+set -xe
+# Script used in Linux x86 and aarch64 CD pipeline
+
+# Workaround for exposing statically linked libstdc++ CXX11 ABI symbols.
+# see: https://github.com/pytorch/pytorch/issues/133437
+LIBNONSHARED=$(gcc -print-file-name=libstdc++_nonshared.a)
+nm -g $LIBNONSHARED | grep " T " | grep recursive_directory_iterator | cut -c 20- > weaken-symbols.txt
+objcopy --weaken-symbols weaken-symbols.txt $LIBNONSHARED $LIBNONSHARED
diff --git a/.ci/docker/libtorch/Dockerfile b/.ci/docker/libtorch/Dockerfile
index d2788b2713f7..d19431ad8b54 100644
--- a/.ci/docker/libtorch/Dockerfile
+++ b/.ci/docker/libtorch/Dockerfile
@@ -74,6 +74,14 @@ RUN bash ./install_cuda.sh 13.0
RUN bash ./install_magma.sh 13.0
RUN ln -sf /usr/local/cuda-13.0 /usr/local/cuda
+# Install libibverbs for libtorch and copy to CUDA directory
+RUN apt-get update -y && \
+ apt-get install -y libibverbs-dev librdmacm-dev && \
+ cp /usr/lib/x86_64-linux-gnu/libmlx5.so* /usr/local/cuda/lib64/ && \
+ cp /usr/lib/x86_64-linux-gnu/librdmacm.so* /usr/local/cuda/lib64/ && \
+ cp /usr/lib/x86_64-linux-gnu/libibverbs.so* /usr/local/cuda/lib64/ && \
+ cp /usr/lib/x86_64-linux-gnu/libnl* /usr/local/cuda/lib64/
+
FROM cpu as rocm
ARG ROCM_VERSION
ARG PYTORCH_ROCM_ARCH
diff --git a/.ci/docker/manywheel/Dockerfile_2_28 b/.ci/docker/manywheel/Dockerfile_2_28
index b150423e9954..4803cb778c90 100644
--- a/.ci/docker/manywheel/Dockerfile_2_28
+++ b/.ci/docker/manywheel/Dockerfile_2_28
@@ -130,7 +130,8 @@ ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/op
RUN for cpython_version in "cp312-cp312" "cp313-cp313" "cp313-cp313t"; do \
/opt/python/${cpython_version}/bin/python -m pip install setuptools wheel; \
done;
-
+ADD ./common/patch_libstdc.sh patch_libstdc.sh
+RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
# cmake-3.18.4 from pip; force in case cmake3 already exists
RUN yum install -y python3-pip && \
@@ -175,6 +176,6 @@ ENV XPU_DRIVER_TYPE ROLLING
RUN python3 -m pip install --upgrade pip && \
python3 -mpip install cmake==3.28.4
ADD ./common/install_xpu.sh install_xpu.sh
-ENV XPU_VERSION 2025.1
+ENV XPU_VERSION 2025.2
RUN bash ./install_xpu.sh && rm install_xpu.sh
RUN pushd /opt/_internal && tar -xJf static-libs-for-embedding-only.tar.xz && popd
diff --git a/.ci/docker/manywheel/Dockerfile_2_28_aarch64 b/.ci/docker/manywheel/Dockerfile_2_28_aarch64
index da7ab4d3fd15..6cfab77941fc 100644
--- a/.ci/docker/manywheel/Dockerfile_2_28_aarch64
+++ b/.ci/docker/manywheel/Dockerfile_2_28_aarch64
@@ -71,3 +71,5 @@ RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
COPY --from=openblas /opt/OpenBLAS/ /opt/OpenBLAS/
ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH
+ADD ./common/patch_libstdc.sh patch_libstdc.sh
+RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
diff --git a/.ci/docker/manywheel/Dockerfile_cuda_aarch64 b/.ci/docker/manywheel/Dockerfile_cuda_aarch64
index 369706055737..4d2596fea821 100644
--- a/.ci/docker/manywheel/Dockerfile_cuda_aarch64
+++ b/.ci/docker/manywheel/Dockerfile_cuda_aarch64
@@ -95,3 +95,5 @@ COPY --from=nvpl /opt/nvpl/lib/ /usr/local/lib/
COPY --from=nvpl /opt/nvpl/include/ /usr/local/include/
RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
ENV PATH=/usr/local/cuda/bin:$PATH
+ADD ./common/patch_libstdc.sh patch_libstdc.sh
+RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt
index c9d2fddb1324..248ee8409036 100644
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@@ -93,8 +93,9 @@ librosa==0.10.2 ; python_version == "3.12" and platform_machine != "s390x"
#Pinned versions:
#test that import:
-mypy==1.16.0
+mypy==1.16.0 ; platform_system != "Windows"
# Pin MyPy version because new errors are likely to appear with each release
+# Skip on Windows as lots of type annotations are POSIX specific
#Description: linter
#Pinned versions: 1.16.0
#test that import: test_typing.py, test_type_hints.py
@@ -112,9 +113,8 @@ ninja==1.11.1.3
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
numba==0.49.0 ; python_version < "3.9" and platform_machine != "s390x"
-numba==0.55.2 ; python_version == "3.9" and platform_machine != "s390x"
-numba==0.55.2 ; python_version == "3.10" and platform_machine != "s390x"
-numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
+numba==0.60.0 ; python_version == "3.9" and platform_machine != "s390x"
+numba==0.61.2 ; python_version > "3.9" and platform_machine != "s390x"
#Description: Just-In-Time Compiler for Numerical Functions
#Pinned versions: 0.54.1, 0.49.0, <=0.49.1
#test that import: test_numba_integration.py
@@ -133,12 +133,10 @@ numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
#test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py,
#test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py,
#test_binary_ufuncs.py
-numpy==1.22.4; python_version == "3.9" or python_version == "3.10"
-numpy==1.26.2; python_version == "3.11" or python_version == "3.12"
-numpy==2.1.2; python_version >= "3.13"
+numpy==2.0.2 ; python_version == "3.9"
+numpy==2.1.2 ; python_version > "3.9"
-pandas==2.0.3; python_version < "3.13"
-pandas==2.2.3; python_version >= "3.13"
+pandas==2.2.3
#onnxruntime
#Description: scoring engine for Open Neural Network Exchange (ONNX) models
@@ -168,10 +166,11 @@ pillow==11.0.0
#Pinned versions: 10.3.0
#test that import:
-protobuf==5.29.4
-#Description: Google's data interchange format
-#Pinned versions: 5.29.4
-#test that import: test_tensorboard.py, test/onnx/*
+protobuf==3.20.2 ; python_version <= "3.12"
+protobuf==4.25.1 ; python_version == "3.13"
+#Description: Google’s data interchange format
+#Pinned versions: 3.20.1
+#test that import: test_tensorboard.py
psutil
#Description: information on running processes and system utilization
@@ -249,8 +248,8 @@ scikit-image==0.22.0 ; python_version >= "3.10"
#Pinned versions: 0.20.3
#test that import:
-scipy==1.10.1 ; python_version <= "3.11"
-scipy==1.14.1 ; python_version >= "3.12"
+scipy==1.13.1 ; python_version == "3.9"
+scipy==1.14.1 ; python_version > "3.9"
# Pin SciPy because of failing distribution tests (see #60347)
#Description: scientific python
#Pinned versions: 1.10.1
@@ -309,8 +308,7 @@ z3-solver==4.15.1.0 ; platform_machine != "s390x"
#Pinned versions:
#test that import:
-tensorboard==2.13.0 ; python_version < "3.13"
-tensorboard==2.18.0 ; python_version >= "3.13"
+tensorboard==2.18.0
#Description: Also included in .ci/docker/requirements-docs.txt
#Pinned versions:
#test that import: test_tensorboard
@@ -322,7 +320,8 @@ pywavelets==1.7.0 ; python_version >= "3.12"
#Pinned versions: 1.4.1
#test that import:
-lxml==5.3.0
+lxml==5.3.0 ; python_version <= "3.12"
+lxml==6.0.0 ; python_version == "3.13"
#Description: This is a requirement of unittest-xml-reporting
# Python-3.9 binaries
@@ -334,8 +333,9 @@ sympy==1.13.3
#Pinned versions:
#test that import:
-onnx==1.18.0
-#Description: Required by onnx tests, and mypy and test_public_bindings.py when checking torch.onnx._internal
+onnx==1.16.1 ; python_version <= "3.12"
+onnx==1.18.0 ; python_version == "3.13"
+#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
#Pinned versions:
#test that import:
@@ -379,7 +379,7 @@ dataclasses_json==0.6.7
cmake==4.0.0
#Description: required for building
-tlparse==0.3.30
+tlparse==0.4.0
#Description: required for log parsing
cuda-bindings>=12.0,<13.0 ; platform_machine != "s390x"
diff --git a/.ci/docker/requirements-docs.txt b/.ci/docker/requirements-docs.txt
index 3de4d8e0e44e..c5ad8e969fb9 100644
--- a/.ci/docker/requirements-docs.txt
+++ b/.ci/docker/requirements-docs.txt
@@ -1,7 +1,7 @@
sphinx==5.3.0
#Description: This is used to generate PyTorch docs
#Pinned versions: 5.3.0
--e git+https://github.com/pytorch/pytorch_sphinx_theme.git@722b7e6f9ca512fcc526ad07d62b3d28c50bb6cd#egg=pytorch_sphinx_theme2
+-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@71e55749be14ceb56e7f8211a9fb649866b87ad4#egg=pytorch_sphinx_theme2
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
diff --git a/.ci/docker/triton_version.txt b/.ci/docker/triton_version.txt
index 18091983f59d..1545d966571d 100644
--- a/.ci/docker/triton_version.txt
+++ b/.ci/docker/triton_version.txt
@@ -1 +1 @@
-3.4.0
+3.5.0
diff --git a/.ci/docker/triton_xpu_version.txt b/.ci/docker/triton_xpu_version.txt
index 18091983f59d..1545d966571d 100644
--- a/.ci/docker/triton_xpu_version.txt
+++ b/.ci/docker/triton_xpu_version.txt
@@ -1 +1 @@
-3.4.0
+3.5.0
diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile
index 57f997f30089..1edc8c60c2f0 100644
--- a/.ci/docker/ubuntu/Dockerfile
+++ b/.ci/docker/ubuntu/Dockerfile
@@ -66,6 +66,7 @@ ENV NCCL_LIB_DIR="/usr/local/cuda/lib64/"
# (optional) Install UCC
ARG UCX_COMMIT
ARG UCC_COMMIT
+ARG CUDA_VERSION
ENV UCX_COMMIT $UCX_COMMIT
ENV UCC_COMMIT $UCC_COMMIT
ENV UCX_HOME /usr
diff --git a/.ci/libtorch/build.sh b/.ci/libtorch/build.sh
index 54ddd905aad0..c2d67f8b1bb2 100644
--- a/.ci/libtorch/build.sh
+++ b/.ci/libtorch/build.sh
@@ -7,4 +7,4 @@ set -ex
SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-USE_NVSHMEM=0 USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh
+USE_NVSHMEM=0 USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.10" ${SCRIPTPATH}/../manywheel/build.sh
diff --git a/.ci/lumen_cli/cli/lib/common/gh_summary.py b/.ci/lumen_cli/cli/lib/common/gh_summary.py
new file mode 100644
index 000000000000..72bfaa76e706
--- /dev/null
+++ b/.ci/lumen_cli/cli/lib/common/gh_summary.py
@@ -0,0 +1,143 @@
+from __future__ import annotations
+
+import logging
+import os
+import textwrap
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from cli.lib.common.utils import get_wheels
+from jinja2 import Template
+
+
+if TYPE_CHECKING:
+ from collections.abc import Iterable, Mapping
+
+
+logger = logging.getLogger(__name__)
+
+_TPL_CONTENT = Template(
+ textwrap.dedent("""\
+ ## {{ title }}
+
+ ```{{ lang }}
+ {{ content }}
+ ```
+""")
+)
+
+_TPL_LIST_ITEMS = Template(
+ textwrap.dedent("""\
+ ## {{ title }}
+ {% for it in items %}
+ - {{ it.pkg }}: {{ it.relpath }}
+ {% else %}
+ _(no item found)_
+ {% endfor %}
+ """)
+)
+
+_TPL_TABLE = Template(
+ textwrap.dedent("""\
+ {%- if rows %}
+ | {{ cols | join(' | ') }} |
+ |{%- for _ in cols %} --- |{%- endfor %}
+ {%- for r in rows %}
+ | {%- for c in cols %} {{ r.get(c, "") }} |{%- endfor %}
+ {%- endfor %}
+ {%- else %}
+ _(no data)_
+ {%- endif %}
+""")
+)
+
+
+def gh_summary_path() -> Path | None:
+ """Return the Path to the GitHub step summary file, or None if not set."""
+ p = os.environ.get("GITHUB_STEP_SUMMARY")
+ return Path(p) if p else None
+
+
+def write_gh_step_summary(md: str, *, append_content: bool = True) -> bool:
+ """
+ Write Markdown content to the GitHub Step Summary file if GITHUB_STEP_SUMMARY is set.
+ append_content: default true, if True, append to the end of the file, else overwrite the whole file
+
+ Returns:
+ True if written successfully (in GitHub Actions environment),
+ False if skipped (e.g., running locally where the variable is not set).
+ """
+ sp = gh_summary_path()
+ if not sp:
+ logger.info("[gh-summary] GITHUB_STEP_SUMMARY not set, skipping write.")
+ return False
+
+ md_clean = textwrap.dedent(md).strip() + "\n"
+
+ mode = "a" if append_content else "w"
+ with sp.open(mode, encoding="utf-8") as f:
+ f.write(md_clean)
+ return True
+
+
+def md_heading(text: str, level: int = 2) -> str:
+ """Generate a Markdown heading string with the given level (1-6)."""
+ return f"{'#' * max(1, min(level, 6))} {text}\n"
+
+
+def md_details(summary: str, content: str) -> str:
+ """Generate a collapsible block with a summary and inner content."""
+ return f"\n{summary}
\n\n{content}\n\n \n"
+
+
+def summarize_content_from_file(
+ output_dir: Path,
+ freeze_file: str,
+ title: str = "Content from file",
+ code_lang: str = "", # e.g. "text" or "ini"
+) -> bool:
+ f = Path(output_dir) / freeze_file
+ if not f.exists():
+ return False
+ content = f.read_text(encoding="utf-8").strip()
+ md = render_content(content, title=title, lang=code_lang)
+ return write_gh_step_summary(md)
+
+
+def summarize_wheels(path: Path, title: str = "Wheels", max_depth: int = 3):
+ items = get_wheels(path, max_depth=max_depth)
+ if not items:
+ return False
+ md = render_list(items, title=title)
+ return write_gh_step_summary(md)
+
+
+def md_kv_table(rows: Iterable[Mapping[str, str | int | float]]) -> str:
+ """
+ Render a list of dicts as a Markdown table using Jinja template.
+ """
+ rows = list(rows)
+ cols = list({k for r in rows for k in r.keys()})
+ md = _TPL_TABLE.render(cols=cols, rows=rows).strip() + "\n"
+ return md
+
+
+def render_list(
+ items: Iterable[str],
+ *,
+ title: str = "List",
+) -> str:
+ tpl = _TPL_LIST_ITEMS
+ md = tpl.render(title=title, items=items)
+ return md
+
+
+def render_content(
+ content: str,
+ *,
+ title: str = "Content",
+ lang: str = "text",
+) -> str:
+ tpl = _TPL_CONTENT
+ md = tpl.render(title=title, content=content, lang=lang)
+ return md
diff --git a/.ci/lumen_cli/cli/lib/common/git_helper.py b/.ci/lumen_cli/cli/lib/common/git_helper.py
index 7fa070a3cb65..9833caca956c 100644
--- a/.ci/lumen_cli/cli/lib/common/git_helper.py
+++ b/.ci/lumen_cli/cli/lib/common/git_helper.py
@@ -45,7 +45,7 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules
# Checkout pinned commit
commit = get_post_build_pinned_commit(target)
- logger.info("Checking out pinned commit %s", commit)
+ logger.info("Checking out pinned %s commit %s", target, commit)
r.git.checkout(commit)
# Update submodules if requested
@@ -55,7 +55,7 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules
sm.update(init=True, recursive=True, progress=PrintProgress())
logger.info("Successfully cloned %s", target)
- return r
+ return r, commit
except GitCommandError as e:
logger.error("Git operation failed: %s", e)
diff --git a/.ci/lumen_cli/cli/lib/common/pip_helper.py b/.ci/lumen_cli/cli/lib/common/pip_helper.py
index 1eed8406c9f7..a53747e24d25 100644
--- a/.ci/lumen_cli/cli/lib/common/pip_helper.py
+++ b/.ci/lumen_cli/cli/lib/common/pip_helper.py
@@ -4,7 +4,7 @@
import shutil
import sys
from collections.abc import Iterable
-from importlib.metadata import PackageNotFoundError, version
+from importlib.metadata import PackageNotFoundError, version # noqa: UP035
from typing import Optional, Union
from cli.lib.common.utils import run_command
diff --git a/.ci/lumen_cli/cli/lib/common/utils.py b/.ci/lumen_cli/cli/lib/common/utils.py
index 05790bd66acf..b03309810d98 100644
--- a/.ci/lumen_cli/cli/lib/common/utils.py
+++ b/.ci/lumen_cli/cli/lib/common/utils.py
@@ -8,6 +8,7 @@
import subprocess
import sys
from contextlib import contextmanager
+from pathlib import Path
from typing import Optional
@@ -115,3 +116,24 @@ def working_directory(path: str):
yield
finally:
os.chdir(prev_cwd)
+
+
+def get_wheels(
+ output_dir: Path,
+ max_depth: Optional[int] = None,
+) -> list[str]:
+ """Return a list of wheels found in the given output directory."""
+ root = Path(output_dir)
+ if not root.exists():
+ return []
+ items = []
+ for dirpath, _, filenames in os.walk(root):
+ depth = Path(dirpath).relative_to(root).parts
+ if max_depth is not None and len(depth) > max_depth:
+ continue
+ for fname in sorted(filenames):
+ if fname.endswith(".whl"):
+ pkg = fname.split("-")[0]
+ relpath = str((Path(dirpath) / fname).relative_to(root))
+ items.append({"pkg": pkg, "relpath": relpath})
+ return items
diff --git a/.ci/lumen_cli/cli/lib/core/vllm/lib.py b/.ci/lumen_cli/cli/lib/core/vllm/lib.py
index 7f3a930b2cc6..0e2132839adb 100644
--- a/.ci/lumen_cli/cli/lib/core/vllm/lib.py
+++ b/.ci/lumen_cli/cli/lib/core/vllm/lib.py
@@ -1,13 +1,27 @@
import logging
+import os
+import textwrap
from typing import Any
+from cli.lib.common.gh_summary import write_gh_step_summary
from cli.lib.common.git_helper import clone_external_repo
from cli.lib.common.pip_helper import pip_install_packages
from cli.lib.common.utils import run_command, temp_environ, working_directory
+from jinja2 import Template
logger = logging.getLogger(__name__)
+_TPL_VLLM_INFO = Template(
+ textwrap.dedent("""\
+ ## Vllm against Pytorch CI Test Summary
+ **Vllm Commit**: [{{ vllm_commit }}](https://github.com/vllm-project/vllm/commit/{{ vllm_commit }})
+ {%- if torch_sha %}
+ **Pytorch Commit**: [{{ torch_sha }}](https://github.com/pytorch/pytorch/commit/{{ torch_sha }})
+ {%- endif %}
+""")
+)
+
def sample_vllm_test_library():
"""
@@ -27,7 +41,6 @@ def sample_vllm_test_library():
"pytest -v -s basic_correctness/test_cumem.py",
"pytest -v -s basic_correctness/test_basic_correctness.py",
"pytest -v -s basic_correctness/test_cpu_offload.py",
- "VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py",
],
},
"vllm_basic_models_test": {
@@ -54,16 +67,12 @@ def sample_vllm_test_library():
"-v",
"-s",
"entrypoints/llm",
- "--ignore=entrypoints/llm/test_lazy_outlines.py",
"--ignore=entrypoints/llm/test_generate.py",
- "--ignore=entrypoints/llm/test_generate_multiple_loras.py",
"--ignore=entrypoints/llm/test_collective_rpc.py",
]
),
- "pytest -v -s entrypoints/llm/test_lazy_outlines.py",
- "pytest -v -s entrypoints/llm/test_generate.py ",
- "pytest -v -s entrypoints/llm/test_generate_multiple_loras.py",
- "VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode",
+ "pytest -v -s entrypoints/llm/test_generate.py",
+ "pytest -v -s entrypoints/offline_mode",
],
},
"vllm_regression_test": {
@@ -83,14 +92,24 @@ def sample_vllm_test_library():
"num_gpus": 4,
"steps": [
"pytest -v -s -x lora/test_chatglm3_tp.py",
- "echo $VLLM_WORKER_MULTIPROC_METHOD",
"pytest -v -s -x lora/test_llama_tp.py",
- "pytest -v -s -x lora/test_multi_loras_with_tp.py",
+ "pytest -v -s -x lora/test_llm_with_multi_loras.py",
],
},
- "vllm_lora_280_failure_test": {
- "title": "LoRA 280 failure test",
- "id": "vllm_lora_280_failure_test",
+ "vllm_distributed_test_28_failure_test": {
+ "title": "Distributed Tests (2 GPUs) pytorch 2.8 release failure",
+ "id": "vllm_distributed_test_28_failure_test",
+ "env_vars": {
+ "VLLM_WORKER_MULTIPROC_METHOD": "spawn",
+ },
+ "num_gpus": 4,
+ "steps": [
+ "pytest -v -s distributed/test_sequence_parallel.py",
+ ],
+ },
+ "vllm_lora_28_failure_test": {
+ "title": "LoRA pytorch 2.8 failure test",
+ "id": "vllm_lora_28_failure_test",
"steps": ["pytest -v lora/test_quant_model.py"],
},
"vllm_multi_model_processor_test": {
@@ -101,6 +120,15 @@ def sample_vllm_test_library():
"pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py",
],
},
+ "vllm_multi_model_test_28_failure_test": {
+ "title": "Multi-Model Test (Failed 2.8 release)",
+ "id": "vllm_multi_model_test_28_failure_test",
+ "package_install": ["git+https://github.com/TIGER-AI-Lab/Mantis.git"],
+ "steps": [
+ "pytest -v -s models/multimodal/generation/test_voxtral.py",
+ "pytest -v -s models/multimodal/pooling",
+ ],
+ },
"vllm_pytorch_compilation_unit_tests": {
"title": "PyTorch Compilation Unit Tests",
"id": "vllm_pytorch_compilation_unit_tests",
@@ -115,6 +143,28 @@ def sample_vllm_test_library():
"pytest -v -s compile/test_decorator.py",
],
},
+ "vllm_languagde_model_test_extended_generation_28_failure_test": {
+ "title": "Language Models Test (Extended Generation) 2.8 release failure",
+ "id": "vllm_languagde_model_test_extended_generation_28_failure_test",
+ "package_install": [
+ "--no-build-isolation",
+ "git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8",
+ ],
+ "steps": [
+ "pytest -v -s models/language/generation/test_mistral.py",
+ ],
+ },
+ "vllm_distributed_test_2_gpu_28_failure_test": {
+ "title": "Distributed Tests (2 GPUs) pytorch 2.8 release failure",
+ "id": "vllm_distributed_test_2_gpu_28_failure_test",
+ "env_vars": {
+ "VLLM_WORKER_MULTIPROC_METHOD": "spawn",
+ },
+ "num_gpus": 4,
+ "steps": [
+ "pytest -v -s distributed/test_sequence_parallel.py",
+ ],
+ },
# TODO(elainewy):need to add g6 with 4 gpus to run this test
"vllm_lora_test": {
"title": "LoRA Test %N",
@@ -214,12 +264,13 @@ def run_test_plan(
def clone_vllm(dst: str = "vllm"):
- clone_external_repo(
+ _, commit = clone_external_repo(
target="vllm",
repo="https://github.com/vllm-project/vllm.git",
dst=dst,
update_submodules=True,
)
+ return commit
def replace_buildkite_placeholders(step: str, shard_id: int, num_shards: int) -> str:
@@ -230,3 +281,12 @@ def replace_buildkite_placeholders(step: str, shard_id: int, num_shards: int) ->
for k in sorted(mapping, key=len, reverse=True):
step = step.replace(k, mapping[k])
return step
+
+
+def summarize_build_info(vllm_commit: str) -> bool:
+ torch_sha = os.getenv("GITHUB_SHA")
+ md = (
+ _TPL_VLLM_INFO.render(vllm_commit=vllm_commit, torch_sha=torch_sha).strip()
+ + "\n"
+ )
+ return write_gh_step_summary(md)
diff --git a/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py b/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py
index d067a14f7590..8db48065cb05 100644
--- a/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py
+++ b/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py
@@ -13,6 +13,11 @@
env_str_field,
with_params_help,
)
+from cli.lib.common.gh_summary import (
+ gh_summary_path,
+ summarize_content_from_file,
+ summarize_wheels,
+)
from cli.lib.common.path_helper import (
copy,
ensure_dir_exists,
@@ -21,7 +26,7 @@
is_path_exist,
)
from cli.lib.common.utils import run_command
-from cli.lib.core.vllm.lib import clone_vllm
+from cli.lib.core.vllm.lib import clone_vllm, summarize_build_info
logger = logging.getLogger(__name__)
@@ -153,18 +158,43 @@ def run(self):
"""
inputs = VllmBuildParameters()
logger.info("Running vllm build with inputs: %s", inputs)
- clone_vllm()
+ vllm_commit = clone_vllm()
self.cp_dockerfile_if_exist(inputs)
-
# cp torch wheels from root direct to vllm workspace if exist
self.cp_torch_whls_if_exist(inputs)
- ensure_dir_exists(inputs.output_dir)
+ # make sure the output dir to store the build artifacts exist
+ ensure_dir_exists(Path(inputs.output_dir))
cmd = self._generate_docker_build_cmd(inputs)
logger.info("Running docker build: \n %s", cmd)
- run_command(cmd, cwd="vllm", env=os.environ.copy())
+
+ try:
+ run_command(cmd, cwd="vllm", env=os.environ.copy())
+ finally:
+ self.genearte_vllm_build_summary(vllm_commit, inputs)
+
+ def genearte_vllm_build_summary(
+ self, vllm_commit: str, inputs: VllmBuildParameters
+ ):
+ if not gh_summary_path():
+ return logger.info("Skipping, not detect GH Summary env var....")
+ logger.info("Generate GH Summary ...")
+ # summarize vllm build info
+ summarize_build_info(vllm_commit)
+
+ # summarize vllm build artifacts
+ vllm_artifact_dir = inputs.output_dir / "wheels"
+ summarize_content_from_file(
+ vllm_artifact_dir,
+ "build_summary.txt",
+ title="Vllm build env pip package summary",
+ )
+ summarize_wheels(
+ inputs.torch_whls_path, max_depth=3, title="Torch Wheels Artifacts"
+ )
+ summarize_wheels(vllm_artifact_dir, max_depth=3, title="Vllm Wheels Artifacts")
def cp_torch_whls_if_exist(self, inputs: VllmBuildParameters) -> str:
if not inputs.use_torch_whl:
diff --git a/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py b/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py
index 2be8e246486e..76401e33f29f 100644
--- a/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py
+++ b/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py
@@ -104,20 +104,26 @@ def run(self):
main function to run vllm test
"""
self.prepare()
- with working_directory(self.work_directory):
- if self.test_type == TestInpuType.TEST_PLAN:
- if self.num_shards > 1:
- run_test_plan(
- self.test_plan,
- "vllm",
- sample_vllm_test_library(),
- self.shard_id,
- self.num_shards,
- )
+ try:
+ with working_directory(self.work_directory):
+ if self.test_type == TestInpuType.TEST_PLAN:
+ if self.num_shards > 1:
+ run_test_plan(
+ self.test_plan,
+ "vllm",
+ sample_vllm_test_library(),
+ self.shard_id,
+ self.num_shards,
+ )
+ else:
+ run_test_plan(
+ self.test_plan, "vllm", sample_vllm_test_library()
+ )
else:
- run_test_plan(self.test_plan, "vllm", sample_vllm_test_library())
- else:
- raise ValueError(f"Unknown test type {self.test_type}")
+ raise ValueError(f"Unknown test type {self.test_type}")
+ finally:
+ # double check the torches are not overridden by other packages
+ check_versions()
def _install_wheels(self, params: VllmTestParameters):
logger.info("Running vllm test with inputs: %s", params)
@@ -220,6 +226,8 @@ def preprocess_test_in(
target_path = Path(target_file)
lines = target_path.read_text().splitlines()
+ pkgs_to_add = []
+
# Remove lines starting with the package names (==, @, >=) — case-insensitive
pattern = re.compile(rf"^({'|'.join(pkgs_to_remove)})\s*(==|@|>=)", re.IGNORECASE)
kept_lines = [line for line in lines if not pattern.match(line)]
@@ -236,7 +244,11 @@ def preprocess_test_in(
]
# Write back: header_lines + blank + kept_lines
- out = "\n".join(header_lines + [""] + kept_lines) + "\n"
+ out_lines = header_lines + [""] + kept_lines
+ if pkgs_to_add:
+ out_lines += [""] + pkgs_to_add
+
+ out = "\n".join(out_lines) + "\n"
target_path.write_text(out)
logger.info("[INFO] Updated %s", target_file)
diff --git a/.ci/manywheel/build_cuda.sh b/.ci/manywheel/build_cuda.sh
index 3fbd25be1da3..6ed38f8b25c6 100644
--- a/.ci/manywheel/build_cuda.sh
+++ b/.ci/manywheel/build_cuda.sh
@@ -124,6 +124,7 @@ if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then
fi
if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
echo "Bundling with cudnn and cublas."
+
DEPS_LIST+=(
"/usr/local/cuda/lib64/libcudnn_adv.so.9"
"/usr/local/cuda/lib64/libcudnn_cnn.so.9"
@@ -133,16 +134,11 @@ if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then
"/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9"
"/usr/local/cuda/lib64/libcudnn_heuristic.so.9"
"/usr/local/cuda/lib64/libcudnn.so.9"
- "/usr/local/cuda/lib64/libcublas.so.12"
- "/usr/local/cuda/lib64/libcublasLt.so.12"
"/usr/local/cuda/lib64/libcusparseLt.so.0"
- "/usr/local/cuda/lib64/libcudart.so.12"
- "/usr/local/cuda/lib64/libnvrtc.so.12"
"/usr/local/cuda/lib64/libnvrtc-builtins.so"
"/usr/local/cuda/lib64/libcufile.so.0"
"/usr/local/cuda/lib64/libcufile_rdma.so.1"
"/usr/local/cuda/lib64/libnvshmem_host.so.3"
- "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12"
"/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so"
)
DEPS_SONAME+=(
@@ -154,22 +150,56 @@ if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then
"libcudnn_engines_precompiled.so.9"
"libcudnn_heuristic.so.9"
"libcudnn.so.9"
- "libcublas.so.12"
- "libcublasLt.so.12"
"libcusparseLt.so.0"
- "libcudart.so.12"
- "libnvrtc.so.12"
"libnvrtc-builtins.so"
"libnvshmem_host.so.3"
"libcufile.so.0"
"libcufile_rdma.so.1"
- "libcupti.so.12"
"libnvperf_host.so"
)
# Add libnvToolsExt only if CUDA version is not 12.9
- if [[ $CUDA_VERSION != 12.9* ]]; then
- DEPS_LIST+=("/usr/local/cuda/lib64/libnvToolsExt.so.1")
- DEPS_SONAME+=("libnvToolsExt.so.1")
+ if [[ $CUDA_VERSION == 13* ]]; then
+ DEPS_LIST+=(
+ "/usr/local/cuda/lib64/libcublas.so.13"
+ "/usr/local/cuda/lib64/libcublasLt.so.13"
+ "/usr/local/cuda/lib64/libcudart.so.13"
+ "/usr/local/cuda/lib64/libnvrtc.so.13"
+ "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13"
+ "/usr/local/cuda/lib64/libibverbs.so.1"
+ "/usr/local/cuda/lib64/librdmacm.so.1"
+ "/usr/local/cuda/lib64/libmlx5.so.1"
+ "/usr/local/cuda/lib64/libnl-3.so.200"
+ "/usr/local/cuda/lib64/libnl-route-3.so.200")
+ DEPS_SONAME+=(
+ "libcublas.so.13"
+ "libcublasLt.so.13"
+ "libcudart.so.13"
+ "libnvrtc.so.13"
+ "libcupti.so.13"
+ "libibverbs.so.1"
+ "librdmacm.so.1"
+ "libmlx5.so.1"
+ "libnl-3.so.200"
+ "libnl-route-3.so.200")
+ export USE_CUPTI_SO=1
+ export ATEN_STATIC_CUDA=0
+ export USE_CUDA_STATIC_LINK=0
+ export USE_CUFILE=0
+ else
+ DEPS_LIST+=(
+ "/usr/local/cuda/lib64/libnvToolsExt.so.1"
+ "/usr/local/cuda/lib64/libcublas.so.12"
+ "/usr/local/cuda/lib64/libcublasLt.so.12"
+ "/usr/local/cuda/lib64/libcudart.so.12"
+ "/usr/local/cuda/lib64/libnvrtc.so.12"
+ "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12")
+ DEPS_SONAME+=(
+ "libnvToolsExt.so.1"
+ "libcublas.so.12"
+ "libcublasLt.so.12"
+ "libcudart.so.12"
+ "libnvrtc.so.12"
+ "libcupti.so.12")
fi
else
echo "Using nvidia libs from pypi."
diff --git a/.ci/pytorch/check_binary.sh b/.ci/pytorch/check_binary.sh
index 0f632f8006c0..cca289ac146b 100755
--- a/.ci/pytorch/check_binary.sh
+++ b/.ci/pytorch/check_binary.sh
@@ -67,7 +67,7 @@ fi
# wheels with cxx11-abi
echo "Checking that the gcc ABI is what we expect"
-if [[ "$(uname)" != 'Darwin' ]]; then
+if [[ "$(uname)" != 'Darwin' && "$(uname -m)" != "s390x" ]]; then
# We also check that there are cxx11 symbols in libtorch
#
echo "Checking that symbols in libtorch.so have the right gcc abi"
diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh
index 6d79a4517edf..bf03e132d30b 100644
--- a/.ci/pytorch/common_utils.sh
+++ b/.ci/pytorch/common_utils.sh
@@ -284,7 +284,7 @@ function install_torchrec_and_fbgemm() {
function clone_pytorch_xla() {
if [[ ! -d ./xla ]]; then
- git clone --recursive --quiet https://github.com/pytorch/xla.git
+ git clone --recursive -b r2.9 https://github.com/pytorch/xla.git
pushd xla
# pin the xla hash so that we don't get broken by changes to xla
git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
diff --git a/.ci/pytorch/cpp_doc_push_script.sh b/.ci/pytorch/cpp_doc_push_script.sh
index 6e417bf8bbe9..f085fa78bebe 100755
--- a/.ci/pytorch/cpp_doc_push_script.sh
+++ b/.ci/pytorch/cpp_doc_push_script.sh
@@ -58,7 +58,7 @@ time python tools/setup_helpers/generate_code.py \
# Build the docs
pushd docs/cpp
-time make VERBOSE=1 html -j
+time make VERBOSE=1 html
popd
popd
diff --git a/.ci/pytorch/macos-test.sh b/.ci/pytorch/macos-test.sh
index 295a82f057dc..a859901191e0 100755
--- a/.ci/pytorch/macos-test.sh
+++ b/.ci/pytorch/macos-test.sh
@@ -195,7 +195,7 @@ torchbench_setup_macos() {
git checkout "$(cat ../.github/ci_commit_pins/vision.txt)"
git submodule update --init --recursive
python setup.py clean
- python setup.py develop
+ python -m pip install -e . -v --no-build-isolation
popd
pushd torchaudio
@@ -204,7 +204,7 @@ torchbench_setup_macos() {
git submodule update --init --recursive
python setup.py clean
#TODO: Remove me, when figure out how to make TorchAudio find brew installed openmp
- USE_OPENMP=0 python setup.py develop
+ USE_OPENMP=0 python -m pip install -e . -v --no-build-isolation
popd
checkout_install_torchbench
@@ -302,6 +302,47 @@ test_torchbench_smoketest() {
fi
done
+ echo "Pytorch benchmark on mps device completed"
+}
+
+test_aoti_torchbench_smoketest() {
+ print_cmake_info
+
+ echo "Launching AOTInductor torchbench setup"
+ pip_benchmark_deps
+ # shellcheck disable=SC2119,SC2120
+ torchbench_setup_macos
+
+ TEST_REPORTS_DIR=$(pwd)/test/test-reports
+ mkdir -p "$TEST_REPORTS_DIR"
+
+ local device=mps
+ local dtypes=(undefined float16 bfloat16 notset)
+ local dtype=${dtypes[$1]}
+ local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16)
+
+ echo "Launching torchbench inference performance run for AOT Inductor and dtype ${dtype}"
+ local dtype_arg="--${dtype}"
+ if [ "$dtype" == notset ]; then
+ dtype_arg="--float32"
+ fi
+ touch "$TEST_REPORTS_DIR/aot_inductor_torchbench_${dtype}_inference_${device}_performance.csv"
+ for model in "${models[@]}"; do
+ PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
+ --performance --only "$model" --export-aot-inductor --inference --devices "$device" "$dtype_arg" \
+ --output "$TEST_REPORTS_DIR/aot_inductor_torchbench_${dtype}_inference_${device}_performance.csv" || true
+ PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
+ --accuracy --only "$model" --export-aot-inductor --inference --devices "$device" "$dtype_arg" \
+ --output "$TEST_REPORTS_DIR/aot_inductor_torchbench_${dtype}_inference_${device}_accuracy.csv" || true
+ done
+
+ echo "Launching HuggingFace inference performance run for AOT Inductor and dtype ${dtype}"
+ PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/huggingface.py \
+ --performance --export-aot-inductor --inference --devices "$device" "$dtype_arg" \
+ --output "$TEST_REPORTS_DIR/aot_inductor_huggingface_${dtype}_inference_${device}_performance.csv" || true
+ PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/huggingface.py \
+ --accuracy --export-aot-inductor --inference --devices "$device" "$dtype_arg" \
+ --output "$TEST_REPORTS_DIR/aot_inductor_huggingface_${dtype}_inference_${device}_accuracy.csv" || true
echo "Pytorch benchmark on mps device completed"
}
@@ -350,6 +391,8 @@ elif [[ $TEST_CONFIG == *"perf_timm"* ]]; then
test_timm_perf
elif [[ $TEST_CONFIG == *"perf_smoketest"* ]]; then
test_torchbench_smoketest "${SHARD_NUMBER}"
+elif [[ $TEST_CONFIG == *"aot_inductor_perf_smoketest"* ]]; then
+ test_aoti_torchbench_smoketest "${SHARD_NUMBER}"
elif [[ $TEST_CONFIG == *"mps"* ]]; then
test_python_mps
elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then
diff --git a/.ci/pytorch/numba-cuda-13.patch b/.ci/pytorch/numba-cuda-13.patch
new file mode 100644
index 000000000000..f96ff287ed39
--- /dev/null
+++ b/.ci/pytorch/numba-cuda-13.patch
@@ -0,0 +1,25 @@
+From 6e08c9d08e9de59c7af28b720289debbbd384764 Mon Sep 17 00:00:00 2001
+From: Michael Wang <13521008+isVoid@users.noreply.github.com>
+Date: Tue, 1 Apr 2025 17:28:05 -0700
+Subject: [PATCH] Avoid bumping certain driver API to avoid future breakage
+ (#185)
+
+Co-authored-by: isVoid
+---
+ numba_cuda/numba/cuda/cudadrv/driver.py | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py
+index 1641bf77..233e9ed7 100644
+--- a/numba_cuda/numba/cuda/cudadrv/driver.py
++++ b/numba_cuda/numba/cuda/cudadrv/driver.py
+@@ -365,6 +365,9 @@ def _find_api(self, fname):
+ else:
+ variants = ('_v2', '')
+
++ if fname in ("cuCtxGetDevice", "cuCtxSynchronize"):
++ return getattr(self.lib, fname)
++
+ for variant in variants:
+ try:
+ return getattr(self.lib, f'{fname}{variant}')
diff --git a/.ci/pytorch/smoke_test/check_binary_symbols.py b/.ci/pytorch/smoke_test/check_binary_symbols.py
index 3e88ffe4ffd7..b0c607659c72 100755
--- a/.ci/pytorch/smoke_test/check_binary_symbols.py
+++ b/.ci/pytorch/smoke_test/check_binary_symbols.py
@@ -32,6 +32,9 @@
"torch::",
)
+# Patterns for detecting statically linked libstdc++ symbols
+STATICALLY_LINKED_CXX11_ABI = [re.compile(r".*recursive_directory_iterator.*")]
+
def _apply_libtorch_symbols(symbols):
return [
@@ -53,12 +56,17 @@ def get_symbols(lib: str) -> list[tuple[str, str, str]]:
return [x.split(" ", 2) for x in lines.decode("latin1").split("\n")[:-1]]
-def grep_symbols(lib: str, patterns: list[Any]) -> list[str]:
+def grep_symbols(
+ lib: str, patterns: list[Any], symbol_type: str | None = None
+) -> list[str]:
def _grep_symbols(
symbols: list[tuple[str, str, str]], patterns: list[Any]
) -> list[str]:
rc = []
for _s_addr, _s_type, s_name in symbols:
+ # Filter by symbol type if specified
+ if symbol_type and _s_type != symbol_type:
+ continue
for pattern in patterns:
if pattern.match(s_name):
rc.append(s_name)
@@ -80,6 +88,18 @@ def _get_symbols_chunk(i):
return functools.reduce(list.__add__, (x.result() for x in tasks), [])
+def check_lib_statically_linked_libstdc_cxx_abi_symbols(lib: str) -> None:
+ cxx11_statically_linked_symbols = grep_symbols(
+ lib, STATICALLY_LINKED_CXX11_ABI, symbol_type="T"
+ )
+ num_statically_linked_symbols = len(cxx11_statically_linked_symbols)
+ print(f"num_statically_linked_symbols (T): {num_statically_linked_symbols}")
+ if num_statically_linked_symbols > 0:
+ raise RuntimeError(
+ f"Found statically linked libstdc++ symbols (recursive_directory_iterator): {cxx11_statically_linked_symbols[:100]}"
+ )
+
+
def check_lib_symbols_for_abi_correctness(lib: str) -> None:
print(f"lib: {lib}")
cxx11_symbols = grep_symbols(lib, LIBTORCH_CXX11_PATTERNS)
@@ -107,6 +127,7 @@ def main() -> None:
libtorch_cpu_path = str(install_root / "lib" / "libtorch_cpu.so")
check_lib_symbols_for_abi_correctness(libtorch_cpu_path)
+ check_lib_statically_linked_libstdc_cxx_abi_symbols(libtorch_cpu_path)
if __name__ == "__main__":
diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
index a0c3760b5eaa..e8c5b3fc56af 100755
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@@ -32,6 +32,16 @@ if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /v
git config --global --add safe.directory /var/lib/jenkins/workspace
fi
+
+# Patch numba to avoid CUDA-13 crash, see https://github.com/pytorch/pytorch/issues/162878
+NUMBA_CUDA_DIR=$(python -c "import os;import numba.cuda; print(os.path.dirname(numba.cuda.__file__))" 2>/dev/null || true)
+if [ -n "$NUMBA_CUDA_DIR" ]; then
+ NUMBA_PATCH="$(dirname "$(realpath "${BASH_SOURCE[0]}")")/numba-cuda-13.patch"
+ pushd "$NUMBA_CUDA_DIR"
+ patch -p4 <"$NUMBA_PATCH"
+ popd
+fi
+
echo "Environment variables:"
env
@@ -496,6 +506,14 @@ test_inductor_cpp_wrapper_shard() {
-k 'take' \
--shard "$1" "$NUM_TEST_SHARDS" \
--verbose
+
+ if [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then
+ python test/run_test.py \
+ --include inductor/test_mkldnn_pattern_matcher \
+ -k 'xpu' \
+ --shard "$1" "$NUM_TEST_SHARDS" \
+ --verbose
+ fi
}
# "Global" flags for inductor benchmarking controlled by TEST_CONFIG
@@ -1606,6 +1624,25 @@ test_operator_benchmark() {
--expected "expected_ci_operator_benchmark_eager_float32_cpu.csv"
}
+test_operator_microbenchmark() {
+ TEST_REPORTS_DIR=$(pwd)/test/test-reports
+ mkdir -p "$TEST_REPORTS_DIR"
+ TEST_DIR=$(pwd)
+
+ cd benchmarks/operator_benchmark/pt_extension
+ python -m pip install .
+
+ cd "${TEST_DIR}"/benchmarks/operator_benchmark
+
+ for OP_BENCHMARK_TESTS in matmul mm addmm bmm; do
+ $TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
+ --output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}_compile.json" \
+ --benchmark-name "PyTorch operator microbenchmark" --use-compile
+ $TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
+ --output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}.json" \
+ --benchmark-name "PyTorch operator microbenchmark"
+ done
+}
if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
(cd test && python -c "import torch; print(torch.__config__.show())")
@@ -1660,6 +1697,8 @@ elif [[ "${TEST_CONFIG}" == *operator_benchmark* ]]; then
test_operator_benchmark cpu ${TEST_MODE}
fi
+elif [[ "${TEST_CONFIG}" == *operator_microbenchmark* ]]; then
+ test_operator_microbenchmark
elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
test_inductor_distributed
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
@@ -1713,11 +1752,6 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
install_torchvision
test_inductor_shard "${SHARD_NUMBER}"
- if [[ "${SHARD_NUMBER}" == 1 ]]; then
- if [[ "${BUILD_ENVIRONMENT}" != linux-jammy-py3.9-gcc11-build ]]; then
- test_inductor_distributed
- fi
- fi
elif [[ "${TEST_CONFIG}" == *einops* ]]; then
test_einops
elif [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then
diff --git a/.ci/pytorch/win-test-helpers/build_pytorch.bat b/.ci/pytorch/win-test-helpers/build_pytorch.bat
index 19d715b9d0b6..67d156922192 100644
--- a/.ci/pytorch/win-test-helpers/build_pytorch.bat
+++ b/.ci/pytorch/win-test-helpers/build_pytorch.bat
@@ -137,7 +137,7 @@ sccache --show-stats
python -c "import os, glob; os.system('python -mpip install --no-index --no-deps ' + glob.glob('dist/*.whl')[0])"
(
if "%BUILD_ENVIRONMENT%"=="" (
- echo NOTE: To run `import torch`, please make sure to activate the conda environment by running `call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3` in Command Prompt before running Git Bash.
+ echo NOTE: To run `import torch`, please make sure to activate the conda environment by running `call %CONDA_ROOT_DIR%\Scripts\activate.bat %CONDA_ROOT_DIR%\envs\py_tmp` in Command Prompt before running Git Bash.
) else (
copy /Y "dist\*.whl" "%PYTORCH_FINAL_PACKAGE_DIR%"
diff --git a/.ci/pytorch/win-test-helpers/installation-helpers/activate_miniconda3.bat b/.ci/pytorch/win-test-helpers/installation-helpers/activate_miniconda3.bat
index 01e08c8bb4e5..abd2c8722b11 100644
--- a/.ci/pytorch/win-test-helpers/installation-helpers/activate_miniconda3.bat
+++ b/.ci/pytorch/win-test-helpers/installation-helpers/activate_miniconda3.bat
@@ -3,12 +3,12 @@ if "%BUILD_ENVIRONMENT%"=="" (
) else (
set CONDA_PARENT_DIR=C:\Jenkins
)
-
+set CONDA_ROOT_DIR=%CONDA_PARENT_DIR%\Miniconda3
:: Be conservative here when rolling out the new AMI with conda. This will try
:: to install conda as before if it couldn't find the conda installation. This
:: can be removed eventually after we gain enough confidence in the AMI
-if not exist %CONDA_PARENT_DIR%\Miniconda3 (
+if not exist %CONDA_ROOT_DIR% (
set INSTALL_FRESH_CONDA=1
)
@@ -17,10 +17,14 @@ if "%INSTALL_FRESH_CONDA%"=="1" (
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
- %TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_PARENT_DIR%\Miniconda3
+ %TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_ROOT_DIR%
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
)
:: Activate conda so that we can use its commands, i.e. conda, python, pip
-call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3
+call %CONDA_ROOT_DIR%\Scripts\activate.bat %CONDA_ROOT_DIR%
+:: Activate conda so that we can use its commands, i.e. conda, python, pip
+call conda activate py_tmp
+
+call pip install -r .ci/docker/requirements-ci.txt
diff --git a/.ci/pytorch/win-test-helpers/setup_pytorch_env.bat b/.ci/pytorch/win-test-helpers/setup_pytorch_env.bat
index 4a464d6b5786..3173582b06f4 100644
--- a/.ci/pytorch/win-test-helpers/setup_pytorch_env.bat
+++ b/.ci/pytorch/win-test-helpers/setup_pytorch_env.bat
@@ -14,7 +14,7 @@ if not errorlevel 0 exit /b
:: build\torch. Rather than changing all these references, making a copy of torch folder
:: from conda to the current workspace is easier. The workspace will be cleaned up after
:: the job anyway
-xcopy /s %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %TMP_DIR_WIN%\build\torch\
+xcopy /s %CONDA_ROOT_DIR%\envs\py_tmp\Lib\site-packages\torch %TMP_DIR_WIN%\build\torch\
pushd .
if "%VC_VERSION%" == "" (
diff --git a/.ci/pytorch/win-test.sh b/.ci/pytorch/win-test.sh
index be7f3e4bb35c..c96d5c331c9f 100755
--- a/.ci/pytorch/win-test.sh
+++ b/.ci/pytorch/win-test.sh
@@ -38,13 +38,20 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
fi
# TODO: Move both of them to Windows AMI
-python -m pip install pytest-rerunfailures==10.3 pytest-cpp==2.3.0 tensorboard==2.13.0 protobuf==5.29.4 pytest-subtests==0.13.1
+python -m pip install tensorboard==2.13.0 protobuf==5.29.4 pytest-subtests==0.13.1
+
+# Copied from https://github.com/pytorch/test-infra/blob/be01a40157c36cd5a48391fdf44a7bc3ebd4c7e3/aws/ami/windows/scripts/Installers/Install-Pip-Dependencies.ps1#L16 with some adjustments
+# pytest-rerunfailures==10.3 as 10.2 fails with INTERNALERROR> pluggy._manager.PluginValidationError: unknown hook 'pytest_configure_node'
+# scipy from 1.6.3 to 1.10
+# expecttest from 0.1.3 to 0.3.0
+# xdoctest from 1.0.2 to 1.3.0
+python -m pip install "future==0.18.2" "hypothesis==5.35.1" "expecttest==0.3.0" "librosa>=0.6.2" "scipy==1.10.1" "psutil==5.9.1" "pynvml==11.4.1" "pillow==9.2.0" "unittest-xml-reporting<=3.2.0,>=2.0.0" "pytest==7.1.3" "pytest-xdist==2.5.0" "pytest-flakefinder==1.1.0" "pytest-rerunfailures==10.3" "pytest-shard==0.1.2" "sympy==1.11.1" "xdoctest==1.3.0" "pygments==2.12.0" "opt-einsum>=3.3" "networkx==2.8.8" "mpmath==1.2.1" "pytest-cpp==2.3.0" "boto3==1.35.42"
# Install Z3 optional dependency for Windows builds.
python -m pip install z3-solver==4.15.1.0
# Install tlparse for test\dynamo\test_structured_trace.py UTs.
-python -m pip install tlparse==0.3.30
+python -m pip install tlparse==0.4.0
# Install parameterized
python -m pip install parameterized==0.8.1
@@ -52,9 +59,6 @@ python -m pip install parameterized==0.8.1
# Install pulp for testing ilps under torch\distributed\_tools
python -m pip install pulp==2.9.0
-# Install expecttest to merge https://github.com/pytorch/pytorch/pull/155308
-python -m pip install expecttest==0.3.0
-
run_tests() {
# Run nvidia-smi if available
for path in '/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe' /c/Windows/System32/nvidia-smi.exe; do
diff --git a/.ci/pytorch/windows/cuda128.bat b/.ci/pytorch/windows/cuda128.bat
index bbdfb4bd1bb7..bbd349e2efb4 100644
--- a/.ci/pytorch/windows/cuda128.bat
+++ b/.ci/pytorch/windows/cuda128.bat
@@ -37,10 +37,10 @@ IF "%CUDA_PATH_V128%"=="" (
)
IF "%BUILD_VISION%" == "" (
- set TORCH_CUDA_ARCH_LIST=6.1;7.0;7.5;8.0;8.6;9.0;10.0;12.0
+ set TORCH_CUDA_ARCH_LIST=7.0;7.5;8.0;8.6;9.0;10.0;12.0
set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
) ELSE (
- set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_100,code=compute_100 -gencode=arch=compute_120,code=compute_120
+ set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_100,code=compute_100 -gencode=arch=compute_120,code=compute_120
)
set "CUDA_PATH=%CUDA_PATH_V128%"
diff --git a/.ci/pytorch/windows/internal/copy.bat b/.ci/pytorch/windows/internal/copy.bat
index 40f2bd7acdbb..e0281c0d78a4 100644
--- a/.ci/pytorch/windows/internal/copy.bat
+++ b/.ci/pytorch/windows/internal/copy.bat
@@ -1,12 +1,20 @@
-copy "%CUDA_PATH%\bin\cusparse*64_*.dll*" pytorch\torch\lib
-copy "%CUDA_PATH%\bin\cublas*64_*.dll*" pytorch\torch\lib
-copy "%CUDA_PATH%\bin\cudart*64_*.dll*" pytorch\torch\lib
-copy "%CUDA_PATH%\bin\curand*64_*.dll*" pytorch\torch\lib
-copy "%CUDA_PATH%\bin\cufft*64_*.dll*" pytorch\torch\lib
-copy "%CUDA_PATH%\bin\cusolver*64_*.dll*" pytorch\torch\lib
+
+if %CUDA_VERSION% geq 130 (
+ set "dll_path=bin\x64"
+) else (
+ set "dll_path=bin"
+)
+
+copy "%CUDA_PATH%\%dll_path%\cusparse*64_*.dll*" pytorch\torch\lib
+copy "%CUDA_PATH%\%dll_path%\cublas*64_*.dll*" pytorch\torch\lib
+copy "%CUDA_PATH%\%dll_path%\cudart*64_*.dll*" pytorch\torch\lib
+copy "%CUDA_PATH%\%dll_path%\curand*64_*.dll*" pytorch\torch\lib
+copy "%CUDA_PATH%\%dll_path%\cufft*64_*.dll*" pytorch\torch\lib
+copy "%CUDA_PATH%\%dll_path%\cusolver*64_*.dll*" pytorch\torch\lib
+copy "%CUDA_PATH%\%dll_path%\nvrtc*64_*.dll*" pytorch\torch\lib
+copy "%CUDA_PATH%\%dll_path%\nvJitLink_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\bin\cudnn*64_*.dll*" pytorch\torch\lib
-copy "%CUDA_PATH%\bin\nvrtc*64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\extras\CUPTI\lib64\nvperf_host*.dll*" pytorch\torch\lib
@@ -20,8 +28,3 @@ copy "%libuv_ROOT%\bin\uv.dll" pytorch\torch\lib
if exist "C:\Windows\System32\zlibwapi.dll" (
copy "C:\Windows\System32\zlibwapi.dll" pytorch\torch\lib
)
-
-::copy nvJitLink dll is requires for cuda 12+
-if exist "%CUDA_PATH%\bin\nvJitLink_*.dll*" (
- copy "%CUDA_PATH%\bin\nvJitLink_*.dll*" pytorch\torch\lib
-)
diff --git a/.ci/pytorch/windows/internal/driver_update.bat b/.ci/pytorch/windows/internal/driver_update.bat
index 5ed3a236c09a..2c173aed818b 100644
--- a/.ci/pytorch/windows/internal/driver_update.bat
+++ b/.ci/pytorch/windows/internal/driver_update.bat
@@ -1,9 +1,9 @@
-set WIN_DRIVER_VN=528.89
-set "DRIVER_DOWNLOAD_LINK=https://ossci-windows.s3.amazonaws.com/%WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe" & REM @lint-ignore
-curl --retry 3 -kL %DRIVER_DOWNLOAD_LINK% --output %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe
+set WIN_DRIVER_VN=580.88
+set "DRIVER_DOWNLOAD_LINK=https://ossci-windows.s3.amazonaws.com/%WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe" & REM @lint-ignore
+curl --retry 3 -kL %DRIVER_DOWNLOAD_LINK% --output %WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe
if errorlevel 1 exit /b 1
-start /wait %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe -s -noreboot
+start /wait %WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe -s -noreboot
if errorlevel 1 exit /b 1
-del %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe || ver > NUL
+del %WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe || ver > NUL
diff --git a/.ci/pytorch/windows/internal/xpu_install.bat b/.ci/pytorch/windows/internal/xpu_install.bat
index 2296adf4dfe6..f143571a5692 100644
--- a/.ci/pytorch/windows/internal/xpu_install.bat
+++ b/.ci/pytorch/windows/internal/xpu_install.bat
@@ -13,9 +13,9 @@ if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build"
:xpu_bundle_install_start
set XPU_BUNDLE_PARENT_DIR=C:\Program Files (x86)\Intel\oneAPI
-set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d6d6c17-ca2d-4735-9331-99447e4a1280/intel-deep-learning-essentials-2025.0.1.28_offline.exe
+set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/75d4eb97-914a-4a95-852c-7b9733d80f74/intel-deep-learning-essentials-2025.1.3.8_offline.exe
set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.deep-learning-essentials.product
-set XPU_BUNDLE_VERSION=2025.0.1+20
+set XPU_BUNDLE_VERSION=2025.1.3+5
set XPU_BUNDLE_INSTALLED=0
set XPU_BUNDLE_UNINSTALL=0
set XPU_EXTRA_URL=NULL
@@ -24,9 +24,9 @@ set XPU_EXTRA_VERSION=2025.0.1+1226
set XPU_EXTRA_INSTALLED=0
set XPU_EXTRA_UNINSTALL=0
-if not [%XPU_VERSION%]==[] if [%XPU_VERSION%]==[2025.1] (
- set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/75d4eb97-914a-4a95-852c-7b9733d80f74/intel-deep-learning-essentials-2025.1.3.8_offline.exe
- set XPU_BUNDLE_VERSION=2025.1.3+5
+if not [%XPU_VERSION%]==[] if [%XPU_VERSION%]==[2025.2] (
+ set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/24751ead-ddc5-4479-b9e6-f9fe2ff8b9f2/intel-deep-learning-essentials-2025.2.1.25_offline.exe
+ set XPU_BUNDLE_VERSION=2025.2.1+20
)
:: Check if XPU bundle is target version or already installed
@@ -90,14 +90,3 @@ if errorlevel 1 exit /b 1
del xpu_extra.exe
:xpu_install_end
-
-if not "%XPU_ENABLE_KINETO%"=="1" goto install_end
-:: Install Level Zero SDK
-set XPU_EXTRA_LZ_URL=https://github.com/oneapi-src/level-zero/releases/download/v1.14.0/level-zero-sdk_1.14.0.zip
-curl -k -L %XPU_EXTRA_LZ_URL% --output "%SRC_DIR%\temp_build\level_zero_sdk.zip"
-echo "Installing level zero SDK..."
-7z x "%SRC_DIR%\temp_build\level_zero_sdk.zip" -o"%SRC_DIR%\temp_build\level_zero"
-set "INCLUDE=%SRC_DIR%\temp_build\level_zero\include;%INCLUDE%"
-del "%SRC_DIR%\temp_build\level_zero_sdk.zip"
-
-:install_end
diff --git a/.ci/wheel/build_wheel.sh b/.ci/wheel/build_wheel.sh
index b9b6448ae208..e63a68e4f193 100755
--- a/.ci/wheel/build_wheel.sh
+++ b/.ci/wheel/build_wheel.sh
@@ -124,19 +124,15 @@ popd
export TH_BINARY_BUILD=1
export INSTALL_TEST=0 # dont install test binaries into site-packages
-export MACOSX_DEPLOYMENT_TARGET=10.15
+export MACOSX_DEPLOYMENT_TARGET=11.0
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
-SETUPTOOLS_PINNED_VERSION="==70.1.0"
-PYYAML_PINNED_VERSION="==5.3"
EXTRA_CONDA_INSTALL_FLAGS=""
CONDA_ENV_CREATE_FLAGS=""
RENAME_WHEEL=true
case $desired_python in
3.14t)
echo "Using 3.14 deps"
- SETUPTOOLS_PINNED_VERSION=">=70.1.0"
- PYYAML_PINNED_VERSION=">=6.0.1"
NUMPY_PINNED_VERSION="==2.1.0"
CONDA_ENV_CREATE_FLAGS="python-freethreading"
EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge"
@@ -145,8 +141,6 @@ case $desired_python in
;;
3.14)
echo "Using 3.14t deps"
- SETUPTOOLS_PINNED_VERSION=">=70.1.0"
- PYYAML_PINNED_VERSION=">=6.0.1"
NUMPY_PINNED_VERSION="==2.1.0"
EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge"
desired_python="3.14.0rc1"
@@ -154,8 +148,6 @@ case $desired_python in
;;
3.13t)
echo "Using 3.13 deps"
- SETUPTOOLS_PINNED_VERSION=">=70.1.0"
- PYYAML_PINNED_VERSION=">=6.0.1"
NUMPY_PINNED_VERSION="==2.1.0"
CONDA_ENV_CREATE_FLAGS="python-freethreading"
EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge"
@@ -164,37 +156,23 @@ case $desired_python in
;;
3.13)
echo "Using 3.13 deps"
- SETUPTOOLS_PINNED_VERSION=">=70.1.0"
- PYYAML_PINNED_VERSION=">=6.0.1"
NUMPY_PINNED_VERSION="==2.1.0"
;;
3.12)
echo "Using 3.12 deps"
- SETUPTOOLS_PINNED_VERSION=">=70.1.0"
- PYYAML_PINNED_VERSION=">=6.0.1"
NUMPY_PINNED_VERSION="==2.0.2"
;;
3.11)
echo "Using 3.11 deps"
- SETUPTOOLS_PINNED_VERSION=">=70.1.0"
- PYYAML_PINNED_VERSION=">=5.3"
NUMPY_PINNED_VERSION="==2.0.2"
;;
3.10)
echo "Using 3.10 deps"
- SETUPTOOLS_PINNED_VERSION=">=70.1.0"
- PYYAML_PINNED_VERSION=">=5.3"
- NUMPY_PINNED_VERSION="==2.0.2"
- ;;
- 3.9)
- echo "Using 3.9 deps"
- SETUPTOOLS_PINNED_VERSION=">=70.1.0"
- PYYAML_PINNED_VERSION=">=5.3"
NUMPY_PINNED_VERSION="==2.0.2"
;;
*)
- echo "Using default deps"
- NUMPY_PINNED_VERSION="==1.11.3"
+ echo "Unsupported version $desired_python"
+ exit 1
;;
esac
@@ -204,8 +182,6 @@ conda create ${EXTRA_CONDA_INSTALL_FLAGS} -yn "$tmp_env_name" python="$desired_p
source activate "$tmp_env_name"
PINNED_PACKAGES=(
- "setuptools${SETUPTOOLS_PINNED_VERSION}"
- "pyyaml${PYYAML_PINNED_VERSION}"
"numpy${NUMPY_PINNED_VERSION}"
)
retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements-build.txt"
@@ -223,7 +199,7 @@ export BUILD_TEST=OFF
pushd "$pytorch_rootdir"
echo "Calling setup.py bdist_wheel at $(date)"
-python setup.py bdist_wheel -d "$whl_tmp_dir"
+python setup.py bdist_wheel -d "$whl_tmp_dir" --plat-name ${mac_version}
echo "Finished setup.py bdist_wheel at $(date)"
diff --git a/.circleci/scripts/binary_populate_env.sh b/.circleci/scripts/binary_populate_env.sh
index 87fea14b8d28..aa82d36aa7ce 100755
--- a/.circleci/scripts/binary_populate_env.sh
+++ b/.circleci/scripts/binary_populate_env.sh
@@ -5,7 +5,9 @@ export TZ=UTC
tagged_version() {
GIT_DIR="${workdir}/pytorch/.git"
GIT_DESCRIBE="git --git-dir ${GIT_DIR} describe --tags --match v[0-9]*.[0-9]*.[0-9]*"
- if [[ ! -d "${GIT_DIR}" ]]; then
+ if [[ -n "${CIRCLE_TAG:-}" ]]; then
+ echo "${CIRCLE_TAG}"
+ elif [[ ! -d "${GIT_DIR}" ]]; then
echo "Abort, abort! Git dir ${GIT_DIR} does not exists!"
kill $$
elif ${GIT_DESCRIBE} --exact >/dev/null; then
@@ -69,16 +71,11 @@ fi
export PYTORCH_BUILD_NUMBER=1
+# This part is done in the builder scripts so commenting the duplicate code
+: <<'BLOCK_COMMENT'
# Set triton version as part of PYTORCH_EXTRA_INSTALL_REQUIREMENTS
TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
-
-# Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT
-TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'"
-
-# CUDA 12.9 builds have triton for Linux and Linux aarch64 binaries.
-if [[ "$DESIRED_CUDA" == "cu129" ]]; then
- TRITON_CONSTRAINT="platform_system == 'Linux'"
-fi
+TRITON_CONSTRAINT="platform_system == 'Linux'"
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" && ! "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then
TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
@@ -117,6 +114,7 @@ if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_B
export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | ${TRITON_REQUIREMENT}"
fi
fi
+BLOCK_COMMENT
USE_GLOO_WITH_OPENSSL="ON"
if [[ "$GPU_ARCH_TYPE" =~ .*aarch64.* ]]; then
diff --git a/.circleci/scripts/binary_windows_build.sh b/.circleci/scripts/binary_windows_build.sh
index 27cd36f94928..18dcde50e2b6 100644
--- a/.circleci/scripts/binary_windows_build.sh
+++ b/.circleci/scripts/binary_windows_build.sh
@@ -15,8 +15,7 @@ fi
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
export VC_YEAR=2022
export USE_SCCACHE=0
- export XPU_VERSION=2025.1
- export XPU_ENABLE_KINETO=1
+ export XPU_VERSION=2025.2
fi
echo "Free space on filesystem before build:"
diff --git a/.circleci/scripts/binary_windows_test.sh b/.circleci/scripts/binary_windows_test.sh
index 79f714265f2c..9326d9037e8b 100644
--- a/.circleci/scripts/binary_windows_test.sh
+++ b/.circleci/scripts/binary_windows_test.sh
@@ -8,7 +8,7 @@ export VC_YEAR=2022
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
export VC_YEAR=2022
- export XPU_VERSION=2025.1
+ export XPU_VERSION=2025.2
fi
pushd "$PYTORCH_ROOT/.ci/pytorch/"
diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml
index 85c7999c1857..798dee312306 100644
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@@ -12,7 +12,9 @@ self-hosted-runner:
- linux.9xlarge.ephemeral
- am2.linux.9xlarge.ephemeral
- linux.12xlarge
+ - linux.12xlarge.memory
- linux.24xlarge
+ - linux.24xlarge.memory
- linux.24xlarge.ephemeral
- linux.24xlarge.amd
- linux.arm64.2xlarge
diff --git a/.github/actions/build-external-packages/action.yml b/.github/actions/build-external-packages/action.yml
index dc8b8b889536..c0c727d93ac6 100644
--- a/.github/actions/build-external-packages/action.yml
+++ b/.github/actions/build-external-packages/action.yml
@@ -4,6 +4,11 @@ name: Build External packages
description: build external packages for PyTorch
inputs:
+ cuda-version:
+ description: CUDA version to use
+ type: string
+ required: true
+ default: '12.8.1'
cuda-arch-list:
description: TORCH_CUDA_ARCH_LIST (e.g., "8.0;8.9;9.0")
type: string
@@ -44,10 +49,12 @@ runs:
env:
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
SCCACHE_REGION: us-east-1
+ CUDA_VERSION: ${{ inputs.cuda-version }}
TORCH_CUDA_ARCH_LIST: ${{ inputs.cuda-arch-list }}
BASE_IMAGE: ${{ inputs.docker-image }}
BUILD_TARGETS: ${{ inputs.build-targets }}
- PARENT_OUTPUT_DIR: ${{ inputs.output-dir}}
+ PARENT_OUTPUT_DIR: ${{ inputs.output-dir }}
+ TORCH_WHEELS_PATH: ${{ inputs.torch-wheel-dir }}
shell: bash
run: |
set -euo pipefail
@@ -68,7 +75,6 @@ runs:
export OUTPUT_DIR
echo "Building external package: $target in directory $OUTPUT_DIR"
python3 -m cli.run build external "$target"
-
done
END_TIME=$(date +%s)
diff --git a/.github/actions/checkout-pytorch/action.yml b/.github/actions/checkout-pytorch/action.yml
index 055404c69474..15f193ef3a5d 100644
--- a/.github/actions/checkout-pytorch/action.yml
+++ b/.github/actions/checkout-pytorch/action.yml
@@ -57,6 +57,21 @@ runs:
submodules: ${{ inputs.submodules }}
show-progress: false
+ - name: Clean submodules post checkout
+ id: clean-submodules
+ if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
+ shell: bash
+ env:
+ NO_SUDO: ${{ inputs.no-sudo }}
+ run: |
+ cd "${GITHUB_WORKSPACE}"
+ # Clean stale submodule dirs
+ if [ -z "${NO_SUDO}" ]; then
+ sudo git submodule foreach --recursive git clean -ffdx
+ else
+ git submodule foreach --recursive git clean -ffdx
+ fi
+
- name: Clean workspace (try again)
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' &&
(steps.first-clean.outcome != 'success' || steps.first-checkout-attempt.outcome != 'success') }}
diff --git a/.github/actions/setup-win/action.yml b/.github/actions/setup-win/action.yml
index 93c957896b5e..2ea330f93b49 100644
--- a/.github/actions/setup-win/action.yml
+++ b/.github/actions/setup-win/action.yml
@@ -6,6 +6,12 @@ inputs:
cuda-version:
description: which cuda version to install, 'cpu' for none
required: true
+ python-version:
+ required: false
+ type: string
+ default: "3.10"
+ description: |
+ The python version to be used. Will be 3.10 by default
runs:
using: composite
@@ -38,18 +44,24 @@ runs:
CONDA="C:\Jenkins\Miniconda3\condabin\conda.bat"
{
+ echo "CONDA=${CONDA}";
echo "CONDA_RUN=${CONDA} run --no-capture-output";
echo "CONDA_BUILD=${CONDA} run conda-build";
echo "CONDA_INSTALL=${CONDA} install";
} >> "${GITHUB_ENV}"
- name: Setup Python3
+ env:
+ PYTHON_VERSION: ${{ inputs.python-version }}
shell: bash
run: |
set +e
set -x
- PYTHON3=$(${CONDA_RUN} which python3)
+ # Create new py_tmp env with python-version
+ ${CONDA} create -y -n py_tmp python=${PYTHON_VERSION} intel-openmp libuv
+
+ PYTHON3=$(${CONDA_RUN} -n py_tmp which python3)
EXIT_CODE=$?
if [[ "${EXIT_CODE}" == "0" ]]; then
@@ -62,7 +74,7 @@ runs:
# installation, which is Python 3 based. Its Python is default to Python 3. Further, there
# is also the Miniconda installation that is Python 2 based, and both can be installed if
# needed. In both cases, Python binary is just called python
- PYTHON=$(${CONDA_RUN} which python)
+ PYTHON=$(${CONDA_RUN} -n py_tmp which python)
EXIT_CODE=$?
if [[ "${EXIT_CODE}" == "0" ]]; then
diff --git a/.github/ci_commit_pins/audio.txt b/.github/ci_commit_pins/audio.txt
index 0b9c14848239..b0255e764c59 100644
--- a/.github/ci_commit_pins/audio.txt
+++ b/.github/ci_commit_pins/audio.txt
@@ -1 +1 @@
-10a5002c6195bd95e34df8fe28ff8a2d55a2a922
+27fc2493d383354a008106f22f3be232badee9a1
diff --git a/.github/ci_commit_pins/vllm.txt b/.github/ci_commit_pins/vllm.txt
index 80c5a90c7be9..bbc484d273a1 100644
--- a/.github/ci_commit_pins/vllm.txt
+++ b/.github/ci_commit_pins/vllm.txt
@@ -1 +1 @@
-add1adfec742dfb13e614dab3372b5aafd1ff046
+78a47f87ce259a48f0391fa9ae15add05ea7432b
diff --git a/.github/ci_commit_pins/xla.txt b/.github/ci_commit_pins/xla.txt
index 53cf6c8c9915..ee530f8c8b21 100644
--- a/.github/ci_commit_pins/xla.txt
+++ b/.github/ci_commit_pins/xla.txt
@@ -1 +1 @@
-a1c6ee92c85e8b0955c20892ed68f032a6015c09
+r2.9
diff --git a/.github/ci_configs/vllm/Dockerfile.tmp_vllm b/.github/ci_configs/vllm/Dockerfile.tmp_vllm
index 330a78424fee..2cee6ed2df19 100644
--- a/.github/ci_configs/vllm/Dockerfile.tmp_vllm
+++ b/.github/ci_configs/vllm/Dockerfile.tmp_vllm
@@ -12,54 +12,46 @@ ARG BUILD_BASE_IMAGE=torch-nightly-base
# by default, it uses devel-ubuntu22.04 official image.
ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
+# The logic is copied from https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile
+ARG GET_PIP_URL="https://bootstrap.pypa.io/get-pip.py"
-#################### TORCH NIGHTLY BASE IMAGE ####################
+
+#################### TORCH NIGHTLY BASE IMAGE ####################
# A base image for building vLLM with devel ubuntu 22.04, this is mainly used to build vllm in vllm builtkite ci
-From nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 as torch-nightly-base
-ARG CUDA_VERSION=12.8.1
-ARG PYTHON_VERSION=3.12
-ARG TARGETPLATFORM
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \
- echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment
-
-# Install Python and other dependencies if it does not existed
-RUN if ! command -v python3 >/dev/null || ! python3 --version | grep -q "${PYTHON_VERSION}"; then \
- echo "Installing Python ${PYTHON_VERSION}..." && \
- echo 'tzdata tzdata/Areas select America' | debconf-set-selections && \
- echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections && \
- apt-get update -y && \
- apt-get install -y ccache software-properties-common git curl sudo && \
- for i in 1 2 3; do \
- add-apt-repository -y ppa:deadsnakes/ppa && break || \
- { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \
- done && \
- apt-get update -y && \
- apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv && \
- update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 && \
- update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} && \
- ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config && \
- curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION}; \
- else \
- echo "Python ${PYTHON_VERSION} already present, skipping setup."; \
- fi \
- && python3 --version && python3 -m pip --version
+FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 as torch-nightly-base
+
+ARG CUDA_VERSION
+ARG PYTHON_VERSION
+ARG GET_PIP_URL
+
+# Install Python and other dependencies
+RUN apt-get update -y \
+ && apt-get install -y ccache software-properties-common git curl wget sudo vim \
+ && add-apt-repository -y ppa:deadsnakes/ppa \
+ && apt-get update -y \
+ && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
+ && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
+ && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
+ && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
+ && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \
+ && python3 --version && python3 -m pip --version
# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
# as it was causing spam when compiling the CUTLASS kernels
# Ensure gcc >= 10 to avoid CUTLASS issues (bug 92519)
RUN current_gcc_version=$(gcc -dumpversion | cut -f1 -d.) && \
- if [ "$current_gcc_version" -lt 10 ]; then \
- echo "GCC version is $current_gcc_version, installing gcc-10..."; \
- apt-get update && \
- apt-get install -y gcc-10 g++-10 && \
- update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 && \
- update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 100; \
- else \
- echo "GCC version is $current_gcc_version, no need to install gcc-10."; \
- fi && \
- gcc --version && g++ --version
+ if command -v apt-get >/dev/null; then \
+ if [ "$current_gcc_version" -lt 10 ]; then \
+ echo "GCC version is $current_gcc_version, installing gcc-10..."; \
+ apt-get update \
+ && apt-get install -y gcc-10 g++-10 \
+ && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 \
+ && update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 100; \
+ else \
+ echo "GCC version is $current_gcc_version, no need to install gcc-10."; \
+ fi \
+ fi \
+ && gcc --version && g++ --version
# install uv for faster pip installs
RUN --mount=type=cache,target=/root/.cache/uv \
@@ -79,6 +71,21 @@ ENV UV_LINK_MODE=copy
FROM ${BUILD_BASE_IMAGE} AS base
USER root
+ARG CUDA_VERSION
+ARG PYTHON_VERSION
+
+# TODO (huydhn): Only work with PyTorch manylinux builder
+ENV PATH="/opt/python/cp312-cp312/bin:${PATH}"
+
+# Install some system dependencies and double check python version
+RUN if command -v apt-get >/dev/null; then \
+ apt-get update -y \
+ && apt-get install -y ccache software-properties-common git curl wget sudo vim; \
+ else \
+ dnf install -y git curl wget sudo vim; \
+ fi \
+ && python3 --version && python3 -m pip --version
+
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
@@ -118,17 +125,15 @@ RUN --mount=type=bind,source=${TORCH_WHEELS_PATH},target=/dist \
if [ -n "$TORCH_WHEELS_PATH" ] && [ "$TORCH_WHEELS_PATH" != "./requirements" ] && [ -d "/dist" ] && ls /dist/torch*.whl >/dev/null 2>&1; then \
echo "[INFO] Installing torch wheels to build vllm"; \
torch_whl=$(find /dist -maxdepth 1 -name 'torch-*.whl' -print -quit); \
- vision_whl=$(find /dist/vision -name 'torchvision*.whl' | head -n1 | xargs); \
- audio_whl=$(find /dist/audio -name 'torchaudio*.whl' | head -n1 | xargs); \
- uv pip install --system "${torch_whl}[opt-einsum]"; \
- uv pip install --system "${vision_whl}"; \
- uv pip install --system "${audio_whl}"; \
+ vision_whl=$(find /dist -name 'torchvision*.whl' | head -n1 | xargs); \
+ audio_whl=$(find /dist -name 'torchaudio*.whl' | head -n1 | xargs); \
+ uv pip install --system "${torch_whl}[opt-einsum]" "${vision_whl}" "${audio_whl}" /dist/*.whl; \
elif [ -n "$PINNED_TORCH_VERSION" ]; then \
echo "[INFO] Installing pinned torch nightly version to build vllm: $PINNED_TORCH_VERSION"; \
- uv pip install --system "$PINNED_TORCH_VERSION" --index-url https://download.pytorch.org/whl/nightly/cu128; \
+ uv pip install --system "$PINNED_TORCH_VERSION" --index-url https://download.pytorch.org/whl/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
else \
echo "[INFO] Installing torch nightly with latest one to build vllm"; \
- uv pip install --system torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128; \
+ uv pip install --system torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi
# Install numba 0.61.2 for cuda environment
@@ -137,12 +142,11 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# Install common dependencies from vllm common.txt
RUN --mount=type=cache,target=/root/.cache/uv \
-uv pip install --system -r requirements/common.txt
-
+ uv pip install --system -r requirements/common.txt
# Must put before installing xformers, so it can install the correct version of xfomrers.
-ARG exformer_cuda_arch_list='7.5;8.0+PTX;9.0a'
-ENV TORCH_CUDA_ARCH_LIST=${exformer_cuda_arch_list}
+ARG xformers_cuda_arch_list='7.5;8.0+PTX;9.0a'
+ENV TORCH_CUDA_ARCH_LIST=${xformers_cuda_arch_list}
ARG max_jobs=16
ENV MAX_JOBS=${max_jobs}
@@ -153,8 +157,8 @@ RUN pip freeze | grep -E 'ninja'
# Build xformers with cuda and torch nightly/wheel
# following official xformers guidance: https://github.com/facebookresearch/xformers#build
-# sha for https://github.com/facebookresearch/xformers/tree/v0.0.31
-ARG XFORMERS_COMMIT=eb0946a363464da96ea40afd1a7f72a907c25497
+# sha for https://github.com/facebookresearch/xformers/tree/v0.0.32.post2
+ARG XFORMERS_COMMIT=5d4b92a5e5a9c6c6d4878283f47d82e17995b468
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
@@ -176,6 +180,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# Build can take a long time, and the torch nightly version fetched from url can be different in next docker stage.
# track the nightly torch version used in the build, when we set up runtime environment we can make sure the version is the same
RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio' > torch_build_versions.txt
+
RUN cat torch_build_versions.txt
RUN pip freeze | grep -E 'torch|xformers|torchvision|torchaudio'
@@ -187,11 +192,6 @@ RUN pip freeze | grep -E 'torch|xformers|torchvision|torchaudio'
FROM base AS build
ARG TARGETPLATFORM
-ENV UV_HTTP_TIMEOUT=500
-ENV UV_INDEX_STRATEGY="unsafe-best-match"
-# Use copy mode to avoid hardlink failures with Docker cache mounts
-ENV UV_LINK_MODE=copy
-
COPY . .
RUN python3 use_existing_torch.py
@@ -250,9 +250,9 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
python3 setup.py bdist_wheel --dist-dir=vllm-dist --py-limited-api=cp38; \
fi
-RUN echo "[DEBUG] Listing current directory:" && \
+RUN echo "[INFO] Listing current directory:" && \
ls -al && \
- echo "[DEBUG] Showing torch_build_versions.txt content:" && \
+ echo "[INFO] Showing torch_build_versions.txt content:" && \
cat torch_build_versions.txt
#################### WHEEL BUILD IMAGE ####################
@@ -262,42 +262,40 @@ RUN echo "[DEBUG] Listing current directory:" && \
# Setup clean environment for vLLM for test and api server using ubuntu22.04 with AOT flashinfer
FROM ${FINAL_BASE_IMAGE} AS vllm-base
USER root
+
+ARG CUDA_VERSION
+ARG PYTHON_VERSION
+ARG GET_PIP_URL
+
+# TODO (huydhn): Only work with PyTorch manylinux builder
+ENV PATH="/opt/python/cp312-cp312/bin:${PATH}"
+
# prepare for environment starts
WORKDIR /workspace
-RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \
- echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment
-
-# Install Python and other dependencies if it does not existed
-RUN if ! command -v python3 >/dev/null || ! python3 --version | grep -q "${PYTHON_VERSION}"; then \
- echo "Installing Python ${PYTHON_VERSION}..." && \
- echo 'tzdata tzdata/Areas select America' | debconf-set-selections && \
- echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections && \
- apt-get update -y && \
- apt-get install -y ccache software-properties-common git curl sudo && \
- for i in 1 2 3; do \
- add-apt-repository -y ppa:deadsnakes/ppa && break || \
- { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \
- done && \
- apt-get update -y && \
- apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv && \
- update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 && \
- update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} && \
- ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config && \
- curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION}; \
- else \
- echo "Python ${PYTHON_VERSION} already present, skipping setup."; \
- fi \
- && python3 --version && python3 -m pip --version
-
+# Install Python and other dependencies
+RUN if command -v apt-get >/dev/null; then \
+ apt-get update -y \
+ && apt-get install -y ccache software-properties-common git curl wget sudo vim \
+ && add-apt-repository -y ppa:deadsnakes/ppa \
+ && apt-get update -y \
+ && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
+ && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
+ && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
+ && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
+ && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION}; \
+ else \
+ dnf install -y git curl wget sudo vim; \
+ fi \
+ && python3 --version && python3 -m pip --version
# Get the torch versions, and whls used in previous stagtes for consistency
COPY --from=base /workspace/torch_build_versions.txt ./torch_build_versions.txt
COPY --from=base /workspace/xformers-dist /wheels/xformers
COPY --from=build /workspace/vllm-dist /wheels/vllm
-RUN echo "[DEBUG] Listing current directory before torch install step:" && \
+RUN echo "[INFO] Listing current directory before torch install step:" && \
ls -al && \
- echo "[DEBUG] Showing torch_build_versions.txt content:" && \
+ echo "[INFO] Showing torch_build_versions.txt content:" && \
cat torch_build_versions.txt
# Workaround for https://github.com/openai/triton/issues/2507 and
@@ -306,7 +304,6 @@ RUN echo "[DEBUG] Listing current directory before torch install step:" && \
# or future versions of triton.
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
-
# Install uv for faster pip installs if not existed
RUN --mount=type=cache,target=/root/.cache/uv \
if ! python3 -m uv --version > /dev/null 2>&1; then \
@@ -326,15 +323,13 @@ RUN --mount=type=bind,source=${TORCH_WHEELS_PATH},target=/dist \
--mount=type=cache,target=/root/.cache/uv \
if [ -n "$TORCH_WHEELS_PATH" ] && [ "$TORCH_WHEELS_PATH" != "./requirements" ] && [ -d "/dist" ] && ls /dist/torch*.whl >/dev/null 2>&1; then \
torch_whl=$(find /dist -maxdepth 1 -name 'torch-*.whl' -print -quit); \
- vision_whl=$(find /dist/vision -name 'torchvision*.whl' | head -n1 | xargs); \
- audio_whl=$(find /dist/audio -name 'torchaudio*.whl' | head -n1 | xargs); \
+ vision_whl=$(find /dist -name 'torchvision*.whl' | head -n1 | xargs); \
+ audio_whl=$(find /dist -name 'torchaudio*.whl' | head -n1 | xargs); \
echo "[INFO] Use wheels to build : '${torch_whl}' '${audio_whl}' '${vision_whl}'"; \
- uv pip install --system "${torch_whl}[opt-einsum]"; \
- uv pip install --system "${vision_whl}"; \
- uv pip install --system "${audio_whl}"; \
+ uv pip install --system "${torch_whl}[opt-einsum]" "${vision_whl}" "${audio_whl}" /dist/*.whl; \
else \
echo "[INFO] Installing torch versions from torch_build_versions.txt"; \
- uv pip install --system $(cat torch_build_versions.txt | xargs) --index-url https://download.pytorch.org/whl/nightly/cu128; \
+ uv pip install --system $(cat torch_build_versions.txt | xargs) --index-url https://download.pytorch.org/whl/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi
# Install the vllm wheel from previous stage
@@ -345,9 +340,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system /wheels/xformers/*.whl --verbose
-
# Build flashinfer from source.
-ARG torch_cuda_arch_list='8.0;8.9;9.0a'
+ARG torch_cuda_arch_list='8.0;8.9;9.0a;10.0a;12.0'
# install package for build flashinfer
# see issue: https://github.com/flashinfer-ai/flashinfer/issues/738
@@ -358,7 +352,7 @@ ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
# Build flashinfer for torch nightly from source around 10 mins
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
# Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt
-ARG FLASHINFER_GIT_REF="v0.2.9rc2"
+ARG FLASHINFER_GIT_REF="v0.2.14.post1"
RUN --mount=type=cache,target=/root/.cache/uv \
git clone --depth 1 --recursive --shallow-submodules \
--branch ${FLASHINFER_GIT_REF} \
@@ -376,6 +370,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# Logging to confirm the torch versions
RUN pip freeze | grep -E 'torch|xformers|vllm|flashinfer'
+RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio\|^xformers\|^vllm\|^flashinfer' > build_summary.txt
################### VLLM INSTALLED IMAGE ####################
@@ -414,11 +409,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -r requirements/nightly_torch_test.txt
-# Workaround for #17068
-# pinned commit for v2.2.4
-RUN --mount=type=cache,target=/root/.cache/uv \
- uv pip install --system --no-build-isolation "git+https://github.com/state-spaces/mamba@95d8aba8a8c75aedcaa6143713b11e745e7cd0d9#egg=mamba-ssm"
-
# Logging to confirm the torch versions
RUN pip freeze | grep -E 'torch|xformers|vllm|flashinfer'
@@ -433,4 +423,5 @@ FROM scratch as export-wheels
# Just copy the wheels we prepared in previous stages
COPY --from=base /workspace/xformers-dist /wheels/xformers
COPY --from=build /workspace/vllm-dist /wheels/vllm
+COPY --from=vllm-base /workspace/build_summary.txt /wheels/build_summary.txt
COPY --from=vllm-base /workspace/wheels/flashinfer /wheels/flashinfer-python
diff --git a/.github/requirements/pip-requirements-macOS.txt b/.github/requirements/pip-requirements-macOS.txt
index 224835188d87..3a27cac46f71 100644
--- a/.github/requirements/pip-requirements-macOS.txt
+++ b/.github/requirements/pip-requirements-macOS.txt
@@ -28,7 +28,7 @@ pyyaml==6.0.2
scipy==1.12.0
setuptools==72.1.0
sympy==1.13.3
-tlparse==0.3.30
+tlparse==0.4.0
tensorboard==2.13.0
typing-extensions==4.12.2
unittest-xml-reporting<=3.2.0,>=2.0.0
diff --git a/.github/scripts/build_triton_wheel.py b/.github/scripts/build_triton_wheel.py
index beec9f96aba2..f2851e331725 100644
--- a/.github/scripts/build_triton_wheel.py
+++ b/.github/scripts/build_triton_wheel.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import os
+import re
import shutil
import sys
from pathlib import Path
@@ -50,6 +51,30 @@ def patch_init_py(
with open(path, "w") as f:
f.write(orig)
+def get_rocm_version() -> str:
+ rocm_path = os.environ.get('ROCM_HOME') or os.environ.get('ROCM_PATH') or "/opt/rocm"
+ rocm_version = "0.0.0"
+ rocm_version_h = f"{rocm_path}/include/rocm-core/rocm_version.h"
+ if not os.path.isfile(rocm_version_h):
+ rocm_version_h = f"{rocm_path}/include/rocm_version.h"
+ # The file could be missing due to 1) ROCm version < 5.2, or 2) no ROCm install.
+ if os.path.isfile(rocm_version_h):
+ RE_MAJOR = re.compile(r"#define\s+ROCM_VERSION_MAJOR\s+(\d+)")
+ RE_MINOR = re.compile(r"#define\s+ROCM_VERSION_MINOR\s+(\d+)")
+ RE_PATCH = re.compile(r"#define\s+ROCM_VERSION_PATCH\s+(\d+)")
+ major, minor, patch = 0, 0, 0
+ for line in open(rocm_version_h):
+ match = RE_MAJOR.search(line)
+ if match:
+ major = int(match.group(1))
+ match = RE_MINOR.search(line)
+ if match:
+ minor = int(match.group(1))
+ match = RE_PATCH.search(line)
+ if match:
+ patch = int(match.group(1))
+ rocm_version = str(major)+"."+str(minor)+"."+str(patch)
+ return rocm_version
def build_triton(
*,
@@ -64,14 +89,24 @@ def build_triton(
if "MAX_JOBS" not in env:
max_jobs = os.cpu_count() or 1
env["MAX_JOBS"] = str(max_jobs)
-
+ if not release:
+ # Nightly binaries include the triton commit hash, i.e. 2.1.0+e6216047b8
+ # while release build should only include the version, i.e. 2.1.0
+ rocm_version = get_rocm_version()
+ version_suffix = f"+rocm{rocm_version}.git{commit_hash[:8]}"
+ version += version_suffix
with TemporaryDirectory() as tmpdir:
triton_basedir = Path(tmpdir) / "triton"
triton_pythondir = triton_basedir / "python"
triton_repo = "https://github.com/openai/triton"
if device == "rocm":
- triton_pkg_name = "pytorch-triton-rocm"
+ triton_repo = "https://github.com/ROCm/triton"
+ rocm_version = get_rocm_version() # e.g., "7.0.1"
+ if tuple(map(int, rocm_version.split("."))) > (7, 0, 0):
+ triton_pkg_name = "triton"
+ else:
+ triton_pkg_name = "pytorch-triton-rocm"
elif device == "xpu":
triton_pkg_name = "pytorch-triton-xpu"
triton_repo = "https://github.com/intel/intel-xpu-backend-for-triton"
@@ -84,10 +119,12 @@ def build_triton(
["git", "checkout", f"release/{ver}.{rev}.x"], cwd=triton_basedir
)
else:
+ check_call(["git", "fetch", "origin", commit_hash], cwd=triton_basedir)
check_call(["git", "checkout", commit_hash], cwd=triton_basedir)
# change built wheel name and version
env["TRITON_WHEEL_NAME"] = triton_pkg_name
+ env["TRITON_WHEEL_VERSION_SUFFIX"] = version_suffix
if with_clang_ldd:
env["TRITON_BUILD_WITH_CLANG_LLD"] = "1"
diff --git a/.github/scripts/filter_test_configs.py b/.github/scripts/filter_test_configs.py
index 9ba210a5ed2b..dd16dbc18db2 100755
--- a/.github/scripts/filter_test_configs.py
+++ b/.github/scripts/filter_test_configs.py
@@ -41,9 +41,9 @@ def is_cuda_or_rocm_job(job_name: Optional[str]) -> bool:
}
# The link to the published list of disabled jobs
-DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json"
+DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json?versionId=hjktHz2WOejHpxKpkqpDknTt5rMTM9KK"
# and unstable jobs
-UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json"
+UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json?versionId=wrjdvvQTJxgvMO.rGw5MEuMsj6XbjuV7"
# Some constants used to handle disabled and unstable jobs
JOB_NAME_SEP = "/"
diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py
index a576706ace22..4dc97ee6a284 100644
--- a/.github/scripts/generate_binary_build_matrix.py
+++ b/.github/scripts/generate_binary_build_matrix.py
@@ -16,18 +16,16 @@
# NOTE: Please also update the CUDA sources in `PIP_SOURCES` in tools/nightly.py when changing this
-CUDA_ARCHES = ["12.6", "12.8", "12.9", "13.0"]
+CUDA_ARCHES = ["12.6", "12.8", "13.0"]
CUDA_STABLE = "12.8"
CUDA_ARCHES_FULL_VERSION = {
"12.6": "12.6.3",
"12.8": "12.8.1",
- "12.9": "12.9.1",
"13.0": "13.0.0",
}
CUDA_ARCHES_CUDNN_VERSION = {
"12.6": "9",
"12.8": "9",
- "12.9": "9",
"13.0": "9",
}
@@ -40,99 +38,82 @@
CPU_S390X_ARCH = ["cpu-s390x"]
-CUDA_AARCH64_ARCHES = ["12.9-aarch64"]
+CUDA_AARCH64_ARCHES = ["12.6-aarch64", "12.8-aarch64", "13.0-aarch64"]
PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"12.6": (
- "nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'"
+ "nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | "
+ "nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | "
+ "nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | "
+ "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | "
+ "nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | "
+ "nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | "
+ "nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | "
+ "nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | "
+ "nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | "
+ "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
+ "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
+ "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | "
+ "nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | "
+ "nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | "
+ "nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'"
),
"12.8": (
- "nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'"
- ),
- "12.9": (
- "nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'"
+ "nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | "
+ "nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | "
+ "nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | "
+ "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | "
+ "nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | "
+ "nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | "
+ "nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | "
+ "nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | "
+ "nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | "
+ "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
+ "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
+ "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | "
+ "nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | "
+ "nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | "
+ "nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'"
),
"13.0": (
- "nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'"
+ "nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | "
+ "nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | "
+ "nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | "
+ "nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | "
+ "nvidia-cublas==13.0.0.19; platform_system == 'Linux' | "
+ "nvidia-cufft==12.0.0.15; platform_system == 'Linux' | "
+ "nvidia-curand==10.4.0.35; platform_system == 'Linux' | "
+ "nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | "
+ "nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | "
+ "nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | "
+ "nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | "
+ "nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | "
+ "nvidia-nvtx==13.0.39; platform_system == 'Linux' | "
+ "nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | "
+ "nvidia-cufile==1.15.0.42; platform_system == 'Linux'"
),
"xpu": (
- "intel-cmplr-lib-rt==2025.1.1 | "
- "intel-cmplr-lib-ur==2025.1.1 | "
- "intel-cmplr-lic-rt==2025.1.1 | "
- "intel-sycl-rt==2025.1.1 | "
- "oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | "
- "onemkl-sycl-blas==2025.1.0 | "
- "onemkl-sycl-dft==2025.1.0 | "
- "onemkl-sycl-lapack==2025.1.0 | "
- "onemkl-sycl-rng==2025.1.0 | "
- "onemkl-sycl-sparse==2025.1.0 | "
- "dpcpp-cpp-rt==2025.1.1 | "
- "intel-opencl-rt==2025.1.1 | "
- "mkl==2025.1.0 | "
- "intel-openmp==2025.1.1 | "
- "tbb==2022.1.0 | "
- "tcmlib==1.3.0 | "
- "umf==0.10.0 | "
- "intel-pti==0.12.3"
+ "intel-cmplr-lib-rt==2025.2.1 | "
+ "intel-cmplr-lib-ur==2025.2.1 | "
+ "intel-cmplr-lic-rt==2025.2.1 | "
+ "intel-sycl-rt==2025.2.1 | "
+ "oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+ "oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+ "impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+ "onemkl-sycl-blas==2025.2.0 | "
+ "onemkl-sycl-dft==2025.2.0 | "
+ "onemkl-sycl-lapack==2025.2.0 | "
+ "onemkl-sycl-rng==2025.2.0 | "
+ "onemkl-sycl-sparse==2025.2.0 | "
+ "dpcpp-cpp-rt==2025.2.1 | "
+ "intel-opencl-rt==2025.2.1 | "
+ "mkl==2025.2.0 | "
+ "intel-openmp==2025.2.1 | "
+ "tbb==2022.2.0 | "
+ "tcmlib==1.4.0 | "
+ "umf==0.11.0 | "
+ "intel-pti==0.13.1"
),
}
@@ -240,12 +221,8 @@ def generate_libtorch_matrix(
if os == "linux":
arches += CUDA_ARCHES
arches += ROCM_ARCHES
- if "13.0" in arches:
- arches.remove("13.0")
elif os == "windows":
arches += CUDA_ARCHES
- if "13.0" in arches:
- arches.remove("13.0")
if libtorch_variants is None:
libtorch_variants = [
"shared-with-deps",
@@ -310,8 +287,6 @@ def generate_wheels_matrix(
arches += CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
elif os == "windows":
arches += CUDA_ARCHES + XPU_ARCHES
- if "13.0" in arches:
- arches.remove("13.0")
elif os == "linux-aarch64":
# Separate new if as the CPU type is different and
# uses different build/test scripts
@@ -334,19 +309,20 @@ def generate_wheels_matrix(
else arch_version
)
- # TODO: Enable python 3.13t on cpu-s390x
- if gpu_arch_type == "cpu-s390x" and python_version == "3.13t":
- continue
# TODO: Enable python 3.14 for rest
- if os not in ["linux", "linux-aarch64", "macos-arm64", "windows"] and (
- python_version == "3.14" or python_version == "3.14t"
- ):
+ if os not in [
+ "linux",
+ "linux-aarch64",
+ "linux-s390x",
+ "macos-arm64",
+ "windows",
+ ] and (python_version == "3.14" or python_version == "3.14t"):
continue
# cuda linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
if (
- arch_version in ["13.0", "12.9", "12.8", "12.6"]
+ arch_version in ["13.0", "12.8", "12.6"]
and os == "linux"
or arch_version in CUDA_AARCH64_ARCHES
):
@@ -410,6 +386,5 @@ def generate_wheels_matrix(
validate_nccl_dep_consistency("13.0")
-validate_nccl_dep_consistency("12.9")
validate_nccl_dep_consistency("12.8")
validate_nccl_dep_consistency("12.6")
diff --git a/.github/scripts/generate_ci_workflows.py b/.github/scripts/generate_ci_workflows.py
index 67906d4ad88d..0396c405ad0a 100755
--- a/.github/scripts/generate_ci_workflows.py
+++ b/.github/scripts/generate_ci_workflows.py
@@ -135,7 +135,7 @@ class OperatingSystem:
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
OperatingSystem.LINUX,
arches=["6.4"],
- python_versions=["3.9"],
+ python_versions=["3.10"],
),
ciflow_config=CIFlowConfig(
labels={
diff --git a/.github/scripts/test_trymerge.py b/.github/scripts/test_trymerge.py
index 58f3ca50baa1..ac3a1cc12921 100755
--- a/.github/scripts/test_trymerge.py
+++ b/.github/scripts/test_trymerge.py
@@ -27,6 +27,7 @@
get_drci_classifications,
gh_get_team_members,
GitHubPR,
+ iter_issue_timeline_until_comment,
JobCheckState,
main as trymerge_main,
MandatoryChecksMissingError,
@@ -34,6 +35,8 @@
RE_GHSTACK_DESC,
read_merge_rules,
remove_job_name_suffix,
+ sha_from_committed_event,
+ sha_from_force_push_after,
validate_revert,
)
@@ -124,7 +127,7 @@ def __init__(self) -> None:
self.force = force
self.pr_num = 76123
self.dry_run = True
- self.comment_id = 0
+ self.comment_id = 12345 # Set to non-zero value
self.reason = "this is for testing"
self.ignore_current = False
self.check_mergeability = False
@@ -152,9 +155,9 @@ def mock_revert(
def mock_merge(
pr: GitHubPR,
repo: GitRepo,
+ comment_id: int,
dry_run: bool = False,
skip_mandatory_checks: bool = False,
- comment_id: Optional[int] = None,
timeout_minutes: int = 400,
stale_pr_days: int = 3,
ignore_current: bool = False,
@@ -470,9 +473,9 @@ def test_main_force(
mock_merge.assert_called_once_with(
mock.ANY,
mock.ANY,
+ comment_id=mock.ANY,
dry_run=mock.ANY,
skip_mandatory_checks=True,
- comment_id=mock.ANY,
ignore_current=False,
)
@@ -485,9 +488,9 @@ def test_main_merge(self, mock_merge: Any, *args: Any) -> None:
mock_merge.assert_called_once_with(
mock.ANY,
mock.ANY,
+ comment_id=mock.ANY,
dry_run=mock.ANY,
skip_mandatory_checks=False,
- comment_id=mock.ANY,
ignore_current=False,
)
@@ -1138,5 +1141,176 @@ def test__revlist_to_prs_two_prs(
)
+@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
+@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
+@mock.patch(
+ "trymerge.get_drci_classifications", side_effect=mocked_drci_classifications
+)
+class TestTimelineFunctions(TestCase):
+ """Tests for the new timeline-related functions"""
+
+ def test_sha_from_committed_event(self, *args: Any) -> None:
+ """Test extracting SHA from committed event"""
+ # Based on actual GitHub API format - committed events have "sha" at top level
+ event = {
+ "event": "committed",
+ "sha": "fb21ce932ded6670c918804a0d9151b773770a7c",
+ }
+ self.assertEqual(
+ sha_from_committed_event(event), "fb21ce932ded6670c918804a0d9151b773770a7c"
+ )
+
+ # Test with missing SHA
+ event_no_sha = {"event": "committed"}
+ self.assertIsNone(sha_from_committed_event(event_no_sha))
+
+ def test_sha_from_force_push_after(self, *args: Any) -> None:
+ """Test extracting SHA from force push event"""
+ # NOTE: The current function doesn't handle the actual GitHub API format
+ # Real force push events have "commit_id" at top level, but this function
+ # looks for "after", "after_commit", "after_sha", or "head_sha" fields
+
+ # Test with the legacy format the current function handles
+ event_legacy = {
+ "event": "head_ref_force_pushed",
+ "after": {"sha": "ef22bcbc54bb0f787e1e4ffd3d83df18fc407f5e"},
+ }
+ self.assertEqual(
+ sha_from_force_push_after(event_legacy),
+ "ef22bcbc54bb0f787e1e4ffd3d83df18fc407f5e",
+ )
+
+ # Test with current GitHub API format (should return None with current implementation)
+ event_real_api = {
+ "event": "head_ref_force_pushed",
+ "commit_id": "ef22bcbc54bb0f787e1e4ffd3d83df18fc407f5e",
+ }
+ self.assertEqual(
+ sha_from_force_push_after(event_real_api),
+ "ef22bcbc54bb0f787e1e4ffd3d83df18fc407f5e",
+ ) # Current function doesn't handle commit_id
+
+ # Test with missing SHA
+ event_no_sha = {"event": "head_ref_force_pushed"}
+ self.assertIsNone(sha_from_force_push_after(event_no_sha))
+
+ @mock.patch("trymerge.gh_fetch_json_list")
+ def test_iter_issue_timeline_until_comment(
+ self, mock_gh_fetch_json_list: Any, *args: Any
+ ) -> None:
+ """Test timeline iteration until target comment"""
+ # Mock timeline data based on actual GitHub API format
+ timeline_data = [
+ {"event": "commented", "id": 100, "body": "first comment"},
+ {"event": "committed", "sha": "fb21ce932ded6670c918804a0d9151b773770a7c"},
+ {"event": "commented", "id": 200, "body": "target comment"},
+ {"event": "commented", "id": 300, "body": "after target"},
+ ]
+ mock_gh_fetch_json_list.return_value = timeline_data
+
+ # Test iteration stops at target comment
+ events = list(iter_issue_timeline_until_comment("pytorch", "pytorch", 123, 200))
+ self.assertEqual(len(events), 3) # Should stop at target comment
+ self.assertEqual(events[0]["event"], "commented")
+ self.assertEqual(events[0]["id"], 100)
+ self.assertEqual(events[1]["event"], "committed")
+ self.assertEqual(events[1]["sha"], "fb21ce932ded6670c918804a0d9151b773770a7c")
+ self.assertEqual(events[2]["event"], "commented")
+ self.assertEqual(events[2]["id"], 200)
+
+ @mock.patch("trymerge.gh_fetch_json_list")
+ def test_iter_issue_timeline_until_comment_not_found(
+ self, mock_gh_fetch_json_list: Any, *args: Any
+ ) -> None:
+ """Test timeline iteration when target comment is not found"""
+ # Mock empty timeline
+ mock_gh_fetch_json_list.return_value = []
+
+ events = list(iter_issue_timeline_until_comment("pytorch", "pytorch", 123, 999))
+ self.assertEqual(len(events), 0)
+
+ @mock.patch("trymerge.iter_issue_timeline_until_comment")
+ def test_get_commit_sha_at_comment_commit_after_comment(
+ self, mock_iter_timeline: Any, *args: Any
+ ) -> None:
+ """Test get_commit_sha_at_comment returns correct SHA after comment"""
+ mock_iter_timeline.return_value = [
+ {"event": "committed", "sha": "commit1"},
+ {"event": "committed", "sha": "commit2"},
+ {"event": "commented", "id": 100},
+ {"event": "head_ref_force_pushed", "after": {"sha": "commit3"}},
+ ]
+ pr = GitHubPR("pytorch", "pytorch", 77700)
+ sha = pr.get_commit_sha_at_comment(100)
+ self.assertEqual(sha, "commit2")
+
+ @mock.patch("trymerge.iter_issue_timeline_until_comment")
+ def test_get_commit_sha_at_comment_force_push_before_comment(
+ self, mock_iter_timeline: Any, *args: Any
+ ) -> None:
+ mock_iter_timeline.return_value = [
+ {"event": "committed", "sha": "commit1"},
+ {"event": "committed", "sha": "commit2"},
+ {"event": "head_ref_force_pushed", "commit_id": "commit3"},
+ {"event": "commented", "id": 100},
+ ]
+ pr = GitHubPR("pytorch", "pytorch", 77700)
+ sha = pr.get_commit_sha_at_comment(100)
+ self.assertEqual(sha, "commit3")
+
+ @mock.patch("trymerge.iter_issue_timeline_until_comment")
+ def test_get_commit_sha_at_comment_force_push_before_comment_legacy_mode(
+ self, mock_iter_timeline: Any, *args: Any
+ ) -> None:
+ mock_iter_timeline.return_value = [
+ {"event": "committed", "sha": "commit1"},
+ {"event": "committed", "sha": "commit2"},
+ {"event": "head_ref_force_pushed", "after": {"sha": "commit3"}},
+ {"event": "commented", "id": 100},
+ ]
+ pr = GitHubPR("pytorch", "pytorch", 77700)
+ sha = pr.get_commit_sha_at_comment(100)
+ self.assertEqual(sha, "commit3")
+
+ @mock.patch("trymerge.iter_issue_timeline_until_comment")
+ def test_get_commit_sha_at_comment_multiple_comments(
+ self, mock_iter_timeline: Any, *args: Any
+ ) -> None:
+ mock_iter_timeline.return_value = [
+ {"event": "committed", "sha": "commit1"},
+ {"event": "commented", "id": 100},
+ {"event": "committed", "sha": "commit2"},
+ {"event": "commented", "id": 200},
+ {"event": "head_ref_force_pushed", "after": {"sha": "commit3"}},
+ {"event": "commented", "id": 300},
+ ]
+ pr = GitHubPR("pytorch", "pytorch", 77700)
+ sha = pr.get_commit_sha_at_comment(200)
+ self.assertEqual(sha, "commit2")
+ sha = pr.get_commit_sha_at_comment(300)
+ self.assertEqual(sha, "commit3")
+
+ @mock.patch("trymerge.iter_issue_timeline_until_comment")
+ def test_get_commit_sha_at_comment_no_events(
+ self, mock_iter_timeline: Any, *args: Any
+ ) -> None:
+ mock_iter_timeline.return_value = [
+ {"event": "commented", "id": 100},
+ {"event": "labeled", "label": {"name": "test"}},
+ ]
+ pr = GitHubPR("pytorch", "pytorch", 77700)
+ sha = pr.get_commit_sha_at_comment(100)
+ self.assertIsNone(sha)
+
+ @mock.patch("trymerge.iter_issue_timeline_until_comment")
+ def test_get_commit_sha_at_comment_exception(
+ self, mock_iter_timeline: Any, *args: Any
+ ) -> None:
+ mock_iter_timeline.side_effect = Exception("API error")
+ pr = GitHubPR("pytorch", "pytorch", 77700)
+ sha = pr.get_commit_sha_at_comment(100)
+ self.assertIsNone(sha)
+
+
if __name__ == "__main__":
main()
diff --git a/.github/scripts/trymerge.py b/.github/scripts/trymerge.py
index 695a53305a05..00b66869dcf2 100755
--- a/.github/scripts/trymerge.py
+++ b/.github/scripts/trymerge.py
@@ -450,6 +450,63 @@ def __init__(self, name: str, url: str, run_id: int, status: Optional[str]):
IGNORABLE_FAILED_CHECKS_THESHOLD = 10
+def iter_issue_timeline_until_comment(
+ org: str, repo: str, issue_number: int, target_comment_id: int, max_pages: int = 200
+) -> Any:
+ """
+ Yield timeline entries in order until (and including) the entry whose id == target_comment_id
+ for a 'commented' event. Stops once the target comment is encountered.
+ """
+ page = 1
+
+ while page <= max_pages:
+ url = (
+ f"https://api.github.com/repos/{org}/{repo}/issues/{issue_number}/timeline"
+ )
+ params = {"per_page": 100, "page": page}
+
+ batch = gh_fetch_json_list(url, params)
+
+ if not batch:
+ return
+ for ev in batch:
+ # The target is the issue comment row with event == "commented" and id == issue_comment_id
+ if ev.get("event") == "commented" and ev.get("id") == target_comment_id:
+ yield ev # nothing in the timeline after this matters, so stop early
+ return
+ yield ev
+ if len(batch) < 100:
+ return
+ page += 1
+
+ # If we got here without finding the comment, then we either hit a bug or some github PR
+ # has a _really_ long timeline.
+ # The max # of pages found on any pytorch/pytorch PR at the time of this change was 41
+ raise RuntimeError(
+ f"Could not find a merge commit in the first {max_pages} pages of the timeline at url {url}."
+ f"This is most likely a bug, please report it to the @pytorch/pytorch-dev-infra team."
+ )
+
+
+def sha_from_committed_event(ev: dict[str, Any]) -> Optional[str]:
+ """Extract SHA from committed event in timeline"""
+ return ev.get("sha")
+
+
+def sha_from_force_push_after(ev: dict[str, Any]) -> Optional[str]:
+ """Extract SHA from force push event in timeline"""
+ # The current GitHub API format
+ commit_id = ev.get("commit_id")
+ if commit_id:
+ return str(commit_id)
+
+ # Legacy format
+ after = ev.get("after") or ev.get("after_commit") or {}
+ if isinstance(after, dict):
+ return after.get("sha") or after.get("oid")
+ return ev.get("after_sha") or ev.get("head_sha")
+
+
def gh_get_pr_info(org: str, proj: str, pr_no: int) -> Any:
rc = gh_graphql(GH_GET_PR_INFO_QUERY, name=proj, owner=org, number=pr_no)
return rc["data"]["repository"]["pullRequest"]
@@ -737,16 +794,24 @@ def get_changed_files_count(self) -> int:
def last_commit(self) -> Any:
return self.info["commits"]["nodes"][-1]["commit"]
+ def last_commit_sha(self, default: Optional[str] = None) -> str:
+ # for commits, the oid is the sha
+
+ if default is None:
+ return str(self.last_commit()["oid"])
+
+ return str(self.last_commit().get("oid", default))
+
def get_merge_base(self) -> str:
if self.merge_base:
return self.merge_base
- last_commit_oid = self.last_commit()["oid"]
+ last_commit_sha = self.last_commit_sha()
# NB: We could use self.base_ref() here for regular PR, however, that doesn't
# work for ghstack where the base is the custom branch, i.e. gh/USER/ID/base,
# so let's just use main instead
self.merge_base = gh_fetch_merge_base(
- self.org, self.project, last_commit_oid, self.default_branch()
+ self.org, self.project, last_commit_sha, self.default_branch()
)
# Fallback to baseRefOid if the API call fails, i.e. rate limit. Note that baseRefOid
@@ -835,6 +900,44 @@ def get_approved_by(self) -> list[str]:
def get_commit_count(self) -> int:
return int(self.info["commits_with_authors"]["totalCount"])
+ def get_commit_sha_at_comment(self, comment_id: int) -> Optional[str]:
+ """
+ Get the PR head commit SHA that was present when a specific comment was posted.
+ This ensures we only merge the state of the PR at the time the merge command was issued,
+ not any subsequent commits that may have been pushed after.
+
+ Returns None if no head-changing events found before the comment or if the comment was not found.
+ """
+ head = None
+
+ try:
+ for event in iter_issue_timeline_until_comment(
+ self.org, self.project, self.pr_num, comment_id
+ ):
+ etype = event.get("event")
+ if etype == "committed":
+ sha = sha_from_committed_event(event)
+ if sha:
+ head = sha
+ print(f"Timeline: Found commit event for SHA {sha}")
+ elif etype == "head_ref_force_pushed":
+ sha = sha_from_force_push_after(event)
+ if sha:
+ head = sha
+ print(f"Timeline: Found force push event for SHA {sha}")
+ elif etype == "commented":
+ if event.get("id") == comment_id:
+ print(f"Timeline: Found final comment with sha {sha}")
+ return head
+ except Exception as e:
+ print(
+ f"Warning: Failed to reconstruct timeline for comment {comment_id}: {e}"
+ )
+ return None
+
+ print(f"Did not find comment with id {comment_id} in the PR timeline")
+ return None
+
def get_pr_creator_login(self) -> str:
return cast(str, self.info["author"]["login"])
@@ -1151,7 +1254,7 @@ def merge_into(
*,
skip_mandatory_checks: bool = False,
dry_run: bool = False,
- comment_id: Optional[int] = None,
+ comment_id: int,
ignore_current_checks: Optional[list[str]] = None,
) -> None:
# Raises exception if matching rule is not found
@@ -1167,7 +1270,7 @@ def merge_into(
skip_internal_checks=can_skip_internal_checks(self, comment_id),
ignore_current_checks=ignore_current_checks,
)
- additional_merged_prs = self.merge_changes(
+ additional_merged_prs = self.merge_changes_locally(
repo, skip_mandatory_checks, comment_id
)
@@ -1196,7 +1299,7 @@ def merge_into(
broken_trunk_checks=ignorable_checks.get("BROKEN_TRUNK", []),
flaky_checks=ignorable_checks.get("FLAKY", []),
unstable_checks=ignorable_checks.get("UNSTABLE", []),
- last_commit_sha=self.last_commit().get("oid", ""),
+ last_commit_sha=self.last_commit_sha(default=""),
merge_base_sha=self.get_merge_base(),
merge_commit_sha=merge_commit_sha,
is_failed=False,
@@ -1217,7 +1320,7 @@ def merge_into(
dry_run=dry_run,
)
- def merge_changes(
+ def merge_changes_locally(
self,
repo: GitRepo,
skip_mandatory_checks: bool = False,
@@ -1226,27 +1329,15 @@ def merge_changes(
skip_all_rule_checks: bool = False,
) -> list["GitHubPR"]:
"""
- :param skip_all_rule_checks: If true, skips all rule checks, useful for dry-running merge locally
+ :param skip_all_rule_checks: If true, skips all rule checks on ghstack PRs, useful for dry-running merge locally
"""
branch_to_merge_into = self.default_branch() if branch is None else branch
if repo.current_branch() != branch_to_merge_into:
repo.checkout(branch_to_merge_into)
- if not self.is_ghstack_pr():
- msg = self.gen_commit_message()
- pr_branch_name = f"__pull-request-{self.pr_num}__init__"
- repo.fetch(self.last_commit()["oid"], pr_branch_name)
- repo._run_git("merge", "--squash", pr_branch_name)
- repo._run_git("commit", f'--author="{self.get_author()}"', "-m", msg)
-
- # Did the PR change since we started the merge?
- pulled_sha = repo.show_ref(pr_branch_name)
- latest_pr_status = GitHubPR(self.org, self.project, self.pr_num)
- if pulled_sha != latest_pr_status.last_commit()["oid"]:
- raise RuntimeError(
- "PR has been updated since CI checks last passed. Please rerun the merge command."
- )
- return []
- else:
+
+ # It's okay to skip the commit SHA check for ghstack PRs since
+ # authoring requires write access to the repo.
+ if self.is_ghstack_pr():
return self.merge_ghstack_into(
repo,
skip_mandatory_checks,
@@ -1254,6 +1345,48 @@ def merge_changes(
skip_all_rule_checks=skip_all_rule_checks,
)
+ msg = self.gen_commit_message()
+ pr_branch_name = f"__pull-request-{self.pr_num}__init__"
+
+ # Determine which commit SHA to merge
+ commit_to_merge = None
+ if not comment_id:
+ raise ValueError("Must provide --comment-id when merging regular PRs")
+
+ # Get the commit SHA that was present when the comment was made
+ commit_to_merge = self.get_commit_sha_at_comment(comment_id)
+ if not commit_to_merge:
+ raise RuntimeError(
+ f"Could not find commit that was pushed before comment {comment_id}"
+ )
+
+ # Validate that this commit is the latest commit on the PR
+ latest_commit = self.last_commit_sha()
+ if commit_to_merge != latest_commit:
+ raise RuntimeError(
+ f"Commit {commit_to_merge} was HEAD when comment {comment_id} was posted "
+ f"but now the latest commit on the PR is {latest_commit}. "
+ f"Please re-issue the merge command to merge the latest commit."
+ )
+
+ print(f"Merging commit {commit_to_merge} locally")
+
+ repo.fetch(commit_to_merge, pr_branch_name)
+ repo._run_git("merge", "--squash", pr_branch_name)
+ repo._run_git("commit", f'--author="{self.get_author()}"', "-m", msg)
+
+ # Did the PR change since we started the merge?
+ pulled_sha = repo.show_ref(pr_branch_name)
+ latest_pr_status = GitHubPR(self.org, self.project, self.pr_num)
+ if (
+ pulled_sha != latest_pr_status.last_commit_sha()
+ or pulled_sha != commit_to_merge
+ ):
+ raise RuntimeError(
+ "PR has been updated since CI checks last passed. Please rerun the merge command."
+ )
+ return []
+
class MergeRuleFailedError(RuntimeError):
def __init__(self, message: str, rule: Optional["MergeRule"] = None) -> None:
@@ -1458,7 +1591,7 @@ def find_matching_merge_rule(
pending_checks = []
failed_checks = []
- hud_link = f"https://hud.pytorch.org/{pr.org}/{pr.project}/commit/{pr.last_commit()['oid']}"
+ hud_link = f"https://hud.pytorch.org/{pr.org}/{pr.project}/commit/{pr.last_commit_sha()}"
if len(failed_checks) > 0:
if reject_reason_score < 30000:
reject_reason_score = 30000
@@ -2156,14 +2289,14 @@ def categorize_checks(
def merge(
pr: GitHubPR,
repo: GitRepo,
+ comment_id: int,
dry_run: bool = False,
skip_mandatory_checks: bool = False,
- comment_id: Optional[int] = None,
timeout_minutes: int = 400,
stale_pr_days: int = 3,
ignore_current: bool = False,
) -> None:
- initial_commit_sha = pr.last_commit()["oid"]
+ initial_commit_sha = pr.last_commit_sha()
pr_link = f"https://github.com/{pr.org}/{pr.project}/pull/{pr.pr_num}"
print(f"Attempting merge of {initial_commit_sha} ({pr_link})")
@@ -2234,7 +2367,7 @@ def merge(
f"Attempting merge of https://github.com/{pr.org}/{pr.project}/pull/{pr.pr_num} ({elapsed_time / 60} minutes elapsed)"
)
pr = GitHubPR(pr.org, pr.project, pr.pr_num)
- if initial_commit_sha != pr.last_commit()["oid"]:
+ if initial_commit_sha != pr.last_commit_sha():
raise RuntimeError(
"New commits were pushed while merging. Please rerun the merge command."
)
@@ -2401,7 +2534,7 @@ def handle_exception(e: Exception, title: str = "Merge failed") -> None:
if args.check_mergeability:
if pr.is_ghstack_pr():
get_ghstack_prs(repo, pr) # raises error if out of sync
- pr.merge_changes(
+ pr.merge_changes_locally(
repo,
skip_mandatory_checks=True,
skip_all_rule_checks=True,
@@ -2416,12 +2549,18 @@ def handle_exception(e: Exception, title: str = "Merge failed") -> None:
gh_post_pr_comment(org, project, args.pr_num, message, dry_run=args.dry_run)
return
try:
+ # Ensure comment id is set, else fail
+ if not args.comment_id:
+ raise ValueError(
+ "Comment ID is required for merging PRs, please provide it using --comment-id"
+ )
+
merge(
pr,
repo,
+ comment_id=args.comment_id,
dry_run=args.dry_run,
skip_mandatory_checks=args.force,
- comment_id=args.comment_id,
ignore_current=args.ignore_current,
)
except Exception as e:
@@ -2443,7 +2582,7 @@ def handle_exception(e: Exception, title: str = "Merge failed") -> None:
broken_trunk_checks=[],
flaky_checks=[],
unstable_checks=[],
- last_commit_sha=pr.last_commit().get("oid", ""),
+ last_commit_sha=pr.last_commit_sha(default=""),
merge_base_sha=pr.get_merge_base(),
is_failed=True,
skip_mandatory_checks=args.force,
diff --git a/.github/templates/common.yml.j2 b/.github/templates/common.yml.j2
index 23d4c003efa8..7c93fdf522a4 100644
--- a/.github/templates/common.yml.j2
+++ b/.github/templates/common.yml.j2
@@ -4,7 +4,7 @@
{%- set download_artifact_action = "actions/download-artifact@v4.1.7" -%}
{%- set timeout_minutes = 240 -%}
-{%- set timeout_minutes_windows_binary = 300 -%}
+{%- set timeout_minutes_windows_binary = 360 -%}
{%- macro concurrency(build_environment) -%}
concurrency:
@@ -32,7 +32,7 @@ concurrency:
{%- macro setup_ec2_windows() -%}
!{{ display_ec2_information() }}
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/templates/linux_binary_build_workflow.yml.j2 b/.github/templates/linux_binary_build_workflow.yml.j2
index e0998e46fb5f..bf7db5866e78 100644
--- a/.github/templates/linux_binary_build_workflow.yml.j2
+++ b/.github/templates/linux_binary_build_workflow.yml.j2
@@ -56,7 +56,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -77,6 +77,9 @@ jobs:
runs_on: linux.s390x
ALPINE_IMAGE: "docker.io/s390x/alpine"
timeout-minutes: 420
+ {%- elif config["gpu_arch_type"] == "rocm" %}
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
{%- elif "conda" in build_environment and config["gpu_arch_type"] == "cuda" %}
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.24xlarge.ephemeral
@@ -135,7 +138,7 @@ jobs:
contents: read
steps:
- name: Setup XPU
- uses: ./.github/actions/setup-xpu
+ uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.9
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@v4
@@ -150,10 +153,10 @@ jobs:
with:
name: !{{ config["build_name"] }}
path: "${{ runner.temp }}/artifacts/"
- !{{ common.checkout(deep_clone=False, directory="pytorch") }}
+ !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: !{{ config["container_image"] }}
@@ -161,7 +164,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -182,7 +185,7 @@ jobs:
with:
name: !{{ config["build_name"] }}
path: "${{ runner.temp }}/artifacts/"
- !{{ common.checkout(deep_clone=False, directory="pytorch") }}
+ !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
- name: ROCm set GPU_FLAG
run: |
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
@@ -196,7 +199,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: !{{ config["container_image"] }}
@@ -204,7 +207,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
diff --git a/.github/templates/macos_binary_build_workflow.yml.j2 b/.github/templates/macos_binary_build_workflow.yml.j2
index 02fa68f54172..662060bb1307 100644
--- a/.github/templates/macos_binary_build_workflow.yml.j2
+++ b/.github/templates/macos_binary_build_workflow.yml.j2
@@ -68,12 +68,7 @@ jobs:
chmod +x "${RUNNER_TEMP}/conda.sh"
/bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda"
echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}"
- if [ -d "/Applications/Xcode_14.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- fi
- !{{ common.checkout(deep_clone=False, directory="pytorch") }}
+ !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
- name: Populate binary env
run: |
# shellcheck disable=SC1091
diff --git a/.github/templates/upload.yml.j2 b/.github/templates/upload.yml.j2
index 1039a6214a7a..5e3798f8e237 100644
--- a/.github/templates/upload.yml.j2
+++ b/.github/templates/upload.yml.j2
@@ -33,7 +33,7 @@
{%- if is_windows %}
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
{%- endif %}
{%- else %}
diff --git a/.github/templates/windows_binary_build_workflow.yml.j2 b/.github/templates/windows_binary_build_workflow.yml.j2
index c3a824ad05a3..c61686f8df27 100644
--- a/.github/templates/windows_binary_build_workflow.yml.j2
+++ b/.github/templates/windows_binary_build_workflow.yml.j2
@@ -64,7 +64,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -135,7 +135,7 @@ jobs:
{%- else %}
!{{ set_runner_specific_vars() }}
!{{ common.setup_ec2_windows() }}
- !{{ common.checkout(deep_clone=False, directory="pytorch") }}
+ !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
{%- endif %}
- name: Populate binary env
shell: bash
@@ -211,7 +211,7 @@ jobs:
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
{%- else %}
!{{ common.setup_ec2_windows() }}
- !{{ common.checkout(deep_clone=False, directory="pytorch") }}
+ !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
!{{ set_runner_specific_vars() }}
{%- endif %}
- uses: !{{ common.download_artifact_action }}
diff --git a/.github/workflows/_bazel-build-test.yml b/.github/workflows/_bazel-build-test.yml
index 72241a772be6..d9e5e29576d4 100644
--- a/.github/workflows/_bazel-build-test.yml
+++ b/.github/workflows/_bazel-build-test.yml
@@ -47,7 +47,7 @@ jobs:
reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
fetch-depth: 1
submodules: false
@@ -69,25 +69,25 @@ jobs:
runs-on: ${{ matrix.runner }}
steps:
- name: Setup SSH (Click me for login details)
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
# [see note: pytorch repo ref]
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
- name: Setup Linux
uses: ./.github/actions/setup-linux
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-image-name: ${{ inputs.docker-image-name }}
- name: Pull docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
@@ -97,7 +97,7 @@ jobs:
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
- uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+ uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.9
if: ${{ inputs.cuda-version != 'cpu' && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
- name: Output disk space left
@@ -209,5 +209,5 @@ jobs:
file-suffix: bazel-${{ github.job }}_${{ steps.get-job-id.outputs.job-id }}
- name: Teardown Linux
- uses: pytorch/test-infra/.github/actions/teardown-linux@main
+ uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.9
if: always()
diff --git a/.github/workflows/_binary-build-linux.yml b/.github/workflows/_binary-build-linux.yml
index bfa035bc753b..e81e4b6a8b26 100644
--- a/.github/workflows/_binary-build-linux.yml
+++ b/.github/workflows/_binary-build-linux.yml
@@ -142,13 +142,13 @@ jobs:
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
if: inputs.build_environment != 'linux-s390x-binary-manywheel'
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.github-token }}
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' || inputs.build_environment == 'linux-s390x-binary-manywheel' }}
@@ -178,7 +178,6 @@ jobs:
- name: Checkout PyTorch to pytorch dir
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -213,9 +212,9 @@ jobs:
- name: Calculate docker image
id: calculate-docker-image
if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }}
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
- # If doing this in main or release branch, use docker.io. Otherwise
+ # If doing this in release/2.9 or release branch, use docker.io. Otherwise
# use ECR
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: ${{ inputs.DOCKER_IMAGE }}
@@ -227,7 +226,7 @@ jobs:
- name: Pull Docker image
if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }}
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
@@ -283,7 +282,7 @@ jobs:
- name: Teardown Linux
if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
- uses: pytorch/test-infra/.github/actions/teardown-linux@main
+ uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.9
- name: Chown workspace
if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
diff --git a/.github/workflows/_binary-test-linux.yml b/.github/workflows/_binary-test-linux.yml
index 2d9e4d0e27b2..887ab908b2d8 100644
--- a/.github/workflows/_binary-test-linux.yml
+++ b/.github/workflows/_binary-test-linux.yml
@@ -125,14 +125,14 @@ jobs:
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
if: inputs.build_environment != 'linux-s390x-binary-manywheel'
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.github-token }}
# Setup the environment
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' || inputs.build_environment == 'linux-s390x-binary-manywheel' }}
@@ -155,7 +155,6 @@ jobs:
- name: Checkout PyTorch to pytorch dir
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
show-progress: false
path: pytorch
@@ -186,9 +185,7 @@ jobs:
path: "${{ runner.temp }}/artifacts/"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
- uses: pytorch/test-infra/.github/actions/setup-nvidia@main
- with:
- driver-version: ${{ startsWith(inputs.GPU_ARCH_VERSION, '13') && '580.65.06' || '570.133.07' }}
+ uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.9
if: ${{ inputs.GPU_ARCH_TYPE == 'cuda' && steps.filter.outputs.is-test-matrix-empty == 'False' }}
- name: configure aws credentials
@@ -203,7 +200,7 @@ jobs:
- name: Calculate docker image
id: calculate-docker-image
if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }}
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: ${{ inputs.DOCKER_IMAGE }}
@@ -213,7 +210,7 @@ jobs:
- name: Pull Docker image
if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }}
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
@@ -225,7 +222,7 @@ jobs:
- name: Teardown Linux
if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
- uses: pytorch/test-infra/.github/actions/teardown-linux@main
+ uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.9
- name: Chown workspace
if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
diff --git a/.github/workflows/_binary-upload.yml b/.github/workflows/_binary-upload.yml
index 636b76d42931..61896f52bbed 100644
--- a/.github/workflows/_binary-upload.yml
+++ b/.github/workflows/_binary-upload.yml
@@ -81,7 +81,7 @@ jobs:
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
no-sudo: true
diff --git a/.github/workflows/_docs.yml b/.github/workflows/_docs.yml
index ff5dbe604bac..5980ad849fa7 100644
--- a/.github/workflows/_docs.yml
+++ b/.github/workflows/_docs.yml
@@ -67,7 +67,7 @@ jobs:
# an OOM issue when running the job, so this upgrades the runner from 4xlarge
# to the next available tier of 12xlarge. So much memory just to generate cpp
# doc
- runner: ${{ inputs.runner_prefix }}linux.12xlarge
+ runner: ${{ inputs.runner_prefix }}linux.12xlarge.memory
# TODO: Nightly cpp docs take longer and longer to finish (more than 3h now)
# Let's try to figure out how this can be improved
timeout-minutes: 360
@@ -84,7 +84,7 @@ jobs:
name: build-docs-${{ matrix.docs_type }}-${{ inputs.push }}
steps:
- name: Setup SSH (Click me for login details)
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
instructions: |
@@ -95,7 +95,7 @@ jobs:
# [see note: pytorch repo ref]
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
- name: Setup Linux
uses: ./.github/actions/setup-linux
@@ -110,12 +110,12 @@ jobs:
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-image-name: ${{ inputs.docker-image }}
- name: Pull docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
@@ -222,5 +222,5 @@ jobs:
s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}/functorchdocs
- name: Teardown Linux
- uses: pytorch/test-infra/.github/actions/teardown-linux@main
+ uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.9
if: always()
diff --git a/.github/workflows/_link_check.yml b/.github/workflows/_link_check.yml
index 014e6106b073..4c46ad28cf6b 100644
--- a/.github/workflows/_link_check.yml
+++ b/.github/workflows/_link_check.yml
@@ -11,7 +11,7 @@ on:
jobs:
lint-urls:
if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-url-lint') }}
- uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9
with:
job-name: lint-urls
timeout: 120
@@ -37,7 +37,7 @@ jobs:
lint-xrefs:
if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-xref-lint') }}
- uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9
with:
job-name: lint-xrefs
timeout: 60
diff --git a/.github/workflows/_linux-build.yml b/.github/workflows/_linux-build.yml
index 6b4bd429e3c9..f909488850d0 100644
--- a/.github/workflows/_linux-build.yml
+++ b/.github/workflows/_linux-build.yml
@@ -134,7 +134,7 @@ jobs:
test-matrix: ${{ steps.filter.outputs.test-matrix }}
steps:
- name: Setup SSH (Click me for login details)
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -147,7 +147,7 @@ jobs:
# checkout because when we run this action we don't *have* a local
# checkout. In other cases you should prefer a local checkout.
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
no-sudo: true
@@ -183,7 +183,7 @@ jobs:
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
docker-image-name: ${{ inputs.docker-image-name }}
@@ -199,7 +199,7 @@ jobs:
echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"
- name: Pull docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && steps.use-old-whl.outputs.reuse != 'true'
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
@@ -457,7 +457,7 @@ jobs:
artifact_prefix: usage_log_build_${{ steps.get-job-id.outputs.job-id }}
- name: Teardown Linux
- uses: pytorch/test-infra/.github/actions/teardown-linux@main
+ uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.9
if: always() && inputs.build-environment != 'linux-s390x-binary-manywheel'
- name: Cleanup docker
diff --git a/.github/workflows/_linux-test.yml b/.github/workflows/_linux-test.yml
index 66579b573a63..f413f497d79e 100644
--- a/.github/workflows/_linux-test.yml
+++ b/.github/workflows/_linux-test.yml
@@ -99,7 +99,7 @@ jobs:
contents: read
steps:
- name: Setup SSH (Click me for login details)
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
if: ${{ !contains(matrix.runner, 'b200') && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -108,7 +108,7 @@ jobs:
docker exec -it $(docker container ps --format '{{.ID}}') bash
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
no-sudo: true
@@ -139,7 +139,7 @@ jobs:
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
docker-image-name: ${{ inputs.docker-image }}
@@ -155,7 +155,7 @@ jobs:
echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"
- name: Pull docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
@@ -167,9 +167,9 @@ jobs:
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
id: install-nvidia-driver
- uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+ uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.9
with:
- driver-version: ${{ matrix.config == 'legacy_nvidia_driver' && '525.105.17' || '570.133.07' }}
+ driver-version: ${{ matrix.config == 'legacy_nvidia_driver' && '525.105.17' || '580.82.07' }}
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && !contains(matrix.runner, 'b200') }}
- name: Setup GPU_FLAG for docker run
@@ -273,6 +273,8 @@ jobs:
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
+ EXTRA_FLAGS: ${{ matrix.extra_flags || '' }}
+ OP_BENCHMARK_TESTS: ${{ matrix.op_benchmark_tests }}
REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
@@ -418,7 +420,7 @@ jobs:
aws-region: us-east-1
- name: Upload the benchmark results
- uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+ uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.9
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
benchmark-results-dir: test/test-reports
@@ -476,7 +478,7 @@ jobs:
workflow_attempt: ${{github.run_attempt}}
- name: Teardown Linux
- uses: pytorch/test-infra/.github/actions/teardown-linux@main
+ uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.9
if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'
# NB: We are currently having an intermittent GPU-related issue on G5 runners with
diff --git a/.github/workflows/_mac-build.yml b/.github/workflows/_mac-build.yml
index a2a5f8dd9111..9561dcc8b895 100644
--- a/.github/workflows/_mac-build.yml
+++ b/.github/workflows/_mac-build.yml
@@ -67,11 +67,11 @@ jobs:
test-matrix: ${{ steps.filter.outputs.test-matrix }}
steps:
- name: Clean up disk space before running MacOS workflow
- uses: pytorch/test-infra/.github/actions/check-disk-space@main
+ uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.9
# [see note: pytorch repo ref]
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
- name: Set xcode version
env:
@@ -82,7 +82,7 @@ jobs:
fi
- name: Setup Python
- uses: pytorch/test-infra/.github/actions/setup-python@main
+ uses: pytorch/test-infra/.github/actions/setup-python@release/2.9
with:
python-version: ${{ inputs.python-version }}
pip-requirements-file: .github/requirements/pip-requirements-macOS.txt
@@ -188,4 +188,4 @@ jobs:
- name: Clean up disk space
if: always()
continue-on-error: true
- uses: pytorch/test-infra/.github/actions/check-disk-space@main
+ uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.9
diff --git a/.github/workflows/_mac-test.yml b/.github/workflows/_mac-test.yml
index 086e25b4868e..29ff3a72817f 100644
--- a/.github/workflows/_mac-test.yml
+++ b/.github/workflows/_mac-test.yml
@@ -105,11 +105,11 @@ jobs:
done
- name: Clean up disk space before running MacOS workflow
- uses: pytorch/test-infra/.github/actions/check-disk-space@main
+ uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.9
# [see note: pytorch repo ref]
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
- name: Get workflow job id
id: get-job-id
@@ -119,7 +119,7 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup Python
- uses: pytorch/test-infra/.github/actions/setup-python@main
+ uses: pytorch/test-infra/.github/actions/setup-python@release/2.9
with:
python-version: ${{ inputs.python-version }}
pip-requirements-file: .github/requirements/pip-requirements-macOS.txt
@@ -257,7 +257,7 @@ jobs:
file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
- name: Upload the benchmark results
- uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+ uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.9
with:
benchmark-results-dir: test/test-reports
dry-run: false
@@ -287,4 +287,4 @@ jobs:
- name: Clean up disk space
if: always()
continue-on-error: true
- uses: pytorch/test-infra/.github/actions/check-disk-space@main
+ uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.9
diff --git a/.github/workflows/_rocm-test.yml b/.github/workflows/_rocm-test.yml
index f73972942b5f..b6cd5d88a094 100644
--- a/.github/workflows/_rocm-test.yml
+++ b/.github/workflows/_rocm-test.yml
@@ -81,7 +81,7 @@ jobs:
steps:
# [see note: pytorch repo ref]
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
no-sudo: true
@@ -113,12 +113,12 @@ jobs:
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-image-name: ${{ inputs.docker-image }}
- name: Pull docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
@@ -330,7 +330,7 @@ jobs:
aws-region: us-east-1
- name: Upload the benchmark results
- uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+ uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.9
with:
benchmark-results-dir: test/test-reports
dry-run: false
diff --git a/.github/workflows/_runner-determinator.yml b/.github/workflows/_runner-determinator.yml
index 0d674f044ec4..dd28024dbd80 100644
--- a/.github/workflows/_runner-determinator.yml
+++ b/.github/workflows/_runner-determinator.yml
@@ -59,7 +59,7 @@ jobs:
PR_NUMBER: ${{ github.event.pull_request.number }}
steps:
# - name: Checkout PyTorch
- # uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ # uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
# with:
# fetch-depth: 1
# submodules: true
diff --git a/.github/workflows/_win-build.yml b/.github/workflows/_win-build.yml
index ebfb4001e437..92543128265d 100644
--- a/.github/workflows/_win-build.yml
+++ b/.github/workflows/_win-build.yml
@@ -77,6 +77,7 @@ jobs:
run: |
git config --global core.longpaths true
git config --global core.symlinks true
+ git config --global core.ignorecase false
# https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock
# the directory on Windows and prevent GHA from checking out as reported
@@ -84,10 +85,10 @@ jobs:
git config --global core.fsmonitor false
- name: Clean up leftover processes on non-ephemeral Windows runner
- uses: pytorch/test-infra/.github/actions/cleanup-runner@main
+ uses: pytorch/test-infra/.github/actions/cleanup-runner@release/2.9
- name: Setup SSH (Click me for login details)
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
instructions: |
@@ -102,7 +103,7 @@ jobs:
# [see note: pytorch repo ref]
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
no-sudo: true
@@ -150,7 +151,7 @@ jobs:
BUILD_WHEEL: 1
MAX_JOBS: 8
CUDA_VERSION: ${{ inputs.cuda-version }}
- PYTHON_VERSION: "3.9"
+ PYTHON_VERSION: "3.10"
SCCACHE_BUCKET: "ossci-compiler-cache"
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
SCCACHE_REGION: us-east-1
diff --git a/.github/workflows/_win-test.yml b/.github/workflows/_win-test.yml
index 0c95503928fb..37e48d99e2be 100644
--- a/.github/workflows/_win-test.yml
+++ b/.github/workflows/_win-test.yml
@@ -70,6 +70,7 @@ jobs:
run: |
git config --global core.longpaths true
git config --global core.symlinks true
+ git config --global core.ignorecase false
# https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock
# the directory on Windows and prevent GHA from checking out as reported
@@ -77,10 +78,10 @@ jobs:
git config --global core.fsmonitor false
- name: Clean up leftover processes on non-ephemeral Windows runner
- uses: pytorch/test-infra/.github/actions/cleanup-runner@main
+ uses: pytorch/test-infra/.github/actions/cleanup-runner@release/2.9
- name: Setup SSH (Click me for login details)
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
instructions: |
@@ -96,7 +97,7 @@ jobs:
# [see note: pytorch repo ref]
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
no-sudo: true
@@ -183,7 +184,7 @@ jobs:
env:
USE_CUDA: ${{ inputs.cuda-version != 'cpu' && '1' || '0' }}
INSTALL_WINDOWS_SDK: 1
- PYTHON_VERSION: 3.9
+ PYTHON_VERSION: "3.10"
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
TEST_SHOWLOCALS: ${{ steps.keep-going.outputs.ci-test-showlocals }}
diff --git a/.github/workflows/_xpu-test.yml b/.github/workflows/_xpu-test.yml
index 177e6ca4bbe3..6bceb4eef6ba 100644
--- a/.github/workflows/_xpu-test.yml
+++ b/.github/workflows/_xpu-test.yml
@@ -77,7 +77,7 @@ jobs:
steps:
# [see note: pytorch repo ref]
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
- name: Setup XPU
uses: ./.github/actions/setup-xpu
@@ -95,7 +95,7 @@ jobs:
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-image-name: ${{ inputs.docker-image }}
@@ -109,7 +109,7 @@ jobs:
echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"
- name: Pull docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
@@ -275,7 +275,7 @@ jobs:
- name: Change permissions
if: ${{ always() && steps.test.conclusion }}
run: |
- docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "sudo chown -R 1001:1001 test"
+ docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "sudo chown -R 1000:1000 test"
- name: Print remaining test logs
shell: bash
diff --git a/.github/workflows/build-almalinux-images.yml b/.github/workflows/build-almalinux-images.yml
index 0754b154a358..e0492f736442 100644
--- a/.github/workflows/build-almalinux-images.yml
+++ b/.github/workflows/build-almalinux-images.yml
@@ -39,7 +39,7 @@ jobs:
tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm6.3", "rocm6.4", "cpu"]
steps:
- name: Build docker image
- uses: pytorch/pytorch/.github/actions/binary-docker-build@main
+ uses: pytorch/pytorch/.github/actions/binary-docker-build@release/2.9
with:
docker-image-name: almalinux-builder
custom-tag-prefix: ${{matrix.tag}}
diff --git a/.github/workflows/build-libtorch-images.yml b/.github/workflows/build-libtorch-images.yml
index cc2f54fc45f8..edfa0168e19f 100644
--- a/.github/workflows/build-libtorch-images.yml
+++ b/.github/workflows/build-libtorch-images.yml
@@ -32,7 +32,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -58,7 +58,7 @@ jobs:
]
steps:
- name: Build docker image
- uses: pytorch/pytorch/.github/actions/binary-docker-build@main
+ uses: pytorch/pytorch/.github/actions/binary-docker-build@release/2.9
with:
docker-image-name: libtorch-cxx11-builder
custom-tag-prefix: ${{ matrix.tag }}
diff --git a/.github/workflows/build-manywheel-images-s390x.yml b/.github/workflows/build-manywheel-images-s390x.yml
index c498e169f1aa..a719bf21a1ca 100644
--- a/.github/workflows/build-manywheel-images-s390x.yml
+++ b/.github/workflows/build-manywheel-images-s390x.yml
@@ -25,7 +25,7 @@ jobs:
runs-on: linux.s390x
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
submodules: false
no-sudo: true
diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index ce42d5644c93..e3549cd6284a 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -32,7 +32,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -47,12 +47,11 @@ jobs:
matrix:
include: [
{ name: "manylinux2_28-builder", tag: "cuda13.0", runner: "linux.9xlarge.ephemeral" },
- { name: "manylinux2_28-builder", tag: "cuda12.9", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "cuda12.8", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "cuda12.6", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinuxaarch64-builder", tag: "cuda13.0", runner: "linux.arm64.2xlarge.ephemeral" },
- { name: "manylinuxaarch64-builder", tag: "cuda12.9", runner: "linux.arm64.2xlarge.ephemeral" },
{ name: "manylinuxaarch64-builder", tag: "cuda12.8", runner: "linux.arm64.2xlarge.ephemeral" },
+ { name: "manylinuxaarch64-builder", tag: "cuda12.6", runner: "linux.arm64.2xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "rocm6.3", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "rocm6.4", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "cpu", runner: "linux.9xlarge.ephemeral" },
@@ -64,7 +63,7 @@ jobs:
name: ${{ matrix.name }}:${{ matrix.tag }}
steps:
- name: Build docker image
- uses: pytorch/pytorch/.github/actions/binary-docker-build@main
+ uses: pytorch/pytorch/.github/actions/binary-docker-build@release/2.9
with:
docker-image-name: ${{ matrix.name }}
custom-tag-prefix: ${{ matrix.tag }}
diff --git a/.github/workflows/build-triton-wheel.yml b/.github/workflows/build-triton-wheel.yml
index 932d9c886302..8f066de47534 100644
--- a/.github/workflows/build-triton-wheel.yml
+++ b/.github/workflows/build-triton-wheel.yml
@@ -3,7 +3,7 @@ name: Build Triton wheels
on:
push:
branches:
- - main
+ - release/2.9
tags:
# NOTE: Binary build pipelines should only get triggered on release candidate builds
# Release candidate tags look like: v1.11.0-rc1
@@ -36,7 +36,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -74,12 +74,12 @@ jobs:
PLATFORM: 'manylinux_2_28_x86_64'
steps:
- name: Setup SSH (Click me for login details)
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
submodules: false
@@ -87,7 +87,7 @@ jobs:
uses: ./.github/actions/setup-linux
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ env.DOCKER_IMAGE }}
@@ -184,7 +184,7 @@ jobs:
path: ${{ runner.temp }}/artifacts/wheelhouse/*
- name: Teardown Linux
- uses: pytorch/test-infra/.github/actions/teardown-linux@main
+ uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.9
if: always()
build-wheel-win:
@@ -217,7 +217,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/build-vllm-wheel.yml b/.github/workflows/build-vllm-wheel.yml
new file mode 100644
index 000000000000..9efedf64cce7
--- /dev/null
+++ b/.github/workflows/build-vllm-wheel.yml
@@ -0,0 +1,248 @@
+name: Build vLLM wheels
+
+on:
+ push:
+ branches:
+ - main
+ paths:
+ - .github/workflows/build-vllm-wheel.yml
+ - .github/ci_commit_pins/vllm.txt
+ workflow_dispatch:
+ pull_request:
+ paths:
+ - .github/workflows/build-vllm-wheel.yml
+ - .github/ci_commit_pins/vllm.txt
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+ cancel-in-progress: true
+
+jobs:
+ build-wheel:
+ if: github.repository_owner == 'pytorch'
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: [ '3.12' ]
+ # TODO (huydhn): Add cu130 https://github.com/pytorch/pytorch/pull/162000#issuecomment-3261541554
+ device: [ 'cu128', 'cu129' ]
+ runner: [ 'linux.12xlarge.memory' ]
+ include:
+ - device: cu128
+ manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.8'
+ - device: cu129
+ manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.9'
+ name: "Build ${{ matrix.device }} vLLM wheel"
+ runs-on: ${{ matrix.runner }}
+ timeout-minutes: 480
+ env:
+ PY_VERS: ${{ matrix.python-version }}
+ MANYLINUX_IMAGE: ${{ matrix.manylinux-image }}
+ PLATFORM: 'manylinux_2_28_x86_64'
+ BUILD_DEVICE: ${{ matrix.device }}
+ steps:
+ - name: Setup SSH (Click me for login details)
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
+ with:
+ github-secret: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Checkout PyTorch
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
+ with:
+ submodules: false
+
+ - name: Setup Linux
+ uses: ./.github/actions/setup-linux
+
+ - name: Get latest PyTorch nightly
+ shell: bash
+ run: |
+ set -eux
+
+ # Keep PyTorch nightly wheel here so that we can install it later during
+ # vLLM build process
+ mkdir -p "${RUNNER_TEMP}/artifacts/"
+
+ container_name=$(docker run \
+ --tty \
+ --detach \
+ -e PLATFORM \
+ -v "${GITHUB_WORKSPACE}:/pytorch" \
+ -v "${RUNNER_TEMP}/artifacts:/artifacts" \
+ -w /artifacts/ \
+ "${MANYLINUX_IMAGE}"
+ )
+
+ # Determine python executable for given version (copied from build-triton-wheel)
+ case $PY_VERS in
+ 3.10)
+ PYTHON_EXECUTABLE=/opt/python/cp310-cp310/bin/python
+ ;;
+ 3.11)
+ PYTHON_EXECUTABLE=/opt/python/cp311-cp311/bin/python
+ ;;
+ 3.12)
+ PYTHON_EXECUTABLE=/opt/python/cp312-cp312/bin/python
+ ;;
+ 3.13)
+ PYTHON_EXECUTABLE=/opt/python/cp313-cp313/bin/python
+ ;;
+ 3.13t)
+ PYTHON_EXECUTABLE=/opt/python/cp313-cp313t/bin/python
+ ;;
+ 3.14)
+ PYTHON_EXECUTABLE=/opt/python/cp314-cp314/bin/python
+ ;;
+ 3.14t)
+ PYTHON_EXECUTABLE=/opt/python/cp314-cp314t/bin/python
+ ;;
+ *)
+ echo "Unsupported python version ${PY_VERS}"
+ exit 1
+ ;;
+ esac
+
+ docker exec -t "${container_name}" "${PYTHON_EXECUTABLE}" -mpip install \
+ --pre torch torchvision torchaudio \
+ --index-url "https://download.pytorch.org/whl/nightly/${BUILD_DEVICE}"
+
+ # I wonder if there is a command to both download and install the wheels
+ # in one go
+ docker exec -t "${container_name}" "${PYTHON_EXECUTABLE}" -mpip download \
+ --pre torch torchvision torchaudio \
+ --index-url "https://download.pytorch.org/whl/nightly/${BUILD_DEVICE}"
+
+ # Save this for later
+ echo "PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}" >> "$GITHUB_ENV"
+ echo "container_name=${container_name}" >> "$GITHUB_ENV"
+
+ - name: Build vLLM wheel
+ uses: ./.github/actions/build-external-packages
+ with:
+ build-targets: vllm
+ docker-image: ${{ env.MANYLINUX_IMAGE }}
+ cuda-arch-list: '8.0;8.9;9.0;10.0;12.0'
+ torch-wheel-dir: ${{ runner.temp }}/artifacts
+ output-dir: ${{ runner.temp }}/artifacts/externals
+
+ - name: Prepare vLLM wheel
+ shell: bash
+ run: |
+ set -eux
+
+ # Get these wheels ready, the vllm renaming logic is copied from its .buildkite/scripts/upload-wheels.sh
+ docker exec -t "${container_name}" bash -c "
+ set -eux
+
+ nightly=\$(unzip -p torch-* '**/METADATA' | grep '^Version: ' | cut -d' ' -f2 | cut -d'.' -f4)
+
+ pushd externals/vllm/wheels
+ for package in xformers flashinfer-python vllm; do
+ pushd \$package
+ auditwheel repair --plat \$PLATFORM *.whl \
+ --exclude libc10* --exclude libtorch* --exclude libcu* --exclude libnv*
+ repair_wheel=\$(find wheelhouse -name *\${PLATFORM}*)
+ repair_wheel=\$(basename \${repair_wheel})
+ popd
+
+ cp \${package}/wheelhouse/\${repair_wheel} .
+ version=\$(unzip -p \$repair_wheel '**/METADATA' | grep '^Version: ' | cut -d' ' -f2)
+
+ if [[ \$package == vllm ]]; then
+ new_wheel=\${repair_wheel/\$version/1.0.0.\$nightly}
+ else
+ major_version=\$(echo \$version | tr '.+' '.' | cut -d'.' -f1-3)
+ new_wheel=\${repair_wheel/\$version/\$major_version.\$nightly}
+ fi
+
+ mv -- \$repair_wheel \$new_wheel
+ rm -rf \$package
+ done
+ popd
+ "
+
+ docker exec -t "${container_name}" chown -R 1000:1000 /artifacts
+
+ - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+ with:
+ name: vllm-wheel-${{ matrix.device }}-${{ matrix.python-version }}-${{ env.PLATFORM }}
+ if-no-files-found: error
+ path: ${{ runner.temp }}/artifacts/externals/vllm/wheels/*.whl
+
+ - name: Teardown Linux
+ uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.9
+ if: always()
+
+ # Copied from build-triton-wheel workflow (mostly)
+ upload-wheel:
+ name: "Upload ${{ matrix.device }} vLLM wheel"
+ needs:
+ - build-wheel
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ device: [ 'cu128', 'cu129' ]
+ env:
+ BUILD_DEVICE: ${{ matrix.device }}
+ permissions:
+ id-token: write
+ contents: read
+ container:
+ image: continuumio/miniconda3:4.12.0
+ environment: ${{ (github.event_name == 'push' && github.event.ref == 'refs/heads/main') && 'nightly-wheel-upload' || '' }}
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+ - name: Configure AWS credentials(PyTorch account) for main
+ if: ${{ github.event_name == 'push' && github.event.ref == 'refs/heads/main' }}
+ uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
+ with:
+ role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_nightly_build_wheels
+ aws-region: us-east-1
+
+ - name: Configure AWS credentials(PyTorch account) for RC builds
+ if: ${{ github.event_name == 'push' && (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/')) }}
+ uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
+ with:
+ role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_test_build_wheels
+ aws-region: us-east-1
+
+ - name: Download Build Artifacts
+ uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
+ with:
+ # Download all available artifacts
+ path: ${{ runner.temp }}/artifacts-all
+
+ - name: Select Wheel Artifacts
+ shell: bash
+ run: |
+ set -eux
+ mkdir -p "${RUNNER_TEMP}/artifacts/"
+ mv "${RUNNER_TEMP}"/artifacts-all/vllm-wheel-"${BUILD_DEVICE}"-*/* "${RUNNER_TEMP}/artifacts/"
+
+ - name: Set DRY_RUN (only for tagged pushes)
+ if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) }}
+ shell: bash
+ run: |
+ echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
+
+ - name: Set UPLOAD_CHANNEL (only for tagged pushes)
+ if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') }}
+ shell: bash
+ run: |
+ set -ex
+
+ if [[ "${GITHUB_REF_NAME}" = *-rc[0-9]* ]]; then
+ echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
+ fi
+
+ - name: Upload binaries
+ env:
+ PACKAGE_TYPE: wheel
+ UPLOAD_SUBFOLDER: ${{ env.BUILD_DEVICE }}
+ PKG_DIR: ${{ runner.temp }}/artifacts
+ shell: bash
+ run: |
+ set -ex
+ bash .circleci/scripts/binary_upload.sh
diff --git a/.github/workflows/check-labels.yml b/.github/workflows/check-labels.yml
index 44430522b79d..1174a1c502f6 100644
--- a/.github/workflows/check-labels.yml
+++ b/.github/workflows/check-labels.yml
@@ -38,7 +38,7 @@ jobs:
runs-on: linux.24_04.4x
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
submodules: false
fetch-depth: 1
diff --git a/.github/workflows/close-nonexistent-disable-issues.yml b/.github/workflows/close-nonexistent-disable-issues.yml
index bef3d8797149..da83019a5908 100644
--- a/.github/workflows/close-nonexistent-disable-issues.yml
+++ b/.github/workflows/close-nonexistent-disable-issues.yml
@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
submodules: false
fetch-depth: 1
diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml
index 57fe7be15d29..03631be3e563 100644
--- a/.github/workflows/create_release.yml
+++ b/.github/workflows/create_release.yml
@@ -19,7 +19,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml
index b86ee2352bd1..f88244a13ffc 100644
--- a/.github/workflows/docker-builds.yml
+++ b/.github/workflows/docker-builds.yml
@@ -33,7 +33,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -50,28 +50,27 @@ jobs:
runner: [linux.12xlarge]
docker-image-name: [
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11,
+ pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11,
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-vllm,
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks,
- pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc9-inductor-benchmarks,
- pytorch-linux-jammy-cuda12.8-cudnn9-py3.13-gcc9-inductor-benchmarks,
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9,
pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11,
- pytorch-linux-jammy-py3.9-clang12,
+ pytorch-linux-jammy-py3.10-clang12,
pytorch-linux-jammy-py3.13-clang12,
pytorch-linux-jammy-rocm-n-py3,
pytorch-linux-noble-rocm-n-py3,
pytorch-linux-noble-rocm-alpha-py3,
pytorch-linux-jammy-rocm-n-py3-benchmarks,
- pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-clang12,
- pytorch-linux-jammy-py3.9-gcc11,
- pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks,
+ pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-clang12,
+ pytorch-linux-jammy-py3.10-gcc11,
+ pytorch-linux-jammy-py3-gcc11-inductor-benchmarks,
pytorch-linux-jammy-py3.12-halide,
- pytorch-linux-jammy-xpu-2025.0-py3,
- pytorch-linux-jammy-xpu-2025.1-py3,
+ pytorch-linux-jammy-xpu-n-1-py3,
+ pytorch-linux-jammy-xpu-n-py3,
pytorch-linux-jammy-py3-clang18-asan,
pytorch-linux-jammy-py3-clang12-onnx,
pytorch-linux-jammy-linter,
- pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter,
+ pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter,
# Executorch pin needs update
# pytorch-linux-jammy-py3-clang12-executorch,
pytorch-linux-jammy-py3.12-triton-cpu,
@@ -97,21 +96,21 @@ jobs:
# [see note: pytorch repo ref]
# deep clone (fetch-depth 0) required for git merge-base
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
- name: Setup Linux
uses: ./.github/actions/setup-linux
- name: Build docker image
id: build-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-image-name: ci-image:${{ matrix.docker-image-name }}
always-rebuild: true
push: true
- name: Pull docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.build-docker-image.outputs.docker-image }}
@@ -142,5 +141,5 @@ jobs:
if: always()
- name: Teardown Linux
- uses: pytorch/test-infra/.github/actions/teardown-linux@main
+ uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.9
if: always()
diff --git a/.github/workflows/docker-cache-mi300.yml b/.github/workflows/docker-cache-mi300.yml
index 02c1171c567a..bc2ae450f7c2 100644
--- a/.github/workflows/docker-cache-mi300.yml
+++ b/.github/workflows/docker-cache-mi300.yml
@@ -20,7 +20,7 @@ jobs:
runs-on: rocm-docker
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
no-sudo: true
@@ -39,13 +39,13 @@ jobs:
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
push: false
- name: Pull docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
diff --git a/.github/workflows/docker-release.yml b/.github/workflows/docker-release.yml
index 2560ebf7912a..134e4caf3088 100644
--- a/.github/workflows/docker-release.yml
+++ b/.github/workflows/docker-release.yml
@@ -37,7 +37,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -52,7 +52,7 @@ jobs:
matrix: ${{ steps.generate-matrix.outputs.matrix }}
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
fetch-depth: 1
submodules: true
@@ -82,7 +82,7 @@ jobs:
CUDNN_VERSION: ${{ matrix.cudnn_version }}
steps:
- name: Setup SSH (Click me for login details)
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
# [see note: pytorch repo ref]
@@ -164,12 +164,12 @@ jobs:
fi
- name: Teardown Linux
- uses: pytorch/test-infra/.github/actions/teardown-linux@main
+ uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.9
if: always()
validate:
needs: build
- uses: pytorch/test-infra/.github/workflows/validate-docker-images.yml@main
+ uses: pytorch/test-infra/.github/workflows/validate-docker-images.yml@release/2.9
with:
- channel: nightly
+ channel: test
ref: main
diff --git a/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
index 59b14b455e9a..7e36c82644dc 100644
--- a/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
+++ b/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
@@ -41,7 +41,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -112,7 +112,7 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_10-cuda-aarch64-12_9-build:
+ manywheel-py3_10-cuda-aarch64-12_6-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
@@ -121,39 +121,131 @@ jobs:
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
DESIRED_PYTHON: "3.10"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
- build_name: manywheel-py3_10-cuda-aarch64-12_9
+ build_name: manywheel-py3_10-cuda-aarch64-12_6
build_environment: linux-aarch64-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_10-cuda-aarch64-12_9-upload: # Uploading
+ manywheel-py3_10-cuda-aarch64-12_6-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: manywheel-py3_10-cuda-aarch64-12_9-build
+ needs: manywheel-py3_10-cuda-aarch64-12_6-build
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
DESIRED_PYTHON: "3.10"
- build_name: manywheel-py3_10-cuda-aarch64-12_9
+ build_name: manywheel-py3_10-cuda-aarch64-12_6
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_10-cuda-aarch64-12_8-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
+ DESIRED_PYTHON: "3.10"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_10-cuda-aarch64-12_8
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_10-cuda-aarch64-12_8-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_10-cuda-aarch64-12_8-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
+ DESIRED_PYTHON: "3.10"
+ build_name: manywheel-py3_10-cuda-aarch64-12_8
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_10-cuda-aarch64-13_0-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
+ DESIRED_PYTHON: "3.10"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_10-cuda-aarch64-13_0
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_10-cuda-aarch64-13_0-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_10-cuda-aarch64-13_0-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
+ DESIRED_PYTHON: "3.10"
+ build_name: manywheel-py3_10-cuda-aarch64-13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -223,7 +315,99 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_11-cuda-aarch64-12_9-build:
+ manywheel-py3_11-cuda-aarch64-12_6-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
+ DESIRED_PYTHON: "3.11"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_11-cuda-aarch64-12_6
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_11-cuda-aarch64-12_6-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_11-cuda-aarch64-12_6-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
+ DESIRED_PYTHON: "3.11"
+ build_name: manywheel-py3_11-cuda-aarch64-12_6
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_11-cuda-aarch64-12_8-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
+ DESIRED_PYTHON: "3.11"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_11-cuda-aarch64-12_8
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_11-cuda-aarch64-12_8-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_11-cuda-aarch64-12_8-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
+ DESIRED_PYTHON: "3.11"
+ build_name: manywheel-py3_11-cuda-aarch64-12_8
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_11-cuda-aarch64-13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
@@ -232,39 +416,39 @@ jobs:
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
DESIRED_PYTHON: "3.11"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
- build_name: manywheel-py3_11-cuda-aarch64-12_9
+ build_name: manywheel-py3_11-cuda-aarch64-13_0
build_environment: linux-aarch64-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_11-cuda-aarch64-12_9-upload: # Uploading
+ manywheel-py3_11-cuda-aarch64-13_0-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: manywheel-py3_11-cuda-aarch64-12_9-build
+ needs: manywheel-py3_11-cuda-aarch64-13_0-build
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
DESIRED_PYTHON: "3.11"
- build_name: manywheel-py3_11-cuda-aarch64-12_9
+ build_name: manywheel-py3_11-cuda-aarch64-13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -334,7 +518,53 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_12-cuda-aarch64-12_9-build:
+ manywheel-py3_12-cuda-aarch64-12_6-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
+ DESIRED_PYTHON: "3.12"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_12-cuda-aarch64-12_6
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_12-cuda-aarch64-12_6-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_12-cuda-aarch64-12_6-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
+ DESIRED_PYTHON: "3.12"
+ build_name: manywheel-py3_12-cuda-aarch64-12_6
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_12-cuda-aarch64-12_8-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
@@ -343,39 +573,85 @@ jobs:
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
DESIRED_PYTHON: "3.12"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
- build_name: manywheel-py3_12-cuda-aarch64-12_9
+ build_name: manywheel-py3_12-cuda-aarch64-12_8
build_environment: linux-aarch64-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_12-cuda-aarch64-12_9-upload: # Uploading
+ manywheel-py3_12-cuda-aarch64-12_8-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: manywheel-py3_12-cuda-aarch64-12_9-build
+ needs: manywheel-py3_12-cuda-aarch64-12_8-build
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
DESIRED_PYTHON: "3.12"
- build_name: manywheel-py3_12-cuda-aarch64-12_9
+ build_name: manywheel-py3_12-cuda-aarch64-12_8
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_12-cuda-aarch64-13_0-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
+ DESIRED_PYTHON: "3.12"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_12-cuda-aarch64-13_0
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_12-cuda-aarch64-13_0-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_12-cuda-aarch64-13_0-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
+ DESIRED_PYTHON: "3.12"
+ build_name: manywheel-py3_12-cuda-aarch64-13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -445,7 +721,53 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_13-cuda-aarch64-12_9-build:
+ manywheel-py3_13-cuda-aarch64-12_6-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
+ DESIRED_PYTHON: "3.13"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_13-cuda-aarch64-12_6
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_13-cuda-aarch64-12_6-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_13-cuda-aarch64-12_6-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
+ DESIRED_PYTHON: "3.13"
+ build_name: manywheel-py3_13-cuda-aarch64-12_6
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_13-cuda-aarch64-12_8-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
@@ -454,39 +776,85 @@ jobs:
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
DESIRED_PYTHON: "3.13"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
- build_name: manywheel-py3_13-cuda-aarch64-12_9
+ build_name: manywheel-py3_13-cuda-aarch64-12_8
build_environment: linux-aarch64-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_13-cuda-aarch64-12_9-upload: # Uploading
+ manywheel-py3_13-cuda-aarch64-12_8-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: manywheel-py3_13-cuda-aarch64-12_9-build
+ needs: manywheel-py3_13-cuda-aarch64-12_8-build
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
DESIRED_PYTHON: "3.13"
- build_name: manywheel-py3_13-cuda-aarch64-12_9
+ build_name: manywheel-py3_13-cuda-aarch64-12_8
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_13-cuda-aarch64-13_0-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
+ DESIRED_PYTHON: "3.13"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_13-cuda-aarch64-13_0
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_13-cuda-aarch64-13_0-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_13-cuda-aarch64-13_0-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
+ DESIRED_PYTHON: "3.13"
+ build_name: manywheel-py3_13-cuda-aarch64-13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -556,7 +924,7 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_13t-cuda-aarch64-12_9-build:
+ manywheel-py3_13t-cuda-aarch64-12_6-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
@@ -565,39 +933,131 @@ jobs:
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
- build_name: manywheel-py3_13t-cuda-aarch64-12_9
+ build_name: manywheel-py3_13t-cuda-aarch64-12_6
build_environment: linux-aarch64-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_13t-cuda-aarch64-12_9-upload: # Uploading
+ manywheel-py3_13t-cuda-aarch64-12_6-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: manywheel-py3_13t-cuda-aarch64-12_9-build
+ needs: manywheel-py3_13t-cuda-aarch64-12_6-build
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
DESIRED_PYTHON: "3.13t"
- build_name: manywheel-py3_13t-cuda-aarch64-12_9
+ build_name: manywheel-py3_13t-cuda-aarch64-12_6
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_13t-cuda-aarch64-12_8-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
+ DESIRED_PYTHON: "3.13t"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_13t-cuda-aarch64-12_8
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_13t-cuda-aarch64-12_8-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_13t-cuda-aarch64-12_8-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
+ DESIRED_PYTHON: "3.13t"
+ build_name: manywheel-py3_13t-cuda-aarch64-12_8
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_13t-cuda-aarch64-13_0-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
+ DESIRED_PYTHON: "3.13t"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_13t-cuda-aarch64-13_0
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_13t-cuda-aarch64-13_0-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_13t-cuda-aarch64-13_0-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
+ DESIRED_PYTHON: "3.13t"
+ build_name: manywheel-py3_13t-cuda-aarch64-13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -667,7 +1127,99 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_14-cuda-aarch64-12_9-build:
+ manywheel-py3_14-cuda-aarch64-12_6-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
+ DESIRED_PYTHON: "3.14"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_14-cuda-aarch64-12_6
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_14-cuda-aarch64-12_6-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_14-cuda-aarch64-12_6-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
+ DESIRED_PYTHON: "3.14"
+ build_name: manywheel-py3_14-cuda-aarch64-12_6
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_14-cuda-aarch64-12_8-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
+ DESIRED_PYTHON: "3.14"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_14-cuda-aarch64-12_8
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_14-cuda-aarch64-12_8-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_14-cuda-aarch64-12_8-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
+ DESIRED_PYTHON: "3.14"
+ build_name: manywheel-py3_14-cuda-aarch64-12_8
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_14-cuda-aarch64-13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
@@ -676,39 +1228,39 @@ jobs:
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
DESIRED_PYTHON: "3.14"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
- build_name: manywheel-py3_14-cuda-aarch64-12_9
+ build_name: manywheel-py3_14-cuda-aarch64-13_0
build_environment: linux-aarch64-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_14-cuda-aarch64-12_9-upload: # Uploading
+ manywheel-py3_14-cuda-aarch64-13_0-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: manywheel-py3_14-cuda-aarch64-12_9-build
+ needs: manywheel-py3_14-cuda-aarch64-13_0-build
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
DESIRED_PYTHON: "3.14"
- build_name: manywheel-py3_14-cuda-aarch64-12_9
+ build_name: manywheel-py3_14-cuda-aarch64-13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -778,7 +1330,99 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_14t-cuda-aarch64-12_9-build:
+ manywheel-py3_14t-cuda-aarch64-12_6-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
+ DESIRED_PYTHON: "3.14t"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_14t-cuda-aarch64-12_6
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_14t-cuda-aarch64-12_6-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_14t-cuda-aarch64-12_6-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu126
+ GPU_ARCH_VERSION: "12.6-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.6
+ DESIRED_PYTHON: "3.14t"
+ build_name: manywheel-py3_14t-cuda-aarch64-12_6
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_14t-cuda-aarch64-12_8-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
+ DESIRED_PYTHON: "3.14t"
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ runs_on: linux.arm64.m7g.4xlarge.ephemeral
+ ALPINE_IMAGE: "arm64v8/alpine"
+ build_name: manywheel-py3_14t-cuda-aarch64-12_8
+ build_environment: linux-aarch64-binary-manywheel
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
+ timeout-minutes: 420
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_14t-cuda-aarch64-12_8-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_14t-cuda-aarch64-12_8-build
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cu128
+ GPU_ARCH_VERSION: "12.8-aarch64"
+ GPU_ARCH_TYPE: cuda-aarch64
+ DOCKER_IMAGE: manylinuxaarch64-builder
+ DOCKER_IMAGE_TAG_PREFIX: cuda12.8
+ DESIRED_PYTHON: "3.14t"
+ build_name: manywheel-py3_14t-cuda-aarch64-12_8
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_14t-cuda-aarch64-13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
@@ -787,39 +1431,39 @@ jobs:
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
DESIRED_PYTHON: "3.14t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
- build_name: manywheel-py3_14t-cuda-aarch64-12_9
+ build_name: manywheel-py3_14t-cuda-aarch64-13_0
build_environment: linux-aarch64-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
timeout-minutes: 420
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_14t-cuda-aarch64-12_9-upload: # Uploading
+ manywheel-py3_14t-cuda-aarch64-13_0-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: manywheel-py3_14t-cuda-aarch64-12_9-build
+ needs: manywheel-py3_14t-cuda-aarch64-13_0-build
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9-aarch64"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0-aarch64"
GPU_ARCH_TYPE: cuda-aarch64
DOCKER_IMAGE: manylinuxaarch64-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
DESIRED_PYTHON: "3.14t"
- build_name: manywheel-py3_14t-cuda-aarch64-12_9
+ build_name: manywheel-py3_14t-cuda-aarch64-13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
diff --git a/.github/workflows/generated-linux-binary-libtorch-nightly.yml b/.github/workflows/generated-linux-binary-libtorch-nightly.yml
index 776e77e80826..bc671ae80ae2 100644
--- a/.github/workflows/generated-linux-binary-libtorch-nightly.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-nightly.yml
@@ -41,7 +41,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -248,7 +248,7 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- libtorch-cuda12_9-shared-with-deps-release-build:
+ libtorch-cuda13_0-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
@@ -257,22 +257,22 @@ jobs:
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: libtorch-cxx11-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build_name: libtorch-cuda12_9-shared-with-deps-release
+ build_name: libtorch-cuda13_0-shared-with-deps-release
build_environment: linux-binary-libtorch
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
- libtorch-cuda12_9-shared-with-deps-release-test: # Testing
+ libtorch-cuda13_0-shared-with-deps-release-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- - libtorch-cuda12_9-shared-with-deps-release-build
+ - libtorch-cuda13_0-shared-with-deps-release-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
@@ -280,38 +280,38 @@ jobs:
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: libtorch-cxx11-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
- build_name: libtorch-cuda12_9-shared-with-deps-release
+ build_name: libtorch-cuda13_0-shared-with-deps-release
build_environment: linux-binary-libtorch
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
- libtorch-cuda12_9-shared-with-deps-release-upload: # Uploading
+ libtorch-cuda13_0-shared-with-deps-release-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: libtorch-cuda12_9-shared-with-deps-release-test
+ needs: libtorch-cuda13_0-shared-with-deps-release-test
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: libtorch-cxx11-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
+ DOCKER_IMAGE_TAG_PREFIX: cuda13.0
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
- build_name: libtorch-cuda12_9-shared-with-deps-release
+ build_name: libtorch-cuda13_0-shared-with-deps-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -333,6 +333,7 @@ jobs:
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: libtorch-rocm6_3-shared-with-deps-release
build_environment: linux-binary-libtorch
secrets:
@@ -368,7 +369,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -390,7 +390,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: libtorch-cxx11-builder
@@ -398,7 +398,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -447,6 +447,7 @@ jobs:
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: libtorch-rocm6_4-shared-with-deps-release
build_environment: linux-binary-libtorch
secrets:
@@ -482,7 +483,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -504,7 +504,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: libtorch-cxx11-builder
@@ -512,7 +512,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
diff --git a/.github/workflows/generated-linux-binary-libtorch-release-main.yml b/.github/workflows/generated-linux-binary-libtorch-release-main.yml
index c98d71dfefc4..9d55fc6e50ab 100644
--- a/.github/workflows/generated-linux-binary-libtorch-release-main.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-release-main.yml
@@ -36,7 +36,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
diff --git a/.github/workflows/generated-linux-binary-manywheel-main.yml b/.github/workflows/generated-linux-binary-manywheel-main.yml
index ec08b2c78eb6..85b91378b253 100644
--- a/.github/workflows/generated-linux-binary-manywheel-main.yml
+++ b/.github/workflows/generated-linux-binary-manywheel-main.yml
@@ -36,7 +36,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -60,7 +60,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_12-cuda12_8
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cuda12_8-test: # Testing
diff --git a/.github/workflows/generated-linux-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-binary-manywheel-nightly.yml
index 96a4a0fff837..5f9eaab976a6 100644
--- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml
+++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml
@@ -41,7 +41,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -127,7 +127,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_10-cuda12_6
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_10-cuda12_6-test: # Testing
@@ -193,7 +193,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_10-cuda12_8
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_10-cuda12_8-test: # Testing
@@ -241,72 +241,6 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_10-cuda12_9-build:
- if: ${{ github.repository_owner == 'pytorch' }}
- uses: ./.github/workflows/_binary-build-linux.yml
- needs: get-label-type
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.10"
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build_name: manywheel-py3_10-cuda12_9
- build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_10-cuda12_9-test: # Testing
- if: ${{ github.repository_owner == 'pytorch' }}
- needs:
- - manywheel-py3_10-cuda12_9-build
- - get-label-type
- uses: ./.github/workflows/_binary-test-linux.yml
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.10"
- build_name: manywheel-py3_10-cuda12_9
- build_environment: linux-binary-manywheel
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_10-cuda12_9-upload: # Uploading
- if: ${{ github.repository_owner == 'pytorch' }}
- permissions:
- id-token: write
- contents: read
- needs: manywheel-py3_10-cuda12_9-test
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.10"
- build_name: manywheel-py3_10-cuda12_9
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- uses: ./.github/workflows/_binary-upload.yml
-
manywheel-py3_10-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
@@ -325,7 +259,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_10-cuda13_0
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_10-cuda13_0-test: # Testing
@@ -389,6 +323,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
DESIRED_PYTHON: "3.10"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_10-rocm6_3
build_environment: linux-binary-manywheel
secrets:
@@ -423,7 +358,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -445,7 +379,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -453,7 +387,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -500,6 +434,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
DESIRED_PYTHON: "3.10"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_10-rocm6_4
build_environment: linux-binary-manywheel
secrets:
@@ -534,7 +469,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -556,7 +490,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -564,7 +498,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -612,7 +546,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_10-xpu
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_10-xpu-test: # Testing
@@ -638,7 +572,7 @@ jobs:
contents: read
steps:
- name: Setup XPU
- uses: ./.github/actions/setup-xpu
+ uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.9
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@v4
@@ -656,7 +590,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -667,7 +600,7 @@ jobs:
working-directory: pytorch
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -675,7 +608,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -785,7 +718,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_11-cuda12_6
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_11-cuda12_6-test: # Testing
@@ -851,7 +784,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_11-cuda12_8
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_11-cuda12_8-test: # Testing
@@ -899,72 +832,6 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_11-cuda12_9-build:
- if: ${{ github.repository_owner == 'pytorch' }}
- uses: ./.github/workflows/_binary-build-linux.yml
- needs: get-label-type
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.11"
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build_name: manywheel-py3_11-cuda12_9
- build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_11-cuda12_9-test: # Testing
- if: ${{ github.repository_owner == 'pytorch' }}
- needs:
- - manywheel-py3_11-cuda12_9-build
- - get-label-type
- uses: ./.github/workflows/_binary-test-linux.yml
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.11"
- build_name: manywheel-py3_11-cuda12_9
- build_environment: linux-binary-manywheel
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_11-cuda12_9-upload: # Uploading
- if: ${{ github.repository_owner == 'pytorch' }}
- permissions:
- id-token: write
- contents: read
- needs: manywheel-py3_11-cuda12_9-test
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.11"
- build_name: manywheel-py3_11-cuda12_9
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- uses: ./.github/workflows/_binary-upload.yml
-
manywheel-py3_11-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
@@ -983,7 +850,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_11-cuda13_0
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_11-cuda13_0-test: # Testing
@@ -1047,6 +914,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
DESIRED_PYTHON: "3.11"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_11-rocm6_3
build_environment: linux-binary-manywheel
secrets:
@@ -1081,7 +949,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1103,7 +970,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -1111,7 +978,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -1158,6 +1025,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
DESIRED_PYTHON: "3.11"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_11-rocm6_4
build_environment: linux-binary-manywheel
secrets:
@@ -1192,7 +1060,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1214,7 +1081,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -1222,7 +1089,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -1270,7 +1137,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_11-xpu
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_11-xpu-test: # Testing
@@ -1296,7 +1163,7 @@ jobs:
contents: read
steps:
- name: Setup XPU
- uses: ./.github/actions/setup-xpu
+ uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.9
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@v4
@@ -1314,7 +1181,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1325,7 +1191,7 @@ jobs:
working-directory: pytorch
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -1333,7 +1199,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -1443,7 +1309,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_12-cuda12_6
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cuda12_6-test: # Testing
@@ -1509,7 +1375,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_12-cuda12_8
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cuda12_8-test: # Testing
@@ -1557,72 +1423,6 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_12-cuda12_9-build:
- if: ${{ github.repository_owner == 'pytorch' }}
- uses: ./.github/workflows/_binary-build-linux.yml
- needs: get-label-type
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.12"
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build_name: manywheel-py3_12-cuda12_9
- build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_12-cuda12_9-test: # Testing
- if: ${{ github.repository_owner == 'pytorch' }}
- needs:
- - manywheel-py3_12-cuda12_9-build
- - get-label-type
- uses: ./.github/workflows/_binary-test-linux.yml
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.12"
- build_name: manywheel-py3_12-cuda12_9
- build_environment: linux-binary-manywheel
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_12-cuda12_9-upload: # Uploading
- if: ${{ github.repository_owner == 'pytorch' }}
- permissions:
- id-token: write
- contents: read
- needs: manywheel-py3_12-cuda12_9-test
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.12"
- build_name: manywheel-py3_12-cuda12_9
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- uses: ./.github/workflows/_binary-upload.yml
-
manywheel-py3_12-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
@@ -1641,7 +1441,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_12-cuda13_0
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cuda13_0-test: # Testing
@@ -1705,6 +1505,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
DESIRED_PYTHON: "3.12"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_12-rocm6_3
build_environment: linux-binary-manywheel
secrets:
@@ -1739,7 +1540,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1761,7 +1561,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -1769,7 +1569,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -1816,6 +1616,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
DESIRED_PYTHON: "3.12"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_12-rocm6_4
build_environment: linux-binary-manywheel
secrets:
@@ -1850,7 +1651,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1872,7 +1672,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -1880,7 +1680,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -1928,7 +1728,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_12-xpu
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-xpu-test: # Testing
@@ -1954,7 +1754,7 @@ jobs:
contents: read
steps:
- name: Setup XPU
- uses: ./.github/actions/setup-xpu
+ uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.9
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@v4
@@ -1972,7 +1772,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1983,7 +1782,7 @@ jobs:
working-directory: pytorch
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -1991,7 +1790,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -2101,7 +1900,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13-cuda12_6
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13-cuda12_6-test: # Testing
@@ -2167,7 +1966,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13-cuda12_8
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13-cuda12_8-test: # Testing
@@ -2215,72 +2014,6 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_13-cuda12_9-build:
- if: ${{ github.repository_owner == 'pytorch' }}
- uses: ./.github/workflows/_binary-build-linux.yml
- needs: get-label-type
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.13"
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build_name: manywheel-py3_13-cuda12_9
- build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_13-cuda12_9-test: # Testing
- if: ${{ github.repository_owner == 'pytorch' }}
- needs:
- - manywheel-py3_13-cuda12_9-build
- - get-label-type
- uses: ./.github/workflows/_binary-test-linux.yml
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.13"
- build_name: manywheel-py3_13-cuda12_9
- build_environment: linux-binary-manywheel
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_13-cuda12_9-upload: # Uploading
- if: ${{ github.repository_owner == 'pytorch' }}
- permissions:
- id-token: write
- contents: read
- needs: manywheel-py3_13-cuda12_9-test
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.13"
- build_name: manywheel-py3_13-cuda12_9
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- uses: ./.github/workflows/_binary-upload.yml
-
manywheel-py3_13-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
@@ -2299,7 +2032,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13-cuda13_0
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13-cuda13_0-test: # Testing
@@ -2363,6 +2096,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
DESIRED_PYTHON: "3.13"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_13-rocm6_3
build_environment: linux-binary-manywheel
secrets:
@@ -2397,7 +2131,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -2419,7 +2152,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -2427,7 +2160,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -2474,6 +2207,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
DESIRED_PYTHON: "3.13"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_13-rocm6_4
build_environment: linux-binary-manywheel
secrets:
@@ -2508,7 +2242,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -2530,7 +2263,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -2538,7 +2271,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -2586,7 +2319,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13-xpu
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13-xpu-test: # Testing
@@ -2612,7 +2345,7 @@ jobs:
contents: read
steps:
- name: Setup XPU
- uses: ./.github/actions/setup-xpu
+ uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.9
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@v4
@@ -2630,7 +2363,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -2641,7 +2373,7 @@ jobs:
working-directory: pytorch
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -2649,7 +2381,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -2759,7 +2491,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda12_6
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_6-test: # Testing
@@ -2825,7 +2557,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda12_8
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_8-test: # Testing
@@ -2873,72 +2605,6 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_13t-cuda12_9-build:
- if: ${{ github.repository_owner == 'pytorch' }}
- uses: ./.github/workflows/_binary-build-linux.yml
- needs: get-label-type
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.13t"
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build_name: manywheel-py3_13t-cuda12_9
- build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_13t-cuda12_9-test: # Testing
- if: ${{ github.repository_owner == 'pytorch' }}
- needs:
- - manywheel-py3_13t-cuda12_9-build
- - get-label-type
- uses: ./.github/workflows/_binary-test-linux.yml
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.13t"
- build_name: manywheel-py3_13t-cuda12_9
- build_environment: linux-binary-manywheel
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_13t-cuda12_9-upload: # Uploading
- if: ${{ github.repository_owner == 'pytorch' }}
- permissions:
- id-token: write
- contents: read
- needs: manywheel-py3_13t-cuda12_9-test
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.13t"
- build_name: manywheel-py3_13t-cuda12_9
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- uses: ./.github/workflows/_binary-upload.yml
-
manywheel-py3_13t-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
@@ -2957,7 +2623,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda13_0
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda13_0-test: # Testing
@@ -3021,6 +2687,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_13t-rocm6_3
build_environment: linux-binary-manywheel
secrets:
@@ -3055,7 +2722,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3077,7 +2743,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -3085,7 +2751,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -3132,6 +2798,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_13t-rocm6_4
build_environment: linux-binary-manywheel
secrets:
@@ -3166,7 +2833,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3188,7 +2854,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -3196,7 +2862,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -3244,7 +2910,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-xpu
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-xpu-test: # Testing
@@ -3270,7 +2936,7 @@ jobs:
contents: read
steps:
- name: Setup XPU
- uses: ./.github/actions/setup-xpu
+ uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.9
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@v4
@@ -3288,7 +2954,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3299,7 +2964,7 @@ jobs:
working-directory: pytorch
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -3307,7 +2972,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -3417,7 +3082,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14-cuda12_6
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14-cuda12_6-test: # Testing
@@ -3483,7 +3148,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14-cuda12_8
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14-cuda12_8-test: # Testing
@@ -3531,72 +3196,6 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_14-cuda12_9-build:
- if: ${{ github.repository_owner == 'pytorch' }}
- uses: ./.github/workflows/_binary-build-linux.yml
- needs: get-label-type
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.14"
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build_name: manywheel-py3_14-cuda12_9
- build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_14-cuda12_9-test: # Testing
- if: ${{ github.repository_owner == 'pytorch' }}
- needs:
- - manywheel-py3_14-cuda12_9-build
- - get-label-type
- uses: ./.github/workflows/_binary-test-linux.yml
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.14"
- build_name: manywheel-py3_14-cuda12_9
- build_environment: linux-binary-manywheel
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_14-cuda12_9-upload: # Uploading
- if: ${{ github.repository_owner == 'pytorch' }}
- permissions:
- id-token: write
- contents: read
- needs: manywheel-py3_14-cuda12_9-test
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.14"
- build_name: manywheel-py3_14-cuda12_9
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- uses: ./.github/workflows/_binary-upload.yml
-
manywheel-py3_14-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
@@ -3615,7 +3214,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14-cuda13_0
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14-cuda13_0-test: # Testing
@@ -3679,6 +3278,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
DESIRED_PYTHON: "3.14"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_14-rocm6_3
build_environment: linux-binary-manywheel
secrets:
@@ -3713,7 +3313,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3735,7 +3334,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -3743,7 +3342,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -3790,6 +3389,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
DESIRED_PYTHON: "3.14"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_14-rocm6_4
build_environment: linux-binary-manywheel
secrets:
@@ -3824,7 +3424,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3846,7 +3445,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -3854,7 +3453,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -3902,7 +3501,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14-xpu
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14-xpu-test: # Testing
@@ -3928,7 +3527,7 @@ jobs:
contents: read
steps:
- name: Setup XPU
- uses: ./.github/actions/setup-xpu
+ uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.9
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@v4
@@ -3946,7 +3545,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3957,7 +3555,7 @@ jobs:
working-directory: pytorch
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -3965,7 +3563,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -4075,7 +3673,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14t-cuda12_6
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14t-cuda12_6-test: # Testing
@@ -4141,7 +3739,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14t-cuda12_8
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14t-cuda12_8-test: # Testing
@@ -4189,72 +3787,6 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- manywheel-py3_14t-cuda12_9-build:
- if: ${{ github.repository_owner == 'pytorch' }}
- uses: ./.github/workflows/_binary-build-linux.yml
- needs: get-label-type
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.14t"
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build_name: manywheel-py3_14t-cuda12_9
- build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_14t-cuda12_9-test: # Testing
- if: ${{ github.repository_owner == 'pytorch' }}
- needs:
- - manywheel-py3_14t-cuda12_9-build
- - get-label-type
- uses: ./.github/workflows/_binary-test-linux.yml
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.14t"
- build_name: manywheel-py3_14t-cuda12_9
- build_environment: linux-binary-manywheel
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_14t-cuda12_9-upload: # Uploading
- if: ${{ github.repository_owner == 'pytorch' }}
- permissions:
- id-token: write
- contents: read
- needs: manywheel-py3_14t-cuda12_9-test
- with:
- PYTORCH_ROOT: /pytorch
- PACKAGE_TYPE: manywheel
- # TODO: This is a legacy variable that we eventually want to get rid of in
- # favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
- GPU_ARCH_TYPE: cuda
- DOCKER_IMAGE: manylinux2_28-builder
- DOCKER_IMAGE_TAG_PREFIX: cuda12.9
- DESIRED_PYTHON: "3.14t"
- build_name: manywheel-py3_14t-cuda12_9
- secrets:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- uses: ./.github/workflows/_binary-upload.yml
-
manywheel-py3_14t-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
@@ -4273,7 +3805,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14t-cuda13_0
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14t-cuda13_0-test: # Testing
@@ -4337,6 +3869,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
DESIRED_PYTHON: "3.14t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_14t-rocm6_3
build_environment: linux-binary-manywheel
secrets:
@@ -4371,7 +3904,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -4393,7 +3925,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -4401,7 +3933,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -4448,6 +3980,7 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
DESIRED_PYTHON: "3.14t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ timeout-minutes: 300
build_name: manywheel-py3_14t-rocm6_4
build_environment: linux-binary-manywheel
secrets:
@@ -4482,7 +4015,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -4504,7 +4036,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -4512,7 +4044,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
@@ -4560,7 +4092,7 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_14t-xpu
build_environment: linux-binary-manywheel
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_14t-xpu-test: # Testing
@@ -4586,7 +4118,7 @@ jobs:
contents: read
steps:
- name: Setup XPU
- uses: ./.github/actions/setup-xpu
+ uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.9
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@v4
@@ -4604,7 +4136,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -4615,7 +4146,7 @@ jobs:
working-directory: pytorch
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -4623,7 +4154,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
diff --git a/.github/workflows/generated-linux-binary-manywheel-rocm-main.yml b/.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
index 8177bac3fe21..9df4835757c4 100644
--- a/.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
+++ b/.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
@@ -38,13 +38,13 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
- manywheel-py3_9-rocm6_4-build:
+ manywheel-py3_10-rocm6_4-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
@@ -58,16 +58,17 @@ jobs:
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: manylinux2_28-builder
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build_name: manywheel-py3_9-rocm6_4
+ timeout-minutes: 300
+ build_name: manywheel-py3_10-rocm6_4
build_environment: linux-binary-manywheel-rocm
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
- manywheel-py3_9-rocm6_4-test: # Testing
+ manywheel-py3_10-rocm6_4-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- - manywheel-py3_9-rocm6_4-build
+ - manywheel-py3_10-rocm6_4-build
- get-label-type
runs-on: linux.rocm.gpu.mi250
timeout-minutes: 240
@@ -82,19 +83,18 @@ jobs:
SKIP_ALL_TESTS: 1
DOCKER_IMAGE: manylinux2_28-builder
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Setup ROCm
uses: ./.github/actions/setup-rocm
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
- name: manywheel-py3_9-rocm6_4
+ name: manywheel-py3_10-rocm6_4
path: "${{ runner.temp }}/artifacts/"
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -116,7 +116,7 @@ jobs:
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
@@ -124,7 +124,7 @@ jobs:
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
diff --git a/.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
index b0c3c06b2e61..d7fd44031be2 100644
--- a/.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
+++ b/.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
@@ -41,7 +41,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -302,3 +302,195 @@ jobs:
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_13t-cpu-s390x-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cpu
+ GPU_ARCH_TYPE: cpu-s390x
+ DOCKER_IMAGE: pytorch/manylinuxs390x-builder
+ DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
+ DESIRED_PYTHON: "3.13t"
+ runs_on: linux.s390x
+ ALPINE_IMAGE: "docker.io/s390x/alpine"
+ timeout-minutes: 420
+ build_name: manywheel-py3_13t-cpu-s390x
+ build_environment: linux-s390x-binary-manywheel
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_13t-cpu-s390x-test: # Testing
+ if: ${{ github.repository_owner == 'pytorch' }}
+ needs:
+ - manywheel-py3_13t-cpu-s390x-build
+ - get-label-type
+ uses: ./.github/workflows/_binary-test-linux.yml
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cpu
+ GPU_ARCH_TYPE: cpu-s390x
+ DOCKER_IMAGE: pytorch/manylinuxs390x-builder
+ DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
+ DESIRED_PYTHON: "3.13t"
+ build_name: manywheel-py3_13t-cpu-s390x
+ build_environment: linux-s390x-binary-manywheel
+ runs_on: linux.s390x
+ ALPINE_IMAGE: "docker.io/s390x/alpine"
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_13t-cpu-s390x-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_13t-cpu-s390x-test
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cpu
+ GPU_ARCH_TYPE: cpu-s390x
+ DOCKER_IMAGE: pytorch/manylinuxs390x-builder
+ DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
+ DESIRED_PYTHON: "3.13t"
+ build_name: manywheel-py3_13t-cpu-s390x
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_14-cpu-s390x-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cpu
+ GPU_ARCH_TYPE: cpu-s390x
+ DOCKER_IMAGE: pytorch/manylinuxs390x-builder
+ DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
+ DESIRED_PYTHON: "3.14"
+ runs_on: linux.s390x
+ ALPINE_IMAGE: "docker.io/s390x/alpine"
+ timeout-minutes: 420
+ build_name: manywheel-py3_14-cpu-s390x
+ build_environment: linux-s390x-binary-manywheel
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_14-cpu-s390x-test: # Testing
+ if: ${{ github.repository_owner == 'pytorch' }}
+ needs:
+ - manywheel-py3_14-cpu-s390x-build
+ - get-label-type
+ uses: ./.github/workflows/_binary-test-linux.yml
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cpu
+ GPU_ARCH_TYPE: cpu-s390x
+ DOCKER_IMAGE: pytorch/manylinuxs390x-builder
+ DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
+ DESIRED_PYTHON: "3.14"
+ build_name: manywheel-py3_14-cpu-s390x
+ build_environment: linux-s390x-binary-manywheel
+ runs_on: linux.s390x
+ ALPINE_IMAGE: "docker.io/s390x/alpine"
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_14-cpu-s390x-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_14-cpu-s390x-test
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cpu
+ GPU_ARCH_TYPE: cpu-s390x
+ DOCKER_IMAGE: pytorch/manylinuxs390x-builder
+ DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
+ DESIRED_PYTHON: "3.14"
+ build_name: manywheel-py3_14-cpu-s390x
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
+
+ manywheel-py3_14t-cpu-s390x-build:
+ if: ${{ github.repository_owner == 'pytorch' }}
+ uses: ./.github/workflows/_binary-build-linux.yml
+ needs: get-label-type
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cpu
+ GPU_ARCH_TYPE: cpu-s390x
+ DOCKER_IMAGE: pytorch/manylinuxs390x-builder
+ DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
+ DESIRED_PYTHON: "3.14t"
+ runs_on: linux.s390x
+ ALPINE_IMAGE: "docker.io/s390x/alpine"
+ timeout-minutes: 420
+ build_name: manywheel-py3_14t-cpu-s390x
+ build_environment: linux-s390x-binary-manywheel
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_14t-cpu-s390x-test: # Testing
+ if: ${{ github.repository_owner == 'pytorch' }}
+ needs:
+ - manywheel-py3_14t-cpu-s390x-build
+ - get-label-type
+ uses: ./.github/workflows/_binary-test-linux.yml
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cpu
+ GPU_ARCH_TYPE: cpu-s390x
+ DOCKER_IMAGE: pytorch/manylinuxs390x-builder
+ DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
+ DESIRED_PYTHON: "3.14t"
+ build_name: manywheel-py3_14t-cpu-s390x
+ build_environment: linux-s390x-binary-manywheel
+ runs_on: linux.s390x
+ ALPINE_IMAGE: "docker.io/s390x/alpine"
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ manywheel-py3_14t-cpu-s390x-upload: # Uploading
+ if: ${{ github.repository_owner == 'pytorch' }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: manywheel-py3_14t-cpu-s390x-test
+ with:
+ PYTORCH_ROOT: /pytorch
+ PACKAGE_TYPE: manywheel
+ # TODO: This is a legacy variable that we eventually want to get rid of in
+ # favor of GPU_ARCH_VERSION
+ DESIRED_CUDA: cpu
+ GPU_ARCH_TYPE: cpu-s390x
+ DOCKER_IMAGE: pytorch/manylinuxs390x-builder
+ DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
+ DESIRED_PYTHON: "3.14t"
+ build_name: manywheel-py3_14t-cpu-s390x
+ secrets:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ uses: ./.github/workflows/_binary-upload.yml
diff --git a/.github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml b/.github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml
index ad7a1cf1d71d..5f21fc565901 100644
--- a/.github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml
+++ b/.github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml
@@ -46,7 +46,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -67,15 +67,9 @@ jobs:
chmod +x "${RUNNER_TEMP}/conda.sh"
/bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda"
echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}"
- if [ -d "/Applications/Xcode_14.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- fi
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
diff --git a/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml b/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
index bcc7279dd777..b12a5212cd4e 100644
--- a/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
+++ b/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
@@ -63,15 +63,9 @@ jobs:
chmod +x "${RUNNER_TEMP}/conda.sh"
/bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda"
echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}"
- if [ -d "/Applications/Xcode_14.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- fi
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -208,15 +202,9 @@ jobs:
chmod +x "${RUNNER_TEMP}/conda.sh"
/bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda"
echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}"
- if [ -d "/Applications/Xcode_14.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- fi
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -353,15 +341,9 @@ jobs:
chmod +x "${RUNNER_TEMP}/conda.sh"
/bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda"
echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}"
- if [ -d "/Applications/Xcode_14.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- fi
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -498,15 +480,9 @@ jobs:
chmod +x "${RUNNER_TEMP}/conda.sh"
/bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda"
echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}"
- if [ -d "/Applications/Xcode_14.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- fi
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -643,15 +619,9 @@ jobs:
chmod +x "${RUNNER_TEMP}/conda.sh"
/bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda"
echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}"
- if [ -d "/Applications/Xcode_14.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- fi
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -788,15 +758,9 @@ jobs:
chmod +x "${RUNNER_TEMP}/conda.sh"
/bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda"
echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}"
- if [ -d "/Applications/Xcode_14.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- fi
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -933,15 +897,9 @@ jobs:
chmod +x "${RUNNER_TEMP}/conda.sh"
/bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda"
echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}"
- if [ -d "/Applications/Xcode_14.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
- echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
- fi
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
diff --git a/.github/workflows/generated-windows-arm64-binary-libtorch-debug-nightly.yml b/.github/workflows/generated-windows-arm64-binary-libtorch-debug-nightly.yml
index 2c86e7e10359..7a8ea9cbfa2c 100644
--- a/.github/workflows/generated-windows-arm64-binary-libtorch-debug-nightly.yml
+++ b/.github/workflows/generated-windows-arm64-binary-libtorch-debug-nightly.yml
@@ -41,7 +41,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -51,7 +51,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "windows-11-arm64-preview"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -64,7 +64,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Populate binary env
shell: cmd
@@ -128,7 +128,7 @@ jobs:
- libtorch-cpu-shared-with-deps-debug-build
- get-label-type
runs-on: "windows-11-arm64-preview"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -141,7 +141,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Populate binary env
shell: cmd
@@ -201,7 +201,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
build_name: libtorch-cpu-shared-with-deps-debug
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/generated-windows-arm64-binary-libtorch-release-nightly.yml b/.github/workflows/generated-windows-arm64-binary-libtorch-release-nightly.yml
index 912a452f0ee8..14081649d370 100644
--- a/.github/workflows/generated-windows-arm64-binary-libtorch-release-nightly.yml
+++ b/.github/workflows/generated-windows-arm64-binary-libtorch-release-nightly.yml
@@ -41,7 +41,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -51,7 +51,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "windows-11-arm64-preview"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -64,7 +64,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Populate binary env
shell: cmd
@@ -128,7 +128,7 @@ jobs:
- libtorch-cpu-shared-with-deps-release-build
- get-label-type
runs-on: "windows-11-arm64-preview"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -141,7 +141,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Populate binary env
shell: cmd
@@ -201,7 +201,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
build_name: libtorch-cpu-shared-with-deps-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml b/.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml
index 1dd70d0d06a9..d0e02dade299 100644
--- a/.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml
+++ b/.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml
@@ -41,7 +41,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -51,7 +51,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "windows-11-arm64-preview"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -124,7 +124,7 @@ jobs:
- wheel-py3_11-cpu-build
- get-label-type
runs-on: "windows-11-arm64-preview"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -198,7 +198,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "windows-11-arm64-preview"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -271,7 +271,7 @@ jobs:
- wheel-py3_12-cpu-build
- get-label-type
runs-on: "windows-11-arm64-preview"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -345,7 +345,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "windows-11-arm64-preview"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -418,7 +418,7 @@ jobs:
- wheel-py3_13-cpu-build
- get-label-type
runs-on: "windows-11-arm64-preview"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
diff --git a/.github/workflows/generated-windows-binary-libtorch-debug-main.yml b/.github/workflows/generated-windows-binary-libtorch-debug-main.yml
index ac15a9f3e97a..3df2c65440a5 100644
--- a/.github/workflows/generated-windows-binary-libtorch-debug-main.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-debug-main.yml
@@ -28,7 +28,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -38,7 +38,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -51,7 +51,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -77,7 +77,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -109,7 +109,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -153,7 +152,7 @@ jobs:
- libtorch-cpu-shared-with-deps-debug-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -166,7 +165,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Display EC2 information
shell: bash
@@ -183,7 +182,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -215,7 +214,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
diff --git a/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml b/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
index 9c3a96d4caee..f4413a86c657 100644
--- a/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
@@ -35,7 +35,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -45,7 +45,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -58,7 +58,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -84,7 +84,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -116,7 +116,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -160,7 +159,7 @@ jobs:
- libtorch-cpu-shared-with-deps-debug-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -173,7 +172,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Display EC2 information
shell: bash
@@ -190,7 +189,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -222,7 +221,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -283,7 +281,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
build_name: libtorch-cpu-shared-with-deps-debug
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
@@ -292,7 +290,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -306,7 +304,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -332,7 +330,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -364,7 +362,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -408,7 +405,7 @@ jobs:
- libtorch-cuda12_6-shared-with-deps-debug-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -422,7 +419,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Display EC2 information
shell: bash
@@ -439,7 +436,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -471,7 +468,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -533,7 +529,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
build_name: libtorch-cuda12_6-shared-with-deps-debug
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
@@ -542,7 +538,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -556,7 +552,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -582,7 +578,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -614,7 +610,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -658,7 +653,7 @@ jobs:
- libtorch-cuda12_8-shared-with-deps-debug-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -672,7 +667,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Display EC2 information
shell: bash
@@ -689,7 +684,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -721,7 +716,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -783,30 +777,30 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
build_name: libtorch-cuda12_8-shared-with-deps-debug
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- libtorch-cuda12_9-shared-with-deps-debug-build:
+ libtorch-cuda13_0-shared-with-deps-debug-build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
LIBTORCH_CONFIG: debug
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -832,7 +826,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -864,7 +858,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -884,7 +877,7 @@ jobs:
- uses: actions/upload-artifact@v4.4.0
if: always()
with:
- name: libtorch-cuda12_9-shared-with-deps-debug
+ name: libtorch-cuda13_0-shared-with-deps-debug
retention-days: 14
if-no-files-found: error
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
@@ -902,27 +895,27 @@ jobs:
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- libtorch-cuda12_9-shared-with-deps-debug-test: # Testing
+ libtorch-cuda13_0-shared-with-deps-debug-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- - libtorch-cuda12_9-shared-with-deps-debug-build
+ - libtorch-cuda13_0-shared-with-deps-debug-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
LIBTORCH_CONFIG: debug
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Display EC2 information
shell: bash
@@ -939,7 +932,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -971,7 +964,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -992,7 +984,7 @@ jobs:
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
- name: libtorch-cuda12_9-shared-with-deps-debug
+ name: libtorch-cuda13_0-shared-with-deps-debug
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Populate binary env
shell: bash
@@ -1015,26 +1007,26 @@ jobs:
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- libtorch-cuda12_9-shared-with-deps-debug-upload: # Uploading
+ libtorch-cuda13_0-shared-with-deps-debug-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: libtorch-cuda12_9-shared-with-deps-debug-test
+ needs: libtorch-cuda13_0-shared-with-deps-debug-test
with:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
LIBTORCH_CONFIG: debug
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
- build_name: libtorch-cuda12_9-shared-with-deps-debug
+ DESIRED_PYTHON: "3.10"
+ build_name: libtorch-cuda13_0-shared-with-deps-debug
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
diff --git a/.github/workflows/generated-windows-binary-libtorch-release-main.yml b/.github/workflows/generated-windows-binary-libtorch-release-main.yml
index 9a0a3496e37b..ef94d6212af3 100644
--- a/.github/workflows/generated-windows-binary-libtorch-release-main.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-release-main.yml
@@ -28,7 +28,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -38,7 +38,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -51,7 +51,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -77,7 +77,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -109,7 +109,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -153,7 +152,7 @@ jobs:
- libtorch-cpu-shared-with-deps-release-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -166,7 +165,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Display EC2 information
shell: bash
@@ -183,7 +182,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -215,7 +214,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
diff --git a/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml b/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
index d212894b7443..8f4ec6e0b205 100644
--- a/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
@@ -35,7 +35,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -45,7 +45,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -58,7 +58,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -84,7 +84,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -116,7 +116,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -160,7 +159,7 @@ jobs:
- libtorch-cpu-shared-with-deps-release-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -173,7 +172,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Display EC2 information
shell: bash
@@ -190,7 +189,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -222,7 +221,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -283,7 +281,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
build_name: libtorch-cpu-shared-with-deps-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
@@ -292,7 +290,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -306,7 +304,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -332,7 +330,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -364,7 +362,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -408,7 +405,7 @@ jobs:
- libtorch-cuda12_6-shared-with-deps-release-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -422,7 +419,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Display EC2 information
shell: bash
@@ -439,7 +436,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -471,7 +468,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -533,7 +529,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
build_name: libtorch-cuda12_6-shared-with-deps-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
@@ -542,7 +538,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -556,7 +552,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -582,7 +578,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -614,7 +610,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -658,7 +653,7 @@ jobs:
- libtorch-cuda12_8-shared-with-deps-release-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
@@ -672,7 +667,7 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Display EC2 information
shell: bash
@@ -689,7 +684,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -721,7 +716,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -783,30 +777,30 @@ jobs:
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
build_name: libtorch-cuda12_8-shared-with-deps-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- libtorch-cuda12_9-shared-with-deps-release-build:
+ libtorch-cuda13_0-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -832,7 +826,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -864,7 +858,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -884,7 +877,7 @@ jobs:
- uses: actions/upload-artifact@v4.4.0
if: always()
with:
- name: libtorch-cuda12_9-shared-with-deps-release
+ name: libtorch-cuda13_0-shared-with-deps-release
retention-days: 14
if-no-files-found: error
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
@@ -902,27 +895,27 @@ jobs:
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- libtorch-cuda12_9-shared-with-deps-release-test: # Testing
+ libtorch-cuda13_0-shared-with-deps-release-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- - libtorch-cuda12_9-shared-with-deps-release-build
+ - libtorch-cuda13_0-shared-with-deps-release-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
+ DESIRED_PYTHON: "3.10"
steps:
- name: Display EC2 information
shell: bash
@@ -939,7 +932,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -971,7 +964,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -992,7 +984,7 @@ jobs:
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
- name: libtorch-cuda12_9-shared-with-deps-release
+ name: libtorch-cuda13_0-shared-with-deps-release
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Populate binary env
shell: bash
@@ -1015,26 +1007,26 @@ jobs:
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- libtorch-cuda12_9-shared-with-deps-release-upload: # Uploading
+ libtorch-cuda13_0-shared-with-deps-release-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: libtorch-cuda12_9-shared-with-deps-release-test
+ needs: libtorch-cuda13_0-shared-with-deps-release-test
with:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason
- DESIRED_PYTHON: "3.9"
- build_name: libtorch-cuda12_9-shared-with-deps-release
+ DESIRED_PYTHON: "3.10"
+ build_name: libtorch-cuda13_0-shared-with-deps-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
diff --git a/.github/workflows/generated-windows-binary-wheel-nightly.yml b/.github/workflows/generated-windows-binary-wheel-nightly.yml
index b476973a1d86..bca8d4843463 100644
--- a/.github/workflows/generated-windows-binary-wheel-nightly.yml
+++ b/.github/workflows/generated-windows-binary-wheel-nightly.yml
@@ -35,7 +35,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -45,7 +45,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -80,7 +80,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -112,7 +112,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -156,7 +155,7 @@ jobs:
- wheel-py3_10-cpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -182,7 +181,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -214,7 +213,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -280,7 +278,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -316,7 +314,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -348,7 +346,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -392,7 +389,7 @@ jobs:
- wheel-py3_10-cuda12_6-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -419,7 +416,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -451,7 +448,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -518,7 +514,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -554,7 +550,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -586,7 +582,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -630,7 +625,7 @@ jobs:
- wheel-py3_10-cuda12_8-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -657,7 +652,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -689,7 +684,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -752,18 +746,18 @@ jobs:
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- wheel-py3_10-cuda12_9-build:
+ wheel-py3_10-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.10"
@@ -792,7 +786,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -824,7 +818,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -844,7 +837,7 @@ jobs:
- uses: actions/upload-artifact@v4.4.0
if: always()
with:
- name: wheel-py3_10-cuda12_9
+ name: wheel-py3_10-cuda13_0
retention-days: 14
if-no-files-found: error
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
@@ -862,20 +855,20 @@ jobs:
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_10-cuda12_9-test: # Testing
+ wheel-py3_10-cuda13_0-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- - wheel-py3_10-cuda12_9-build
+ - wheel-py3_10-cuda13_0-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.10"
@@ -895,7 +888,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -927,7 +920,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -948,7 +940,7 @@ jobs:
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
- name: wheel-py3_10-cuda12_9
+ name: wheel-py3_10-cuda13_0
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Populate binary env
shell: bash
@@ -971,22 +963,22 @@ jobs:
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_10-cuda12_9-upload: # Uploading
+ wheel-py3_10-cuda13_0-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: wheel-py3_10-cuda12_9-test
+ needs: wheel-py3_10-cuda13_0-test
with:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
DESIRED_PYTHON: "3.10"
- build_name: wheel-py3_10-cuda12_9
+ build_name: wheel-py3_10-cuda13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -994,7 +986,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -1004,7 +996,7 @@ jobs:
GPU_ARCH_TYPE: xpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.10"
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -1030,7 +1022,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -1062,7 +1054,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1106,7 +1097,7 @@ jobs:
- wheel-py3_10-xpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -1132,7 +1123,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -1164,7 +1155,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1230,7 +1220,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -1265,7 +1255,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -1297,7 +1287,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1341,7 +1330,7 @@ jobs:
- wheel-py3_11-cpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -1367,7 +1356,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -1399,7 +1388,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1465,7 +1453,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -1501,7 +1489,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -1533,7 +1521,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1577,7 +1564,7 @@ jobs:
- wheel-py3_11-cuda12_6-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -1604,7 +1591,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -1636,7 +1623,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1703,7 +1689,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -1739,7 +1725,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -1771,7 +1757,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1815,7 +1800,7 @@ jobs:
- wheel-py3_11-cuda12_8-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -1842,7 +1827,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -1874,7 +1859,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -1937,18 +1921,18 @@ jobs:
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- wheel-py3_11-cuda12_9-build:
+ wheel-py3_11-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.11"
@@ -1977,7 +1961,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -2009,7 +1993,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -2029,7 +2012,7 @@ jobs:
- uses: actions/upload-artifact@v4.4.0
if: always()
with:
- name: wheel-py3_11-cuda12_9
+ name: wheel-py3_11-cuda13_0
retention-days: 14
if-no-files-found: error
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
@@ -2047,20 +2030,20 @@ jobs:
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_11-cuda12_9-test: # Testing
+ wheel-py3_11-cuda13_0-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- - wheel-py3_11-cuda12_9-build
+ - wheel-py3_11-cuda13_0-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.11"
@@ -2080,7 +2063,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -2112,7 +2095,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -2133,7 +2115,7 @@ jobs:
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
- name: wheel-py3_11-cuda12_9
+ name: wheel-py3_11-cuda13_0
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Populate binary env
shell: bash
@@ -2156,22 +2138,22 @@ jobs:
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_11-cuda12_9-upload: # Uploading
+ wheel-py3_11-cuda13_0-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: wheel-py3_11-cuda12_9-test
+ needs: wheel-py3_11-cuda13_0-test
with:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
DESIRED_PYTHON: "3.11"
- build_name: wheel-py3_11-cuda12_9
+ build_name: wheel-py3_11-cuda13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -2179,7 +2161,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -2189,7 +2171,7 @@ jobs:
GPU_ARCH_TYPE: xpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.11"
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -2215,7 +2197,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -2247,7 +2229,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -2291,7 +2272,7 @@ jobs:
- wheel-py3_11-xpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -2317,7 +2298,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -2349,7 +2330,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -2415,7 +2395,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -2450,7 +2430,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -2482,7 +2462,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -2526,7 +2505,7 @@ jobs:
- wheel-py3_12-cpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -2552,7 +2531,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -2584,7 +2563,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -2650,7 +2628,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -2686,7 +2664,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -2718,7 +2696,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -2762,7 +2739,7 @@ jobs:
- wheel-py3_12-cuda12_6-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -2789,7 +2766,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -2821,7 +2798,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -2888,7 +2864,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -2924,7 +2900,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -2956,7 +2932,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3000,7 +2975,7 @@ jobs:
- wheel-py3_12-cuda12_8-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -3027,7 +3002,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -3059,7 +3034,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3122,18 +3096,18 @@ jobs:
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- wheel-py3_12-cuda12_9-build:
+ wheel-py3_12-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.12"
@@ -3162,7 +3136,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -3194,7 +3168,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3214,7 +3187,7 @@ jobs:
- uses: actions/upload-artifact@v4.4.0
if: always()
with:
- name: wheel-py3_12-cuda12_9
+ name: wheel-py3_12-cuda13_0
retention-days: 14
if-no-files-found: error
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
@@ -3232,20 +3205,20 @@ jobs:
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_12-cuda12_9-test: # Testing
+ wheel-py3_12-cuda13_0-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- - wheel-py3_12-cuda12_9-build
+ - wheel-py3_12-cuda13_0-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.12"
@@ -3265,7 +3238,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -3297,7 +3270,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3318,7 +3290,7 @@ jobs:
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
- name: wheel-py3_12-cuda12_9
+ name: wheel-py3_12-cuda13_0
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Populate binary env
shell: bash
@@ -3341,22 +3313,22 @@ jobs:
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_12-cuda12_9-upload: # Uploading
+ wheel-py3_12-cuda13_0-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: wheel-py3_12-cuda12_9-test
+ needs: wheel-py3_12-cuda13_0-test
with:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
DESIRED_PYTHON: "3.12"
- build_name: wheel-py3_12-cuda12_9
+ build_name: wheel-py3_12-cuda13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -3364,7 +3336,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -3374,7 +3346,7 @@ jobs:
GPU_ARCH_TYPE: xpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.12"
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -3400,7 +3372,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -3432,7 +3404,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3476,7 +3447,7 @@ jobs:
- wheel-py3_12-xpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -3502,7 +3473,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -3534,7 +3505,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3600,7 +3570,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -3635,7 +3605,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -3667,7 +3637,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3711,7 +3680,7 @@ jobs:
- wheel-py3_13-cpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -3737,7 +3706,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -3769,7 +3738,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3835,7 +3803,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -3871,7 +3839,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -3903,7 +3871,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -3947,7 +3914,7 @@ jobs:
- wheel-py3_13-cuda12_6-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -3974,7 +3941,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -4006,7 +3973,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -4073,7 +4039,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -4109,7 +4075,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -4141,7 +4107,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -4185,7 +4150,7 @@ jobs:
- wheel-py3_13-cuda12_8-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -4212,7 +4177,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -4244,7 +4209,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -4307,18 +4271,18 @@ jobs:
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- wheel-py3_13-cuda12_9-build:
+ wheel-py3_13-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.13"
@@ -4347,7 +4311,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -4379,7 +4343,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -4399,7 +4362,7 @@ jobs:
- uses: actions/upload-artifact@v4.4.0
if: always()
with:
- name: wheel-py3_13-cuda12_9
+ name: wheel-py3_13-cuda13_0
retention-days: 14
if-no-files-found: error
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
@@ -4417,20 +4380,20 @@ jobs:
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_13-cuda12_9-test: # Testing
+ wheel-py3_13-cuda13_0-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- - wheel-py3_13-cuda12_9-build
+ - wheel-py3_13-cuda13_0-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.13"
@@ -4450,7 +4413,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -4482,7 +4445,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -4503,7 +4465,7 @@ jobs:
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
- name: wheel-py3_13-cuda12_9
+ name: wheel-py3_13-cuda13_0
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Populate binary env
shell: bash
@@ -4526,22 +4488,22 @@ jobs:
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_13-cuda12_9-upload: # Uploading
+ wheel-py3_13-cuda13_0-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: wheel-py3_13-cuda12_9-test
+ needs: wheel-py3_13-cuda13_0-test
with:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
DESIRED_PYTHON: "3.13"
- build_name: wheel-py3_13-cuda12_9
+ build_name: wheel-py3_13-cuda13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -4549,7 +4511,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -4559,7 +4521,7 @@ jobs:
GPU_ARCH_TYPE: xpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.13"
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -4585,7 +4547,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -4617,7 +4579,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -4661,7 +4622,7 @@ jobs:
- wheel-py3_13-xpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -4687,7 +4648,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -4719,7 +4680,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -4785,7 +4745,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -4820,7 +4780,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -4852,7 +4812,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -4896,7 +4855,7 @@ jobs:
- wheel-py3_13t-cpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -4922,7 +4881,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -4954,7 +4913,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -5020,7 +4978,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -5056,7 +5014,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -5088,7 +5046,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -5132,7 +5089,7 @@ jobs:
- wheel-py3_13t-cuda12_6-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -5159,7 +5116,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -5191,7 +5148,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -5258,7 +5214,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -5294,7 +5250,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -5326,7 +5282,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -5370,7 +5325,7 @@ jobs:
- wheel-py3_13t-cuda12_8-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -5397,7 +5352,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -5429,7 +5384,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -5492,18 +5446,18 @@ jobs:
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- wheel-py3_13t-cuda12_9-build:
+ wheel-py3_13t-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.13t"
@@ -5532,7 +5486,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -5564,7 +5518,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -5584,7 +5537,7 @@ jobs:
- uses: actions/upload-artifact@v4.4.0
if: always()
with:
- name: wheel-py3_13t-cuda12_9
+ name: wheel-py3_13t-cuda13_0
retention-days: 14
if-no-files-found: error
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
@@ -5602,20 +5555,20 @@ jobs:
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_13t-cuda12_9-test: # Testing
+ wheel-py3_13t-cuda13_0-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- - wheel-py3_13t-cuda12_9-build
+ - wheel-py3_13t-cuda13_0-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.13t"
@@ -5635,7 +5588,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -5667,7 +5620,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -5688,7 +5640,7 @@ jobs:
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
- name: wheel-py3_13t-cuda12_9
+ name: wheel-py3_13t-cuda13_0
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Populate binary env
shell: bash
@@ -5711,22 +5663,22 @@ jobs:
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_13t-cuda12_9-upload: # Uploading
+ wheel-py3_13t-cuda13_0-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: wheel-py3_13t-cuda12_9-test
+ needs: wheel-py3_13t-cuda13_0-test
with:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
DESIRED_PYTHON: "3.13t"
- build_name: wheel-py3_13t-cuda12_9
+ build_name: wheel-py3_13t-cuda13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -5734,7 +5686,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -5744,7 +5696,7 @@ jobs:
GPU_ARCH_TYPE: xpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.13t"
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -5770,7 +5722,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -5802,7 +5754,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -5846,7 +5797,7 @@ jobs:
- wheel-py3_13t-xpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -5872,7 +5823,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -5904,7 +5855,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -5970,7 +5920,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -6005,7 +5955,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -6037,7 +5987,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -6081,7 +6030,7 @@ jobs:
- wheel-py3_14-cpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -6107,7 +6056,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -6139,7 +6088,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -6205,7 +6153,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -6241,7 +6189,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -6273,7 +6221,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -6317,7 +6264,7 @@ jobs:
- wheel-py3_14-cuda12_6-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -6344,7 +6291,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -6376,7 +6323,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -6443,7 +6389,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -6479,7 +6425,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -6511,7 +6457,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -6555,7 +6500,7 @@ jobs:
- wheel-py3_14-cuda12_8-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -6582,7 +6527,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -6614,7 +6559,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -6677,18 +6621,18 @@ jobs:
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- wheel-py3_14-cuda12_9-build:
+ wheel-py3_14-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.14"
@@ -6717,7 +6661,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -6749,7 +6693,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -6769,7 +6712,7 @@ jobs:
- uses: actions/upload-artifact@v4.4.0
if: always()
with:
- name: wheel-py3_14-cuda12_9
+ name: wheel-py3_14-cuda13_0
retention-days: 14
if-no-files-found: error
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
@@ -6787,20 +6730,20 @@ jobs:
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_14-cuda12_9-test: # Testing
+ wheel-py3_14-cuda13_0-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- - wheel-py3_14-cuda12_9-build
+ - wheel-py3_14-cuda13_0-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.14"
@@ -6820,7 +6763,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -6852,7 +6795,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -6873,7 +6815,7 @@ jobs:
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
- name: wheel-py3_14-cuda12_9
+ name: wheel-py3_14-cuda13_0
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Populate binary env
shell: bash
@@ -6896,22 +6838,22 @@ jobs:
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_14-cuda12_9-upload: # Uploading
+ wheel-py3_14-cuda13_0-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: wheel-py3_14-cuda12_9-test
+ needs: wheel-py3_14-cuda13_0-test
with:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
DESIRED_PYTHON: "3.14"
- build_name: wheel-py3_14-cuda12_9
+ build_name: wheel-py3_14-cuda13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -6919,7 +6861,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -6929,7 +6871,7 @@ jobs:
GPU_ARCH_TYPE: xpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.14"
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -6955,7 +6897,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -6987,7 +6929,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -7031,7 +6972,7 @@ jobs:
- wheel-py3_14-xpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -7057,7 +6998,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -7089,7 +7030,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -7155,7 +7095,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -7190,7 +7130,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -7222,7 +7162,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -7266,7 +7205,7 @@ jobs:
- wheel-py3_14t-cpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -7292,7 +7231,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -7324,7 +7263,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -7390,7 +7328,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -7426,7 +7364,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -7458,7 +7396,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -7502,7 +7439,7 @@ jobs:
- wheel-py3_14t-cuda12_6-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -7529,7 +7466,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -7561,7 +7498,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -7628,7 +7564,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -7664,7 +7600,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -7696,7 +7632,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -7740,7 +7675,7 @@ jobs:
- wheel-py3_14t-cuda12_8-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -7767,7 +7702,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -7799,7 +7734,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -7862,18 +7796,18 @@ jobs:
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
- wheel-py3_14t-cuda12_9-build:
+ wheel-py3_14t-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.14t"
@@ -7902,7 +7836,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -7934,7 +7868,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -7954,7 +7887,7 @@ jobs:
- uses: actions/upload-artifact@v4.4.0
if: always()
with:
- name: wheel-py3_14t-cuda12_9
+ name: wheel-py3_14t-cuda13_0
retention-days: 14
if-no-files-found: error
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
@@ -7972,20 +7905,20 @@ jobs:
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_14t-cuda12_9-test: # Testing
+ wheel-py3_14t-cuda13_0-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- - wheel-py3_14t-cuda12_9-build
+ - wheel-py3_14t-cuda13_0-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.14t"
@@ -8005,7 +7938,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -8037,7 +7970,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -8058,7 +7990,7 @@ jobs:
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
- name: wheel-py3_14t-cuda12_9
+ name: wheel-py3_14t-cuda13_0
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Populate binary env
shell: bash
@@ -8081,22 +8013,22 @@ jobs:
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- wheel-py3_14t-cuda12_9-upload: # Uploading
+ wheel-py3_14t-cuda13_0-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
- needs: wheel-py3_14t-cuda12_9-test
+ needs: wheel-py3_14t-cuda13_0-test
with:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
- DESIRED_CUDA: cu129
- GPU_ARCH_VERSION: "12.9"
+ DESIRED_CUDA: cu130
+ GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
DESIRED_PYTHON: "3.14t"
- build_name: wheel-py3_14t-cuda12_9
+ build_name: wheel-py3_14t-cuda13_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
@@ -8104,7 +8036,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -8114,7 +8046,7 @@ jobs:
GPU_ARCH_TYPE: xpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.14t"
- PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.1.1 | intel-cmplr-lib-ur==2025.1.1 | intel-cmplr-lic-rt==2025.1.1 | intel-sycl-rt==2025.1.1 | oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.1.0 | onemkl-sycl-dft==2025.1.0 | onemkl-sycl-lapack==2025.1.0 | onemkl-sycl-rng==2025.1.0 | onemkl-sycl-sparse==2025.1.0 | dpcpp-cpp-rt==2025.1.1 | intel-opencl-rt==2025.1.1 | mkl==2025.1.0 | intel-openmp==2025.1.1 | tbb==2022.1.0 | tcmlib==1.3.0 | umf==0.10.0 | intel-pti==0.12.3
+ PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.2.1 | intel-cmplr-lib-ur==2025.2.1 | intel-cmplr-lic-rt==2025.2.1 | intel-sycl-rt==2025.2.1 | oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-sycl-blas==2025.2.0 | onemkl-sycl-dft==2025.2.0 | onemkl-sycl-lapack==2025.2.0 | onemkl-sycl-rng==2025.2.0 | onemkl-sycl-sparse==2025.2.0 | dpcpp-cpp-rt==2025.2.1 | intel-opencl-rt==2025.2.1 | mkl==2025.2.0 | intel-openmp==2025.2.1 | tbb==2022.2.0 | tcmlib==1.4.0 | umf==0.11.0 | intel-pti==0.13.1
steps:
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
@@ -8140,7 +8072,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -8172,7 +8104,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
@@ -8216,7 +8147,7 @@ jobs:
- wheel-py3_14t-xpu-build
- get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
- timeout-minutes: 300
+ timeout-minutes: 360
env:
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
PACKAGE_TYPE: wheel
@@ -8242,7 +8173,7 @@ jobs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.9
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -8274,7 +8205,6 @@ jobs:
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
diff --git a/.github/workflows/h100-cutlass-backend.yml b/.github/workflows/h100-cutlass-backend.yml
index edf4c2e0e807..6eb072399242 100644
--- a/.github/workflows/h100-cutlass-backend.yml
+++ b/.github/workflows/h100-cutlass-backend.yml
@@ -27,7 +27,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
diff --git a/.github/workflows/h100-distributed.yml b/.github/workflows/h100-distributed.yml
index a0a7495483d4..8996add88383 100644
--- a/.github/workflows/h100-distributed.yml
+++ b/.github/workflows/h100-distributed.yml
@@ -24,7 +24,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
diff --git a/.github/workflows/h100-symm-mem.yml b/.github/workflows/h100-symm-mem.yml
index c75ca569fc7d..fa8a795216f3 100644
--- a/.github/workflows/h100-symm-mem.yml
+++ b/.github/workflows/h100-symm-mem.yml
@@ -24,7 +24,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
diff --git a/.github/workflows/inductor-micro-benchmark-x86.yml b/.github/workflows/inductor-micro-benchmark-x86.yml
index 117183428abc..c6cc075e6b27 100644
--- a/.github/workflows/inductor-micro-benchmark-x86.yml
+++ b/.github/workflows/inductor-micro-benchmark-x86.yml
@@ -18,13 +18,13 @@ permissions:
contents: read
jobs:
- linux-jammy-cpu-py3_9-gcc11-inductor-build:
+ inductor-build:
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
- name: linux-jammy-cpu-py3.9-gcc11-inductor
+ name: inductor-build
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-jammy-py3.9-gcc11
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
+ docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
# Use metal host for benchmark jobs
test-matrix: |
{ include: [
@@ -32,13 +32,13 @@ jobs:
]}
secrets: inherit
- linux-jammy-cpu-py3_9-gcc11-inductor-micro-benchmark-test:
- name: linux-jammy-cpu-py3.9-gcc11-inductor
+ inductor-micro-benchmark-test:
+ name: inductor-micro-benchmark-test
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cpu-py3_9-gcc11-inductor-build
+ needs: inductor-build
with:
build-environment: linux-jammy-py3.9-gcc11
- docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
+ docker-image: ${{ needs.inductor-build.outputs.docker-image }}
+ test-matrix: ${{ needs.inductor-build.outputs.test-matrix }}
timeout-minutes: 720
secrets: inherit
diff --git a/.github/workflows/inductor-micro-benchmark.yml b/.github/workflows/inductor-micro-benchmark.yml
index a0ae234ab566..842094e0eb48 100644
--- a/.github/workflows/inductor-micro-benchmark.yml
+++ b/.github/workflows/inductor-micro-benchmark.yml
@@ -20,7 +20,7 @@ permissions:
jobs:
get-default-label-prefix:
name: get-default-label-prefix
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
diff --git a/.github/workflows/inductor-nightly.yml b/.github/workflows/inductor-nightly.yml
index c17a4ed6341a..7502381de93d 100644
--- a/.github/workflows/inductor-nightly.yml
+++ b/.github/workflows/inductor-nightly.yml
@@ -23,7 +23,7 @@ permissions:
jobs:
get-default-label-prefix:
name: get-default-label-prefix
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
@@ -32,13 +32,13 @@ jobs:
curr_ref_type: ${{ github.ref_type }}
opt_out_experiments: lf
- linux-jammy-cpu-py3_9-gcc11-nightly-dynamo-benchmarks-build:
- name: linux-jammy-cpu-py3.9-gcc11-nightly-dynamo-benchmarks
+ nightly-dynamo-benchmarks-build:
+ name: nightly-dynamo-benchmarks-build
uses: ./.github/workflows/_linux-build.yml
needs: get-default-label-prefix
with:
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
+ build-environment: linux-jammy-py3.10-gcc11-build
+ docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
test-matrix: |
{ include: [
@@ -51,13 +51,13 @@ jobs:
build-additional-packages: "vision audio torchao"
secrets: inherit
- linux-jammy-cpu-py3_9-gcc11-nightly-dynamo-benchmarks-test:
- name: linux-jammy-cpu-py3.9-gcc11-nightly-dynamo-benchmarks
+ nightly-dynamo-benchmarks-test:
+ name: nightly-dynamo-benchmarks-test
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cpu-py3_9-gcc11-nightly-dynamo-benchmarks-build
+ needs: nightly-dynamo-benchmarks-build
with:
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-nightly-dynamo-benchmarks-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-nightly-dynamo-benchmarks-build.outputs.test-matrix }}
+ build-environment: linux-jammy-py3.10-gcc11-build
+ docker-image: ${{ needs.nightly-dynamo-benchmarks-build.outputs.docker-image }}
+ test-matrix: ${{ needs.nightly-dynamo-benchmarks-build.outputs.test-matrix }}
timeout-minutes: 720
secrets: inherit
diff --git a/.github/workflows/inductor-perf-compare.yml b/.github/workflows/inductor-perf-compare.yml
index 628f62424012..35217f72bf1a 100644
--- a/.github/workflows/inductor-perf-compare.yml
+++ b/.github/workflows/inductor-perf-compare.yml
@@ -18,7 +18,7 @@ jobs:
get-default-label-prefix:
if: github.repository_owner == 'pytorch'
name: get-default-label-prefix
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
diff --git a/.github/workflows/inductor-perf-test-b200.yml b/.github/workflows/inductor-perf-test-b200.yml
index 7b59e92386a3..3c648a849f78 100644
--- a/.github/workflows/inductor-perf-test-b200.yml
+++ b/.github/workflows/inductor-perf-test-b200.yml
@@ -70,7 +70,7 @@ permissions:
jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
diff --git a/.github/workflows/inductor-perf-test-nightly-aarch64.yml b/.github/workflows/inductor-perf-test-nightly-aarch64.yml
index e16c8be79130..9e3165fe11ea 100644
--- a/.github/workflows/inductor-perf-test-nightly-aarch64.yml
+++ b/.github/workflows/inductor-perf-test-nightly-aarch64.yml
@@ -55,7 +55,7 @@ permissions:
jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
diff --git a/.github/workflows/inductor-perf-test-nightly-h100.yml b/.github/workflows/inductor-perf-test-nightly-h100.yml
index dfaec8240d6c..7e323fa5a92e 100644
--- a/.github/workflows/inductor-perf-test-nightly-h100.yml
+++ b/.github/workflows/inductor-perf-test-nightly-h100.yml
@@ -75,7 +75,7 @@ permissions:
jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
@@ -84,9 +84,8 @@ jobs:
curr_ref_type: ${{ github.ref_type }}
opt_out_experiments: lf
- # NB: Keep this in sync with trunk.yml
build:
- name: cuda12.8-py3.10-gcc9-sm90
+ name: build
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
@@ -128,7 +127,7 @@ jobs:
secrets: inherit
test-periodically:
- name: cuda12.8-py3.10-gcc9-sm90
+ name: test-periodically
uses: ./.github/workflows/_linux-test.yml
needs: build
if: github.event.schedule == '15 0,12 * * 1-6'
@@ -145,7 +144,7 @@ jobs:
secrets: inherit
test-weekly:
- name: cuda12.8-py3.10-gcc9-sm90
+ name: test-weekly
uses: ./.github/workflows/_linux-test.yml
needs: build
if: github.event.schedule == '0 7 * * 0'
@@ -162,9 +161,12 @@ jobs:
secrets: inherit
test:
- name: cuda12.8-py3.10-gcc9-sm90
+ name: test
uses: ./.github/workflows/_linux-test.yml
needs: build
+ # The pull_request trigger is used in PR to bump transformers pin which always
+ # needs one round of benchmark
+ if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' }}
with:
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm90
dashboard-tag: training-${{ inputs.training || 'true' }}-inference-${{ inputs.inference || 'true' }}-default-${{ inputs.default || 'true' }}-dynamic-${{ inputs.dynamic || 'true' }}-cudagraphs-${{ inputs.cudagraphs || 'true' }}-cppwrapper-${{ inputs.cppwrapper || 'false' }}-aotinductor-${{ inputs.aotinductor || 'false' }}-maxautotune-${{ inputs.maxautotune || 'false' }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs || 'false' }}-cudagraphs_low_precision-${{ inputs.cudagraphs || 'false' }}
diff --git a/.github/workflows/inductor-perf-test-nightly-macos.yml b/.github/workflows/inductor-perf-test-nightly-macos.yml
index 0d92455a8f3c..c3b9a4229924 100644
--- a/.github/workflows/inductor-perf-test-nightly-macos.yml
+++ b/.github/workflows/inductor-perf-test-nightly-macos.yml
@@ -48,6 +48,9 @@ jobs:
{ config: "perf_smoketest", shard: 1, num_shards: 3, runner: "macos-m2-15" },
{ config: "perf_smoketest", shard: 2, num_shards: 3, runner: "macos-m2-15" },
{ config: "perf_smoketest", shard: 3, num_shards: 3, runner: "macos-m2-15" },
+ { config: "aot_inductor_perf_smoketest", shard: 1, num_shards: 3, runner: "macos-m2-15" },
+ { config: "aot_inductor_perf_smoketest", shard: 2, num_shards: 3, runner: "macos-m2-15" },
+ { config: "aot_inductor_perf_smoketest", shard: 3, num_shards: 3, runner: "macos-m2-15" },
]}
secrets: inherit
diff --git a/.github/workflows/inductor-perf-test-nightly-rocm.yml b/.github/workflows/inductor-perf-test-nightly-rocm.yml
index f329fe74e6b6..dddf68091fdb 100644
--- a/.github/workflows/inductor-perf-test-nightly-rocm.yml
+++ b/.github/workflows/inductor-perf-test-nightly-rocm.yml
@@ -70,7 +70,7 @@ permissions: read-all
jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
diff --git a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml
index 6e19130a1924..8057b1042676 100644
--- a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml
+++ b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml
@@ -60,7 +60,7 @@ permissions:
jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
@@ -69,14 +69,14 @@ jobs:
curr_ref_type: ${{ github.ref_type }}
opt_out_experiments: lf
- linux-jammy-zen-cpu-py3_9-gcc11-inductor-build:
- name: linux-jammy-zen-cpu-py3.9-gcc11-inductor
+ inductor-build:
+ name: inductor-build
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
+ build-environment: linux-jammy-py3.10-gcc11-build
+ docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
test-matrix: |
{ include: [
{ config: "inductor_huggingface_perf_cpu_x86_zen", shard: 1, num_shards: 3, runner: "linux.24xlarge.amd" },
@@ -95,16 +95,16 @@ jobs:
selected-test-configs: ${{ inputs.benchmark_configs }}
secrets: inherit
- linux-jammy-zen-cpu-py3_9-gcc11-inductor-test-nightly:
- name: linux-jammy-zen-cpu-py3.9-gcc11-inductor
+ inductor-test-nightly:
+ name: inductor-test-nightly
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-zen-cpu-py3_9-gcc11-inductor-build
+ needs: inductor-build
if: github.event.schedule == '0 7 * * *'
with:
- build-environment: linux-jammy-py3.9-gcc11-build
+ build-environment: linux-jammy-py3.10-gcc11-build
dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true
- docker-image: ${{ needs.linux-jammy-zen-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-zen-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
+ docker-image: ${{ needs.inductor-build.outputs.docker-image }}
+ test-matrix: ${{ needs.inductor-build.outputs.test-matrix }}
timeout-minutes: 720
# disable monitor in perf tests
disable-monitor: false
@@ -112,17 +112,16 @@ jobs:
monitor-data-collect-interval: 4
secrets: inherit
-
- linux-jammy-zen-cpu-py3_9-gcc11-inductor-test:
- name: linux-jammy-zen-cpu-py3.9-gcc11-inductor
+ inductor-test:
+ name: inductor-test
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-zen-cpu-py3_9-gcc11-inductor-build
+ needs: inductor-build
if: github.event_name == 'workflow_dispatch'
with:
- build-environment: linux-jammy-py3.9-gcc11-build
+ build-environment: linux-jammy-py3.10-gcc11-build
dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}
- docker-image: ${{ needs.linux-jammy-zen-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-zen-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
+ docker-image: ${{ needs.inductor-build.outputs.docker-image }}
+ test-matrix: ${{ needs.inductor-build.outputs.test-matrix }}
timeout-minutes: 720
# disable monitor in perf tests
disable-monitor: false
diff --git a/.github/workflows/inductor-perf-test-nightly-x86.yml b/.github/workflows/inductor-perf-test-nightly-x86.yml
index 62234e5f499a..b68e9ad95ca4 100644
--- a/.github/workflows/inductor-perf-test-nightly-x86.yml
+++ b/.github/workflows/inductor-perf-test-nightly-x86.yml
@@ -65,7 +65,7 @@ permissions:
jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
@@ -74,14 +74,14 @@ jobs:
curr_ref_type: ${{ github.ref_type }}
opt_out_experiments: lf
- linux-jammy-cpu-py3_9-gcc11-inductor-build:
- name: linux-jammy-cpu-py3.9-gcc11-inductor
+ inductor-build:
+ name: inductor-build
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
+ build-environment: linux-jammy-py3.10-gcc11-build
+ docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
test-matrix: |
{ include: [
{ config: "inductor_huggingface_perf_cpu_x86", shard: 1, num_shards: 3, runner: "linux.24xl.spr-metal" },
@@ -101,16 +101,16 @@ jobs:
build-additional-packages: "vision audio torchao"
secrets: inherit
- linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:
- name: linux-jammy-cpu-py3.9-gcc11-inductor
+ inductor-test-nightly-freezing:
+ name: inductor-test-nightly-freezing
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cpu-py3_9-gcc11-inductor-build
+ needs: inductor-build
if: github.event.schedule == '0 7 * * *'
with:
- build-environment: linux-jammy-py3.9-gcc11-build
+ build-environment: linux-jammy-py3.10-gcc11-build
dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true-freezing-true
- docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
+ docker-image: ${{ needs.inductor-build.outputs.docker-image }}
+ test-matrix: ${{ needs.inductor-build.outputs.test-matrix }}
timeout-minutes: 720
# disable monitor in perf tests
disable-monitor: false
@@ -118,16 +118,16 @@ jobs:
monitor-data-collect-interval: 4
secrets: inherit
- linux-jammy-cpu-py3_9-gcc11-inductor-test:
- name: linux-jammy-cpu-py3.9-gcc11-inductor
+ inductor-test:
+ name: inductor-test
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cpu-py3_9-gcc11-inductor-build
+ needs: inductor-build
if: github.event_name == 'workflow_dispatch'
with:
- build-environment: linux-jammy-py3.9-gcc11-build
+ build-environment: linux-jammy-py3.10-gcc11-build
dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-freezing-${{ inputs.freezing }}
- docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
+ docker-image: ${{ needs.inductor-build.outputs.docker-image }}
+ test-matrix: ${{ needs.inductor-build.outputs.test-matrix }}
timeout-minutes: 720
# disable monitor in perf tests
disable-monitor: false
diff --git a/.github/workflows/inductor-perf-test-nightly.yml b/.github/workflows/inductor-perf-test-nightly.yml
index 9fd81a5a05c9..7c573d4d2571 100644
--- a/.github/workflows/inductor-perf-test-nightly.yml
+++ b/.github/workflows/inductor-perf-test-nightly.yml
@@ -70,7 +70,7 @@ permissions:
jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
@@ -79,7 +79,6 @@ jobs:
curr_ref_type: ${{ github.ref_type }}
opt_out_experiments: lf
- # NB: Keep this in sync with trunk.yml
build:
name: cuda12.8-py3.10-gcc9-sm80
uses: ./.github/workflows/_linux-build.yml
diff --git a/.github/workflows/inductor-periodic.yml b/.github/workflows/inductor-periodic.yml
index 436cf95c156d..b17ebb84d5d3 100644
--- a/.github/workflows/inductor-periodic.yml
+++ b/.github/workflows/inductor-periodic.yml
@@ -22,7 +22,7 @@ permissions:
jobs:
get-default-label-prefix:
name: get-default-label-prefix
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
@@ -31,8 +31,8 @@ jobs:
curr_ref_type: ${{ github.ref_type }}
opt_out_experiments: lf
- linux-jammy-cuda12_8-py3_10-gcc9-periodic-dynamo-benchmarks-build:
- name: cuda12.8-py3.10-gcc9-sm86-periodic-dynamo-benchmarks
+ periodic-dynamo-benchmarks-build:
+ name: periodic-dynamo-benchmarks-build
uses: ./.github/workflows/_linux-build.yml
needs: get-default-label-prefix
with:
@@ -57,23 +57,33 @@ jobs:
{ config: "dynamic_aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "dynamic_aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "dynamic_aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
+ { config: "dynamic_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
+ { config: "dynamic_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
+ { config: "dynamic_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
+ { config: "dynamic_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
+ { config: "dynamic_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
+ { config: "aot_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
+ { config: "aot_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
+ { config: "aot_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
+ { config: "aot_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
+ { config: "aot_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
]}
build-additional-packages: "vision audio fbgemm torchao"
secrets: inherit
- linux-jammy-cuda12_8-py3_10-gcc9-periodic-dynamo-benchmarks-test:
- name: cuda12.8-py3.10-gcc9-sm86-periodic-dynamo-benchmarks
+ periodic-dynamo-benchmarks-test:
+ name: periodic-dynamo-benchmarks-test
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cuda12_8-py3_10-gcc9-periodic-dynamo-benchmarks-build
+ needs: periodic-dynamo-benchmarks-build
with:
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86
- docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
+ docker-image: ${{ needs.periodic-dynamo-benchmarks-build.outputs.docker-image }}
+ test-matrix: ${{ needs.periodic-dynamo-benchmarks-build.outputs.test-matrix }}
secrets: inherit
- linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-build:
+ rocm-periodic-dynamo-benchmarks-build:
if: github.repository_owner == 'pytorch'
- name: rocm-py3_10-periodic-dynamo-benchmarks
+ name: rocm-periodic-dynamo-benchmarks-build
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-jammy-rocm-py3_10
@@ -99,21 +109,21 @@ jobs:
]}
secrets: inherit
- linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-test:
+ rocm-periodic-dynamo-benchmarks-test:
permissions:
id-token: write
contents: read
- name: rocm-py3_10-periodic-dynamo-benchmarks
+ name: rocm-periodic-dynamo-benchmarks-test
uses: ./.github/workflows/_rocm-test.yml
- needs: linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-build
+ needs: rocm-periodic-dynamo-benchmarks-build
with:
build-environment: linux-jammy-rocm-py3_10
- docker-image: ${{ needs.linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-rocm-py3_10-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
+ docker-image: ${{ needs.rocm-periodic-dynamo-benchmarks-build.outputs.docker-image }}
+ test-matrix: ${{ needs.rocm-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
secrets: inherit
- linux-jammy-cuda12_8-py3_10-gcc9-inductor-smoke-build:
- name: cuda12.8-py3.10-gcc9-sm80
+ inductor-smoke-build:
+ name: inductor-smoke-build
uses: ./.github/workflows/_linux-build.yml
needs:
- get-default-label-prefix
@@ -129,23 +139,23 @@ jobs:
build-additional-packages: "vision audio fbgemm torchao"
secrets: inherit
- linux-jammy-cuda12_8-py3_10-gcc9-inductor-smoke-test:
- name: cuda12.8-py3.10-gcc9-sm80
+ inductor-smoke-test:
+ name: inductor-smoke-test
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cuda12_8-py3_10-gcc9-inductor-smoke-build
+ needs: inductor-smoke-build
with:
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
- docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-smoke-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-smoke-build.outputs.test-matrix }}
+ docker-image: ${{ needs.inductor-smoke-build.outputs.docker-image }}
+ test-matrix: ${{ needs.inductor-smoke-build.outputs.test-matrix }}
secrets: inherit
- linux-jammy-cpu-py3_9-gcc11-periodic-dynamo-benchmarks-build:
- name: linux-jammy-cpu-py3.9-gcc11-periodic-dynamo-benchmarks
+ periodic-dynamo-benchmarks-cpu-build:
+ name: periodic-dynamo-benchmarks-cpu-build
uses: ./.github/workflows/_linux-build.yml
needs: get-default-label-prefix
with:
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
+ build-environment: linux-jammy-py3.10-gcc11-build
+ docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
test-matrix: |
{ include: [
@@ -160,68 +170,6 @@ jobs:
{ config: "cpu_inductor_freezing_avx2_torchbench", shard: 2, num_shards: 2, runner: "linux.10xlarge.avx2" },
{ config: "cpu_inductor_freezing_avx2_timm", shard: 1, num_shards: 2, runner: "linux.10xlarge.avx2" },
{ config: "cpu_inductor_freezing_avx2_timm", shard: 2, num_shards: 2, runner: "linux.10xlarge.avx2" },
- ]}
- build-additional-packages: "vision audio torchao"
- secrets: inherit
-
- linux-jammy-cpu-py3_9-gcc11-periodic-dynamo-benchmarks-test:
- name: linux-jammy-cpu-py3.9-gcc11-periodic-dynamo-benchmarks
- uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cpu-py3_9-gcc11-periodic-dynamo-benchmarks-build
- with:
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-periodic-dynamo-benchmarks-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-periodic-dynamo-benchmarks-build.outputs.test-matrix }}
- secrets: inherit
-
-
- linux-jammy-cuda12_8-py3_10-gcc9-inductor-build:
- name: cuda12.8-py3.10-gcc9-sm86
- uses: ./.github/workflows/_linux-build.yml
- needs: get-default-label-prefix
- with:
- build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86
- docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
- cuda-arch-list: '8.6'
- runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
- sync-tag: linux-jammy-cuda12_8-py3_10-gcc9-inductor-build
- test-matrix: |
- { include: [
- { config: "dynamic_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
- { config: "dynamic_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
- { config: "dynamic_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
- { config: "dynamic_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
- { config: "dynamic_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
- { config: "aot_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
- { config: "aot_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
- { config: "aot_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
- { config: "aot_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
- { config: "aot_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
- ]}
- build-additional-packages: "vision audio fbgemm torchao"
- secrets: inherit
-
- linux-jammy-cuda12_8-py3_10-gcc9-inductor-test:
- name: cuda12.8-py3.10-gcc9-sm86
- uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cuda12_8-py3_10-gcc9-inductor-build
- with:
- build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86
- docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.test-matrix }}
- secrets: inherit
-
- linux-jammy-cpu-py3_9-gcc11-inductor-build:
- name: linux-jammy-cpu-py3.9-gcc11-inductor
- uses: ./.github/workflows/_linux-build.yml
- needs: get-default-label-prefix
- with:
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
- runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
- sync-tag: linux-jammy-cpu-py3_9-gcc11-inductor-build
- test-matrix: |
- { include: [
{ config: "cpu_inductor_freezing_huggingface", shard: 1, num_shards: 1, runner: "linux.8xlarge.amx" },
{ config: "cpu_inductor_freezing_timm", shard: 1, num_shards: 2, runner: "linux.8xlarge.amx" },
{ config: "cpu_inductor_freezing_timm", shard: 2, num_shards: 2, runner: "linux.8xlarge.amx" },
@@ -247,12 +195,12 @@ jobs:
build-additional-packages: "vision audio torchao"
secrets: inherit
- linux-jammy-cpu-py3_9-gcc11-inductor-test:
- name: linux-jammy-cpu-py3.9-gcc11-inductor
+ periodic-dynamo-benchmarks-cpu-test:
+ name: periodic-dynamo-benchmarks-cpu-test
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cpu-py3_9-gcc11-inductor-build
+ needs: periodic-dynamo-benchmarks-cpu-build
with:
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
+ build-environment: linux-jammy-py3.10-gcc11-build
+ docker-image: ${{ needs.periodic-dynamo-benchmarks-cpu-build.outputs.docker-image }}
+ test-matrix: ${{ needs.periodic-dynamo-benchmarks-cpu-build.outputs.test-matrix }}
secrets: inherit
diff --git a/.github/workflows/inductor-rocm-mi300.yml b/.github/workflows/inductor-rocm-mi300.yml
index 732ec7eb85f3..369eee791dd6 100644
--- a/.github/workflows/inductor-rocm-mi300.yml
+++ b/.github/workflows/inductor-rocm-mi300.yml
@@ -28,7 +28,7 @@ jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
diff --git a/.github/workflows/inductor-rocm.yml b/.github/workflows/inductor-rocm.yml
index b1bb7972d67d..87d78b600f44 100644
--- a/.github/workflows/inductor-rocm.yml
+++ b/.github/workflows/inductor-rocm.yml
@@ -20,7 +20,7 @@ permissions:
jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
diff --git a/.github/workflows/inductor-unittest.yml b/.github/workflows/inductor-unittest.yml
index df918c329dd7..31ca8e6faa3b 100644
--- a/.github/workflows/inductor-unittest.yml
+++ b/.github/workflows/inductor-unittest.yml
@@ -19,7 +19,7 @@ permissions:
jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
@@ -28,8 +28,8 @@ jobs:
curr_ref_type: ${{ github.ref_type }}
opt_out_experiments: lf
- linux-jammy-cuda12_8-py3_10-gcc9-inductor-build:
- name: cuda12.8-py3.10-gcc9-sm86
+ inductor-build:
+ name: inductor-build
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
@@ -47,44 +47,18 @@ jobs:
]}
secrets: inherit
- linux-jammy-cuda12_8-py3_10-gcc9-inductor-test:
- name: cuda12.8-py3.10-gcc9-sm86
+ inductor-test:
+ name: inductor-test
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cuda12_8-py3_10-gcc9-inductor-build
+ needs: inductor-build
with:
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86
- docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.test-matrix }}
+ docker-image: ${{ needs.inductor-build.outputs.docker-image }}
+ test-matrix: ${{ needs.inductor-build.outputs.test-matrix }}
secrets: inherit
- linux-jammy-cuda12_8-py3_12-gcc9-inductor-build:
- name: cuda12.8-py3.12-gcc9-sm86
- uses: ./.github/workflows/_linux-build.yml
- needs: get-label-type
- with:
- build-environment: linux-jammy-cuda12.8-py3.12-gcc9-sm86
- docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc9-inductor-benchmarks
- cuda-arch-list: '8.6'
- runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- test-matrix: |
- { include: [
- { config: "inductor", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
- { config: "inductor", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
- ]}
- secrets: inherit
-
- linux-jammy-cuda12_8-py3_12-gcc9-inductor-test:
- name: cuda12.8-py3.12-gcc9-sm86
- uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cuda12_8-py3_12-gcc9-inductor-build
- with:
- build-environment: linux-jammy-cuda12.8-py3.12-gcc9-sm86
- docker-image: ${{ needs.linux-jammy-cuda12_8-py3_12-gcc9-inductor-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_12-gcc9-inductor-build.outputs.test-matrix }}
- secrets: inherit
-
- linux-jammy-cpu-py3_12-inductor-halide-build:
- name: linux-jammy-cpu-py3.12-gcc11-inductor-halide
+ inductor-halide-build:
+ name: inductor-halide-build
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
@@ -97,18 +71,18 @@ jobs:
]}
secrets: inherit
- linux-jammy-cpu-py3_12-inductor-halide-test:
- name: linux-jammy-cpu-py3.12-gcc11-inductor-halide
+ inductor-halide-test:
+ name: inductor-halide-test
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cpu-py3_12-inductor-halide-build
+ needs: inductor-halide-build
with:
build-environment: linux-jammy-py3.12-gcc11
- docker-image: ${{ needs.linux-jammy-cpu-py3_12-inductor-halide-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cpu-py3_12-inductor-halide-build.outputs.test-matrix }}
+ docker-image: ${{ needs.inductor-halide-build.outputs.docker-image }}
+ test-matrix: ${{ needs.inductor-halide-build.outputs.test-matrix }}
secrets: inherit
- linux-jammy-cpu-py3_12-inductor-triton-cpu-build:
- name: linux-jammy-cpu-py3.12-gcc11-inductor-triton-cpu
+ inductor-triton-cpu-build:
+ name: inductor-triton-cpu-build
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
@@ -121,23 +95,23 @@ jobs:
]}
secrets: inherit
- linux-jammy-cpu-py3_12-inductor-triton-cpu-test:
+ inductor-triton-cpu-test:
name: linux-jammy-cpu-py3.12-gcc11-inductor-triton-cpu
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cpu-py3_12-inductor-triton-cpu-build
+ needs: inductor-triton-cpu-build
with:
build-environment: linux-jammy-py3.12-gcc11
- docker-image: ${{ needs.linux-jammy-cpu-py3_12-inductor-triton-cpu-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cpu-py3_12-inductor-triton-cpu-build.outputs.test-matrix }}
+ docker-image: ${{ needs.inductor-triton-cpu-build.outputs.docker-image }}
+ test-matrix: ${{ needs.inductor-triton-cpu-build.outputs.test-matrix }}
secrets: inherit
- linux-jammy-cpu-py3_9-gcc11-inductor-build:
- name: linux-jammy-cpu-py3.9-gcc11-inductor
+ inductor-cpu-build:
+ name: inductor-cpu-build
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
+ build-environment: linux-jammy-py3.10-gcc11-build
+ docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
test-matrix: |
{ include: [
@@ -148,37 +122,12 @@ jobs:
]}
secrets: inherit
- linux-jammy-cpu-py3_9-gcc11-inductor-test:
- name: linux-jammy-cpu-py3.9-gcc11-inductor
- uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cpu-py3_9-gcc11-inductor-build
- with:
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
- secrets: inherit
-
- linux-jammy-cuda12_8-py3_13-gcc9-inductor-build:
- name: cuda12.8-py3.13-gcc9-sm86
- uses: ./.github/workflows/_linux-build.yml
- needs: get-label-type
- with:
- build-environment: linux-jammy-cuda12.8-py3.13-gcc9-sm86
- docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3.13-gcc9-inductor-benchmarks
- cuda-arch-list: '8.6'
- test-matrix: |
- { include: [
- { config: "inductor", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
- { config: "inductor", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
- ]}
- secrets: inherit
-
- linux-jammy-cuda12_8-py3_13-gcc9-inductor-test:
- name: cuda12.8-py3.13-gcc9-sm86
+ inductor-cpu-test:
+ name: inductor-cpu-test
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cuda12_8-py3_13-gcc9-inductor-build
+ needs: inductor-cpu-build
with:
- build-environment: linux-jammy-cuda12.8-py3.13-gcc9-sm86
- docker-image: ${{ needs.linux-jammy-cuda12_8-py3_13-gcc9-inductor-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_13-gcc9-inductor-build.outputs.test-matrix }}
+ build-environment: linux-jammy-py3.10-gcc11-build
+ docker-image: ${{ needs.inductor-cpu-build.outputs.docker-image }}
+ test-matrix: ${{ needs.inductor-cpu-build.outputs.test-matrix }}
secrets: inherit
diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml
index 721572f1807b..a70929dd868d 100644
--- a/.github/workflows/inductor.yml
+++ b/.github/workflows/inductor.yml
@@ -35,7 +35,7 @@ jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
@@ -44,8 +44,8 @@ jobs:
curr_ref_type: ${{ github.ref_type }}
opt_out_experiments: lf
- linux-jammy-cuda12_8-py3_10-gcc9-inductor-build:
- name: cuda12.8-py3.10-gcc9-sm86
+ inductor-build:
+ name: inductor-build
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
@@ -53,7 +53,6 @@ jobs:
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.6'
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- sync-tag: linux-jammy-cuda12_8-py3_10-gcc9-inductor-build
test-matrix: |
{ include: [
{ config: "inductor_huggingface", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
@@ -65,25 +64,24 @@ jobs:
build-additional-packages: "vision audio fbgemm torchao"
secrets: inherit
- linux-jammy-cuda12_8-py3_10-gcc9-inductor-test:
- name: cuda12.8-py3.10-gcc9-sm86
+ inductor-test:
+ name: inductor-test
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cuda12_8-py3_10-gcc9-inductor-build
+ needs: inductor-build
with:
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86
- docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.test-matrix }}
+ docker-image: ${{ needs.inductor-build.outputs.docker-image }}
+ test-matrix: ${{ needs.inductor-build.outputs.test-matrix }}
secrets: inherit
- linux-jammy-cpu-py3_9-gcc11-inductor-build:
- name: linux-jammy-cpu-py3.9-gcc11-inductor
+ inductor-cpu-build:
+ name: inductor-cpu-build
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
+ build-environment: linux-jammy-py3.10-gcc11-build
+ docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- sync-tag: linux-jammy-cpu-py3_9-gcc11-inductor-build
test-matrix: |
{ include: [
{ config: "cpu_inductor_torchbench", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
@@ -98,12 +96,12 @@ jobs:
build-additional-packages: "vision audio torchao"
secrets: inherit
- linux-jammy-cpu-py3_9-gcc11-inductor-test:
- name: linux-jammy-cpu-py3.9-gcc11-inductor
+ inductor-cpu-test:
+ name: inductor-cpu-test
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cpu-py3_9-gcc11-inductor-build
+ needs: inductor-cpu-build
with:
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
+ build-environment: linux-jammy-py3.10-gcc11-build
+ docker-image: ${{ needs.inductor-cpu-build.outputs.docker-image }}
+ test-matrix: ${{ needs.inductor-cpu-build.outputs.test-matrix }}
secrets: inherit
diff --git a/.github/workflows/lint-autoformat.yml b/.github/workflows/lint-autoformat.yml
index b962970dc5b7..f64c9973d698 100644
--- a/.github/workflows/lint-autoformat.yml
+++ b/.github/workflows/lint-autoformat.yml
@@ -13,7 +13,7 @@ jobs:
if: ${{ github.repository_owner == 'pytorch' && contains(github.event.pull_request.labels.*.name, 'autoformat') }}
steps:
- name: Checkout pytorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
submodules: true
fetch-depth: 0
diff --git a/.github/workflows/lint-bc.yml b/.github/workflows/lint-bc.yml
index e0de9ede3508..98adf44aefd8 100644
--- a/.github/workflows/lint-bc.yml
+++ b/.github/workflows/lint-bc.yml
@@ -20,7 +20,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Run BC Lint Action
- uses: pytorch/test-infra/.github/actions/bc-lint@main
+ uses: pytorch/test-infra/.github/actions/bc-lint@release/2.9
with:
repo: ${{ github.event.pull_request.head.repo.full_name }}
base_sha: ${{ github.event.pull_request.base.sha }}
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index b1a6dfb39071..534c15824715 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -21,7 +21,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -33,7 +33,7 @@ jobs:
uses: ./.github/workflows/_get-changed-files.yml
lintrunner-clang:
- uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9
needs: [get-label-type, get-changed-files]
# Only run if there are changed files relevant to clangtidy / clangformat
if: |
@@ -53,7 +53,7 @@ jobs:
with:
timeout: 120
runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge"
- docker-image: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter
+ docker-image: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter
# NB: A shallow checkout won't work here because calculate-docker-image requires a full checkout
# to run git rev-parse HEAD~:.ci/docker when a new image is needed
fetch-depth: 0
@@ -72,7 +72,7 @@ jobs:
# NOTE: mypy needs its own job because it depends on --all-files, without assessing all files it sometimes
# fails to find types when it should
lintrunner-mypy:
- uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9
needs: [get-label-type, get-changed-files]
# Only run if there are changed files relevant to mypy
if: |
@@ -96,7 +96,7 @@ jobs:
ADDITIONAL_LINTRUNNER_ARGS="--take MYPY,MYPYSTRICT --all-files" .github/scripts/lintrunner.sh
lintrunner-noclang:
- uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9
needs: [get-label-type, get-changed-files]
with:
timeout: 120
@@ -117,7 +117,7 @@ jobs:
fi
quick-checks:
- uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9
needs: get-label-type
with:
timeout: 120
@@ -157,7 +157,7 @@ jobs:
if: github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'skip-pr-sanity-checks')
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
submodules: false
fetch-depth: -1
@@ -170,7 +170,7 @@ jobs:
bash .github/scripts/pr-sanity-check.sh
workflow-checks:
- uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9
needs: get-label-type
with:
timeout: 120
@@ -181,6 +181,7 @@ jobs:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
# Regenerate workflows
+ export RELEASE_VERSION_TAG=2.9
.github/scripts/generate_ci_workflows.py
RC=0
@@ -190,7 +191,7 @@ jobs:
echo 'As shown by the above diff, the committed .github/workflows'
echo 'are not up to date according to .github/templates.'
echo 'Please run this command, commit, and push again to your PR:'
- echo
+ echo export RELEASE_VERSION_TAG=2.9
echo ' .github/scripts/generate_ci_workflows.py'
echo
echo 'If running that command does nothing, you may need to rebase'
@@ -204,7 +205,7 @@ jobs:
exit $RC
toc:
- uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9
needs: get-label-type
with:
timeout: 120
@@ -240,7 +241,7 @@ jobs:
test-tools:
name: Test tools
if: ${{ github.repository == 'pytorch/pytorch' }}
- uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9
needs: get-label-type
with:
timeout: 120
@@ -260,14 +261,14 @@ jobs:
runs-on: linux.24_04.4x
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
submodules: false
fetch-depth: 1
- - name: Setup Python 3.9
+ - name: Setup Python 3.10
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
- python-version: '3.9'
+ python-version: '3.10'
architecture: x64
cache: pip
- name: Install dependencies
@@ -297,7 +298,7 @@ jobs:
# [see note: pytorch repo ref]
# deep clone (fetch-depth 0) required, to allow us to use git log
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
submodules: false
fetch-depth: 1
diff --git a/.github/workflows/linux-aarch64.yml b/.github/workflows/linux-aarch64.yml
index 2b840a39a5c2..357347f78138 100644
--- a/.github/workflows/linux-aarch64.yml
+++ b/.github/workflows/linux-aarch64.yml
@@ -19,7 +19,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
diff --git a/.github/workflows/llm_td_retrieval.yml b/.github/workflows/llm_td_retrieval.yml
index 565a9b25df50..292f0a956c35 100644
--- a/.github/workflows/llm_td_retrieval.yml
+++ b/.github/workflows/llm_td_retrieval.yml
@@ -12,7 +12,7 @@ jobs:
name: get-label-type
# Don't run on forked repos
if: github.repository_owner == 'pytorch'
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -116,5 +116,5 @@ jobs:
AWS_REGION: ""
- name: Teardown Linux
- uses: pytorch/test-infra/.github/actions/teardown-linux@main
+ uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.9
if: always()
diff --git a/.github/workflows/nightly-s3-uploads.yml b/.github/workflows/nightly-s3-uploads.yml
index acf3504dec9c..1cafca0e0c85 100644
--- a/.github/workflows/nightly-s3-uploads.yml
+++ b/.github/workflows/nightly-s3-uploads.yml
@@ -23,7 +23,7 @@ jobs:
environment: upload-stats
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
fetch-depth: 1
submodules: false
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 2acc987e523c..eddb21ea2ca5 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -19,7 +19,7 @@ concurrency:
jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
@@ -42,8 +42,8 @@ jobs:
needs: get-label-type
with:
runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge"
- build-environment: linux-jammy-py3.9-gcc11
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11
+ build-environment: linux-jammy-py3.10-gcc11
+ docker-image-name: ci-image:pytorch-linux-jammy-py3.10-gcc11
secrets: inherit
docs-push:
@@ -54,7 +54,7 @@ jobs:
- get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build-environment: linux-jammy-py3.9-gcc11
+ build-environment: linux-jammy-py3.10-gcc11
docker-image: ${{ needs.docs-build.outputs.docker-image }}
push: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || startsWith(github.event.ref, 'refs/tags/v') }}
run-doxygen: true
@@ -92,7 +92,7 @@ jobs:
if: github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
steps:
- name: "${{ matrix.repo-owner }}/${{ matrix.repo-name }} update-commit-hash"
- uses: pytorch/test-infra/.github/actions/update-commit-hash@main
+ uses: pytorch/test-infra/.github/actions/update-commit-hash@release/2.9
with:
repo-owner: ${{ matrix.repo-owner }}
repo-name: ${{ matrix.repo-name }}
diff --git a/.github/workflows/nitpicker.yml b/.github/workflows/nitpicker.yml
index 40bd245ce913..242f021e46fa 100644
--- a/.github/workflows/nitpicker.yml
+++ b/.github/workflows/nitpicker.yml
@@ -19,7 +19,7 @@ jobs:
if: ${{ github.event.pull_request.number != 26921 && github.repository_owner == 'pytorch' }}
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
- uses: ethanis/nitpicker@v1
with:
nitpicks: '.github/nitpicks.yml'
diff --git a/.github/workflows/operator_benchmark.yml b/.github/workflows/operator_benchmark.yml
index 16cb1600b8d6..dcdc2cd0ba24 100644
--- a/.github/workflows/operator_benchmark.yml
+++ b/.github/workflows/operator_benchmark.yml
@@ -14,6 +14,10 @@ on:
schedule:
# Run at 07:00 UTC every Sunday
- cron: 0 7 * * 0
+ pull_request:
+ paths:
+ - benchmarks/operator_benchmark/**
+ - .github/workflows/operator_benchmark.yml
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
@@ -24,38 +28,38 @@ permissions:
contents: read
jobs:
- linux-jammy-cpu-py3_9-gcc11-opbenchmark-build:
+ opbenchmark-build:
if: github.repository_owner == 'pytorch'
- name: linux-jammy-cpu-py3.9-gcc11-opbenchmark
+ name: opbenchmark-build
uses: ./.github/workflows/_linux-build.yml
with:
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
+ build-environment: linux-jammy-py3.10-gcc11-build
+ docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
test-matrix: |
{ include: [
{ config: "cpu_operator_benchmark_short", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
]}
secrets: inherit
- linux-jammy-cpu-py3_9-gcc11-opbenchmark-on-demand-build:
+ opbenchmark-on-demand-build:
if: ${{ github.event_name == 'workflow_dispatch' && github.repository_owner == 'pytorch' }}
- name: linux-jammy-cpu-py3.9-gcc11-opbenchmark
+ name: opbenchmark-on-demand-build
uses: ./.github/workflows/_linux-build.yml
with:
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
+ build-environment: linux-jammy-py3.10-gcc11-build
+ docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
test-matrix: |
{ include: [
{ config: "cpu_operator_benchmark_${{ inputs.test_mode }}", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
]}
secrets: inherit
- linux-jammy-cpu-py3_9-gcc11-opbenchmark-test:
- name: linux-jammy-cpu-py3.9-gcc11-opbenchmark
+ opbenchmark-test:
+ name: opbenchmark-test
uses: ./.github/workflows/_linux-test.yml
- needs: linux-jammy-cpu-py3_9-gcc11-opbenchmark-build
+ needs: opbenchmark-build
with:
- build-environment: linux-jammy-py3.9-gcc11-build
- docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-opbenchmark-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-opbenchmark-build.outputs.test-matrix }}
+ build-environment: linux-jammy-py3.10-gcc11-build
+ docker-image: ${{ needs.opbenchmark-build.outputs.docker-image }}
+ test-matrix: ${{ needs.opbenchmark-build.outputs.test-matrix }}
secrets: inherit
diff --git a/.github/workflows/operator_microbenchmark.yml b/.github/workflows/operator_microbenchmark.yml
new file mode 100644
index 000000000000..9205b927c5d7
--- /dev/null
+++ b/.github/workflows/operator_microbenchmark.yml
@@ -0,0 +1,46 @@
+name: operator_microbenchmark
+
+on:
+ push:
+ tags:
+ - ciflow/op-benchmark/*
+ workflow_dispatch:
+ schedule:
+ # Run at 06:00 UTC everyday
+ - cron: 0 6 * * *
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+ cancel-in-progress: true
+
+permissions:
+ id-token: write
+ contents: read
+
+jobs:
+ opmicrobenchmark-build:
+ if: github.repository_owner == 'pytorch'
+ name: opmicrobenchmark-build
+ uses: ./.github/workflows/_linux-build.yml
+ with:
+ runner: linux.12xlarge.memory
+ build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
+ docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
+ cuda-arch-list: '8.0 9.0'
+ test-matrix: |
+ { include: [
+ { config: "operator_microbenchmark_test", shard: 1, num_shards: 1, runner: "linux.aws.h100" },
+ { config: "operator_microbenchmark_test", shard: 1, num_shards: 1, runner: "linux.aws.a100" },
+ ]}
+ secrets: inherit
+
+ opmicrobenchmark-test:
+ name: opmicrobenchmark-test
+ uses: ./.github/workflows/_linux-test.yml
+ needs: opmicrobenchmark-build
+ with:
+ timeout-minutes: 500
+ build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
+ docker-image: ${{ needs.opmicrobenchmark-build.outputs.docker-image }}
+ test-matrix: ${{ needs.opmicrobenchmark-build.outputs.test-matrix }}
+ secrets: inherit
diff --git a/.github/workflows/periodic-rocm-mi300.yml b/.github/workflows/periodic-rocm-mi300.yml
index 4d8890e69fc7..850c98b3fa81 100644
--- a/.github/workflows/periodic-rocm-mi300.yml
+++ b/.github/workflows/periodic-rocm-mi300.yml
@@ -41,7 +41,7 @@ jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch'
with:
triggering_actor: ${{ github.triggering_actor }}
diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml
index 7d43c68c61b0..418699cb5f5a 100644
--- a/.github/workflows/periodic.yml
+++ b/.github/workflows/periodic.yml
@@ -43,7 +43,7 @@ jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch'
with:
triggering_actor: ${{ github.triggering_actor }}
@@ -59,13 +59,14 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-cuda12.4-py3.10-gcc11
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11
+ cuda-arch-list: 7.5
test-matrix: |
{ include: [
- { config: "legacy_nvidia_driver", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
- { config: "legacy_nvidia_driver", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
- { config: "legacy_nvidia_driver", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
- { config: "legacy_nvidia_driver", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
- { config: "legacy_nvidia_driver", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
+ { config: "legacy_nvidia_driver", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
+ { config: "legacy_nvidia_driver", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
+ { config: "legacy_nvidia_driver", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
+ { config: "legacy_nvidia_driver", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
+ { config: "legacy_nvidia_driver", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
]}
secrets: inherit
@@ -170,6 +171,38 @@ jobs:
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-debug-build.outputs.test-matrix }}
secrets: inherit
+ linux-jammy-cuda13_0-py3_10-gcc11-build:
+ name: linux-jammy-cuda13.0-py3.10-gcc11
+ uses: ./.github/workflows/_linux-build.yml
+ needs: get-label-type
+ with:
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ cuda-arch-list: 7.5
+ build-environment: linux-jammy-cuda13.0-py3.10-gcc11
+ docker-image-name: ci-image:pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11
+ test-matrix: |
+ { include: [
+ { config: "nogpu_AVX512", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
+ { config: "nogpu_AVX512", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
+ { config: "nogpu_AVX512", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
+ { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
+ { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
+ { config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.4xlarge.nvidia.gpu" },
+ ]}
+ secrets: inherit
+
+ linux-jammy-cuda13_0-py3_10-gcc11-test:
+ name: linux-jammy-cuda13.0-py3.10-gcc11
+ uses: ./.github/workflows/_linux-test.yml
+ needs:
+ - linux-jammy-cuda13_0-py3_10-gcc11-build
+ - target-determination
+ with:
+ build-environment: linux-jammy-cuda13.0-py3.10-gcc11
+ docker-image: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-build.outputs.docker-image }}
+ test-matrix: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-build.outputs.test-matrix }}
+ secrets: inherit
+
linux-jammy-rocm-py3_10-build:
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index e2cac7bb7315..f884fee53fc7 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -42,21 +42,21 @@ jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
- linux-jammy-py3_9-gcc11-build:
- name: linux-jammy-py3.9-gcc11
+ linux-jammy-py3_10-gcc11-build:
+ name: linux-jammy-py3.10-gcc11
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build-environment: linux-jammy-py3.9-gcc11
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11
+ build-environment: linux-jammy-py3.10-gcc11
+ docker-image-name: ci-image:pytorch-linux-jammy-py3.10-gcc11
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
@@ -73,49 +73,49 @@ jobs:
]}
secrets: inherit
- linux-jammy-py3_9-gcc11-test:
- name: linux-jammy-py3.9-gcc11
+ linux-jammy-py3_10-gcc11-test:
+ name: linux-jammy-py3.10-gcc11
uses: ./.github/workflows/_linux-test.yml
needs:
- - linux-jammy-py3_9-gcc11-build
+ - linux-jammy-py3_10-gcc11-build
- target-determination
with:
- build-environment: linux-jammy-py3.9-gcc11
- docker-image: ${{ needs.linux-jammy-py3_9-gcc11-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-py3_9-gcc11-build.outputs.test-matrix }}
+ build-environment: linux-jammy-py3.10-gcc11
+ docker-image: ${{ needs.linux-jammy-py3_10-gcc11-build.outputs.docker-image }}
+ test-matrix: ${{ needs.linux-jammy-py3_10-gcc11-build.outputs.test-matrix }}
secrets: inherit
linux-docs:
name: linux-docs
uses: ./.github/workflows/_docs.yml
- needs: linux-jammy-py3_9-gcc11-build
+ needs: linux-jammy-py3_10-gcc11-build
with:
- build-environment: linux-jammy-py3.9-gcc11
- docker-image: ${{ needs.linux-jammy-py3_9-gcc11-build.outputs.docker-image }}
+ build-environment: linux-jammy-py3.10-gcc11
+ docker-image: ${{ needs.linux-jammy-py3_10-gcc11-build.outputs.docker-image }}
secrets: inherit
- linux-jammy-py3_9-gcc11-no-ops:
- name: linux-jammy-py3.9-gcc11-no-ops
+ linux-jammy-py3_10-gcc11-no-ops:
+ name: linux-jammy-py3.10-gcc11-no-ops
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build-environment: linux-jammy-py3.9-gcc11-no-ops
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11
+ build-environment: linux-jammy-py3.10-gcc11-no-ops
+ docker-image-name: ci-image:pytorch-linux-jammy-py3.10-gcc11
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1 },
]}
secrets: inherit
- linux-jammy-py3_9-gcc11-pch:
- name: linux-jammy-py3.9-gcc11-pch
+ linux-jammy-py3_10-gcc11-pch:
+ name: linux-jammy-py3.10-gcc11-pch
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build-environment: linux-jammy-py3.9-gcc11-pch
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11
+ build-environment: linux-jammy-py3.10-gcc11-pch
+ docker-image-name: ci-image:pytorch-linux-jammy-py3.10-gcc11
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1 },
@@ -132,17 +132,17 @@ jobs:
docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan
test-matrix: |
{ include: [
- { config: "default", shard: 1, num_shards: 6, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
- { config: "default", shard: 2, num_shards: 6, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
- { config: "default", shard: 3, num_shards: 6, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
- { config: "default", shard: 4, num_shards: 6, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
- { config: "default", shard: 5, num_shards: 6, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
- { config: "default", shard: 6, num_shards: 6, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+ { config: "default", shard: 1, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+ { config: "default", shard: 2, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+ { config: "default", shard: 3, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+ { config: "default", shard: 4, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+ { config: "default", shard: 5, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+ { config: "default", shard: 6, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+ { config: "default", shard: 7, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
]}
sync-tag: asan-build
secrets: inherit
-
linux-jammy-py3_10-clang18-asan-test:
name: linux-jammy-py3.10-clang18-asan
uses: ./.github/workflows/_linux-test.yml
@@ -183,14 +183,14 @@ jobs:
test-matrix: ${{ needs.linux-jammy-py3_10-clang12-onnx-build.outputs.test-matrix }}
secrets: inherit
- linux-jammy-py3_9-clang12-build:
- name: linux-jammy-py3.9-clang12
+ linux-jammy-py3_10-clang12-build:
+ name: linux-jammy-py3.10-clang12
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build-environment: linux-jammy-py3.9-clang12
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-clang12
+ build-environment: linux-jammy-py3.10-clang12
+ docker-image-name: ci-image:pytorch-linux-jammy-py3.10-clang12
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
@@ -207,16 +207,16 @@ jobs:
]}
secrets: inherit
- linux-jammy-py3_9-clang12-test:
- name: linux-jammy-py3.9-clang12
+ linux-jammy-py3_10-clang12-test:
+ name: linux-jammy-py3.10-clang12
uses: ./.github/workflows/_linux-test.yml
needs:
- - linux-jammy-py3_9-clang12-build
+ - linux-jammy-py3_10-clang12-build
- target-determination
with:
- build-environment: linux-jammy-py3.9-clang12
- docker-image: ${{ needs.linux-jammy-py3_9-clang12-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-py3_9-clang12-build.outputs.test-matrix }}
+ build-environment: linux-jammy-py3.10-clang12
+ docker-image: ${{ needs.linux-jammy-py3_10-clang12-build.outputs.docker-image }}
+ test-matrix: ${{ needs.linux-jammy-py3_10-clang12-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-py3_13-clang12-build:
@@ -253,14 +253,14 @@ jobs:
test-matrix: ${{ needs.linux-jammy-py3_13-clang12-build.outputs.test-matrix }}
secrets: inherit
- linux-jammy-cuda12_8-cudnn9-py3_9-clang12-build:
- name: linux-jammy-cuda12.8-cudnn9-py3.9-clang12
+ linux-jammy-cuda12_8-cudnn9-py3_10-clang12-build:
+ name: linux-jammy-cuda12.8-cudnn9-py3.10-clang12
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build-environment: linux-jammy-cuda12.8-cudnn9-py3.9-clang12
- docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-clang12
+ build-environment: linux-jammy-cuda12.8-cudnn9-py3.10-clang12
+ docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-clang12
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1 },
@@ -282,14 +282,14 @@ jobs:
]}
secrets: inherit
- linux-jammy-py3_9-gcc11-mobile-lightweight-dispatch-build:
- name: linux-jammy-py3.9-gcc11-mobile-lightweight-dispatch-build
+ linux-jammy-py3_10-gcc11-mobile-lightweight-dispatch-build:
+ name: linux-jammy-py3.10-gcc11-mobile-lightweight-dispatch-build
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build-environment: linux-jammy-py3.9-gcc11-mobile-lightweight-dispatch-build
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11
+ build-environment: linux-jammy-py3.10-gcc11-mobile-lightweight-dispatch-build
+ docker-image-name: ci-image:pytorch-linux-jammy-py3.10-gcc11
build-generates-artifacts: false
test-matrix: |
{ include: [
@@ -342,15 +342,40 @@ jobs:
test-matrix: ${{ needs.linux-jammy-py3-clang12-executorch-build.outputs.test-matrix }}
secrets: inherit
- linux-jammy-xpu-2025_1-py3_9-build:
- name: linux-jammy-xpu-2025.1-py3.9
+ linux-jammy-cuda12_8-py3_10-gcc9-inductor-build:
+ name: cuda12.8-py3.10-gcc9-sm75
+ uses: ./.github/workflows/_linux-build.yml
+ needs: get-label-type
+ with:
+ runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+ build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm75
+ docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
+ cuda-arch-list: '7.5'
+ test-matrix: |
+ { include: [
+ { config: "pr_time_benchmarks", shard: 1, num_shards: 1, runner: "linux.g4dn.metal.nvidia.gpu" },
+ ]}
+ secrets: inherit
+
+ linux-jammy-cuda12_8-py3_10-gcc9-inductor-test:
+ name: cuda12.8-py3.10-gcc9-sm75
+ uses: ./.github/workflows/_linux-test.yml
+ needs: linux-jammy-cuda12_8-py3_10-gcc9-inductor-build
+ with:
+ build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm75
+ docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.docker-image }}
+ test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.test-matrix }}
+ secrets: inherit
+
+ linux-jammy-xpu-n-py3_9-build:
+ name: linux-jammy-xpu-n-py3.9
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
- sync-tag: linux-xpu-2025-1-build
+ sync-tag: linux-xpu-n-build
runner_prefix: ${{ needs.get-label-type.outputs.label-type }}
- build-environment: linux-jammy-xpu-2025.1-py3.9
- docker-image-name: ci-image:pytorch-linux-jammy-xpu-2025.1-py3
+ build-environment: linux-jammy-xpu-n-py3.9
+ docker-image-name: ci-image:pytorch-linux-jammy-xpu-n-py3
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 4, runner: "linux.idc.xpu" },
diff --git a/.github/workflows/rocm-mi300.yml b/.github/workflows/rocm-mi300.yml
index 7e3ba43bf984..51a807250f54 100644
--- a/.github/workflows/rocm-mi300.yml
+++ b/.github/workflows/rocm-mi300.yml
@@ -28,7 +28,7 @@ jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
diff --git a/.github/workflows/rocm-mi355.yml b/.github/workflows/rocm-mi355.yml
index e5dda604a4db..adf5fe919087 100644
--- a/.github/workflows/rocm-mi355.yml
+++ b/.github/workflows/rocm-mi355.yml
@@ -22,7 +22,7 @@ jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
diff --git a/.github/workflows/slow.yml b/.github/workflows/slow.yml
index 2a7b1d184330..197a04054bfe 100644
--- a/.github/workflows/slow.yml
+++ b/.github/workflows/slow.yml
@@ -41,7 +41,7 @@ jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
@@ -78,14 +78,14 @@ jobs:
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm86-build.outputs.test-matrix }}
secrets: inherit
- linux-jammy-py3_9-clang12-build:
- name: linux-jammy-py3.9-clang12
+ linux-jammy-py3_10-clang12-build:
+ name: linux-jammy-py3.10-clang12
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build-environment: linux-jammy-py3.9-clang12
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-clang12
+ build-environment: linux-jammy-py3.10-clang12
+ docker-image-name: ci-image:pytorch-linux-jammy-py3.10-clang12
test-matrix: |
{ include: [
{ config: "slow", shard: 1, num_shards: 2, runner: "linux.2xlarge" },
@@ -93,16 +93,16 @@ jobs:
]}
secrets: inherit
- linux-jammy-py3_9-clang12-test:
- name: linux-jammy-py3.9-clang12
+ linux-jammy-py3_10-clang12-test:
+ name: linux-jammy-py3.10-clang12
uses: ./.github/workflows/_linux-test.yml
needs:
- - linux-jammy-py3_9-clang12-build
+ - linux-jammy-py3_10-clang12-build
- target-determination
with:
- build-environment: linux-jammy-py3.9-clang12
- docker-image: ${{ needs.linux-jammy-py3_9-clang12-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-py3_9-clang12-build.outputs.test-matrix }}
+ build-environment: linux-jammy-py3.10-clang12
+ docker-image: ${{ needs.linux-jammy-py3_10-clang12-build.outputs.docker-image }}
+ test-matrix: ${{ needs.linux-jammy-py3_10-clang12-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-rocm-py3_10-build:
diff --git a/.github/workflows/target-determination-indexer.yml b/.github/workflows/target-determination-indexer.yml
index ec579fda8da9..f5f29c9646f4 100644
--- a/.github/workflows/target-determination-indexer.yml
+++ b/.github/workflows/target-determination-indexer.yml
@@ -13,7 +13,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -35,7 +35,7 @@ jobs:
- name: Calculate docker image
id: calculate-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.9
with:
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
working-directory: pytorch
@@ -50,13 +50,13 @@ jobs:
echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"
- name: Pull docker image
- uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+ uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.9
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
id: install-nvidia-driver
- uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+ uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.9
- name: Clone CodeLlama
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -149,7 +149,7 @@ jobs:
"s3://target-determinator-assets/indexes/latest/${ZIP_NAME}"
- name: Teardown Linux
- uses: pytorch/test-infra/.github/actions/teardown-linux@main
+ uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.9
if: always()
concurrency:
diff --git a/.github/workflows/target_determination.yml b/.github/workflows/target_determination.yml
index c712b11185a7..3e9f848e9e09 100644
--- a/.github/workflows/target_determination.yml
+++ b/.github/workflows/target_determination.yml
@@ -9,7 +9,7 @@ jobs:
name: get-label-type
# Don't run on forked repos
if: github.repository_owner == 'pytorch'
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@@ -27,7 +27,7 @@ jobs:
# checkout because when we run this action we don't *have* a local
# checkout. In other cases you should prefer a local checkout.
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
submodules: false
diff --git a/.github/workflows/test-check-binary.yml b/.github/workflows/test-check-binary.yml
index 5f0ad59d3a3b..a13e1d027f13 100644
--- a/.github/workflows/test-check-binary.yml
+++ b/.github/workflows/test-check-binary.yml
@@ -15,7 +15,7 @@ jobs:
check_binary_linux_cpu:
if: github.repository_owner == 'pytorch'
name: Test check_binary.sh for Linux CPU
- uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9
with:
docker-image: python:3.11
docker-build-dir: "skip-docker-build"
@@ -28,7 +28,7 @@ jobs:
check_binary_linux_cuda:
if: github.repository_owner == 'pytorch'
name: Test check_binary.sh for Linux CUDA
- uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9
with:
runner: linux.g4dn.4xlarge.nvidia.gpu
docker-image: python:3.11
diff --git a/.github/workflows/test-h100.yml b/.github/workflows/test-h100.yml
index 1e83c7b9d98c..d08d6033c47e 100644
--- a/.github/workflows/test-h100.yml
+++ b/.github/workflows/test-h100.yml
@@ -28,7 +28,7 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
diff --git a/.github/workflows/tools-unit-tests.yml b/.github/workflows/tools-unit-tests.yml
index c687c07b7ca7..9c104571ef89 100644
--- a/.github/workflows/tools-unit-tests.yml
+++ b/.github/workflows/tools-unit-tests.yml
@@ -25,7 +25,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout pytorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
submodules: true
fetch-depth: 0
@@ -52,7 +52,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout pytorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
submodules: true
fetch-depth: 0
diff --git a/.github/workflows/torchbench.yml b/.github/workflows/torchbench.yml
index 08fcd3340262..e4f0c692e976 100644
--- a/.github/workflows/torchbench.yml
+++ b/.github/workflows/torchbench.yml
@@ -18,7 +18,7 @@ jobs:
get-default-label-prefix:
if: github.repository_owner == 'pytorch'
name: get-default-label-prefix
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 0081e4e1f895..efc027ad2acb 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -39,7 +39,7 @@ jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
@@ -224,13 +224,12 @@ jobs:
tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd inductor/test_torchinductor distributed/test_c10d_common distributed/test_c10d_nccl"
secrets: inherit
- # NB: Keep this in sync with inductor-perf-test-nightly.yml
- linux-jammy-cuda12_8-py3_10-gcc9-inductor-build:
- name: cuda12.8-py3.10-gcc9-sm80
+ inductor-build:
+ name: inductor-build
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
- build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
+ build-environment: linux-jammy-cuda12.8-py3.12-gcc9-sm80
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.0'
secrets: inherit
@@ -241,8 +240,8 @@ jobs:
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
- build-environment: linux-jammy-py3.9-gcc11
- docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
+ build-environment: linux-jammy-py3.10-gcc11
+ docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
test-matrix: |
{ include: [
{ config: "verify_cachebench", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
@@ -256,7 +255,7 @@ jobs:
- verify-cachebench-cpu-build
- target-determination
with:
- build-environment: linux-jammy-py3.9-gcc11
+ build-environment: linux-jammy-py3.10-gcc11
docker-image: ${{ needs.verify-cachebench-cpu-build.outputs.docker-image }}
test-matrix: ${{ needs.verify-cachebench-cpu-build.outputs.test-matrix }}
secrets: inherit
diff --git a/.github/workflows/trymerge.yml b/.github/workflows/trymerge.yml
index 1fdb1da67a59..5c456c607c88 100644
--- a/.github/workflows/trymerge.yml
+++ b/.github/workflows/trymerge.yml
@@ -59,22 +59,19 @@ jobs:
# on the PR appear in chronological order (timing issues can shuffle them around)
sleep 60
fi
+
+ # Require a comment id for merge operations
+ if [ -z "${COMMENT_ID}" ]; then
+ echo "Error: merge requires COMMENT_ID to be specified"
+ exit 1
+ fi
+
if [ -n "${FORCE}" ]; then
- if [ -n "${COMMENT_ID}" ]; then
- python3 .github/scripts/trymerge.py --force --comment-id "${COMMENT_ID}" "${PR_NUM}"
- else
- python3 .github/scripts/trymerge.py --force "${PR_NUM}"
- fi
+ python3 .github/scripts/trymerge.py --force --comment-id "${COMMENT_ID}" "${PR_NUM}"
elif [ -n "${IGNORE_CURRENT}" ]; then
- if [ -n "${COMMENT_ID}" ]; then
- python3 .github/scripts/trymerge.py --ignore-current --comment-id "${COMMENT_ID}" "${PR_NUM}"
- else
- python3 .github/scripts/trymerge.py --ignore-current "${PR_NUM}"
- fi
- elif [ -n "${COMMENT_ID}" ]; then
- python3 .github/scripts/trymerge.py --comment-id "${COMMENT_ID}" "${PR_NUM}"
+ python3 .github/scripts/trymerge.py --ignore-current --comment-id "${COMMENT_ID}" "${PR_NUM}"
else
- python3 .github/scripts/trymerge.py "${PR_NUM}"
+ python3 .github/scripts/trymerge.py --comment-id "${COMMENT_ID}" "${PR_NUM}"
fi
- name: Comment on Canceled
if: ${{ cancelled() && steps.checkout.outcome == 'success' }}
diff --git a/.github/workflows/unstable.yml b/.github/workflows/unstable.yml
index 7f0fe6058bd0..5eeb8b19a325 100644
--- a/.github/workflows/unstable.yml
+++ b/.github/workflows/unstable.yml
@@ -46,7 +46,7 @@ jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
diff --git a/.github/workflows/update-viablestrict.yml b/.github/workflows/update-viablestrict.yml
index 3d445756f7a2..e3ca35d2d01d 100644
--- a/.github/workflows/update-viablestrict.yml
+++ b/.github/workflows/update-viablestrict.yml
@@ -18,12 +18,12 @@ jobs:
environment: ${{ (github.event_name == 'schedule') && 'mergebot' || '' }}
steps:
- name: Update viable/strict
- uses: pytorch/test-infra/.github/actions/update-viablestrict@main
+ uses: pytorch/test-infra/.github/actions/update-viablestrict@release/2.9
id: update_viablestrict
with:
repository: pytorch/pytorch
stable-branch: viable/strict
- requires: '[\"pull\", \"trunk\", \"lint\", \"linux-binary\", \"linux-aarch64\"]'
+ requires: '[\"pull\", \"trunk\", \"lint\", \"^linux-binary-manywheel$\", \"^linux-binary-libtorch-release$\", \"linux-aarch64\"]'
secret-bot-token: ${{ secrets.MERGEBOT_TOKEN }}
clickhouse-url: ${{ secrets.CLICKHOUSE_URL }}
clickhouse-username: ${{ secrets.CLICKHOUSE_VIABLESTRICT_USERNAME }}
diff --git a/.github/workflows/update_pytorch_labels.yml b/.github/workflows/update_pytorch_labels.yml
index a1b8c38141ae..535950b3c0b7 100644
--- a/.github/workflows/update_pytorch_labels.yml
+++ b/.github/workflows/update_pytorch_labels.yml
@@ -17,7 +17,7 @@ jobs:
contents: read
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
fetch-depth: 1
submodules: false
diff --git a/.github/workflows/upload-test-stats-while-running.yml b/.github/workflows/upload-test-stats-while-running.yml
index 9aecaad0e068..82c21467dc6a 100644
--- a/.github/workflows/upload-test-stats-while-running.yml
+++ b/.github/workflows/upload-test-stats-while-running.yml
@@ -16,7 +16,7 @@ jobs:
runs-on: linux.2xlarge
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
fetch-depth: 1
submodules: false
diff --git a/.github/workflows/upload-test-stats.yml b/.github/workflows/upload-test-stats.yml
index f77b6081b776..3cfc651b2a62 100644
--- a/.github/workflows/upload-test-stats.yml
+++ b/.github/workflows/upload-test-stats.yml
@@ -58,7 +58,7 @@ jobs:
run: echo "${TRIGGERING_WORKFLOW}"
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
- name: Configure aws credentials
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
diff --git a/.github/workflows/upload-torch-dynamo-perf-stats.yml b/.github/workflows/upload-torch-dynamo-perf-stats.yml
index 07471619437a..db3fc72e68e9 100644
--- a/.github/workflows/upload-torch-dynamo-perf-stats.yml
+++ b/.github/workflows/upload-torch-dynamo-perf-stats.yml
@@ -32,7 +32,7 @@ jobs:
name: Upload dynamo performance stats for ${{ github.event.workflow_run.id }}, attempt ${{ github.event.workflow_run.run_attempt }}
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
submodules: false
fetch-depth: 1
diff --git a/.github/workflows/upload_test_stats_intermediate.yml b/.github/workflows/upload_test_stats_intermediate.yml
index 570256200605..1764139fed25 100644
--- a/.github/workflows/upload_test_stats_intermediate.yml
+++ b/.github/workflows/upload_test_stats_intermediate.yml
@@ -17,7 +17,7 @@ jobs:
environment: upload-stats
steps:
- name: Checkout PyTorch
- uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+ uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.9
with:
fetch-depth: 1
submodules: false
diff --git a/.github/workflows/vllm.yml b/.github/workflows/vllm.yml
index 14524069ab5a..dcfa4027c7ae 100644
--- a/.github/workflows/vllm.yml
+++ b/.github/workflows/vllm.yml
@@ -2,12 +2,14 @@ name: vllm-test
on:
push:
+ branches:
+ - main
+ - release/*
tags:
- ciflow/vllm/*
workflow_dispatch:
schedule:
- # Every 12 hours starting at 00:00 UTC (00:00 and 12:00)
- - cron: '0 0,12 * * *'
+ - cron: '0 */8 * * *' # every 8 hours at minute 0 (UTC)
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
@@ -20,7 +22,7 @@ permissions:
jobs:
get-label-type:
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
@@ -46,14 +48,18 @@ jobs:
{ config: "vllm_basic_models_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "vllm_entrypoints_test", shard: 1, num_shards: 1,runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "vllm_regression_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
- { config: "vllm_lora_280_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "vllm_multi_model_processor_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "vllm_pytorch_compilation_unit_tests", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
+ { config: "vllm_lora_28_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
+ { config: "vllm_multi_model_test_28_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu"},
+ { config: "vllm_languagde_model_test_extended_generation_28_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu"},
+ { config: "vllm_distributed_test_2_gpu_28_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "vllm_lora_test", shard: 0, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "vllm_lora_test", shard: 1, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "vllm_lora_test", shard: 2, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "vllm_lora_test", shard: 3, num_shards: 4, runner: "linux.g6.4xlarge.experimental.nvidia.gpu" },
- { config: "vllm_lora_tp_test_distributed", shard: 1, num_shards: 1, runner: "linux.aws.h100.4"},
+ { config: "vllm_lora_tp_test_distributed", shard: 1, num_shards: 1, runner: "linux.g6.12xlarge.nvidia.gpu"},
+ { config: "vllm_distributed_test_28_failure_test", shard: 1, num_shards: 1, runner: "linux.g6.12xlarge.nvidia.gpu"}
]}
secrets: inherit
diff --git a/.github/workflows/weekly.yml b/.github/workflows/weekly.yml
index b95dadd5f2b1..2c534891c6e2 100644
--- a/.github/workflows/weekly.yml
+++ b/.github/workflows/weekly.yml
@@ -22,7 +22,7 @@ jobs:
fetch-depth: 0
- name: update-xla-commit-hash
continue-on-error: true
- uses: pytorch/test-infra/.github/actions/update-commit-hash@main
+ uses: pytorch/test-infra/.github/actions/update-commit-hash@release/2.9
with:
repo-name: xla
branch: master
diff --git a/.github/workflows/win-arm64-build-test.yml b/.github/workflows/win-arm64-build-test.yml
index 627a43b56bf7..95b4e2f027f6 100644
--- a/.github/workflows/win-arm64-build-test.yml
+++ b/.github/workflows/win-arm64-build-test.yml
@@ -4,6 +4,9 @@ on:
push:
tags:
- ciflow/win-arm64/*
+ schedule:
+ # Every 4 hours starting at 00:00 UTC
+ - cron: '0 */4 * * *'
env:
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
diff --git a/.github/workflows/xpu.yml b/.github/workflows/xpu.yml
index c62918b4af21..3a17bb9d70a1 100644
--- a/.github/workflows/xpu.yml
+++ b/.github/workflows/xpu.yml
@@ -19,22 +19,22 @@ jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
- uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+ uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.9
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
- linux-jammy-xpu-2025_0-py3_9-build:
- name: linux-jammy-xpu-2025.0-py3.9
+ linux-jammy-xpu-n-1-py3_10-build:
+ name: linux-jammy-xpu-n-1-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
- sync-tag: linux-xpu-2025-0-build
+ sync-tag: linux-xpu-n-1-build
runner_prefix: ${{ needs.get-label-type.outputs.label-type }}
- build-environment: linux-jammy-xpu-2025.0-py3.9
- docker-image-name: ci-image:pytorch-linux-jammy-xpu-2025.0-py3
+ build-environment: linux-jammy-xpu-n-1-py3.10
+ docker-image-name: ci-image:pytorch-linux-jammy-xpu-n-1-py3
runner: linux.12xlarge
test-matrix: |
{ include: [
@@ -47,60 +47,62 @@ jobs:
]}
secrets: inherit
- linux-jammy-xpu-2025_1-py3_9-build:
- name: linux-jammy-xpu-2025.1-py3.9
+ linux-jammy-xpu-n-py3_10-build:
+ name: linux-jammy-xpu-n-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
- sync-tag: linux-xpu-2025-1-build
+ sync-tag: linux-xpu-n-build
runner_prefix: ${{ needs.get-label-type.outputs.label-type }}
- build-environment: linux-jammy-xpu-2025.1-py3.9
- docker-image-name: ci-image:pytorch-linux-jammy-xpu-2025.1-py3
+ build-environment: linux-jammy-xpu-n-py3.10
+ docker-image-name: ci-image:pytorch-linux-jammy-xpu-n-py3
runner: linux.12xlarge
test-matrix: |
{ include: [
- { config: "default", shard: 1, num_shards: 6, runner: "linux.idc.xpu" },
- { config: "default", shard: 2, num_shards: 6, runner: "linux.idc.xpu" },
- { config: "default", shard: 3, num_shards: 6, runner: "linux.idc.xpu" },
- { config: "default", shard: 4, num_shards: 6, runner: "linux.idc.xpu" },
- { config: "default", shard: 5, num_shards: 6, runner: "linux.idc.xpu" },
- { config: "default", shard: 6, num_shards: 6, runner: "linux.idc.xpu" },
+ { config: "default", shard: 1, num_shards: 8, runner: "linux.idc.xpu" },
+ { config: "default", shard: 2, num_shards: 8, runner: "linux.idc.xpu" },
+ { config: "default", shard: 3, num_shards: 8, runner: "linux.idc.xpu" },
+ { config: "default", shard: 4, num_shards: 8, runner: "linux.idc.xpu" },
+ { config: "default", shard: 5, num_shards: 8, runner: "linux.idc.xpu" },
+ { config: "default", shard: 6, num_shards: 8, runner: "linux.idc.xpu" },
+ { config: "default", shard: 7, num_shards: 8, runner: "linux.idc.xpu" },
+ { config: "default", shard: 8, num_shards: 8, runner: "linux.idc.xpu" },
]}
secrets: inherit
- linux-jammy-xpu-2025_1-py3_9-test:
- name: linux-jammy-xpu-2025.1-py3.9
+ linux-jammy-xpu-n-py3_10-test:
+ name: linux-jammy-xpu-n-py3.10
uses: ./.github/workflows/_xpu-test.yml
- needs: linux-jammy-xpu-2025_1-py3_9-build
+ needs: linux-jammy-xpu-n-py3_10-build
permissions:
id-token: write
contents: read
with:
- build-environment: linux-jammy-xpu-2025.1-py3.9
- docker-image: ${{ needs.linux-jammy-xpu-2025_1-py3_9-build.outputs.docker-image }}
- test-matrix: ${{ needs.linux-jammy-xpu-2025_1-py3_9-build.outputs.test-matrix }}
+ build-environment: linux-jammy-xpu-n-py3.10
+ docker-image: ${{ needs.linux-jammy-xpu-n-py3_10-build.outputs.docker-image }}
+ test-matrix: ${{ needs.linux-jammy-xpu-n-py3_10-build.outputs.test-matrix }}
secrets: inherit
- windows-xpu-2025_0-build:
+ windows-xpu-n-1-build:
if: github.repository_owner == 'pytorch'
- name: win-vs2022-xpu-2025_0-py3
+ name: win-vs2022-xpu-n-1-py3
uses: ./.github/workflows/_win-build.yml
with:
- build-environment: win-vs2022-xpu-py3
+ build-environment: win-vs2022-xpu-n-1-py3
cuda-version: cpu
use-xpu: true
- xpu-version: '2025.0'
+ xpu-version: '2025.1'
vc-year: '2022'
secrets: inherit
- windows-xpu-2025_1-build:
+ windows-xpu-n-build:
if: github.repository_owner == 'pytorch'
- name: win-vs2022-xpu-2025_1-py3
+ name: win-vs2022-xpu-n-py3
uses: ./.github/workflows/_win-build.yml
with:
- build-environment: win-vs2022-xpu-py3
+ build-environment: win-vs2022-xpu-n-py3
cuda-version: cpu
use-xpu: true
- xpu-version: '2025.1'
+ xpu-version: '2025.2'
vc-year: '2022'
secrets: inherit
diff --git a/.gitignore b/.gitignore
index d1fa4cd3caf2..f20486806796 100644
--- a/.gitignore
+++ b/.gitignore
@@ -82,6 +82,7 @@ torch/return_types.pyi
torch/nn/functional.pyi
torch/utils/data/datapipes/datapipe.pyi
torch/csrc/autograd/generated/*
+torch/csrc/functionalization/generated/*
torch/csrc/lazy/generated/*.[!m]*
torch_compile_debug/
# Listed manually because some files in this directory are not generated
diff --git a/BUILD.bazel b/BUILD.bazel
index 58ebc31e243c..f13da6bfbe43 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -91,6 +91,8 @@ generated_cpu_cpp = [
"aten/src/ATen/NativeMetaFunctions.h",
"aten/src/ATen/RegistrationDeclarations.h",
"aten/src/ATen/VmapGeneratedPlumbing.h",
+ "aten/src/ATen/ViewMetaClasses.h",
+ "aten/src/ATen/ViewMetaClasses.cpp",
"aten/src/ATen/core/aten_interned_strings.h",
"aten/src/ATen/core/enum_tag.h",
"aten/src/ATen/core/TensorBody.h",
@@ -747,6 +749,7 @@ cc_library(
"torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.cu",
"torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryOps.cu",
"torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp",
+ "torch/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp",
"torch/csrc/distributed/c10d/symm_mem/intra_node_comm.cu",
],
)) + torch_sources,
@@ -1105,6 +1108,7 @@ test_suite(
"aten/src/ATen/templates/LazyNonNativeIr.h",
"aten/src/ATen/templates/RegisterDispatchKey.cpp",
"aten/src/ATen/templates/RegisterDispatchDefinitions.ini",
+ "aten/src/ATen/templates/ViewMetaClassesPythonBinding.cpp",
"aten/src/ATen/native/native_functions.yaml",
"aten/src/ATen/native/tags.yaml",
"aten/src/ATen/native/ts_native_functions.yaml",
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 000000000000..dcdf409e7314
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,15 @@
+# Testing
+
+Use our test class and test runner:
+
+```
+from torch.testing._internal.common_utils import run_tests, TestCase
+
+class TestFeature(TestCase):
+ ...
+
+if __name__ == "__main__":
+ run_tests()
+```
+
+To test Tensor equality, use assertEqual.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ad7368e19298..ce7890f002d3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -272,7 +272,7 @@ cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF)
cmake_dependent_option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF "USE_NCCL"
OFF)
cmake_dependent_option(USE_NVSHMEM "Use NVSHMEM" ON
- "USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
+ "USE_DISTRIBUTED;USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
option(USE_NNAPI "Use NNAPI" OFF)
option(USE_NNPACK "Use NNPACK" ON)
cmake_dependent_option(USE_NUMA "Use NUMA. Only available on Linux." ON "LINUX"
@@ -880,10 +880,21 @@ cmake_dependent_option(
USE_FBGEMM_GENAI
"Whether to build FBGEMM GenAI quantized GEMM kernels.\
Will be disabled if not supported by the platform"
- OFF
- "USE_CUDA OR USE_ROCM"
+ ON
+ "USE_ROCM"
OFF)
+IF(USE_FBGEMM_GENAI AND USE_ROCM AND NOT "gfx942" IN_LIST PYTORCH_ROCM_ARCH)
+ message(WARNING "Unsupported ROCM arch for FBGEMM GenAI, will set USE_FBGEMM_GENAI to OFF")
+ set(USE_FBGEMM_GENAI off)
+endif()
+
+# Set USE_FBGEMM_GENAI to ON for CUDA build on SM100
+if(USE_CUDA AND "$ENV{TORCH_CUDA_ARCH_LIST}" MATCHES "10.0a")
+ message(WARNING "Setting USE_FBGEMM_GENAI to ON for CUDA build on SM100")
+ set(USE_FBGEMM_GENAI ON)
+endif()
+
# CAVEAT: Again, Flash Attention2 will error while building for sm52 while Mem
# Eff Attention won't
cmake_dependent_option(
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index dade8f4ec6ec..9d2b5d355391 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -88,13 +88,13 @@ source venv/bin/activate # or `& .\venv\Scripts\Activate.ps1` on Windows
* If you want to have no-op incremental rebuilds (which are fast), see [Make no-op build fast](#make-no-op-build-fast) below.
-* When installing with `python -m pip install -e .` (in contrast to `python -m pip install .`) Python runtime will use
+* When installing with `python -m pip install -e . -v --no-build-isolation` (in contrast to `python -m pip install . -v --no-build-isolation`) Python runtime will use
the current local source-tree when importing `torch` package. (This is done by creating [`.egg-link`](https://wiki.python.org/moin/PythonPackagingTerminology#egg-link) file in `site-packages` folder)
This way you do not need to repeatedly install after modifying Python files (`.py`).
However, you would need to reinstall if you modify Python interface (`.pyi`, `.pyi.in`) or non-Python files (`.cpp`, `.cc`, `.cu`, `.h`, ...).
- One way to avoid running `python -m pip install -e .` every time one makes a change to C++/CUDA/ObjectiveC files on Linux/Mac,
+ One way to avoid running `python -m pip install -e . -v --no-build-isolation` every time one makes a change to C++/CUDA/ObjectiveC files on Linux/Mac,
is to create a symbolic link from `build` folder to `torch/lib`, for example, by issuing following:
```bash
pushd torch/lib; sh -c "ln -sf ../../build/lib/libtorch_cpu.* ."; popd
@@ -116,7 +116,7 @@ source venv/bin/activate # or `& .\venv\Scripts\Activate.ps1` on Windows
Next run `python setup.py clean`. After that, you can install in editable mode again.
-* If you run into errors when running `python -m pip install -e .`, here are some debugging steps:
+* If you run into errors when running `python -m pip install -e . -v --no-build-isolation`, here are some debugging steps:
1. Run `printf '#include \nint main() { printf("Hello World");}'|clang -x c -; ./a.out` to make sure
your CMake works and can compile this simple Hello World program without errors.
2. Nuke your `build` directory. The `setup.py` script compiles binaries into the `build` folder and caches many
@@ -129,10 +129,10 @@ source venv/bin/activate # or `& .\venv\Scripts\Activate.ps1` on Windows
git clean -xdf
python setup.py clean
git submodule update --init --recursive
- python -m pip install -r requirements.txt
+ python -m pip install --group dev
python -m pip install --no-build-isolation -v -e .
```
- 4. The main step within `python -m pip install -e .` is running `cmake --build build` from the `build` directory. If you want to
+ 4. The main step within `python -m pip install -e . -v --no-build-isolation` is running `make` from the `build` directory. If you want to
experiment with some environment variables, you can pass them into the command:
```bash
ENV_KEY1=ENV_VAL1[, ENV_KEY2=ENV_VAL2]* CMAKE_FRESH=1 python -m pip install --no-build-isolation -v -e .
@@ -259,6 +259,7 @@ dependencies as well as the nightly binaries into the repo directory.
support for PyTorch.
* [tools](tools) - Code generation scripts for the PyTorch library.
See [README](tools/README.md) of this directory for more details.
+* [torchgen](torchgen) - contains the logic and tooling for generating PyTorch's low-level C++ and Python bindings from operator definitions, typically specified in native_functions.yaml
* [test](test) - Python unit tests for PyTorch Python frontend.
* [test_torch.py](test/test_torch.py) - Basic tests for PyTorch
functionality.
@@ -294,7 +295,7 @@ The following packages should be installed with `pip`:
- `pytest` - recommended to run tests more selectively
Running
```
-pip install -r requirements.txt
+pip install --group dev
```
will install these dependencies for you.
@@ -645,9 +646,9 @@ can be selected interactively with your mouse to zoom in on a particular part of
the program execution timeline. The `--native` command-line option tells
`py-spy` to record stack frame entries for PyTorch C++ code. To get line numbers
for C++ code it may be necessary to compile PyTorch in debug mode by prepending
-your `python -m pip install -e .` call to compile PyTorch with `DEBUG=1`.
-Depending on your operating system it may also be necessary to run `py-spy` with
-root privileges.
+your `python -m pip install -e . -v --no-build-isolation` call to compile
+PyTorch with `DEBUG=1`. Depending on your operating system it may also be
+necessary to run `py-spy` with root privileges.
`py-spy` can also work in an `htop`-like "live profiling" mode and can be
tweaked to adjust the stack sampling rate, see the `py-spy` readme for more
@@ -655,10 +656,10 @@ details.
## Managing multiple build trees
-One downside to using `python -m pip install -e .` is that your development
-version of PyTorch will be installed globally on your account (e.g., if
-you run `import torch` anywhere else, the development version will be
-used).
+One downside to using `python -m pip install -e . -v --no-build-isolation` is
+that your development version of PyTorch will be installed globally on your
+account (e.g., if you run `import torch` anywhere else, the development version
+will be used).
If you want to manage multiple builds of PyTorch, you can make use of
[venv environments](https://docs.python.org/3/library/venv.html) to maintain
@@ -719,7 +720,7 @@ options.
### Code completion and IDE support
-When using `python -m pip install -e .`, PyTorch will generate
+When using `python -m pip install -e . -v --no-build-isolation`, PyTorch will generate
a `compile_commands.json` file that can be used by many editors
to provide command completion and error highlighting for PyTorch's
C++ code. You need to `pip install ninja` to generate accurate
diff --git a/Dockerfile b/Dockerfile
index 7b8964bd860e..331cf00593cb 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -50,11 +50,10 @@ RUN git submodule update --init --recursive
FROM conda as conda-installs
ARG PYTHON_VERSION=3.11
ARG CUDA_PATH=cu121
-ARG CUDA_CHANNEL=nvidia
ARG INSTALL_CHANNEL=whl/nightly
# Automatically set by buildx
-RUN /opt/conda/bin/conda update -y -n base -c defaults conda
-RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION}
+# pinning version of conda here see: https://github.com/pytorch/pytorch/issues/164574
+RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} conda=25.7.0
ARG TARGETPLATFORM
diff --git a/README.md b/README.md
index 4c18724be0c0..99e6dabd1618 100644
--- a/README.md
+++ b/README.md
@@ -243,7 +243,7 @@ git submodule update --init --recursive
```bash
# Run this command from the PyTorch directory after cloning the source code using the “Get the PyTorch Source“ section above
-pip install -r requirements.txt
+pip install --group dev
```
**On Linux**
@@ -394,7 +394,7 @@ On macOS
```bash
export CMAKE_PREFIX_PATH="${CONDA_PREFIX:-'$(dirname $(which conda))/../'}:${CMAKE_PREFIX_PATH}"
-MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ CMAKE_ONLY=1 python setup.py build
+MACOSX_DEPLOYMENT_TARGET=11.0 CMAKE_ONLY=1 python setup.py build
ccmake build # or cmake-gui build
```
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
index d8787154a213..38b383c2bb31 100644
--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
@@ -1,5 +1,15 @@
cmake_minimum_required(VERSION 3.27 FATAL_ERROR)
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})
+list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/public")
+if(USE_ROCM)
+ include(LoadHIP OPTIONAL RESULT_VARIABLE _had_loadhip)
+ if(_had_loadhip)
+ # Exposed by LoadHIP.cmake, e.g. "7.1.2" or "7.2.0"
+ message(STATUS "LoadHIP loaded: ROCM_VERSION_DEV='${ROCM_VERSION_DEV}'")
+ else()
+ message(WARNING "LoadHIP.cmake not found; ROCM_VERSION_DEV unavailable")
+ endif()
+endif()
if(NOT MSVC)
string(APPEND CMAKE_CXX_FLAGS " -Wno-ignored-qualifiers")
@@ -216,7 +226,7 @@ file(GLOB mem_eff_attention_cuda_cpp "native/transformers/cuda/mem_eff_attention
if(USE_CUDA AND (USE_FLASH_ATTENTION OR USE_MEM_EFF_ATTENTION))
add_library(flash_attention OBJECT EXCLUDE_FROM_ALL ${flash_attention_cuda_kernels_cu} ${flash_attention_cuda_cpp})
- target_include_directories(flash_attention PUBLIC
+ target_include_directories(flash_attention SYSTEM PUBLIC
${PROJECT_SOURCE_DIR}/third_party/flash-attention/csrc
${PROJECT_SOURCE_DIR}/third_party/flash-attention/include
${PROJECT_SOURCE_DIR}/third_party/cutlass/include
@@ -252,47 +262,81 @@ if(USE_MEM_EFF_ATTENTION)
list(APPEND ATen_ATTENTION_KERNEL_SRCS ${mem_eff_attention_cuda_kernels_cu})
endif()
-IF(USE_FBGEMM_GENAI AND USE_ROCM AND NOT "gfx942" IN_LIST PYTORCH_ROCM_ARCH)
- message(WARNING "Unsupported ROCM arch for FBGEMM GenAI, will set USE_FBGEMM_GENAI to OFF")
- set(USE_FBGEMM_GENAI off)
-endif()
-
# FBGEMM GenAI
IF(USE_FBGEMM_GENAI)
set(FBGEMM_THIRD_PARTY ${PROJECT_SOURCE_DIR}/third_party/fbgemm/external/)
- set(FBGEMM_GENAI_DIR ${PROJECT_SOURCE_DIR}/third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize)
-
- if(USE_ROCM)
- # Only include the kernels we want to build to avoid increasing binary size.
- file(GLOB_RECURSE fbgemm_genai_native_rocm_hip
- "${FBGEMM_GENAI_DIR}/ck_extensions/fp8_rowwise_grouped/kernels/fp8_rowwise_grouped*.hip"
- "${FBGEMM_GENAI_DIR}/ck_extensions/fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip")
- set_source_files_properties(${fbgemm_genai_native_rocm_hip} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
-
- # Add additional HIPCC compiler flags for performance
- set(FBGEMM_GENAI_EXTRA_HIPCC_FLAGS
- -mllvm
- -amdgpu-coerce-illegal-types=1
- -mllvm
- -enable-post-misched=0
- -mllvm
- -greedy-reverse-local-assignment=1
- -fhip-new-launch-api)
-
- hip_add_library(
- fbgemm_genai STATIC
- ${fbgemm_genai_native_rocm_hip}
- HIPCC_OPTIONS ${HIP_HCC_FLAGS} ${FBGEMM_GENAI_EXTRA_HIPCC_FLAGS})
+ set(FBGEMM_GENAI_SRCS ${PROJECT_SOURCE_DIR}/third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize)
+ if(USE_CUDA)
+ # To avoid increasing the build time/binary size unnecessarily, use an allow-list of kernels to build.
+ # If you want to integrate a kernel from FBGEMM into torch, you have to add it here.
+ set(FBGEMM_CUTLASS_KERNELS_REGEX ".*mx8mx8bf16_grouped.*")
+ file(GLOB_RECURSE fbgemm_genai_native_cuda_cu
+ "${FBGEMM_GENAI_SRCS}/cutlass_extensions/*.cu"
+ "${FBGEMM_GENAI_SRCS}/cutlass_extensions/**/*.cu")
+ list(FILTER fbgemm_genai_native_cuda_cu INCLUDE REGEX ${FBGEMM_CUTLASS_KERNELS_REGEX})
+
+ file(GLOB_RECURSE fbgemm_genai_native_cuda_cpp
+ "${FBGEMM_GENAI_SRCS}/common/*.cpp"
+ )
+
+ # Combine all source files into a single list
+ list(APPEND fbgemm_genai_all_sources
+ ${fbgemm_genai_native_cuda_cu}
+ ${fbgemm_genai_native_cuda_cpp}
+ )
+
+ # Now, create the library and provide the sources at the same time
+ add_library(fbgemm_genai OBJECT ${fbgemm_genai_all_sources})
+
set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON)
- target_compile_definitions(fbgemm_genai PRIVATE FBGEMM_GENAI_NO_EXTENDED_SHAPES)
+
+ set(fbgemm_genai_mx8mx8bf16_grouped
+ "${FBGEMM_GENAI_SRCS}/cutlass_extensions/mx8mx8bf16_grouped/"
+ )
target_include_directories(fbgemm_genai PUBLIC
- # FBGEMM version of Composable Kernel is used due to some customizations
- ${FBGEMM_THIRD_PARTY}/composable_kernel/include
- ${FBGEMM_THIRD_PARTY}/composable_kernel/library/include
- ${FBGEMM_GENAI_DIR}/include/
- ${FBGEMM_GENAI_DIR}/common/include/
+ ${FBGEMM_THIRD_PARTY}/cutlass/include
+ ${FBGEMM_THIRD_PARTY}/cutlass/tools/util/include
+ ${fbgemm_genai_mx8mx8bf16_grouped}
+ ${FBGEMM_GENAI_SRCS}/common/include/ # includes fbgemm_gpu/quantize/utils.h, fbgemm_gpu/quantize/tuning_cache.hpp
+ ${FBGEMM_GENAI_SRCS}/include/ # includes fbgemm_gpu/torch_ops.h
)
+ else()
+ if(USE_ROCM)
+ # Only include the kernels we want to build to avoid increasing binary size.
+ file(GLOB_RECURSE fbgemm_genai_native_rocm_hip
+ "${FBGEMM_GENAI_SRCS}/ck_extensions/fp8_rowwise_grouped/kernels/fp8_rowwise_grouped*.hip"
+ "${FBGEMM_GENAI_SRCS}/ck_extensions/fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip")
+ set_source_files_properties(${fbgemm_genai_native_rocm_hip} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
+
+ # Add additional HIPCC compiler flags for performance
+ set(FBGEMM_GENAI_EXTRA_HIPCC_FLAGS
+ -mllvm
+ -enable-post-misched=0
+ -mllvm
+ -greedy-reverse-local-assignment=1
+ -fhip-new-launch-api)
+ if(DEFINED ROCM_VERSION_DEV AND ROCM_VERSION_DEV VERSION_LESS "7.2.0")
+ list(PREPEND FBGEMM_GENAI_EXTRA_HIPCC_FLAGS -mllvm -amdgpu-coerce-illegal-types=1)
+ endif()
+
+ hip_add_library(
+ fbgemm_genai STATIC
+ ${fbgemm_genai_native_rocm_hip}
+ HIPCC_OPTIONS ${HIP_HCC_FLAGS} ${FBGEMM_GENAI_EXTRA_HIPCC_FLAGS})
+ set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ target_compile_definitions(fbgemm_genai PRIVATE FBGEMM_GENAI_NO_EXTENDED_SHAPES)
+
+ target_include_directories(fbgemm_genai PUBLIC
+ # FBGEMM version of Composable Kernel is used due to some customizations
+ ${FBGEMM_THIRD_PARTY}/composable_kernel/include
+ ${FBGEMM_THIRD_PARTY}/composable_kernel/library/include
+ ${FBGEMM_THIRD_PARTY}/cutlass/include
+ ${FBGEMM_THIRD_PARTY}/cutlass/tools/util/include
+ ${FBGEMM_GENAI_SRCS}/common/include/ # includes fbgemm_gpu/quantize/utils.h, fbgemm_gpu/quantize/tuning_cache.hpp
+ ${FBGEMM_GENAI_SRCS}/include/ # includes fbgemm_gpu/torch_ops.h
+ )
+ endif()
endif()
endif()
@@ -635,12 +679,26 @@ if(USE_CUDA AND NOT USE_ROCM)
add_definitions(-DCUTE_SM90_EXTENDED_MMA_SHAPES_ENABLED)
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/include)
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/tools/util/include)
+
+ # Add FBGEMM_GENAI include directories for torch_ops.h
+ if(USE_FBGEMM_GENAI)
+ list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize/include)
+ list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize/common/include)
+ endif()
+
if($ENV{ATEN_STATIC_CUDA})
- list(APPEND ATen_CUDA_DEPENDENCY_LIBS
- ${CUDA_LIBRARIES}
- CUDA::cusparse_static
- CUDA::cufft_static_nocallback
- )
+ if(CUDA_VERSION VERSION_LESS_EQUAL 12.9)
+ list(APPEND ATen_CUDA_DEPENDENCY_LIBS
+ ${CUDA_LIBRARIES}
+ CUDA::cusparse_static
+ CUDA::cufft_static_nocallback)
+ else()
+ list(APPEND ATen_CUDA_DEPENDENCY_LIBS
+ ${CUDA_LIBRARIES}
+ CUDA::cusparse_static
+ CUDA::cufft_static)
+ endif()
+
if(NOT BUILD_LAZY_CUDA_LINALG)
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
CUDA::cusolver_static
diff --git a/aten/src/ATen/DLConvertor.cpp b/aten/src/ATen/DLConvertor.cpp
index 9632cd5ed698..98ad757946be 100644
--- a/aten/src/ATen/DLConvertor.cpp
+++ b/aten/src/ATen/DLConvertor.cpp
@@ -308,17 +308,44 @@ void fillVersion(
// constructed out of ATen tensor
template
T* toDLPackImpl(const Tensor& src) {
- // create a new tensor with possibly normalized strides
- // gh-83069
- auto shape = src.sizes();
- auto strides = src.strides().vec();
- for (int i = 0; i < src.dim(); i++) {
- if (shape[i] < 2) {
- strides[i] = 1;
+ auto view = src;
+
+ // Detect whether there is need to normalize the strides
+ // Background: gh-83069
+ //
+ // However, normalizing strides can come at a high-cost
+ // to slow down toDLPack conversion 3x, so we
+ // only normalize if needed.
+ //
+ // The following code detects whether the src follows
+ // a continuous pattern. If the src follows such pattern (common-case)
+ // then we do not need to normalize the strides.
+ bool need_normalize_strides = false;
+ int64_t expected_stride = 1;
+ for (int i = src.dim() - 1; i >= 0; i--) {
+ // detect if we do not meet continuous pattern
+ // and the size is 1, so there is opportunity to normalize
+ if (src.stride(i) != expected_stride && src.size(i) == 1) {
+ need_normalize_strides = true;
+ break;
+ }
+ expected_stride *= src.size(i);
+ }
+
+ // less common case, try normalizing the strides
+ if (need_normalize_strides) {
+ // create a new tensor with possibly normalized strides
+ // gh-83069
+ auto shape = src.sizes();
+ auto strides = src.strides().vec();
+ for (int i = 0; i < src.dim(); i++) {
+ if (shape[i] < 2) {
+ strides[i] = 1;
+ }
}
+ view = src.as_strided(shape, strides, src.storage_offset());
}
- auto view = src.as_strided(shape, strides, src.storage_offset());
ATenDLMTensor* atDLMTensor(new ATenDLMTensor);
atDLMTensor->handle = view;
atDLMTensor->tensor.manager_ctx = atDLMTensor;
diff --git a/aten/src/ATen/DTensorState.cpp b/aten/src/ATen/DTensorState.cpp
new file mode 100644
index 000000000000..0644aae3d070
--- /dev/null
+++ b/aten/src/ATen/DTensorState.cpp
@@ -0,0 +1,17 @@
+#include
+
+namespace at {
+
+namespace {
+thread_local bool kDTensorAllowImplicitReplication = false;
+}
+
+bool get_dtensor_allow_implicit_replication() {
+ return kDTensorAllowImplicitReplication;
+}
+
+void set_dtensor_allow_implicit_replication(bool enabled) {
+ kDTensorAllowImplicitReplication = enabled;
+}
+
+} // namespace at
diff --git a/aten/src/ATen/DTensorState.h b/aten/src/ATen/DTensorState.h
new file mode 100644
index 000000000000..07e89eaeddae
--- /dev/null
+++ b/aten/src/ATen/DTensorState.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include
+
+namespace at {
+
+TORCH_API bool get_dtensor_allow_implicit_replication();
+TORCH_API void set_dtensor_allow_implicit_replication(bool enabled);
+
+struct DTensorAllowImplicitReplication {
+ DTensorAllowImplicitReplication()
+ : prev_dtensor_allow_implicit_replication_(
+ get_dtensor_allow_implicit_replication()) {
+ set_dtensor_allow_implicit_replication(true);
+ }
+
+ DTensorAllowImplicitReplication(const DTensorAllowImplicitReplication&) =
+ delete;
+ DTensorAllowImplicitReplication& operator=(
+ const DTensorAllowImplicitReplication&) = delete;
+ DTensorAllowImplicitReplication(DTensorAllowImplicitReplication&&) = delete;
+ DTensorAllowImplicitReplication& operator=(
+ DTensorAllowImplicitReplication&&) = delete;
+
+ ~DTensorAllowImplicitReplication() {
+ set_dtensor_allow_implicit_replication(
+ prev_dtensor_allow_implicit_replication_);
+ }
+
+ private:
+ bool prev_dtensor_allow_implicit_replication_;
+};
+
+} // namespace at
diff --git a/aten/src/ATen/FunctionalStorageImpl.cpp b/aten/src/ATen/FunctionalStorageImpl.cpp
index a5512818343f..8bca495abdc6 100644
--- a/aten/src/ATen/FunctionalStorageImpl.cpp
+++ b/aten/src/ATen/FunctionalStorageImpl.cpp
@@ -9,11 +9,6 @@
namespace at::functionalization {
-ViewMeta ViewMeta::to_out_idx(int64_t out_idx) {
- if (out_idx == this->out_index) return *this;
- return ViewMeta(forward_fn, reverse_fn, has_symbolic_inputs, is_multi_output, is_as_strided, out_idx);
-}
-
// Note [Functionalization: Alias Removal Part 2]
// See Note [Functionalization: Alias Removal] for more details.
// This function applies a single update from one of the views to the StorageImpl.
@@ -42,12 +37,12 @@ ViewMeta ViewMeta::to_out_idx(int64_t out_idx) {
static const Tensor apply_update(const FunctionalStorageImpl::Update& update, const Tensor& base) {
at::Tensor t = update.new_val;
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
- if (update.view_metas.empty()) return t;
+ if (update.view_metas.empty()) { return t; }
std::vector tmp_values({base});
tmp_values.reserve(update.view_metas.size());
for (size_t i = 0; i < update.view_metas.size() - 1; ++i) {
- at::Tensor next_view = update.view_metas[i].forward_fn(tmp_values.back(), update.view_metas[i].out_index);
+ at::Tensor next_view = update.view_metas[i]->forward(tmp_values.back());
// NB: We only actually need tmp_values for ops like select/slice/diagonal/squeeze/as_strided
// All of these ops require additional information to recover the sizes of the original tensor.
// If need to, we could probably apply this optimization and only bother computing tmp_values
@@ -55,9 +50,8 @@ static const Tensor apply_update(const FunctionalStorageImpl::Update& update, co
tmp_values.push_back(std::move(next_view));
}
for(int64_t i = static_cast(update.view_metas.size()) - 1; i >= 0; --i) {
- int64_t out_idx = update.view_metas[i].out_index;
// Each view inverse is implemented in ViewInverses.cpp.
- t = update.view_metas[i].reverse_fn(tmp_values[i], t, out_idx);
+ t = update.view_metas[i]->reverse(tmp_values[i], t);
}
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
return t;
@@ -111,13 +105,13 @@ FunctionalStorageImpl::FunctionalStorageImpl(const Tensor& base)
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(base_));
}
-void FunctionalStorageImpl::add_update(const Tensor& updated_val, const std::vector& metas) {
+void FunctionalStorageImpl::add_update(const Tensor& updated_val, const std::vector>& metas) {
TORCH_CHECK(!frozen_, "cannot mutate tensors with frozen storage");
if (metas.size() > 1) {
for (size_t i = 1; i < metas.size(); ++i) {
// Skipping this check for XLA. Would be good to add it back, but it is failing XLA CI
- TORCH_CHECK(updated_val.device().type() == c10::DeviceType::XLA || !metas[i].is_as_strided,
+ TORCH_CHECK(updated_val.device().type() == c10::DeviceType::XLA || !metas[i]->is_as_strided,
"During torch.compile, encountered a mutation on a view chain of length ", metas.size(), ", where view ", i,
" was an as_strided() call. as_strided() is non-compositional, and therefore is not possible to functionalize properly today,"
"so this behavior is banned in compile. As a workaround, you can either remove the mutation from the model code, or you "
diff --git a/aten/src/ATen/FunctionalStorageImpl.h b/aten/src/ATen/FunctionalStorageImpl.h
index 8cd1cb7434aa..0c9c1fd775f3 100644
--- a/aten/src/ATen/FunctionalStorageImpl.h
+++ b/aten/src/ATen/FunctionalStorageImpl.h
@@ -8,44 +8,89 @@ namespace at::functionalization {
// See Note [Functionalization Pass In Core]
+enum class InverseReturnMode {
+ /// Specifies that functional inverses should always return a view.
+ AlwaysView,
+ /// Specifies that functional inverses should always return a non-view / copy.
+ NeverView,
+ /// Specifies that functional inverses should return a view unless a (copying)
+ /// scatter
+ /// inverse exists, in which case that will be used instead.
+ /// This avoids as_strided() calls that can be difficult for subclasses to
+ /// handle.
+ ViewOrScatterInverse,
+};
+
+#define FUNCTIONALIZATION_VIEWMETA_NAME(TYPE) \
+ static const char* name() { \
+ return #TYPE; \
+ }
+
+#define FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(...) \
+ using SerializableTuple = std::tuple<__VA_ARGS__>
+
// ViewMeta is a class used by the functionalization pass to navigate between
// a base tensor and a view tensor.
// For example, if I call `b = a.view1(...)`
-// the functionalization pass will generate and store a ViewMeta on b that looks
-// like:
+// the functionalization pass will generate and store a ViewMeta specialization
+// for `view1` operation on b that looks like:
//
-// ViewMeta(
-// [](const Tensor& base, int64_t mutated_view_idx) {
-// return base.view1(...);
-// },
-// [](const at::Tensor& base, const at::Tensor& mutated_view,
-// int64_t mutated_view_idx) -> at::Tensor {
-// return at::functionalization::impl::view1_inverse(base, mutated_view,
-// ...);
+// struct TORCH_API view1_ViewMeta : public ViewMeta {
+// FUNCTIONALIZATION_VIEWMETA_NAME(view1_ViewMeta);
+// FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(
+// bool /* reapply_views */,
+// const std::vector&);
+//
+// view1_ViewMeta(const SerializableTuple& tpl)
+// : view1_ViewMeta(std::get<0>(tpl), std::get<1>(tpl)) {}
+//
+// view1_ViewMeta(bool reapply_views, const std::vector& size)
+// : ViewMeta(/*has_symbolic_inputs=*/false),
+// reapply_views(reapply_views),
+// size(size) {}
+//
+// Tensor forward(const Tensor& base) override {
+// return base.view1(...);
// }
//
-// The forward_fn lambda describes how to replay view1 on a tensor.
+// Tensor reverse(const Tensor& base, const Tensor& mutated_view) override {
+// return at::functionalization::impl::view1_inverse(base, mutated_view,
+// ...);
+// }
//
-// The reverse_fn lambda describes how, given a tensor that is already a view,
+// SerializableTuple to_serializable_tuple() {
+// return std::make_tuple(reapply_views, size);
+// }
+//
+// bool reapply_views;
+// std::vector size;
+// };
+//
+// The forward function describes how to replay view1 on a tensor.
+//
+// The reverse function describes how, given a tensor that is already a view,
// how to get the corresponding base tensor. See Note [Functionalization Pass:
// View Inverses] for details.
+//
+// `SerializedTuple` is a typedef that defines an `std::tuple<...>` type
+// representing the `ViewMeta` instance state. Methods that take in/return such
+// a type are used for supporting pickle serialization.
struct ViewMeta {
ViewMeta(
- std::function forward,
- std::function reverse,
bool has_symbolic_inputs,
bool is_multi_output = false,
bool is_as_strided = false,
int64_t out_idx = 0)
- : forward_fn(std::move(forward)),
- reverse_fn(std::move(reverse)),
- out_index(out_idx),
+ : out_index(out_idx),
is_multi_output(is_multi_output),
is_as_strided(is_as_strided),
has_symbolic_inputs(has_symbolic_inputs) {}
- std::function forward_fn;
- std::function reverse_fn;
+ virtual ~ViewMeta() = default;
+
+ virtual Tensor forward(const Tensor& base) = 0;
+ virtual Tensor reverse(const Tensor& base, const Tensor& mutated_view) = 0;
+
// See Note [out_idx in ViewMeta]
int64_t out_index;
@@ -57,10 +102,17 @@ struct ViewMeta {
// Tells us if this view operation has any symbolic inputs
bool has_symbolic_inputs;
- // Returns a copy of the current ViewMeta, if out_idx matches the current
- // out_index. Otherwise, returns a new ViewMeta with the same forward/reverse
+ // Returns a new ViewMeta with the same forward/reverse
// functions, but a new out index.
- ViewMeta to_out_idx(int64_t out_idx);
+ //
+ // This method should be implemented by those `ViewMeta` that have more than
+ // one output.
+ virtual std::shared_ptr to_out_index(int64_t out_index) {
+ TORCH_CHECK_NOT_IMPLEMENTED(
+ false,
+ "ViewMeta::to_out_index not implemented. ",
+ "Likely because there's only one output.");
+ }
};
// FunctionalStorageImpl is a subclass of StorageImpl used by the
@@ -93,14 +145,14 @@ struct TORCH_API FunctionalStorageImpl : public c10::StorageImpl {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
const at::Tensor new_val;
// NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
- const std::vector view_metas;
+ const std::vector> view_metas;
};
explicit FunctionalStorageImpl(const Tensor& value);
void add_update(
const Tensor& updated_val,
- const std::vector& view_metas);
+ const std::vector>& view_metas);
bool apply_updates();
const Tensor& base() {
return base_;
diff --git a/aten/src/ATen/FunctionalTensorWrapper.cpp b/aten/src/ATen/FunctionalTensorWrapper.cpp
index 7d5e4e84e861..3a574fa7d491 100644
--- a/aten/src/ATen/FunctionalTensorWrapper.cpp
+++ b/aten/src/ATen/FunctionalTensorWrapper.cpp
@@ -129,17 +129,19 @@ void FunctionalTensorWrapper::freeze_storage() const {
// - view_value: The output tensor that we need to wrap.
// - base: The "base" of the view that `view_value` was generated from.
// See Note [Functionalization: Alias Removal Part 2] for more details on the mutation replay logic.
-FunctionalTensorWrapper::FunctionalTensorWrapper(const Tensor& view_value, const FunctionalTensorWrapper* base, const functionalization::ViewMeta& meta)
- : c10::TensorImpl(
- c10::DispatchKeySet(DispatchKey::Functionalize),
- view_value.dtype(),
- view_value.device()
- ),
- value_(view_value),
- is_multi_output_view_(base->is_multi_output_view_ || meta.is_multi_output),
- was_storage_changed_(base->was_storage_changed_),
- is_symbolic_(base->is_symbolic_)
-{
+FunctionalTensorWrapper::FunctionalTensorWrapper(
+ const Tensor& view_value,
+ const FunctionalTensorWrapper* base,
+ const std::shared_ptr& meta)
+ : c10::TensorImpl(
+ c10::DispatchKeySet(DispatchKey::Functionalize),
+ view_value.dtype(),
+ view_value.device()),
+ value_(view_value),
+ is_multi_output_view_(
+ base->is_multi_output_view_ || meta->is_multi_output),
+ was_storage_changed_(base->was_storage_changed_),
+ is_symbolic_(base->is_symbolic_) {
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(value_));
TORCH_INTERNAL_ASSERT(!value_.key_set().has(c10::DispatchKey::Functionalize));
set_constructor_metadata();
@@ -148,11 +150,10 @@ FunctionalTensorWrapper::FunctionalTensorWrapper(const Tensor& view_value, const
view_metas_ = base->view_metas_; // copy
}
view_metas_.push_back(meta);
- maybe_mark_symbolic(meta);
+ maybe_mark_symbolic(meta.get());
storage_ = base->storage_; // alias this tensor's storage with the base tensor's
}
-
functionalization::FunctionalStorageImpl* FunctionalTensorWrapper::functional_storage_impl() const {
return static_cast(storage_.unsafeGetStorageImpl());
}
@@ -176,18 +177,18 @@ bool FunctionalTensorWrapper::is_up_to_date() const {
}
// See Note [Functionalization Pass - Inplace View Ops]
-void FunctionalTensorWrapper::mutate_view_meta(const at::functionalization::ViewMeta& meta) {
+void FunctionalTensorWrapper::mutate_view_meta(const std::shared_ptr& meta) {
view_metas_.push_back(meta);
// Manually track the fact that this tensor received a metadata mutation!
has_metadata_mutation_ = true;
// Mark this tensor as being symbolic if there are any symbolic inputs used by the view operation.
- maybe_mark_symbolic(meta);
+ maybe_mark_symbolic(meta.get());
// Note [Functionalization Pass - Inplace View Ops]
// So, these ops are special - they're mutation AND view ops. They get special codegen.
// An example is transpose_, e.g. `a.transpose_()`
// Calling transpose_() should ensure that a gets an alias, and append the new ViewMeta to a's current list of ViewMetas.
at::AutoDispatchSkipFunctionalize guard;
- value_ = meta.forward_fn(value_, meta.out_index);
+ value_ = meta->forward(value_);
TORCH_INTERNAL_ASSERT(!value_.key_set().has(c10::DispatchKey::Functionalize));
}
@@ -368,15 +369,8 @@ void FunctionalTensorWrapper::sync_() {
regenerate_from_base();
}
-Tensor FunctionalTensorWrapper::apply_view_metas(const Tensor& base) {
- auto t = base;
-
- // Reapply views to get the viewed tensor from the base in alias_
- for (auto& view_meta: view_metas_) {
- t = view_meta.forward_fn(t, view_meta.out_index);
- }
-
- return t;
+const std::vector>& FunctionalTensorWrapper::view_metas() const {
+ return view_metas_;
}
void FunctionalTensorWrapper::regenerate_from_base() {
@@ -385,7 +379,7 @@ void FunctionalTensorWrapper::regenerate_from_base() {
auto t = storage_impl->base();
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
- t = apply_view_metas(t);
+ t = at::functionalization::impl::apply_view_meta_sequence(t, view_metas_);
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
replace_(t, /*from_lazy_regenerate=*/true);
@@ -724,11 +718,11 @@ bool isFunctionalTensor(const std::optional& t) {
}
bool isFunctionalTensor(const c10::List<::std::optional>& t_list) {
- if (t_list.empty()) return false;
+ if (t_list.empty()) { return false; }
auto functional_count = 0;
for (const auto i : c10::irange(t_list.size())) {
auto const & e= t_list[i];
- if (!e.has_value() || !e->defined()) continue;
+ if (!e.has_value() || !e->defined()) { continue; }
if (isFunctionalTensor(e)) {
++functional_count;
}
@@ -738,10 +732,10 @@ bool isFunctionalTensor(const c10::List<::std::optional>& t_list) {
template
static bool isFunctionalTensorIListRef(c10::IListRef list) {
- if (list.size() == 0) return false;
+ if (list.size() == 0) { return false; }
auto functional_count = 0;
for (const auto& tensor : list) {
- if (!tensor.defined()) continue;
+ if (!tensor.defined()) { continue; }
if (isFunctionalTensor(tensor)) {
++functional_count;
}
@@ -759,20 +753,28 @@ void freeze_functional_tensor(const Tensor& tensor) {
functional_base_impl->freeze_storage();
}
-Tensor create_functional_tensor_with_view_meta(const at::Tensor& view_to_wrap, const at::Tensor& base, functionalization::ViewMeta meta, int64_t out_idx) {
+Tensor create_functional_tensor_with_view_meta(
+ const at::Tensor& view_to_wrap,
+ const at::Tensor& base,
+ const std::shared_ptr& meta,
+ int64_t out_idx) {
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(view_to_wrap));
TORCH_INTERNAL_ASSERT(at::functionalization::impl::isFunctionalTensor(base));
auto functional_base_impl = at::functionalization::impl::unsafeGetFunctionalWrapper(base);
+ auto meta_ = meta;
if (out_idx != 0) {
// Note [out_idx in ViewMeta]
// When a view op outputs multiple tensors, each output needs its own separate ViewMeta.
// Each ViewMeta also tracks the index of the particular output tensor, which is needed in the reverse function.
- meta = meta.to_out_idx(out_idx);
+ meta_ = meta->to_out_index(out_idx);
}
- return at::detail::make_tensor(view_to_wrap, functional_base_impl, meta);
+ return at::detail::make_tensor(view_to_wrap, functional_base_impl, meta_);
}
-std::vector create_functional_tensor_with_view_meta(ITensorListRef view_to_wrap, const at::Tensor& base, const functionalization::ViewMeta& meta) {
+std::vector create_functional_tensor_with_view_meta(
+ ITensorListRef view_to_wrap,
+ const at::Tensor& base,
+ const std::shared_ptr& meta) {
std::vector outputs(view_to_wrap.size());
int64_t i = 0;
for (const auto& tensor : view_to_wrap) {
@@ -782,12 +784,22 @@ std::vector create_functional_tensor_with_view_meta(ITensorListRef view_
return outputs;
}
-void mutate_view_meta(const at::Tensor& self, const functionalization::ViewMeta& meta) {
+void mutate_view_meta(const at::Tensor& self, const std::shared_ptr& meta) {
TORCH_INTERNAL_ASSERT(at::functionalization::impl::isFunctionalTensor(self));
auto self_impl = at::functionalization::impl::unsafeGetFunctionalWrapper(self);
self_impl->mutate_view_meta(meta);
}
+Tensor apply_view_meta_sequence(
+ const Tensor& base,
+ const std::vector>& sequence) {
+ Tensor r = base;
+ for (auto& vm : sequence) {
+ r = vm->forward(r);
+ }
+ return r;
+}
+
// Note [Propagating strides in the functionalization pass]
// In order to properly compute stride information, the functionalization pass
// calls each {view} reference implementations with meta tensors.
@@ -881,7 +893,7 @@ void functionalize_op_helper(const c10::OperatorHandle& op, torch::jit::Stack* s
const auto& ivalue = returns[idx];
if (ivalue.isTensor()) {
const auto& t = ivalue.toTensor();
- if (!t.defined()) continue;
+ if (!t.defined()) { continue; }
at::functionalization::impl::sync(t);
auto t_new = c10::IValue(at::functionalization::impl::from_functional_tensor(t));
(*stack)[returns_begin + idx] = t_new;
diff --git a/aten/src/ATen/FunctionalTensorWrapper.h b/aten/src/ATen/FunctionalTensorWrapper.h
index b260b7c9f958..6d9050728da7 100644
--- a/aten/src/ATen/FunctionalTensorWrapper.h
+++ b/aten/src/ATen/FunctionalTensorWrapper.h
@@ -56,7 +56,7 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
explicit FunctionalTensorWrapper(
const Tensor& view_value,
const FunctionalTensorWrapper* base,
- const functionalization::ViewMeta& meta);
+ const std::shared_ptr& meta);
// Get the underlying, actual tensor, that doesn't know anything about
// functionalization.
@@ -99,17 +99,17 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
->are_all_mutations_under_no_grad_or_inference_mode();
}
- void maybe_mark_symbolic(const functionalization::ViewMeta& meta) {
- is_symbolic_ = is_symbolic_ | meta.has_symbolic_inputs;
+ void maybe_mark_symbolic(functionalization::ViewMeta* meta) {
+ is_symbolic_ = is_symbolic_ | meta->has_symbolic_inputs;
}
bool is_symbolic() const {
return is_symbolic_;
}
- // Runs the forward_fn of every ViewMeta collected in the current instance
- // to some other base.
- Tensor apply_view_metas(const Tensor& base);
+ // Retrieves the ViewMeta sequence of this tensor.
+ const std::vector>& view_metas()
+ const;
// Sync's the underlying tensor with its alias, if it's out of date. This
// involves two steps: 1) Apply any pending updates/mutations to the alias 2)
@@ -146,7 +146,8 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
// from the base tensor. This method is used by inplace-view ops like
// transpose_. It appends a ViewMeta to the existing stack, and refreshes the
// tensor by replaying the views off of the alias.
- void mutate_view_meta(const at::functionalization::ViewMeta& meta);
+ void mutate_view_meta(
+ const std::shared_ptr& meta);
// Custom implementation of self.set_(src)
void set__impl(const FunctionalTensorWrapper* other);
@@ -285,7 +286,7 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
bool is_symbolic_ = false;
size_t generation_ = 0;
- std::vector view_metas_;
+ std::vector> view_metas_;
protected:
static void copy_tensor_metadata(
@@ -377,16 +378,20 @@ TORCH_API void propagate_xla_data_direct(
Tensor create_functional_tensor_with_view_meta(
const Tensor& view_to_wrap,
const Tensor& base,
- functionalization::ViewMeta meta,
+ const std::shared_ptr& meta,
int64_t out_idx = 0);
std::vector create_functional_tensor_with_view_meta(
ITensorListRef view_to_wrap,
const Tensor& base,
- const functionalization::ViewMeta& meta);
+ const std::shared_ptr& meta);
void mutate_view_meta(
const Tensor& self,
- const functionalization::ViewMeta& meta);
+ const std::shared_ptr& meta);
+
+TORCH_API Tensor apply_view_meta_sequence(
+ const Tensor& base,
+ const std::vector>& sequence);
void set_sizes_strides_offset(const Tensor& out, const Tensor& meta_out);
void set_sizes_strides_offset(
diff --git a/aten/src/ATen/FunctionalizeFallbackKernel.cpp b/aten/src/ATen/FunctionalizeFallbackKernel.cpp
index 97094c9f125a..10f988b4d281 100644
--- a/aten/src/ATen/FunctionalizeFallbackKernel.cpp
+++ b/aten/src/ATen/FunctionalizeFallbackKernel.cpp
@@ -1,3 +1,5 @@
+#include
+
#include
#include
#include
@@ -7,7 +9,6 @@
#include
#include
#include
-#include
#ifndef AT_PER_OPERATOR_HEADERS
#include
@@ -28,6 +29,31 @@
#include
#endif
+namespace at::functionalization {
+
+Tensor resize__ViewMeta::forward(const Tensor& base) {
+ if (reapply_views) {
+ return base.as_strided(size, c10::contiguous_strides(size));
+ } else {
+ return at::as_strided_copy(base, size, c10::contiguous_strides(size));
+ }
+}
+
+Tensor resize__ViewMeta::reverse(const Tensor& base, const Tensor& mutated_view) {
+ return base.as_strided_scatter(
+ mutated_view, size, c10::contiguous_strides(size));
+}
+
+Tensor _unsafe_view_ViewMeta::forward(const Tensor& base) {
+ return at::_unsafe_view_symint(base, size);
+}
+
+Tensor _unsafe_view_ViewMeta::reverse(const Tensor& base, const Tensor& mutated_view) {
+ return at::_unsafe_view_symint(mutated_view, base.sym_sizes());
+}
+
+} // namespace at::functionalization
+
namespace {
void functionalizeFallback(const c10::OperatorHandle& op, c10::DispatchKeySet dispatchKeySet [[maybe_unused]], torch::jit::Stack* stack) {
const auto& schema = op.schema();
@@ -106,7 +132,9 @@ namespace {
const auto& ivalue = returns[idx];
if (ivalue.isTensor() && should_wrap_outputs) {
const auto& t = ivalue.toTensor();
- if (!t.defined()) continue;
+ if (!t.defined()) {
+ continue;
+ }
auto t_new = c10::IValue(at::functionalization::impl::to_functional_tensor(t));
(*stack)[returns_begin + idx] = t_new;
} else if (ivalue.isTensorList() && should_wrap_outputs) {
@@ -169,19 +197,8 @@ static const at::Tensor & resize__functionalization(c10::DispatchKeySet dispatch
// The output of resizing is equivalent to taking a slice of a larger tensor.
// We have to emulate this "slicing" with an as_strided call.
auto reapply_views = at::functionalization::impl::getFunctionalizationReapplyViewsTLS();
- at::functionalization::ViewMeta view_meta = at::functionalization::ViewMeta(
- [reapply_views = reapply_views, size = size.vec()](const at::Tensor & base, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor {
- if (reapply_views) {
- return base.as_strided(size, c10::contiguous_strides(size));
- } else {
- return at::as_strided_copy(base, size, c10::contiguous_strides(size));
- }
- },
- [size = size.vec()](const at::Tensor & base, const at::Tensor & mutated_view, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor {
- return base.as_strided_scatter(mutated_view, size, c10::contiguous_strides(size));
- },
- /*has_symbolic_inputs=*/false
- );
+ auto view_meta = std::make_shared(
+ reapply_views, size.vec());
at::functionalization::impl::mutate_view_meta(self, view_meta);
return self;
}
@@ -300,17 +317,11 @@ static at::Tensor _unsafe_view_functionalize(const at::Tensor & self, at::SymInt
tmp_output = at::_unsafe_view_symint(self_, size);
}
- bool has_symbolic_inputs = std::any_of(size.begin(), size.end(), [=](auto& s) { return s.is_symbolic(); });
-
- at::functionalization::ViewMeta view_meta = at::functionalization::ViewMeta(
- [size = size.vec()](const at::Tensor & base, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor {
- return at::_unsafe_view_symint(base, size);
- },
- [size = size.vec()](const at::Tensor & base, const at::Tensor & mutated_view, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor {
- return at::_unsafe_view_symint(mutated_view, base.sym_sizes());
- },
- /*has_symbolic_inputs=*/has_symbolic_inputs
- );
+ bool has_symbolic_inputs = std::any_of(
+ size.begin(), size.end(), [=](auto& s) { return s.is_symbolic(); });
+ auto view_meta =
+ std::make_shared(
+ has_symbolic_inputs, size.vec());
auto out = at::functionalization::impl::create_functional_tensor_with_view_meta(tmp_output, self, std::move(view_meta));
// See Note [Propagating strides in the functionalization pass]
diff --git a/aten/src/ATen/FunctionalizeFallbackKernel.h b/aten/src/ATen/FunctionalizeFallbackKernel.h
new file mode 100644
index 000000000000..aabcfc827af3
--- /dev/null
+++ b/aten/src/ATen/FunctionalizeFallbackKernel.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include
+
+namespace at::functionalization {
+
+// `ViewMeta` implementation for `resize_` operation.
+struct TORCH_API resize__ViewMeta : public ViewMeta {
+ FUNCTIONALIZATION_VIEWMETA_NAME(resize__ViewMeta)
+ FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(
+ bool /* reapply_views */,
+ const std::vector&);
+
+ resize__ViewMeta(const SerializableTuple& tpl)
+ : resize__ViewMeta(std::get<0>(tpl), std::get<1>(tpl)) {}
+
+ resize__ViewMeta(bool reapply_views, const std::vector& size)
+ : ViewMeta(/*has_symbolic_inputs=*/false),
+ reapply_views(reapply_views),
+ size(size) {}
+
+ Tensor forward(const Tensor& base) override;
+ Tensor reverse(const Tensor& base, const Tensor& mutated_view) override;
+
+ SerializableTuple to_serializable_tuple() {
+ return std::make_tuple(reapply_views, size);
+ }
+
+ bool reapply_views;
+ std::vector size;
+};
+
+// `ViewMeta` implementation for `_unsafe_view` operation.
+struct TORCH_API _unsafe_view_ViewMeta : public ViewMeta {
+ FUNCTIONALIZATION_VIEWMETA_NAME(_unsafe_view_ViewMeta)
+ FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(
+ bool /* has_symbolic_inputs */,
+ const std::vector&);
+
+ _unsafe_view_ViewMeta(const SerializableTuple& tpl)
+ : _unsafe_view_ViewMeta(std::get<0>(tpl), std::get<1>(tpl)) {}
+
+ _unsafe_view_ViewMeta(
+ bool has_symbolic_inputs,
+ const std::vector& size)
+ : ViewMeta(has_symbolic_inputs), size(size) {}
+
+ Tensor forward(const Tensor& base) override;
+ Tensor reverse(const Tensor& base, const Tensor& mutated_view) override;
+
+ SerializableTuple to_serializable_tuple() {
+ return std::make_tuple(has_symbolic_inputs, size);
+ }
+
+ std::vector size;
+};
+
+} // namespace at::functionalization
diff --git a/aten/src/ATen/ThreadLocalState.cpp b/aten/src/ATen/ThreadLocalState.cpp
index 33977d8d7cf8..22509c7be4e1 100644
--- a/aten/src/ATen/ThreadLocalState.cpp
+++ b/aten/src/ATen/ThreadLocalState.cpp
@@ -8,6 +8,7 @@
#include
#include
#include
+#include
namespace at {
@@ -19,6 +20,7 @@ ThreadLocalState::ThreadLocalState()
torch_dispatch_mode_state_(c10::impl::TorchDispatchModeTLS::get_state()), python_dispatcher_state_(c10::impl::PythonDispatcherTLS::get_state()),
python_torch_function_state_(at::impl::PythonTorchFunctionTLS::get_state()),
saved_tensors_default_hooks_state_(at::SavedTensorDefaultHooks::get_tls_state()), functionalization_reapply_views_state_(at::functionalization::impl::getFunctionalizationReapplyViewsTLS()),
+ dtensor_allow_implicit_replication_(at::get_dtensor_allow_implicit_replication()),
saved_objects_(at::impl::ThreadLocalPythonObjects::get_state()) {
#if !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE) && !defined(BUILD_LITE_INTERPRETER)
for(size_t i=0; i
#include
#include
diff --git a/aten/src/ATen/core/boxing/KernelFunction_impl.h b/aten/src/ATen/core/boxing/KernelFunction_impl.h
index be93d5991e9a..672309ec19a2 100644
--- a/aten/src/ATen/core/boxing/KernelFunction_impl.h
+++ b/aten/src/ATen/core/boxing/KernelFunction_impl.h
@@ -15,7 +15,7 @@ std::enable_if_t<
std::is_base_of_v,
std::unique_ptr>
make_unique_base(Args&&... args) {
- return std::unique_ptr(new Child(std::forward(args)...));
+ return std::make_unique(std::forward(args)...);
}
} // namespace detail
diff --git a/aten/src/ATen/core/dynamic_type.h b/aten/src/ATen/core/dynamic_type.h
index b33e7ce0c549..2ba841e44e20 100644
--- a/aten/src/ATen/core/dynamic_type.h
+++ b/aten/src/ATen/core/dynamic_type.h
@@ -64,6 +64,7 @@ constexpr DynamicTypeBits kDynamicClassTypeBit = DYNAMIC_TYPE_BIT(10);
_(ScalarType, kDynamicIntTypeBit, 1) \
_(Layout, kDynamicIntTypeBit, 1) \
_(SymInt, kDynamicIntTypeBit, 1) \
+ _(SymBool, kDynamicIntTypeBit, 1) \
_(MemoryFormat, kDynamicIntTypeBit, 1)
#define FORWARD_DECL_TYPE(NAME, _, __) struct NAME ## Type;
diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
index 4ab57f0beb1c..0d319ea59384 100644
--- a/aten/src/ATen/cuda/CUDABlas.cpp
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
@@ -996,9 +996,6 @@ void bgemm(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)) {
template <>
void bgemm(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)) {
- #ifdef USE_ROCM
- TORCH_CHECK(false, "bgemm input type at::Half and output type float is not supported for ROCm");
- #endif
// TODO: Support tuning for Half inputs and FP32 output
bgemm_internal(CUDABLAS_BGEMM_ARGS(at::Half));
}
@@ -1006,9 +1003,7 @@ void bgemm(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)
template <>
void bgemm(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)) {
- #ifdef USE_ROCM
- TORCH_CHECK(false, "bgemm input type at::BFloat16 and output type float is not supported for ROCm");
- #else
+ #ifndef USE_ROCM
cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties();
if (prop->major < 8)
@@ -1513,9 +1508,6 @@ void gemm(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
template <>
void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)) {
- #ifdef USE_ROCM
- TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
- #endif
// TODO: Support Tuning for fp16-fp32 gemm
gemm_internal(CUDABLAS_GEMM_ARGS(at::Half));
}
@@ -1523,9 +1515,7 @@ void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, float))
template <>
void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)) {
- #ifdef USE_ROCM
- TORCH_CHECK(false, "gemm input type at::BFloat16 and output type float is not supported for ROCm");
- #else
+ #ifndef USE_ROCM
cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties();
if (prop->major < 8)
@@ -1947,11 +1937,11 @@ void scaled_gemm(
computeDesc.setAttribute(CUBLASLT_MATMUL_DESC_TRANSB, _cublasOpFromChar(transb));
cublasLtMatmulDescAttributes_t matmulDescA = CUBLASLT_MATMUL_DESC_A_SCALE_POINTER;
cublasLtMatmulDescAttributes_t matmulDescB = CUBLASLT_MATMUL_DESC_B_SCALE_POINTER;
+#if defined(USE_ROCM) && !defined(HIPBLASLT_OUTER_VEC) && defined(HIPBLASLT_VEC_EXT)
// hipblaslt supported row-wise before cublas, and did so their own way (via
// the SCALE_POINTERSs), but then migrated to match how cublas does it (via
// the SCALE_MODEs). Here we check for this early custom mode.
bool use_rowwise = (mat1_scaling_type == ScalingType::RowWise && mat2_scaling_type == ScalingType::RowWise);
-#if defined(USE_ROCM) && !defined(HIPBLASLT_OUTER_VEC) && defined(HIPBLASLT_VEC_EXT)
if (use_rowwise) {
matmulDescA = HIPBLASLT_MATMUL_DESC_A_SCALE_POINTER_VEC_EXT;
matmulDescB = HIPBLASLT_MATMUL_DESC_B_SCALE_POINTER_VEC_EXT;
@@ -1966,8 +1956,12 @@ void scaled_gemm(
}
#endif
}
-#else
- // rowwise isn't supported using cublaslt or older hipblaslt
+#elif (CUDA_VERSION < 12090) && !defined(USE_ROCM)
+ // hipblaslt supported row-wise before cublas, and did so their own way (via
+ // the SCALE_POINTERSs), but then migrated to match how cublas does it (via
+ // the SCALE_MODEs). Here we check for this early custom mode.
+ bool use_rowwise = (mat1_scaling_type == ScalingType::RowWise && mat2_scaling_type == ScalingType::RowWise);
+ // rowwise isn't supported using older cublaslt or older hipblaslt
TORCH_INTERNAL_ASSERT(use_rowwise == false, "rowwise scaled_gemm not supported with blaslt");
#endif // if defined(USE_ROCM) && !defined(HIPBLASLT_OUTER_VEC) && defined(HIPBLASLT_VEC_EXT)
computeDesc.setAttribute(matmulDescA, mat1_scale_ptr);
@@ -2583,8 +2577,6 @@ void vdot>(CUDABLAS_DOT_ARGTYPES(c10::complex)) {
reinterpret_cast(result)));
}
-// HIP on Windows does not support
-#if !(defined(USE_ROCM) && defined(_MSC_VER))
template <>
void getrsBatched(CUDABLAS_GETRS_ARGTYPES(float)) {
TORCH_CUDABLAS_CHECK(cublasSgetrsBatched(
@@ -2783,6 +2775,5 @@ void gelsBatched>(CUDABLAS_GELS_BATCHED_ARGTYPES(c10::comple
devInfoArray,
batchSize));
}
-#endif // !(defined(USE_ROCM) && defined(_MSC_VER))
} // namespace at::cuda::blas
diff --git a/aten/src/ATen/cuda/CUDABlas.h b/aten/src/ATen/cuda/CUDABlas.h
index 5021917fe095..b235840418e2 100644
--- a/aten/src/ATen/cuda/CUDABlas.h
+++ b/aten/src/ATen/cuda/CUDABlas.h
@@ -343,9 +343,6 @@ void vdot>(CUDABLAS_DOT_ARGTYPES(c10::complex));
int m, int n, int nrhs, Dtype** dA_array, int ldda, \
Dtype** dC_array, int lddc, int* info, int *devInfoArray, int batchSize
-// HIP on Windows does not support getrs, geqrf, getrf, gels
-#if !(defined(USE_ROCM) && defined(_MSC_VER))
-
template
void getrsBatched(CUDABLAS_GETRS_ARGTYPES(Dtype)) {
static_assert(false&&sizeof(Dtype),"at::cuda::blas::getrsBatched: not implemented");
@@ -400,28 +397,4 @@ TORCH_CUDA_CU_API void gelsBatched>(CUDABLAS_GELS_BATCHED_A
template<>
TORCH_CUDA_CU_API void gelsBatched>(CUDABLAS_GELS_BATCHED_ARGTYPES(c10::complex));
-#else // !(defined(USE_ROCM) && defined(_MSC_VER))
-
-template
-void getrsBatched(CUDABLAS_GETRS_ARGTYPES(Dtype)) {
- TORCH_CHECK(false, "at::cuda::blas::getrsBatched: not supported for HIP on Windows");
-}
-
-template
-void geqrfBatched(CUDABLAS_GEQRF_BATCHED_ARGTYPES(Dtype)) {
- TORCH_CHECK(false, "at::cuda::blas::geqrfBatched: not supported for HIP on Windows");
-}
-
-template
-void getrfBatched(CUDABLAS_GETRF_ARGTYPES(Dtype)) {
- TORCH_CHECK(false, "at::cuda::blas::getrfBatched: not supported for HIP on Windows");
-}
-
-template
-void gelsBatched(CUDABLAS_GELS_BATCHED_ARGTYPES(Dtype)) {
- TORCH_CHECK(false, "at::cuda::blas::gelsBatched: not supported for HIP on Windows");
-}
-
-#endif // !(defined(USE_ROCM) && defined(_MSC_VER))
-
} // namespace at::cuda::blas
diff --git a/aten/src/ATen/cuda/detail/OffsetCalculator.cuh b/aten/src/ATen/cuda/detail/OffsetCalculator.cuh
index 60e1a19c1aac..a65db3f2df12 100644
--- a/aten/src/ATen/cuda/detail/OffsetCalculator.cuh
+++ b/aten/src/ATen/cuda/detail/OffsetCalculator.cuh
@@ -45,6 +45,24 @@ struct OffsetCalculator {
C10_HOST_DEVICE offset_type get(index_t linear_idx) const {
offset_type offsets;
+
+#if defined(USE_ROCM)
+ if ((dims > 0) && (dims <= 2)) {
+ auto divmod = sizes_[0].divmod(linear_idx);
+ #pragma unroll
+ for (int arg = 0; arg < NARGS; arg++)
+ offsets[arg] = divmod.mod * strides_[0][arg];
+ if (dims >= 2) {
+ divmod = sizes_[1].divmod(divmod.div);
+ #pragma unroll
+ for (int arg = 0; arg < NARGS; arg++)
+ offsets[arg] += divmod.mod * strides_[1][arg];
+ }
+ // [...]
+ return offsets;
+ }
+#endif
+
#pragma unroll
for (int arg = 0; arg < NARGS; arg++) {
offsets[arg] = 0;
diff --git a/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h b/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
index d89875865b88..aca83386ad42 100644
--- a/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
+++ b/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
@@ -117,6 +117,8 @@ namespace at::cuda {
_(nvrtcGetPTXSize) \
_(nvrtcGetPTX) \
_(cuModuleLoadData) \
+ _(cuModuleLoad) \
+ _(cuGetErrorString) \
_(cuModuleGetFunction) \
_(HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR) \
_(nvrtcGetErrorString) \
diff --git a/aten/src/ATen/cuda/tunable/Tunable.cpp b/aten/src/ATen/cuda/tunable/Tunable.cpp
index 9972cbd1c151..3511e48ae061 100644
--- a/aten/src/ATen/cuda/tunable/Tunable.cpp
+++ b/aten/src/ATen/cuda/tunable/Tunable.cpp
@@ -220,19 +220,17 @@ TuningResultsValidator::TuningResultsValidator() {
[]() { return GetPyTorchVersion(); },
[this](auto&& k) { return ValidatePyTorchVersion(std::forward(k)); });
#ifdef USE_ROCM
- // rocm
+ // hip
{
-#ifdef _WIN32
- std::string rocm_version = HIP_VERSION_BUILD_NAME;
-#else
- std::string rocm_version = ROCM_BUILD_INFO;
-#endif
+ // HIP version is more accurate than ROCm version. User's environment could be a stock
+ // ROCm install but with a mix of newer components, making ROCm version meaningless.
+ std::string hip_version = c10::str(TORCH_HIP_VERSION);
RegisterValidator(
- "ROCM_VERSION",
- [rocm_version]() { return rocm_version; },
- [rocm_version](auto&& k) {
- TUNABLE_LOG1("ROCM_VERSION validation: expect ", k, " to match ", rocm_version);
- return rocm_version == k ? OK : FAIL;
+ "HIP_VERSION",
+ [hip_version]() { return hip_version; },
+ [hip_version](auto&& k) {
+ TUNABLE_LOG1("HIP_VERSION validation: expect ", k, " to match ", hip_version);
+ return hip_version == k ? OK : FAIL;
});
}
// gfx arch
diff --git a/aten/src/ATen/cudnn/Descriptors.h b/aten/src/ATen/cudnn/Descriptors.h
index 6c2492b12e6b..85f0286542e7 100644
--- a/aten/src/ATen/cudnn/Descriptors.h
+++ b/aten/src/ATen/cudnn/Descriptors.h
@@ -38,6 +38,7 @@ inline int dataSize(cudnnDataType_t dataType)
}
}
+// NOTE [ cudnn fixSizeOneDimStride ]
// The stride for a size-1 dimensions is not uniquely determined; in
// fact, it can be anything you want, because the fact that the
// tensor is size 1 at this dimension means that you will never actually
diff --git a/aten/src/ATen/functorch/BatchRulesModules.cpp b/aten/src/ATen/functorch/BatchRulesModules.cpp
index de69e5c1e23a..6e63708a90f4 100644
--- a/aten/src/ATen/functorch/BatchRulesModules.cpp
+++ b/aten/src/ATen/functorch/BatchRulesModules.cpp
@@ -7,6 +7,7 @@
#include
#include
#include
+#include
#include
@@ -44,8 +45,13 @@ static std::tuple> embedding_batch_rule(
const auto weight_ = reshape_dim_into(*weight_bdim, 0, weight);
auto indices_ = moveBatchDimToFront(indices, indices_bdim);
- const auto range = getStepTensor(indices, batch_size, num_embeddings);
- indices_ = indices_ + range;
+ {
+ // getStepTensor returns a regular Tensor. If indices_ is a DTensor
+ // we want to allow this mixed DTensor-Tensor operation.
+ at::DTensorAllowImplicitReplication guard;
+ const auto range = getStepTensor(indices, batch_size, num_embeddings);
+ indices_ = indices_ + range;
+ }
auto result = at::embedding_symint(weight_, indices_, std::move(padding_idx), scale_grad_by_freq, sparse);
return std::make_tuple(std::move(result), 0);
}
diff --git a/aten/src/ATen/functorch/BatchRulesUnaryOps.cpp b/aten/src/ATen/functorch/BatchRulesUnaryOps.cpp
index b26d2c4a419e..48a735c3e533 100644
--- a/aten/src/ATen/functorch/BatchRulesUnaryOps.cpp
+++ b/aten/src/ATen/functorch/BatchRulesUnaryOps.cpp
@@ -171,6 +171,8 @@ TORCH_LIBRARY_IMPL(aten, FuncTorchBatched, m) {
POINTWISE_BOXED(fill_.Scalar);
POINTWISE_BOXED(zero_);
+ // This is special because this op doesn't return anything
+ m.impl("_assert_tensor_metadata", native::_assert_tensor_metadata);
#undef UNARY_POINTWISE
#undef UNARY_POINTWISE_ALL
diff --git a/aten/src/ATen/miopen/Descriptors.cpp b/aten/src/ATen/miopen/Descriptors.cpp
index 08c09b88f99c..86e42ee3b66d 100644
--- a/aten/src/ATen/miopen/Descriptors.cpp
+++ b/aten/src/ATen/miopen/Descriptors.cpp
@@ -19,31 +19,37 @@ inline miopenDataType_t getDataType(const at::Tensor& t) {
} else {
TORCH_CHECK(
false,
- "TensorDescriptor only supports float, half and bfloat16 tensors");
+ "TensorDescriptor does not support ", scalar_type);
}
}
} // anonymous namespace
+constexpr size_t MIOPEN_DIM_MAX = 5;
-void TensorDescriptor::set(const at::Tensor &t, size_t pad) {
- set(getDataType(t), t.sizes(), t.strides(), pad);
+void TensorDescriptor::set(const at::Tensor &t, at::MemoryFormat memory_format, size_t pad) {
+ set(getDataType(t), t.sizes(), t.strides(), pad,
+ memory_format == at::MemoryFormat::ChannelsLast ||
+ memory_format == at::MemoryFormat::ChannelsLast3d);
}
-constexpr size_t MIOPEN_DIM_MAX = 5;
+void TensorDescriptor::set(const at::Tensor &t, size_t pad) {
+ auto memory_format = t.suggest_memory_format();
+ set(getDataType(t), t.sizes(), t.strides(), pad,
+ memory_format == at::MemoryFormat::ChannelsLast ||
+ memory_format == at::MemoryFormat::ChannelsLast3d);
+}
void TensorDescriptor::set(miopenDataType_t datatype, IntArrayRef t_sizes, IntArrayRef t_strides, size_t pad) {
+ set(datatype, t_sizes, t_strides, pad,
+ is_channels_last_strides_2d(t_sizes, t_strides) ||
+ is_channels_last_strides_3d(t_sizes, t_strides));
+}
+
+void TensorDescriptor::set(miopenDataType_t datatype, IntArrayRef t_sizes, IntArrayRef t_strides, size_t pad, bool nhwc) {
size_t dim = t_sizes.size();
if (dim > MIOPEN_DIM_MAX || pad > MIOPEN_DIM_MAX)
-#define _STR(X) #X
-#define STR(X) _STR(X)
- TORCH_CHECK(
- false,
- "MIOpen supports only up to ",
- STR(MIOPEN_DIM_MAX),
- " dimensions");
-#undef _STR
-#undef STR
+ TORCH_CHECK(false, "MIOpen supports only up to ", MIOPEN_DIM_MAX, " dimensions");
int size[MIOPEN_DIM_MAX];
int stride[MIOPEN_DIM_MAX];
for (const auto i : c10::irange(dim)) {
@@ -54,7 +60,7 @@ void TensorDescriptor::set(miopenDataType_t datatype, IntArrayRef t_sizes, IntAr
size[i] = 1;
stride[i] = 1;
}
- set(datatype, static_cast(std::max(dim, pad)), size, stride);
+ set(datatype, static_cast(std::max(dim, pad)), size, stride, nhwc);
}
std::string miopenTypeToString(miopenDataType_t dtype) {
@@ -74,10 +80,11 @@ std::string miopenTypeToString(miopenDataType_t dtype) {
std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d) {
out << "TensorDescriptor " << static_cast(d.desc()) << "\n";
- int nbDims = 4;
+ int nbDims = 0;
int dimA[MIOPEN_DIM_MAX];
int strideA[MIOPEN_DIM_MAX];
miopenDataType_t dtype;
+ miopenGetTensorDescriptorSize(d.desc(), &nbDims);
miopenGetTensorDescriptor(d.desc(), &dtype, dimA, strideA);
out << " type = " << miopenTypeToString(dtype) << "\n";
out << " nbDims = " << nbDims << "\n";
@@ -99,19 +106,17 @@ void TensorDescriptor::print() { std::cout << *this; }
void FilterDescriptor::set(const at::Tensor &t, const at::MemoryFormat memory_format, int64_t pad) {
auto dim = t.ndimension();
- if (dim > static_cast(MIOPEN_DIM_MAX) || pad > static_cast(MIOPEN_DIM_MAX)) {
-#define _STR(X) #X
-#define STR(X) _STR(X)
- TORCH_CHECK(
- false,
- "MIOpen supports only up to ",
- STR(MIOPEN_DIM_MAX),
- " dimensions");
-#undef _STR
-#undef STR
- }
+ if (dim > MIOPEN_DIM_MAX || pad > MIOPEN_DIM_MAX)
+ TORCH_CHECK(false, "MIOpen supports only up to ", MIOPEN_DIM_MAX, " dimensions");
+ // NB: It is possible for this test to be insufficient, because the
+ // Tensor passed in to set the filter descriptor may not be the actual
+ // Tensor whose data pointer is passed to cuDNN. Nevertheless,
+ // that is the common case, so we can catch most client errors with this test.
TORCH_CHECK(t.is_contiguous(memory_format),
- "MIOpen filters (a.k.a. weights) must be contiguous");
+ "MIOpen filters (a.k.a. weights) must be contiguous in desired memory_format\n",
+ "Weight sizes: ", t.sizes(), "\n",
+ "Weight strides: ", t.strides(), "\n",
+ "cuDNN suggested memory_format: ", memory_format);
int size[MIOPEN_DIM_MAX];
int stride[MIOPEN_DIM_MAX];
@@ -131,7 +136,9 @@ void FilterDescriptor::set(const at::Tensor &t, const at::MemoryFormat memory_fo
}
dim = std::max(dim, pad);
- set(getDataType(t), (int) dim, size, stride);
+ set(getDataType(t), static_cast(dim), size, stride,
+ memory_format == at::MemoryFormat::ChannelsLast ||
+ memory_format == at::MemoryFormat::ChannelsLast3d);
}
}}
diff --git a/aten/src/ATen/miopen/Descriptors.h b/aten/src/ATen/miopen/Descriptors.h
index 2eee837cd533..8825575c9231 100644
--- a/aten/src/ATen/miopen/Descriptors.h
+++ b/aten/src/ATen/miopen/Descriptors.h
@@ -9,6 +9,8 @@
namespace at { namespace native {
+std::string miopenTypeToString(miopenDataType_t dtype);
+
inline int dataSize(miopenDataType_t dataType)
{
switch (dataType) {
@@ -19,6 +21,32 @@ inline int dataSize(miopenDataType_t dataType)
}
}
+// See NOTE [ cudnn fixSizeOneDimStride ] in aten/src/ATen/cudnn/Descriptors.h
+template
+static inline void fixSizeOneDimStride(int dim, const T *size, T *stride, bool nhwc) {
+ int64_t z = 1;
+ int index = 0;
+ std::vector permutation(dim);
+
+ if (nhwc) {
+ permutation[index++] = 1;
+ }
+ for (int d = dim-1; d > 1; d--) {
+ permutation[index++] = d;
+ }
+ if (!nhwc) {
+ permutation[index++] = 1;
+ }
+ permutation[index++] = 0;
+ for (int d : permutation) {
+ if (size[d] == 1) {
+ stride[d] = z;
+ } else {
+ z *= size[d];
+ }
+ }
+}
+
template
struct DescriptorDeleter {
void operator()(T* x) {
@@ -75,14 +103,20 @@ class TORCH_HIP_CPP_API TensorDescriptor : public Descriptor<
set(t, pad);
}
+ // See Note [CuDNN broadcast padding]
void set(const at::Tensor &t, size_t pad = 0);
+ void set(const at::Tensor &t, at::MemoryFormat memory_format, size_t pad = 0);
void set(miopenDataType_t dataType, IntArrayRef sizes, IntArrayRef strides, size_t pad = 0);
void print();
private:
- void set(miopenDataType_t dataType, int dim, int* size, int* stride) {
- MIOPEN_CHECK(miopenSetTensorDescriptor(mut_desc(), dataType, dim, size, stride));
+ void set(miopenDataType_t dataType, IntArrayRef sizes, IntArrayRef strides, size_t pad, bool nhwc);
+
+ void set(miopenDataType_t dataType, int dim, int* size, int* stride, bool nhwc) {
+ std::vector strides_copy(stride, stride + dim);
+ fixSizeOneDimStride(dim, size, strides_copy.data(), nhwc);
+ MIOPEN_CHECK(miopenSetTensorDescriptor(mut_desc(), dataType, dim, size, strides_copy.data()));
}
};
@@ -100,8 +134,10 @@ class TORCH_HIP_CPP_API FilterDescriptor : public Descriptor<
void set(const at::Tensor &t, const at::MemoryFormat memory_format, int64_t pad = 0);
private:
- void set(miopenDataType_t dataType, int dim, int* size, int* stride) {
- MIOPEN_CHECK(miopenSetTensorDescriptor(mut_desc(), dataType, dim, size, stride));
+ void set(miopenDataType_t dataType, int dim, int* size, int* stride, bool nhwc) {
+ std::vector strides_copy(stride, stride + dim);
+ fixSizeOneDimStride(dim, size, strides_copy.data(), nhwc);
+ MIOPEN_CHECK(miopenSetTensorDescriptor(mut_desc(), dataType, dim, size, strides_copy.data()));
}
};
@@ -166,4 +202,4 @@ union Constant
}
};
-}} // namespace
+}} // namespace
diff --git a/aten/src/ATen/mps/EmptyTensor.cpp b/aten/src/ATen/mps/EmptyTensor.cpp
index d858df073397..6c58de099648 100644
--- a/aten/src/ATen/mps/EmptyTensor.cpp
+++ b/aten/src/ATen/mps/EmptyTensor.cpp
@@ -12,7 +12,7 @@
#define MPS_ERROR_NOT_COMPILED "PyTorch code is not compiled with MPS enabled"
#define MPS_ERROR_RUNTIME_TOO_LOW \
- "The MPS backend is supported on MacOS 13.0+.", \
+ "The MPS backend is supported on MacOS 14.0+. ", \
"Current OS version can be queried using `sw_vers`"
#define MPS_ERROR_DOUBLE_NOT_SUPPORTED "Cannot convert a MPS Tensor to float64 dtype " \
"as the MPS framework doesn't support float64. Please use float32 instead."
diff --git a/aten/src/ATen/mps/MPSHooks.mm b/aten/src/ATen/mps/MPSHooks.mm
index a2ec221c1bfe..34fbd31af91d 100644
--- a/aten/src/ATen/mps/MPSHooks.mm
+++ b/aten/src/ATen/mps/MPSHooks.mm
@@ -70,7 +70,10 @@
}
void* MPSHooks::getCommandBuffer() const {
- return at::mps::getDefaultMPSStream()->commandBuffer();
+ auto stream = at::mps::getDefaultMPSStream();
+ // Release pending computeCommandEncoder, as extensions is likely to allocate new one
+ stream->endKernelCoalescing();
+ return stream->commandBuffer();
}
void* MPSHooks::getDispatchQueue() const {
diff --git a/aten/src/ATen/mps/MPSStream.mm b/aten/src/ATen/mps/MPSStream.mm
index e9627a343ad6..71325bd69e1d 100644
--- a/aten/src/ATen/mps/MPSStream.mm
+++ b/aten/src/ATen/mps/MPSStream.mm
@@ -158,7 +158,18 @@ @interface MPSGraphExecutionDescriptor ()
endKernelCoalescing();
id blitEncoder = [commandBuffer() blitCommandEncoder];
- [blitEncoder fillBuffer:buffer range:NSMakeRange(offset, length) value:value];
+ // For some reason fillBufferfor stopped working for lengh > 4Gb on MacOS 26
+ // See https://github.com/pytorch/pytorch/issues/163962
+ // Workaround by batching copy commands into 4Gb chunks
+ constexpr size_t max_copy_size = 0x100000000; // 4GB
+ size_t bytes_filled = 0;
+ size_t bytes_remains = length;
+ while (bytes_remains > 0) {
+ NSUInteger bytes_to_copy = std::min(max_copy_size, bytes_remains);
+ [blitEncoder fillBuffer:buffer range:NSMakeRange(offset + bytes_filled, bytes_to_copy) value:value];
+ bytes_filled += bytes_to_copy;
+ bytes_remains -= bytes_to_copy;
+ }
[blitEncoder endEncoding];
synchronize(syncType);
}
diff --git a/aten/src/ATen/native/Blas.cpp b/aten/src/ATen/native/Blas.cpp
index 674ccf11cfb9..49366151ae60 100644
--- a/aten/src/ATen/native/Blas.cpp
+++ b/aten/src/ATen/native/Blas.cpp
@@ -9,6 +9,7 @@
#include
#include
#include
+#include
#if !defined(__s390x__) && !defined(__powerpc__)
#include
#endif
@@ -332,4 +333,23 @@ _scaled_mm_cpu(const Tensor& mat_a, const Tensor& mat_b,
return _scaled_mm_out_cpu(mat_a, mat_b, scale_a, scale_b, bias, scale_result, out_dtype, use_fast_accum, out);
}
+// TODO(vasiliy, future PR): figure out why we need to declare this function, when
+// other functions that live in ATen/native/*.cpp without declarations
+// or headers work just fine.
+Tensor _grouped_mm(const Tensor& mat_a, const Tensor& mat_b,
+const std::optional& offs,
+const std::optional& bias,
+std::optional out_dtype);
+
+Tensor _grouped_mm(const Tensor& mat_a, const Tensor& mat_b,
+const std::optional& offs,
+const std::optional& bias,
+std::optional out_dtype) {
+ _grouped_mm_validate_inputs(mat_a, mat_b, offs, bias, out_dtype);
+ const auto out_dtype_ = _resolve_grouped_mm_out_dtype(mat_a, mat_b, out_dtype);
+ Tensor out = create_grouped_gemm_output_tensor(mat_a, mat_b, offs, out_dtype_);
+ _grouped_mm_fallback(mat_a, mat_b, offs, bias, out_dtype, out);
+ return out;
+}
+
} // namespace at::native
diff --git a/aten/src/ATen/native/CPUBlas.cpp b/aten/src/ATen/native/CPUBlas.cpp
index b16c1ef04fa0..e06afddd05aa 100644
--- a/aten/src/ATen/native/CPUBlas.cpp
+++ b/aten/src/ATen/native/CPUBlas.cpp
@@ -496,18 +496,18 @@ void gemm(
// for the fallback path, first compute gemm with beta = 0,
// and then add c in full precision.
int64_t c_size = n * m;
- std::vector float16_c(c_size, 0.f);
- gemm_stub(
+ std::vector float_c(c_size, 0.f);
+ gemm_no_downcast_stub(
at::kCPU, at::kHalf,
- transa, transb, m, n, k, alpha, a, lda, b, ldb, 0.f, float16_c.data(), m);
+ transa, transb, m, n, k, alpha, a, lda, b, ldb, 0.f, float_c.data(), m);
for (const auto j : c10::irange(n)) {
for (const auto i : c10::irange(m)) {
auto offset = j * ldc + i;
// beta == 0 won't propagate NaN from C
if (beta == 0.f) {
- c[offset] = c10::convert(float16_c[j * m + i]);
+ c[offset] = float_c[j * m + i];
} else {
- c[offset] = beta * c[offset] + c10::convert(float16_c[j * m + i]);
+ c[offset] = beta * c[offset] + float_c[j * m + i];
}
}
}
diff --git a/aten/src/ATen/native/ConvUtils.h b/aten/src/ATen/native/ConvUtils.h
index 84381efe55b0..e160c84ced33 100644
--- a/aten/src/ATen/native/ConvUtils.h
+++ b/aten/src/ATen/native/ConvUtils.h
@@ -353,19 +353,21 @@ TORCH_API void _cudnn_set_conv_benchmark_empty_cache(bool enable);
TORCH_API bool _cudnn_get_conv_benchmark_empty_cache();
-inline bool miopen_conv_use_channels_last(const at::Tensor& input, const at::Tensor& weight) {
-
+inline at::MemoryFormat miopen_conv_suggest_memory_format(const at::Tensor& input, const at::Tensor& weight) {
// disable NHWC for float64 input.
if (!at::detail::getCUDAHooks().compiledWithMIOpen() ||
input.scalar_type() == at::kDouble ||
weight.scalar_type() == at::kDouble) {
- return false;
+ return at::MemoryFormat::Contiguous;
}
// TODO: Remove PYTORCH_MIOPEN_SUGGEST_NHWC once ROCm officially supports NHWC in MIOpen
- // See #64427
- static std::optional PYTORCH_MIOPEN_SUGGEST_NHWC = c10::utils::check_env("PYTORCH_MIOPEN_SUGGEST_NHWC");
- static bool suggest_nhwc = PYTORCH_MIOPEN_SUGGEST_NHWC && *PYTORCH_MIOPEN_SUGGEST_NHWC;
+ // See https://github.com/pytorch/pytorch/issues/64427.
+ // non static variable is used to be able to change environment variable in runtime for testing
+ // enabled by default for ROCm >= 7.0.0 with miopen 3.5
+ int miopen_version = detail::getCUDAHooks().compiledWithMIOpen() ? detail::getCUDAHooks().versionMIOpen() : 0;
+ bool is_miopen_3_5 = miopen_version >= 30500; // ROCm 7.0
+ bool suggest_nhwc = c10::utils::check_env("PYTORCH_MIOPEN_SUGGEST_NHWC").value_or(is_miopen_3_5);
auto input_memory_format = input.suggest_memory_format();
auto weight_memory_format = weight.suggest_memory_format();
@@ -375,13 +377,24 @@ inline bool miopen_conv_use_channels_last(const at::Tensor& input, const at::Ten
(input_memory_format == at::MemoryFormat::ChannelsLast) ||
(weight_memory_format == at::MemoryFormat::ChannelsLast)
);
+ if (can_use_miopen_channels_last_2d) {
+ return at::MemoryFormat::ChannelsLast;
+ }
bool can_use_miopen_channels_last_3d = suggest_nhwc && (weight_ndim == 5) && (
(input_memory_format == at::MemoryFormat::ChannelsLast3d) ||
(weight_memory_format == at::MemoryFormat::ChannelsLast3d)
);
+ if (can_use_miopen_channels_last_3d) {
+ return at::MemoryFormat::ChannelsLast3d;
+ }
+
+ return at::MemoryFormat::Contiguous;
+}
- return can_use_miopen_channels_last_2d || can_use_miopen_channels_last_3d;
+// deprecated, but to remove would be BC-breaking
+inline bool miopen_conv_use_channels_last(const at::Tensor& input, const at::Tensor& weight) {
+ return miopen_conv_suggest_memory_format(input, weight) != at::MemoryFormat::Contiguous;
}
inline bool mkldnn_conv_use_channels_last(const at::Tensor& input, const at::Tensor& weight) {
diff --git a/aten/src/ATen/native/Convolution.cpp b/aten/src/ATen/native/Convolution.cpp
index d2b7b055684e..2e0e4a47f37b 100644
--- a/aten/src/ATen/native/Convolution.cpp
+++ b/aten/src/ATen/native/Convolution.cpp
@@ -14,6 +14,7 @@
#include
#include
#include
+#include
#include
#include
@@ -300,67 +301,50 @@ struct ConvParams {
bool allow_tf32{};
bool is_strided() const {
- bool is_strided = false;
- for (const auto& s : stride) {
- is_strided |= (s != 1);
- }
- return is_strided;
+ return std::any_of(
+ stride.cbegin(), stride.cend(), [](const T& s) { return s != 1; });
}
bool is_dilated() const {
- bool is_dilated = false;
- for (const auto& d : dilation) {
- is_dilated |= (d != 1);
- }
- return is_dilated;
+ return std::any_of(
+ dilation.cbegin(), dilation.cend(), [](const T& d) { return d != 1; });
}
bool is_padded() const {
- bool is_padded = false;
- for (auto p : padding) {
- is_padded |= (p != 0);
- }
- return is_padded;
+ return std::any_of(
+ padding.cbegin(), padding.cend(), [](const T& p) { return p != 0; });
}
bool is_output_padding_neg() const {
- bool is_non_neg = false;
- for (const auto& p : output_padding) {
- is_non_neg |= (p < 0);
- }
- return is_non_neg;
+ return std::any_of(
+ output_padding.cbegin(),
+ output_padding.cend(),
+ [](const T& p) { return p < 0; });
}
bool is_output_padding_big() const {
- bool is_big = false;
+ // Revisit this with std::views::zip at C++20.
for (auto i: c10::irange(output_padding.size())) {
- is_big |= (output_padding[i] >= stride[i]);
+ if (output_padding[i] >= stride[i]) {
+ return true;
+ }
}
- return is_big;
+ return false;
}
bool is_padding_neg() const {
- bool is_non_neg = false;
- for (const auto& p : padding) {
- is_non_neg |= (p < 0);
- }
- return is_non_neg;
+ return std::any_of(
+ padding.cbegin(), padding.cend(), [](const T& p) { return p < 0; });
}
bool is_dilation_neg() const {
- bool is_non_neg = false;
- for (const auto& p : dilation) {
- is_non_neg |= (p < 0);
- }
- return is_non_neg;
+ return std::any_of(
+ dilation.cbegin(), dilation.cend(), [](const T& d) { return d < 0; });
}
bool is_stride_nonpos() const {
- bool is_nonpos = false;
- for (const auto& s : stride) {
- is_nonpos |= (s <= 0);
- }
- return is_nonpos;
+ return std::any_of(
+ stride.cbegin(), stride.cend(), [](const T& s) { return s <= 0; });
}
void view1d_as_2d() {
@@ -426,11 +410,23 @@ struct ConvParams {
// cudnn and miopen are guaranteed not to be on mobile, and T102591915 / T110194934 suggest
// that maybe the compiledWithCuDNN() check sometimes segfaults (though I can't imagine how)
#if !defined(C10_MOBILE)
- if (!detail::getCUDAHooks().compiledWithCuDNN()) {
+ if (!detail::getCUDAHooks().compiledWithCuDNN() || !input.is_cuda() || !cudnn_enabled) {
return false;
}
+ static long cudnn_version = detail::getCUDAHooks().versionCuDNN();
+ // broken on cuDNN 9.8
+ if (cudnn_version >= 90800) {
+ if (cudnn_conv_suggest_memory_format(input, weight) == at::MemoryFormat::Contiguous &&
+ (input.scalar_type() == at::kBFloat16 || input.scalar_type() == at::kHalf) &&
+ weight.dim() == 5) {
+ for (int i = 2; i < weight.dim(); i++) {
+ if (weight.size(i) != 1) {
+ return false;
+ }
+ }
+ }
+ }
if (needs_64bit_indexing_no_split(input, weight)) {
- static long cudnn_version = detail::getCUDAHooks().versionCuDNN();
if (!(cudnn_version >= 90300 && at::native::cudnnv8_enabled_check_debug())) {
TORCH_WARN_ONCE("cuDNN cannot be used for large non-batch-splittable convolutions"
" if the V8 API is not enabled or before cuDNN version 9.3+."
@@ -438,9 +434,6 @@ struct ConvParams {
return false;
}
}
- if (!input.is_cuda() || !cudnn_enabled) {
- return false;
- }
if (input.scalar_type() == at::kBFloat16 || weight.scalar_type() == at::kBFloat16) {
if (!(detail::getCUDAHooks().supportsBFloat16ConvolutionWithCuDNNv8() && at::native::cudnnv8_enabled_check_debug())) {
return false;
@@ -459,13 +452,19 @@ struct ConvParams {
// Use cudnn for FP16 depthwise convolutions
bool use_cudnn_depthwise(const at::Tensor& input, const at::Tensor& weight) const {
- if (cudnn_conv_suggest_memory_format(input, weight) != at::MemoryFormat::Contiguous && use_cudnn(input, weight)) {
- // always use cudnn_depthwise for channels_last format
- return true;
+ if (!cudnn_enabled || !detail::getCUDAHooks().compiledWithCuDNN() || !input.is_cuda()) {
+ return false;
}
// native kernel doesn't support 64-bit non-splittable case
- if (cudnn_enabled && !(canUse32BitIndexMath(input) && canUse32BitIndexMath(weight))) {
+ if (!(canUse32BitIndexMath(input) && canUse32BitIndexMath(weight))) {
static long cudnn_version = detail::getCUDAHooks().compiledWithCuDNN() ? detail::getCUDAHooks().versionCuDNN() : -1;
+ // TODO(eqy): remove this once cuDNN fixes 64-bit depthwise support, first broken in 9.11x
+ if (cudnn_conv_suggest_memory_format(input, weight) != at::MemoryFormat::Contiguous) {
+ if (cudnn_version < 0 || cudnn_version > 91000) {
+ return false;
+ }
+ }
+
if (!(cudnn_version >= 90300 && at::native::cudnnv8_enabled_check_debug())) {
TORCH_WARN_ONCE("cuDNN cannot be used for large non-batch-splittable convolutions"
" if the V8 API is not enabled or before cuDNN version 9.3+."
@@ -475,6 +474,10 @@ struct ConvParams {
return true;
}
}
+ if (cudnn_conv_suggest_memory_format(input, weight) != at::MemoryFormat::Contiguous) {
+ // always use cudnn_depthwise for channels_last format
+ return true;
+ }
if (detail::getCUDAHooks().supportsDepthwiseConvolutionWithCuDNN()) {
bool kernel_cond = (use_cudnn(input, weight) &&
input.scalar_type() == kHalf && // only for FP16
@@ -1419,10 +1422,8 @@ static inline at::MemoryFormat determine_backend_memory_format(
case ConvBackend::Miopen:
case ConvBackend::MiopenDepthwise:
case ConvBackend::MiopenTranspose:
- if (detail::getCUDAHooks().compiledWithMIOpen() && miopen_conv_use_channels_last(input, weight)) {
- TORCH_INTERNAL_ASSERT((k == 4 || k == 5),
- "Expected 4D or 5D input for miopen memory format selection in determine_backend_memory_format()");
- backend_memory_format = (k == 5) ? at::MemoryFormat::ChannelsLast3d : at::MemoryFormat::ChannelsLast;
+ if (detail::getCUDAHooks().compiledWithMIOpen()) {
+ backend_memory_format = miopen_conv_suggest_memory_format(input, weight);
}
break;
case ConvBackend::Mkldnn:
diff --git a/aten/src/ATen/native/GroupedMMUtils.h b/aten/src/ATen/native/GroupedMMUtils.h
new file mode 100644
index 000000000000..78993308cd5f
--- /dev/null
+++ b/aten/src/ATen/native/GroupedMMUtils.h
@@ -0,0 +1,167 @@
+#pragma once
+
+#include
+#include
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include
+#include
+#include
+#else
+#include
+#include
+#include
+#include
+#endif
+
+namespace at::native {
+
+inline bool check_valid_strides_and_return_transposed(const Tensor& mat) {
+ IntArrayRef tensor_strides = mat.strides();
+ IntArrayRef tensor_sizes = mat.sizes();
+ int end_dim = mat.dim() - 1;
+ int alignment = 16 / mat.element_size();
+ TORCH_CHECK(uint64_t(mat.data_ptr()) % 16 ==0, "expected data_ptr to be aligned to 16 bytes\n");
+ if ((tensor_strides[end_dim - 1] == 1) && (tensor_strides[end_dim] >= std::max(1, tensor_sizes[end_dim - 1]))) {
+ TORCH_CHECK(tensor_strides[end_dim] % alignment == 0, "strides should be multiple of 16 bytes");
+ return true;
+ } else if ((tensor_strides[end_dim] == 1) && (tensor_strides[end_dim - 1] >= std::max(1, tensor_sizes[end_dim]))) {
+ TORCH_CHECK(tensor_strides[end_dim - 1] % alignment == 0, "strides should be multiple of 16 bytes");
+ return false;
+ } else {
+ TORCH_CHECK(false, "Invalid strides/sizes, got ", mat.strides(), " for strides and ", mat.sizes(), " for sizes");
+ }
+}
+
+inline at::Tensor create_grouped_gemm_output_tensor(const Tensor& mat_a,
+const Tensor& mat_b,
+const std::optional& offs,
+c10::ScalarType out_dtype
+) {
+ c10::SmallVector out_size;
+ const bool a_is_2d = mat_a.dim() == 2;
+ const bool b_is_2d = mat_b.dim() == 2;
+ if (a_is_2d) {
+ if (b_is_2d) {
+ out_size = {offs->size(0), mat_a.size(0), mat_b.size(1)};
+ } else {
+ TORCH_CHECK(offs->size(0) == mat_b.size(0), "matrix batch sizes have to match");
+ out_size = {mat_a.size(0), mat_b.size(-1)};
+ }
+ } else {
+ if (b_is_2d) {
+ // this case is not actually encountered for MoE gemms
+ TORCH_CHECK(offs->size(0) == mat_a.size(0), "matrix batch sizes have to match");
+ out_size = {mat_a.size(1), mat_b.size(1)};
+ } else { // regular bmm
+ TORCH_CHECK(mat_a.size(0) == mat_b.size(0), "batched dimension has to match");
+ out_size = {mat_a.size(0), mat_a.size(1), mat_b.size(-1)};
+ }
+ }
+
+ #ifndef USE_ROCM
+ // For TMA transfers, strides of output tensor have to be either
+ // 1, or aligned to 16 bytes.
+ const auto last_dim = out_size.size() - 1;
+ const auto alignment = 16 / c10::elementSize(out_dtype);
+ const int64_t size_padded = (out_size[last_dim] + alignment - 1) / alignment * alignment;
+ std::vector out_stride;
+ if (a_is_2d != b_is_2d) {
+ out_stride = {size_padded, 1};
+ } else {
+ out_stride = {out_size[1] * size_padded, size_padded, 1};
+ }
+ return at::empty_strided(out_size, out_stride, mat_a.options().dtype(out_dtype));
+ #else
+ return at::empty(out_size, mat_a.options().dtype(out_dtype));
+ #endif
+}
+
+inline void _grouped_mm_validate_inputs(const Tensor& mat_a, const Tensor& mat_b,
+const std::optional& offs,
+const std::optional& bias,
+std::optional out_dtype) {
+ TORCH_CHECK((mat_a.dtype() == at::kBFloat16) || (mat_a.dtype() == at::kFloat) || (mat_a.dtype() == at::kHalf), "Expected mat_a to be Float32, BFloat16 or Float16 matrix, got ", mat_a.scalar_type());
+ TORCH_CHECK((mat_b.dtype() == at::kBFloat16) || (mat_b.dtype() == at::kFloat) || (mat_b.dtype() == at::kHalf), "Expected mat_b to be Float32, BFloat16 or Float16 matrix, got ", mat_b.scalar_type());
+ TORCH_CHECK(mat_a.dim() == 2 || mat_a.dim() == 3, "mat_a has to be 2 or 3d");
+ TORCH_CHECK(mat_b.dim() == 2 || mat_b.dim() == 3, "mat_b has to be 2 or 3d");
+ const bool a_is_2d = mat_a.dim() == 2;
+ const bool b_is_2d = mat_b.dim() == 2;
+ if (!a_is_2d || !b_is_2d) {
+ TORCH_CHECK(mat_a.size(-1) == mat_b.size(-2), "contraction dimension of mat_a and mat_b must match");
+ }
+
+ // check that the strides are valid, the fn will throw an error if not
+ check_valid_strides_and_return_transposed(mat_a);
+ check_valid_strides_and_return_transposed(mat_b);
+ TORCH_CHECK(offs.has_value() == (a_is_2d || b_is_2d), "Have to provide offsets if there is a 2d matrix, or no offset if both matrices are 3d");
+
+ if (offs.has_value()) {
+ TORCH_CHECK(offs->dim() == 1, "offs has to be 1D");
+ TORCH_CHECK(offs->dtype() == at::kInt, "Offsets have to be int32");
+ }
+ TORCH_CHECK(!bias.has_value(), "Bias not supported yet");
+}
+
+inline c10::ScalarType _resolve_grouped_mm_out_dtype(const Tensor& mat_a, const Tensor& mat_b,
+std::optional out_dtype) {
+ const auto out_dtype_ = out_dtype.value_or(mat_a.scalar_type());
+ // TODO(future PR): enable float32 output dtype for bfloat16 and float16 inputs
+ TORCH_CHECK(out_dtype_ == mat_a.dtype(), "Grouped gemm output dtype must match `mat_a` dtype");
+ return out_dtype_;
+}
+
+
+inline void _grouped_mm_fallback(const Tensor& mat_a, const Tensor& mat_b,
+const std::optional& offs,
+const std::optional& bias,
+std::optional out_dtype,
+Tensor out) {
+ LOG(INFO) << "fallback path for `torch._grouped_mm`, performance may not be optimal";
+ const bool a_is_2d = mat_a.dim() == 2;
+ const bool b_is_2d = mat_b.dim() == 2;
+ if (a_is_2d && !b_is_2d) {
+ // 2d x 3d with offsets
+ int group_start_idx = 0;
+ auto offs_cpu = offs.value().cpu();
+ for (int group_idx = 0; group_idx < offs_cpu.size(0); group_idx++) {
+ int group_end_idx = offs_cpu[group_idx].item();
+ auto mat_a_slice = mat_a.slice(0, group_start_idx, group_end_idx);
+ auto out_slice = out.slice(0, group_start_idx, group_end_idx);
+ at::mm_out(out_slice, mat_a_slice, mat_b[group_idx]);
+ group_start_idx = group_end_idx;
+ }
+
+ } else if (!a_is_2d && b_is_2d) {
+ // 3d x 2d with offsets
+ int group_start_idx = 0;
+ auto offs_cpu = offs.value().cpu();
+ for (int group_idx = 0; group_idx < offs_cpu.size(0); group_idx++) {
+ int group_end_idx = offs_cpu[group_idx].item();
+ auto mat_b_slice = mat_b.slice(1, group_start_idx, group_end_idx);
+ auto out_slice = out.slice(1, group_start_idx, group_end_idx);
+ at::mm_out(out_slice, mat_a[group_idx], mat_b_slice);
+ group_start_idx = group_end_idx;
+ }
+
+ } else if (a_is_2d && b_is_2d) {
+ // 2d x 2d with offsets
+ int group_start_idx = 0;
+ auto offs_cpu = offs.value().cpu();
+ for (int group_idx = 0; group_idx < offs_cpu.size(0); group_idx++) {
+ int group_end_idx = offs_cpu[group_idx].item();
+ auto mat_a_slice = mat_a.slice(1, group_start_idx, group_end_idx);
+ auto mat_b_slice = mat_b.slice(0, group_start_idx, group_end_idx);
+ auto out_slice = out[group_idx];
+ at::mm_out(out_slice, mat_a_slice, mat_b_slice);
+ group_start_idx = group_end_idx;
+ }
+
+ } else {
+ // 3d x 3d without offsets - regular bmm
+ at::bmm_out(out, mat_a, mat_b);
+ }
+}
+
+
+} // namespace at::native
diff --git a/aten/src/ATen/native/Linear.cpp b/aten/src/ATen/native/Linear.cpp
index 5d3a84ea39f6..a744da3bcad2 100644
--- a/aten/src/ATen/native/Linear.cpp
+++ b/aten/src/ATen/native/Linear.cpp
@@ -185,6 +185,17 @@ static Tensor sumproduct_pair(const Tensor& left_, const Tensor& right_, IntArra
// right: "lro, summed, ro" permuted with rpermutation and the three flattened
// then the permuted output is a view of bmm(left, right)
// finally, opermutation reverts the permutation to the original order of dimensions
+ // By default the output is "lro, lo, 1-for-summed-dims, ro" with original shape dimensions.
+ // However, if all dimensions from the right operand appear before those from the left
+ // operand in the final output, we can swap the operands so that bmm directly produces
+ // the result in the correct memory order.
+
+ bool swap_lo_ro = !lo.empty() && !ro.empty() && ro.back() < lo.front();
+ if (swap_lo_ro) {
+ std::swap(left, right);
+ std::swap(lo, ro);
+ std::swap(lo_size, ro_size);
+ }
auto out_num_dim = lro.size() + lo.size() + sum_dims_.size() + ro.size();
std::vector out_size;
out_size.reserve(out_num_dim);
diff --git a/aten/src/ATen/native/LinearAlgebra.cpp b/aten/src/ATen/native/LinearAlgebra.cpp
index b62c584641db..616e6ec60e13 100644
--- a/aten/src/ATen/native/LinearAlgebra.cpp
+++ b/aten/src/ATen/native/LinearAlgebra.cpp
@@ -1360,7 +1360,8 @@ Tensor outer(const Tensor& self, const Tensor& vec2) {
#endif
-#if defined(__aarch64__) && AT_MKLDNN_ACL_ENABLED()
+#if !defined(__aarch64__) || AT_MKLDNN_ACL_ENABLED()
+// Used by default on x86 platforms and on AArch64+ACL
static inline int64_t get_mkldnn_matmul_min_dim() {
static auto value = [&] {
const int64_t default_min_dim = [&] {
@@ -1395,8 +1396,6 @@ static inline bool apply_mkldnn_matmul_heur(int64_t m, int64_t k, int64_t n) {
return at::globalContext().userEnabledMkldnn() && m > min_dim && k > min_dim && n > min_dim && m * k * n > min_size;
}
#endif
-
-
static void addmm_impl_cpu_(
Tensor &result, const Tensor &self, Tensor m1, Tensor m2, const Scalar& beta, const Scalar& alpha) {
TORCH_INTERNAL_ASSERT(self.dim() == 2 && m1.dim() == 2 && m2.dim() == 2);
@@ -1772,8 +1771,8 @@ static inline void bmm_out_or_baddbmm_(const Tensor& self_or_result_, const Tens
return (strides[2] == 1 && (sizes[1] == 1 || strides[1] >= sizes[2])) ||
(strides[1] == 1 && (sizes[2] == 1 || strides[2] >= sizes[1]));
};
-
-#if defined(__aarch64__) && AT_MKLDNN_ACL_ENABLED()
+#if !defined(__aarch64__) || AT_MKLDNN_ACL_ENABLED()
+ // Always apply mkldnn heuristic on x86 platform, but on ARM only if compiled with ACL
bool apply_heur = apply_mkldnn_matmul_heur(batch1.sizes()[1], batch1.sizes()[2], batch2.sizes()[2]);
if (apply_heur && use_mkldnn_matmul(batch1, batch2, self_or_result)) {
try {
@@ -1785,7 +1784,6 @@ static inline void bmm_out_or_baddbmm_(const Tensor& self_or_result_, const Tens
}
}
#endif
-
if (contraction_size * res_rows * res_cols < 400) {
if (is_bmm_out) {
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2(kBFloat16, kHalf, batch1.scalar_type(), "bmm", [&] {
diff --git a/aten/src/ATen/native/LossNLL.cpp b/aten/src/ATen/native/LossNLL.cpp
index 53d56622fe62..ca86292403fb 100644
--- a/aten/src/ATen/native/LossNLL.cpp
+++ b/aten/src/ATen/native/LossNLL.cpp
@@ -47,10 +47,14 @@ TORCH_META_FUNC(nll_loss_forward)
TORCH_CHECK(
target.dim() <= 1,
"0D or 1D target tensor expected, multi-target not supported");
-
- auto no_batch_dim = self.dim() == 1 && target.dim() == 0;
+ if (self.dim() == 1 && target.dim() == 1) {
+ TORCH_CHECK_VALUE(
+ target.size(0) == 1,
+ "For 1D input, 1D target must have size 1, but got target size: ",
+ target.size(0));
+ }
TORCH_CHECK(
- no_batch_dim || (self.size(0) == target.size(0)),
+ self.dim() == 1 || (self.size(0) == target.size(0)),
"size mismatch (got input: ",
self.sizes(),
", target: ",
diff --git a/aten/src/ATen/native/Onehot.cpp b/aten/src/ATen/native/Onehot.cpp
index 2ac513bf0888..8833bdb6e471 100644
--- a/aten/src/ATen/native/Onehot.cpp
+++ b/aten/src/ATen/native/Onehot.cpp
@@ -1,5 +1,6 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include
+#include
#ifndef AT_PER_OPERATOR_HEADERS
#include
@@ -24,8 +25,13 @@ Tensor one_hot(const Tensor &self, int64_t num_classes) {
if (num_classes == -1) {
num_classes = self.max().item().toLong() + 1;
}
- at::Tensor index = at::arange(num_classes, self.options());
- return at::eq(self.unsqueeze(-1), index).to(kLong);
+ {
+ // If `self` is a DTensor, then allow implicit replication
+ // of the `index` Tensor.
+ at::DTensorAllowImplicitReplication guard;
+ at::Tensor index = at::arange(num_classes, self.options());
+ return at::eq(self.unsqueeze(-1), index).to(kLong);
+ }
}
auto shape = self.sizes().vec();
diff --git a/aten/src/ATen/native/PadNd.cpp b/aten/src/ATen/native/PadNd.cpp
index 8072d24a1090..8099648d37b2 100644
--- a/aten/src/ATen/native/PadNd.cpp
+++ b/aten/src/ATen/native/PadNd.cpp
@@ -240,8 +240,15 @@ Tensor _pad_enum_symint(const Tensor &self, c10::SymIntArrayRef pad, int64_t mod
default: {}
}
}
- C10_THROW_ERROR(NotImplementedError,
- "Only 2D, 3D, 4D, 5D padding with non-constant padding are supported for now");
+
+ std::ostringstream error_msg;
+ error_msg << "Padding size " << pad.size() << " is not supported for " << input_dim << "D input tensor.\n";
+ error_msg << "Supported combinations for non-constant padding:\n";
+ error_msg << " - 2D or 3D input: padding size = 2 (pads last dimension)\n";
+ error_msg << " - 3D or 4D input: padding size = 4 (pads last 2 dimensions)\n";
+ error_msg << " - 4D or 5D input: padding size = 6 (pads last 3 dimensions)";
+
+ C10_THROW_ERROR(NotImplementedError, error_msg.str());
}
Tensor pad_symint(const Tensor &self, c10::SymIntArrayRef pad, std::string_view mode, std::optional value) {
diff --git a/aten/src/ATen/native/TensorAdvancedIndexing.cpp b/aten/src/ATen/native/TensorAdvancedIndexing.cpp
index 408faea1b764..7d613fc02312 100644
--- a/aten/src/ATen/native/TensorAdvancedIndexing.cpp
+++ b/aten/src/ATen/native/TensorAdvancedIndexing.cpp
@@ -2174,7 +2174,7 @@ static void _scatter_via_index_put(
if (self.dim() == 1 || broadcast_index) {
Tensor squeezed = index;
if (broadcast_index && index.dim() > 1) {
- for (const auto d : c10::irange(index.dim())) {
+ for (int64_t d = index.dim() - 1; d >= 0; --d) {
if (d == dim) {
continue;
}
diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp
index 054cc66cf8eb..1886e65fc1ed 100644
--- a/aten/src/ATen/native/TensorFactories.cpp
+++ b/aten/src/ATen/native/TensorFactories.cpp
@@ -1640,6 +1640,9 @@ Tensor zeros_symint(
std::optional layout,
std::optional device,
std::optional pin_memory) {
+ for (const auto& dim_size : size) {
+ TORCH_CHECK(dim_size >= 0, "zeros: Dimension size must be non-negative.");
+ }
Layout layout_ = layout.value_or(Layout::Strided);
if (at::sparse_csr::is_sparse_compressed(layout_)) {
return zeros_sparse_compressed_symint(
diff --git a/aten/src/ATen/native/TensorProperties.cpp b/aten/src/ATen/native/TensorProperties.cpp
index 77acfe47363e..4fa0556ad785 100644
--- a/aten/src/ATen/native/TensorProperties.cpp
+++ b/aten/src/ATen/native/TensorProperties.cpp
@@ -18,6 +18,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -57,6 +58,12 @@ c10::SymInt sym_size(const Tensor& self, int64_t dim) {
return self.sym_size(dim);
}
+c10::SymBool sym_is_contiguous(
+ const Tensor& self,
+ c10::MemoryFormat memory_format) {
+ return self.sym_is_contiguous(memory_format);
+}
+
c10::SymInt sym_stride(const Tensor& self, int64_t dim) {
return self.sym_stride(dim);
}
diff --git a/aten/src/ATen/native/cpu/Loops.h b/aten/src/ATen/native/cpu/Loops.h
index 5715fd8f047f..83b51a998563 100644
--- a/aten/src/ATen/native/cpu/Loops.h
+++ b/aten/src/ATen/native/cpu/Loops.h
@@ -89,7 +89,7 @@ execute_op(char* C10_RESTRICT data[], const int64_t* strides, int64_t i, int64_t
using result_type = typename traits::result_type;
for (; i < n; i++) {
result_type* out_ptr = (result_type*)(data[0] + i * strides[0]);
- *out_ptr = c10::guts::apply(op, dereference(
+ *out_ptr = std::apply(op, dereference(
&data[1],
&strides[1],
i));
@@ -102,7 +102,7 @@ inline void
execute_op(char* C10_RESTRICT data[], const int64_t* strides, int64_t i, int64_t n, func_t&& op) {
using traits = function_traits;
for (; i < n; i++) {
- c10::guts::apply(op, dereference(
+ std::apply(op, dereference(
&data[0],
&strides[0],
i));
@@ -162,7 +162,7 @@ void handle_tuple_outputs(char* C10_RESTRICT data[],
}
// Loop operation for `cpu_kernel_multiple_outputs`.
-// 1. Use `c10::guts::apply` to make dynamic method invocation
+// 1. Use `std::apply` to make dynamic method invocation
// for the lambda passed in `cpu_kernel_multiple_outputs`.
// 2. Iterate over the members of the returned tuple, set the corresponding
// output tensor by the tuple member in `handle_tuple_outputs` function.
@@ -183,7 +183,7 @@ multiple_outputs_loop(char* C10_RESTRICT data[], const int64_t* strides_, int64_
}
for (; i < n; i++) {
- auto output = c10::guts::apply(op, dereference(
+ auto output = std::apply(op, dereference(
&data[num_outputs],
&strides[num_outputs],
i));
@@ -213,8 +213,8 @@ vectorized_loop(char** C10_RESTRICT data_, int64_t n, int64_t S, func_t&& op, ve
for (; i <= n - 2 * Vec::size(); i += 2 * Vec::size()) {
auto args1 = dereference_vec(&data[1], opt_scalar, S, i);
auto args2 = dereference_vec(&data[1], opt_scalar, S, i + Vec::size());
- auto out1 = c10::guts::apply(vop, std::move(args1));
- auto out2 = c10::guts::apply(vop, std::move(args2));
+ auto out1 = std::apply(vop, std::move(args1));
+ auto out2 = std::apply(vop, std::move(args2));
out1.store(data[0] + i * sizeof(scalar_t));
out2.store(data[0] + (i + Vec::size()) * sizeof(scalar_t));
}
diff --git a/aten/src/ATen/native/cpu/PaddingKernel.cpp b/aten/src/ATen/native/cpu/PaddingKernel.cpp
index e3f08194bb58..59d838b9782d 100644
--- a/aten/src/ATen/native/cpu/PaddingKernel.cpp
+++ b/aten/src/ATen/native/cpu/PaddingKernel.cpp
@@ -156,7 +156,7 @@ void cpu_padding(
int64_t offset_h = ndim >= 2 ? p.offsets[ndim - 2] : 0;
int64_t offset_w = p.offsets[ndim - 1];
- // do vectorized copy whe output is overlapped with input on W,
+ // do vectorized copy when output is overlapped with input on W,
// only applies to positive padding
auto loop = [=](scalar_t* out, const scalar_t* in, bool positive_padding) {
if (positive_padding) {
diff --git a/aten/src/ATen/native/cpu/batch_norm_kernel.cpp b/aten/src/ATen/native/cpu/batch_norm_kernel.cpp
index 5a288193143d..d013dfa0485e 100644
--- a/aten/src/ATen/native/cpu/batch_norm_kernel.cpp
+++ b/aten/src/ATen/native/cpu/batch_norm_kernel.cpp
@@ -318,7 +318,7 @@ batch_norm_cpu_collect_stats_channels_last_impl(
//
// The optimal THRESHOLD to tile was found empirically.
// When C > THRESHOLD, C is large enough that the benefit from tiling and vectorization outweigh the synchronization overhead.
- // Wehn C <= TILE_SIZE, the problem size is small enough (C <= TILE_SIZE && NHW <= max_threads) that it's better to launch single thread with vectorization than C threads without vectorization.
+ // When C <= TILE_SIZE, the problem size is small enough (C <= TILE_SIZE && NHW <= max_threads) that it's better to launch single thread with vectorization than C threads without vectorization.
//
// When num_threads == 1, always use Method 2 as there is no synchronization overhead.
//
diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp
index a7c17893903b..23447c7e09b3 100644
--- a/aten/src/ATen/native/cuda/Blas.cpp
+++ b/aten/src/ATen/native/cuda/Blas.cpp
@@ -16,6 +16,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -1079,6 +1080,16 @@ static bool _scaled_mm_allowed_device(bool sm90_only=false, bool sm100_only=fals
#endif
}
+static bool _grouped_mm_allowed_device() {
+#ifdef USE_ROCM
+ return false;
+#else
+ auto dprops = at::cuda::getCurrentDeviceProperties();
+ // CUDA capability 8.0 and greater
+ return dprops->major >= 8;
+#endif
+}
+
#ifdef USE_ROCM
static bool _scaled_mm_is_fnuz() {
return at::detail::getCUDAHooks().isGPUArch({"gfx942"});
@@ -1540,71 +1551,8 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
}
namespace {
- at::Tensor create_grouped_gemm_output_tensor(const Tensor& mat_a,
- const Tensor& mat_b,
- const std::optional& offs,
- std::optional out_dtype
- ) {
- c10::SmallVector out_size;
- const bool a_is_2d = mat_a.dim() == 2;
- const bool b_is_2d = mat_b.dim() == 2;
- if (a_is_2d) {
- if (b_is_2d) {
- out_size = {offs->size(0), mat_a.size(0), mat_b.size(1)};
- } else {
- TORCH_CHECK(offs->size(0) == mat_b.size(0), "matrix batch sizes have to match");
- out_size = {mat_a.size(0), mat_b.size(-1)};
- }
- } else {
- if (b_is_2d) {
- // this case is not actually encountered for MoE gemms
- TORCH_CHECK(offs->size(0) == mat_a.size(0), "matrix batch sizes have to match");
- out_size = {mat_a.size(1), mat_b.size(1)};
- } else { // regular bmm
- TORCH_CHECK(mat_a.size(0) == mat_b.size(0), "batched dimension has to match");
- out_size = {mat_a.size(0), mat_a.size(1), mat_b.size(-1)};
- }
- }
-
- const auto out_dtype_ = out_dtype.value_or(kBFloat16);
- TORCH_CHECK(out_dtype_ == kBFloat16, "Only bf16 high precision output types are supported for grouped gemm");
-
- #ifndef USE_ROCM
- // For TMA transfers, strides of output tensor have to be either
- // 1, or aligned to 16 bytes.
- const auto last_dim = out_size.size() - 1;
- const auto alignment = 16 / c10::elementSize(out_dtype_);
- const int64_t size_padded = (out_size[last_dim] + alignment - 1) / alignment * alignment;
- std::vector out_stride;
- if (a_is_2d != b_is_2d) {
- out_stride = {size_padded, 1};
- } else {
- out_stride = {out_size[1] * size_padded, size_padded, 1};
- }
- return at::empty_strided(out_size, out_stride, mat_a.options().dtype(out_dtype_));
- #else
- return at::empty(out_size, mat_a.options().dtype(out_dtype_));
- #endif
- }
-
- bool check_valid_strides_and_return_transposed(const Tensor& mat) {
- IntArrayRef tensor_strides = mat.strides();
- IntArrayRef tensor_sizes = mat.sizes();
- int end_dim = mat.dim() - 1;
- int alignment = 16 / mat.element_size();
- TORCH_CHECK(uint64_t(mat.data_ptr()) % 16 ==0, "expected data_ptr to be aligned to 16 bytes\n");
- if ((tensor_strides[end_dim - 1] == 1) && (tensor_strides[end_dim] >= std::max(1, tensor_sizes[end_dim - 1]))) {
- TORCH_CHECK(tensor_strides[end_dim] % alignment == 0, "strides should be multiple of 16 bytes");
- return true;
- } else if ((tensor_strides[end_dim] == 1) && (tensor_strides[end_dim - 1] >= std::max(1, tensor_sizes[end_dim]))) {
- TORCH_CHECK(tensor_strides[end_dim - 1] % alignment == 0, "strides should be multiple of 16 bytes");
- return false;
- } else {
- TORCH_CHECK(false, "Invalid strides/sizes, got ", mat.strides(), " for strides and ", mat.sizes(), " for sizes");
- }
- }
-
- void check_scale(const Tensor& mat, const Tensor& scale, const int dim, const int arg_idx, const int scale_multiplier=1) {
+ void _check_scales_fp8_rowwise(const Tensor& mat, const Tensor& scale, const int dim, const int arg_idx, const int scale_multiplier=1) {
+ // Checks scales for 2d or 3d target tensors (`mat`).
if (mat.dim() == 2) {
TORCH_CHECK(
scale.dim() == 1,
@@ -1638,9 +1586,66 @@ namespace {
"scale must have the same first dimension as mat for arg ",
arg_idx);
}
-}
+ }
+ void _check_scales_mxfp8(const Tensor& mat, const Tensor& scale, const int dim, const int arg_idx) {
+ // Checks scales for 2d or 3d target tensors (`mat`).
+ if (mat.dim() == 2) {
+ // For MXFP8, 2d tensors have variable size groups represented as subtensors,
+ // that are converted to blocked padded format individually,
+ // so we can't check the scale sizes without doing a d2h sync to get the group sizes here.
+ TORCH_CHECK(
+ scale.dim() == mat.dim(),
+ "for mxfp8, scale must have same number of dimensions as parent tensor, but got mat.dim() = ", mat.dim(), " and scale.dim() = ", scale.dim(), " for arg ", arg_idx);
+
+ // LHS mat shape (M, total_K) -> scale shape (rounded_up(M, 128), rounded_up_per_group(K/32, 4))
+ // RHS mat shape (total_K, N) -> scale shape (rounded_up(N, 128), rounded_up_per_group(K/32, 4))
+ // * weight is transposed prior to the call, scale stays non-transposed.
+ bool LHS = arg_idx == 0;
+ int scale_dim_to_check = 0;
+ int mat_dim_to_check = LHS ? 0 : 1;
+ TORCH_CHECK(
+ scale.size(scale_dim_to_check) >= mat.size(mat_dim_to_check),
+ "for mxfp8, arg ", arg_idx, " tensor shape (", mat.size(0), ", ", mat.size(1), ") ",
+ "must have scale.shape[", scale_dim_to_check, "] >= ", mat.size(mat_dim_to_check), " but got scale.shape=(", scale.size(0), ", ", scale.size(1), ")");
+ } else {
+ // For MXFP8, 3d tensors have static group sizes (stack of 2d tensors),
+ // so we can check the exact expected scale sizes here without a d2h sync.
+ auto round_up = [](auto x, auto y) {
+ return ((x + y - 1) / y) * y;
+ };
+
+ // TODO: this is for 3d tensor in 2d-3d case specifically.
+ // We'll need to support 3d-3d and 3d-2d cases once mxfp8 grouped gemm supports them.
+ int64_t G = mat.size(0);
+ int64_t K = mat.size(1);
+ int64_t N = mat.size(2);
+ int64_t blocked_scale_K = round_up(K/32, 4);
+ int64_t blocked_scale_N = round_up(N, 128);
+
+ // fbgemm expects stack of flattened blocked scales for 3d tensor, shape (G, blocked_scale_K * blocked_scale_N).
+ TORCH_CHECK(
+ scale.dim() == mat.dim() - 1,
+ "for mxfp8 2d-3d grouped GEMM, the 3d tensor of shape (G,K,N) must have a 2d scale of shape (G, blocked_scale_K * blocked_scale_N), but scale is ", scale.dim(), "D for arg ", arg_idx
+ );
+ TORCH_CHECK(
+ scale.size(0) == G && scale.size(1) == blocked_scale_K * blocked_scale_N,
+ "for mxfp8, the tensor shape (", G, ", ", K, ", ", N, ") must have scale shape (", G, ",", blocked_scale_K, ",", blocked_scale_N, ") for arg ", arg_idx
+ );
+ }
+ }
+ void check_scale(const Tensor& mat, const Tensor& scale, const int dim, const int arg_idx, const int scale_multiplier=1) {
+ bool using_fp8_rowwise = scale.scalar_type() == kFloat;
+ bool using_mxfp8 = scale.scalar_type() == at::kFloat8_e8m0fnu;
+ if (using_fp8_rowwise) {
+ _check_scales_fp8_rowwise(mat, scale, dim, arg_idx, scale_multiplier);
+ } else if (using_mxfp8) {
+ _check_scales_mxfp8(mat, scale, dim, arg_idx);
+ } else {
+ TORCH_CHECK(false, "scale must be float32 or float8_e8m0fnu, but got ", scale.dtype());
+ }
+ }
}
Tensor
@@ -1665,8 +1670,8 @@ const std::optional& bias,
const std::optional& scale_result,
std::optional out_dtype,
bool use_fast_accum) {
- bool allowed_device = _scaled_mm_allowed_device();
- TORCH_CHECK(allowed_device, "torch._scaled_grouped_mm is only supported on CUDA devices with compute capability = 9.0, or ROCm MI300+");
+ bool allowed_device = _scaled_mm_allowed_device(/*sm90_only*/true, /*sm100_only*/true);
+ TORCH_CHECK(allowed_device, "torch._scaled_grouped_mm is only supported on CUDA devices with compute capability = [9.0, 10.0], or ROCm MI300+");
TORCH_CHECK(!check_valid_strides_and_return_transposed(mat_a), "Expected mat1 to not be transposed");
TORCH_CHECK(check_valid_strides_and_return_transposed(mat_b), "Expected mat2 to be transposed");
@@ -1699,16 +1704,47 @@ bool use_fast_accum) {
TORCH_CHECK(offs->dtype() == at::kInt, "Offsets have to be int32");
}
- // Both Per-Tensor and Row-wise scaling expect fp32 tensors
+ // FP8 per-tensor and per-row scaling expect fp32 scales.
+ // MXFP8 expects float8_e8m0fnu scales.
TORCH_CHECK(
- scale_a.scalar_type() == kFloat && scale_b.scalar_type() == kFloat,
- "Both scale_a and scale_b must be float (fp32) tensors.");
+ (scale_a.scalar_type() == kFloat && scale_b.scalar_type() == kFloat) ||
+ (scale_a.scalar_type() == at::kFloat8_e8m0fnu && scale_b.scalar_type() == at::kFloat8_e8m0fnu),
+ "For FP8 tensorwise and rowwise, both scales must both be float32 tensors. For MXFP8, scales must both be float8_e8m0fnu tensors.");
const int scale_multiplier = (mat_a.dim() == 2 && mat_b.dim() == 2) ? offs->size(0) : 1;
check_scale(mat_a, scale_a, 0 ,0, scale_multiplier);
check_scale(mat_b, scale_b, 1, 1, scale_multiplier);
- Tensor out = create_grouped_gemm_output_tensor(mat_a, mat_b, offs, out_dtype);
+ const auto out_dtype_ = out_dtype.value_or(kBFloat16);
+ TORCH_CHECK(out_dtype_ == kBFloat16, "Only bf16 high precision output types are supported for grouped gemm");
+
+ Tensor out = create_grouped_gemm_output_tensor(mat_a, mat_b, offs, out_dtype_);
+
+#if defined(USE_FBGEMM_GENAI) && defined(USE_CUDA) && !defined(USE_ROCM)
+ // MXFP8 grouped GEMM dispatching
+ bool is_mx8mx8bf16 = (
+ mat_a.scalar_type() == at::kFloat8_e4m3fn && mat_b.scalar_type() == at::kFloat8_e4m3fn &&
+ scale_a.scalar_type() == at::kFloat8_e8m0fnu && scale_b.scalar_type() == at::kFloat8_e8m0fnu
+ );
+ TORCH_CHECK(out_dtype == at::kBFloat16, "Only bf16 out_dtype is supported for MXFP8 grouped gemm");
+
+ if (is_mx8mx8bf16) {
+ bool b_is_3d = mat_b.dim() == 3;
+ bool is_2d_2d = a_is_2d && b_is_2d;
+ bool is_2d_3d = a_is_2d && b_is_3d;
+ TORCH_CHECK(is_2d_2d || is_2d_3d, "MXFP8 grouped GEMM currently only supports 2d-2d and 2d-3d cases");
+ TORCH_CHECK(offs.has_value(), "MXFP8 2d-2d and 2d-3d grouped GEMMs requires offsets");
+
+ fbgemm_gpu::mx8mx8bf16_grouped_mm(
+ mat_a,
+ mat_b,
+ scale_a,
+ scale_b,
+ offs.value(),
+ out);
+ return out;
+ }
+#endif
#ifndef USE_ROCM
TORCH_CHECK(mat_a.dtype() == at::kFloat8_e4m3fn, "Expected mat_a to be Float8_e4m3 matrix got ", mat_a.scalar_type());
@@ -1741,6 +1777,7 @@ bool use_fast_accum) {
#else
TORCH_CHECK(false, "grouped gemm is not supported without USE_FBGEMM_GENAI on ROCM")
#endif
+
#endif
}
@@ -1750,33 +1787,21 @@ const std::optional& offs,
const std::optional& bias,
std::optional out_dtype) {
#ifndef USE_ROCM
- bool allowed_device = _scaled_mm_allowed_device(/*sm90_only*/true, /*sm100_only*/true);
- TORCH_CHECK(allowed_device, "torch._grouped_mm is only supported on CUDA devices with compute capability = 9.0, 10.0");
-
- TORCH_CHECK(mat_a.dtype() == at::kBFloat16, "Expected mat_a to be BFloat16 matrix got ", mat_a.scalar_type());
- TORCH_CHECK(mat_b.dtype() == at::kBFloat16, "Expected mat_a to be BFloat16 matrix got ", mat_b.scalar_type());
- TORCH_CHECK(mat_a.dim() == 2 || mat_a.dim() == 3, "mat_a has to be 2 or 3d");
- TORCH_CHECK(mat_b.dim() == 2 || mat_b.dim() == 3, "mat_b has to be 2 or 3d");
- const bool a_is_2d = mat_a.dim() == 2;
- const bool b_is_2d = mat_b.dim() == 2;
- if (!a_is_2d || !b_is_2d) {
- TORCH_CHECK(mat_a.size(-1) == mat_b.size(-2), "contraction dimension of mat_a and mat_b must match");
- }
-
- // check that the strides are valid, the fn will throw an error if not
- check_valid_strides_and_return_transposed(mat_a);
- check_valid_strides_and_return_transposed(mat_b);
- TORCH_CHECK(offs.has_value() == (a_is_2d || b_is_2d), "Have to provide offsets if there is a 2d matrix, or no offset if both matrices are 3d");
-
- if (offs.has_value()) {
- TORCH_CHECK(offs->dim() == 1, "offs has to be 1D");
- TORCH_CHECK(offs->dtype() == at::kInt, "Offsets have to be int32");
+ _grouped_mm_validate_inputs(mat_a, mat_b, offs, bias, out_dtype);
+ bool a_b_and_out_are_bf16 = (
+ mat_a.dtype() == at::kBFloat16 &&
+ mat_b.dtype() == at::kBFloat16 &&
+ out_dtype.value_or(at::kBFloat16) == at::kBFloat16
+ );
+ bool use_fast_path = _scaled_mm_allowed_device(/*sm90_only*/true, /*sm100_only*/true) && a_b_and_out_are_bf16;
+ const auto out_dtype_ = _resolve_grouped_mm_out_dtype(mat_a, mat_b, out_dtype);
+ Tensor out = create_grouped_gemm_output_tensor(mat_a, mat_b, offs, out_dtype_);
+ if (use_fast_path) {
+ // fast path, no d2h sync needed
+ at::cuda::detail::bf16bf16_grouped_mm(mat_a, mat_b, offs, bias, out);
+ } else {
+ _grouped_mm_fallback(mat_a, mat_b, offs, bias, out_dtype, out);
}
- TORCH_CHECK(!bias.has_value(), "Bias not supported yet");
-
- Tensor out = create_grouped_gemm_output_tensor(mat_a, mat_b, offs, out_dtype);
-
- at::cuda::detail::bf16bf16_grouped_mm(mat_a, mat_b, offs, bias, out);
return out;
#else
TORCH_CHECK(false, "grouped gemm is not supported on ROCM")
diff --git a/aten/src/ATen/native/cuda/CUDALoops.cuh b/aten/src/ATen/native/cuda/CUDALoops.cuh
index 12ad84a15b18..ee28c5c1693f 100644
--- a/aten/src/ATen/native/cuda/CUDALoops.cuh
+++ b/aten/src/ATen/native/cuda/CUDALoops.cuh
@@ -999,12 +999,41 @@ void gpu_kernel_impl(TensorIteratorBase& iter, const func_t& f) {
dtypes[i] = iter.dtype(i);
}
auto offset_calc = ::make_offset_calculator(iter);
+#ifdef USE_ROCM
+ constexpr int grp_sz = 128;
+ launch_legacy_kernel_manual_unroll(numel, [=] GPU_LAMBDA(int idx, bool unrl) {
+ if (unrl) {
+ auto offsets0 = offset_calc.get(idx);
+ auto offsets1 = offset_calc.get(idx + grp_sz);
+ auto offsets2 = offset_calc.get(idx + grp_sz * 2);
+ auto offsets3 = offset_calc.get(idx + grp_sz * 3);
+ void* out0 = data[0] + offsets0[0];
+ void* out1 = data[0] + offsets1[0];
+ void* out2 = data[0] + offsets2[0];
+ void* out3 = data[0] + offsets3[0];
+ arg0_t result0 = invoke(f, &data[1], &offsets0[1], &dtypes[1], 1);
+ arg0_t result1 = invoke(f, &data[1], &offsets1[1], &dtypes[1], 1);
+ arg0_t result2 = invoke(f, &data[1], &offsets2[1], &dtypes[1], 1);
+ arg0_t result3 = invoke(f, &data[1], &offsets3[1], &dtypes[1], 1);
+ c10::cast_and_store(dtypes[0], out0, result0);
+ c10::cast_and_store(dtypes[0], out1, result1);
+ c10::cast_and_store(dtypes[0], out2, result2);
+ c10::cast_and_store(dtypes[0], out3, result3);
+ } else {
+ auto offsets = offset_calc.get(idx);
+ void* out = data[0] + offsets[0];
+ arg0_t result = invoke(f, &data[1], &offsets[1], &dtypes[1], 1);
+ c10::cast_and_store(dtypes[0], out, result);
+ }
+ });
+#else
launch_legacy_kernel<128, 4>(numel, [=] GPU_LAMBDA(int idx) {
auto offsets = offset_calc.get(idx);
void* out = data[0] + offsets[0];
arg0_t result = invoke(f, &data[1], &offsets[1], &dtypes[1], 1);
c10::cast_and_store(dtypes[0], out, result);
});
+#endif
}
}
diff --git a/aten/src/ATen/native/cuda/Copy.cu b/aten/src/ATen/native/cuda/Copy.cu
index 59b0426bab1f..62a07e1e28c8 100644
--- a/aten/src/ATen/native/cuda/Copy.cu
+++ b/aten/src/ATen/native/cuda/Copy.cu
@@ -42,6 +42,19 @@ void bfloat16_copy_kernel_cuda(TensorIteratorBase &iter) {
});
}
+#ifdef USE_ROCM
+void bfloat16tofloat32_copy_kernel_cuda(TensorIteratorBase &iter) {
+ gpu_kernel_nocast(iter, [] GPU_LAMBDA(at::BFloat16 value) {
+ return static_cast(value);
+ });
+}
+void float16tofloat32_copy_kernel_cuda(TensorIteratorBase &iter) {
+ gpu_kernel_nocast(iter, [] GPU_LAMBDA(at::Half value) {
+ return static_cast(value);
+ });
+}
+#endif
+
void float8_copy_kernel_cuda(TensorIteratorBase &iter) {
ScalarType dtype = iter.dtype(0);
ScalarType other_dtype = iter.dtype(1);
@@ -187,7 +200,17 @@ void direct_copy_kernel_cuda(TensorIteratorBase &iter) {
} else {
float16_copy_kernel_cuda(iter);
}
- } else if (isBitsType(dtype)) {
+ }
+#ifdef USE_ROCM
+ else if ((iter.dtype(1) == kBFloat16 || iter.dtype(1) == kHalf) && dtype == kFloat) {
+ if (iter.dtype(1) == kBFloat16) {
+ bfloat16tofloat32_copy_kernel_cuda(iter);
+ } else {
+ float16tofloat32_copy_kernel_cuda(iter);
+ }
+ }
+#endif
+ else if (isBitsType(dtype)) {
TORCH_CHECK(dtype == iter.dtype(1), "copy_() does not support casting "
"bits types to different bits types. Source dtype is ", iter.dtype(1), "target dtype is ", dtype);
AT_DISPATCH_BIT_TYPES(dtype, "copy_", [&] {
diff --git a/aten/src/ATen/native/cuda/Indexing.cu b/aten/src/ATen/native/cuda/Indexing.cu
index 02feb55cb69d..dacef18c79b6 100644
--- a/aten/src/ATen/native/cuda/Indexing.cu
+++ b/aten/src/ATen/native/cuda/Indexing.cu
@@ -59,7 +59,7 @@ constexpr uint64_t getDefaultMaxThreadsPerBlock() {
#ifdef USE_ROCM
#define SKIP_SORTED_INDICES 32
template
-__global__ void indexing_backward_kernel(
+__global__ void indexing_backward_kernel_many_indices(
const int64_t* sorted_indices, const int64_t* indices, const scalar_t* grad_output, scalar_t* grad_weight,
int64_t numel, int64_t stride, int64_t stride_before, int64_t outer_dim, bool accumulate) {
using opmath_t = at::opmath_type;
@@ -254,7 +254,8 @@ __global__ void indexing_backward_kernel_stride_1(
}
}
}
-#else
+#endif
+
template
__global__ void indexing_backward_kernel(
const int64_t* sorted_indices, const int64_t* indices, const scalar_t* grad_output, scalar_t* grad_weight,
@@ -333,6 +334,7 @@ __global__ void indexing_backward_kernel(
}
}
+#ifndef USE_ROCM
template
__global__ void indexing_backward_kernel_stride_1(
const int64_t* sorted_indices, const int64_t* indices, const scalar_t* grad_output, scalar_t* grad_weight,
@@ -708,6 +710,9 @@ void index_put_with_sort_kernel(Tensor & self, const c10::List(at::cuda::getCurrentDeviceProperties()->maxGridSize[1], ceil_div(sliceSize, (int64_t) (warp_size))) : grid.y,
+ grid.z);
dim3 new_grid(ceil_div(num_indices, (int64_t) (indices_per_block * warp_size)), grid.y, grid.z);
size_t smem_dups_size = indices_per_block * warp_size * sizeof(int64_t);
#define KERNEL_GRID new_grid
@@ -780,11 +785,43 @@ void index_put_with_sort_kernel(Tensor & self, const c10::List= 200000)
+ AT_DISPATCH_V2(
+ expandedValue.scalar_type(),
+ "indexing_backward_many_indices",
+ AT_WRAP([&] {
+ indexing_backward_kernel_many_indices<<>>(
+ sorted_indices.const_data_ptr(),
+ orig_indices.const_data_ptr(),
+ expandedValue.const_data_ptr(),
+ src_.mutable_data_ptr(),
+ num_indices,
+ sliceSize,
+ strideBefore,
+ nElemBefore,
+ accumulate);
+ C10_CUDA_KERNEL_LAUNCH_CHECK();
+ }),
+ AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX),
+ // AT_EXPAND(AT_FLOAT8_TYPES),
+ // TODO(#113663): clean up accumulation behavior in float8 dtypes, accumulate=True
+ // should not be supported here, then reenable AT_FLOAT8_DTYPES
+ kFloat8_e4m3fn,
+ kFloat8_e5m2,
+ kFloat8_e4m3fnuz,
+ kFloat8_e5m2fnuz,
+ kComplexHalf,
+ kHalf,
+ kBool,
+ kBFloat16);
+ else
+#endif
AT_DISPATCH_V2(
expandedValue.scalar_type(),
"indexing_backward",
AT_WRAP([&] {
- indexing_backward_kernel<<>>(
+ indexing_backward_kernel<<>>(
sorted_indices.const_data_ptr(),
orig_indices.const_data_ptr(),
expandedValue.const_data_ptr(),
diff --git a/aten/src/ATen/native/cuda/SegmentReduce.cu b/aten/src/ATen/native/cuda/SegmentReduce.cu
index 3acb359342f1..c6f88692a8a5 100644
--- a/aten/src/ATen/native/cuda/SegmentReduce.cu
+++ b/aten/src/ATen/native/cuda/SegmentReduce.cu
@@ -20,7 +20,7 @@
// SegmentReduce compilation with CUDA-12.9 causes NVCC crash on Windows
// See https://github.com/pytorch/pytorch/issues/156181
-#if !defined(_WIN32) || CUDART_VERSION < 12090
+#if !(defined(_WIN32) && CUDART_VERSION == 12090)
namespace at::native {
@@ -606,4 +606,4 @@ REGISTER_DISPATCH(
} // namespace at::native
-#endif
+#endif
\ No newline at end of file
diff --git a/aten/src/ATen/native/cuda/layer_norm_kernel.cu b/aten/src/ATen/native/cuda/layer_norm_kernel.cu
index 940680eb3682..81387bcceaf0 100644
--- a/aten/src/ATen/native/cuda/layer_norm_kernel.cu
+++ b/aten/src/ATen/native/cuda/layer_norm_kernel.cu
@@ -141,7 +141,11 @@ WelfordDataLN cuWelfordOnlineSum(
if constexpr (!rms_norm){
U delta = val - curr_sum.mean;
U new_count = curr_sum.count + 1.f;
+#if defined(USE_ROCM) && defined(PYTORCH_LAYERNORM_FAST_RECIPROCAL)
+ U new_mean = curr_sum.mean + delta * __builtin_amdgcn_rcpf(new_count);
+#else
U new_mean = curr_sum.mean + delta * (1.f/new_count); //proper division is slow, this is less accurate but noticeably faster
+#endif
return {new_mean, curr_sum.sigma2 + delta * (val - new_mean), new_count};
} else{
return {0.f, curr_sum.sigma2 + val * val, 0};
@@ -159,7 +163,11 @@ WelfordDataLN cuWelfordCombine(
U count = dataA.count + dataB.count;
U mean, sigma2;
if (count > decltype(dataB.count){0}) {
+#if defined(USE_ROCM) && defined(PYTORCH_LAYERNORM_FAST_RECIPROCAL)
+ auto coef = __builtin_amdgcn_rcpf(count);
+#else
auto coef = 1.f/count; //NB we don't use --use_fast_math, but this is emulation, 1./count goes to intrinsic, `* coef` is multiplication, instead of slow fp division
+#endif
auto nA = dataA.count * coef;
auto nB = dataB.count * coef;
mean = nA*dataA.mean + nB*dataB.mean;
diff --git a/aten/src/ATen/native/cudnn/MHA.cpp b/aten/src/ATen/native/cudnn/MHA.cpp
index 182716ed7a1a..1658ce34ca6c 100644
--- a/aten/src/ATen/native/cudnn/MHA.cpp
+++ b/aten/src/ATen/native/cudnn/MHA.cpp
@@ -146,7 +146,7 @@ namespace native {
namespace fe = cudnn_frontend;
-#define MAX_MHA_DIM 4
+constexpr uint8_t MAX_MHA_DIM = 4;
// Whether we will use ragged offsets in the dense (non-nested) path
// to avoid recompilation
@@ -238,7 +238,8 @@ void setMHAParams(
const std::optional& attn_bias,
double dropout_probability,
bool is_causal,
- bool return_softmaxstats) {
+ bool return_softmaxstats,
+ bool is_nested) {
memset(¶ms, 0, sizeof(MHAParams));
params.device_id = at::cuda::current_device();
params.dataType = fe::DataType_t::HALF;
@@ -255,23 +256,24 @@ void setMHAParams(
params.is_causal = is_causal;
params.return_softmaxstats = return_softmaxstats;
params.has_attn_bias = attn_bias.has_value();
+ // Expect 4D dense tensor, 3D nested case (THD)
TORCH_INTERNAL_ASSERT(
- q.sizes().size() == MAX_MHA_DIM,
+ q.sizes().size() == (uint8_t)(MAX_MHA_DIM - (uint8_t)is_nested),
"Q tensor has unexpected number of dims, please report a bug to PyTorch.");
TORCH_INTERNAL_ASSERT(
- q.strides().size() == MAX_MHA_DIM,
+ q.strides().size() == (uint8_t)(MAX_MHA_DIM - (uint8_t)is_nested),
"Q tensor has unexpected number of dims, please report a bug to PyTorch.");
TORCH_INTERNAL_ASSERT(
- k.sizes().size() == MAX_MHA_DIM,
+ k.sizes().size() == (uint8_t)(MAX_MHA_DIM - (uint8_t)is_nested),
"K tensor has unexpected number of dims, please report a bug to PyTorch.");
TORCH_INTERNAL_ASSERT(
- k.strides().size() == MAX_MHA_DIM,
+ k.strides().size() == (uint8_t)(MAX_MHA_DIM - (uint8_t)is_nested),
"K tensor has unexpected number of dims, please report a bug to PyTorch.");
TORCH_INTERNAL_ASSERT(
- v.sizes().size() == MAX_MHA_DIM,
+ v.sizes().size() == (uint8_t)(MAX_MHA_DIM - (uint8_t)is_nested),
"V tensor has unexpected number of dims, please report a bug to PyTorch.");
TORCH_INTERNAL_ASSERT(
- v.strides().size() == MAX_MHA_DIM,
+ v.strides().size() == (uint8_t)(MAX_MHA_DIM - (uint8_t)is_nested),
"V tensor has unexpected number of dims, please report a bug to PyTorch.");
std::copy(q.sizes().begin(), q.sizes().end(), params.q_dim.begin());
std::copy(q.strides().begin(), q.strides().end(), params.q_stride.begin());
@@ -320,7 +322,8 @@ struct MHACacheKeyWrapper : ParamsWrapper {
const std::optional& attn_bias,
double dropout_probability,
bool is_causal,
- bool return_softmaxstats) {
+ bool return_softmaxstats,
+ bool is_nested) {
setMHAParams(
this->pod,
b,
@@ -335,7 +338,8 @@ struct MHACacheKeyWrapper : ParamsWrapper {
attn_bias,
dropout_probability,
is_causal,
- return_softmaxstats);
+ return_softmaxstats,
+ is_nested);
}
};
@@ -479,6 +483,8 @@ auto build_graph(
fe::graph::SDPA_attributes()
.set_name("CUDNN_SDPA")
.set_is_inference(return_softmaxstats == false)
+ // TODO(eqy): switch to this API once cuDNN FE is upgraded
+ // .set_generate_stats(return_softmaxstats)
.set_causal_mask(is_causal)
.set_attn_scale(attn_scale);
if (use_ragged_in_dense(q, k, v, o, attn_bias.has_value())) {
@@ -699,6 +705,8 @@ auto build_graph_nestedtensor(
fe::graph::SDPA_attributes()
.set_name("CUDNN_SDPA_NESTEDTENSOR")
.set_is_inference(return_softmaxstats == false)
+ // TODO(eqy): switch to this API once cuDNN FE is upgraded
+ // .set_generate_stats(return_softmaxstats)
.set_causal_mask(is_causal)
.set_attn_scale(attn_scale)
.set_seq_len_q(SEQ_LEN_Q_)
@@ -1386,7 +1394,8 @@ void run_cudnn_SDP_fprop(
attn_bias,
dropout_probability,
is_causal,
- return_softmaxstats);
+ return_softmaxstats,
+ false);
auto graph_ptr = getMHAGraphCache_().find(key);
std::shared_ptr