Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LLVM] Make the GPU loader utilities an LLVM tool #132096

Merged
merged 1 commit into from
Mar 20, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions libc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -59,13 +59,6 @@ set(LIBC_NAMESPACE ${default_namespace}
CACHE STRING "The namespace to use to enclose internal implementations. Must start with '__llvm_libc'."
)

# We will build the GPU utilities if we are not doing a runtimes build.
option(LIBC_BUILD_GPU_LOADER "Always build the GPU loader utilities" OFF)
if(LIBC_BUILD_GPU_LOADER OR ((NOT LLVM_RUNTIMES_BUILD) AND LLVM_LIBC_GPU_BUILD))
add_subdirectory(utils/gpu)
return()
endif()

option(LIBC_CMAKE_VERBOSE_LOGGING
"Log details warnings and notifications during CMake configuration." OFF)

1 change: 0 additions & 1 deletion libc/utils/gpu/CMakeLists.txt

This file was deleted.

54 changes: 0 additions & 54 deletions libc/utils/gpu/loader/CMakeLists.txt

This file was deleted.

10 changes: 0 additions & 10 deletions libc/utils/gpu/loader/amdgpu/CMakeLists.txt

This file was deleted.

9 changes: 0 additions & 9 deletions libc/utils/gpu/loader/nvptx/CMakeLists.txt

This file was deleted.

4 changes: 0 additions & 4 deletions llvm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -210,10 +210,6 @@ if("${LIBC_TARGET_TRIPLE}" STREQUAL "amdgcn-amd-amdhsa" OR
"${LIBC_TARGET_TRIPLE}" STREQUAL "nvptx64-nvidia-cuda")
set(LLVM_LIBC_GPU_BUILD ON)
endif()
if (NOT "libc" IN_LIST LLVM_ENABLE_PROJECTS AND LLVM_LIBC_GPU_BUILD)
message(STATUS "Enabling libc project to build libc testing tools")
list(APPEND LLVM_ENABLE_PROJECTS "libc")
endif()

# LLVM_ENABLE_PROJECTS_USED is `ON` if the user has ever used the
# `LLVM_ENABLE_PROJECTS` CMake cache variable. This exists for
14 changes: 0 additions & 14 deletions llvm/runtimes/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -534,20 +534,6 @@ if(build_runtimes)
endif()
if(LLVM_LIBC_GPU_BUILD)
list(APPEND extra_cmake_args "-DLLVM_LIBC_GPU_BUILD=ON")
if("libc" IN_LIST RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES)
if(TARGET amdhsa-loader)
list(APPEND extra_cmake_args
"-DRUNTIMES_amdgcn-amd-amdhsa_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:amdhsa-loader>")
list(APPEND extra_deps amdhsa-loader)
endif()
endif()
if("libc" IN_LIST RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES)
if(TARGET nvptx-loader)
list(APPEND extra_cmake_args
"-DRUNTIMES_nvptx64-nvidia-cuda_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:nvptx-loader>")
list(APPEND extra_deps nvptx-loader)
endif()
endif()
if(TARGET clang-offload-packager)
list(APPEND extra_deps clang-offload-packager)
endif()
45 changes: 45 additions & 0 deletions llvm/tools/llvm-gpu-loader/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
set(LLVM_LINK_COMPONENTS
BinaryFormat
Object
Option
Support
FrontendOffloading
)

add_llvm_tool(llvm-gpu-loader
llvm-gpu-loader.cpp

# TODO: We intentionally split this currently due to statically linking the
# GPU runtimes. Dynamically load the dependencies, possibly using the
# LLVM offloading API when it is complete.
PARTIAL_SOURCES_INTENDED
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a correct use of PARTIAL_SOURCES_INTENDED. It is intended to be used only when you are building multiple targets within a single file. Here, you should just include all sources and use #ifdef.


DEPENDS
intrinsics_gen
)

# Locate the RPC server handling interface.
include(FindLibcCommonUtils)
target_link_libraries(llvm-gpu-loader PUBLIC llvm-libc-common-utilities)

# Check for HSA support for targeting AMD GPUs.
find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Silent automagic dependency on hsa-runtime64 (and CUDA runtime below) deep down the source tree is a horrible idea. I've filed bug #132890 specifically about it.

if(hsa-runtime64_FOUND)
target_sources(llvm-gpu-loader PRIVATE amdhsa.cpp)
target_compile_definitions(llvm-gpu-loader PRIVATE AMDHSA_SUPPORT)
target_link_libraries(llvm-gpu-loader PRIVATE hsa-runtime64::hsa-runtime64)

# Compatibility with the old amdhsa-loader name.
add_llvm_tool_symlink(amdhsa-loader llvm-gpu-loader)
endif()

# Check for CUDA support for targeting NVIDIA GPUs.
find_package(CUDAToolkit 11.2 QUIET)
if(CUDAToolkit_FOUND)
target_sources(llvm-gpu-loader PRIVATE nvptx.cpp)
target_compile_definitions(llvm-gpu-loader PRIVATE NVPTX_SUPPORT)
target_link_libraries(llvm-gpu-loader PRIVATE CUDA::cuda_driver)

# Compatibility with the old nvptx-loader name.
add_llvm_tool_symlink(nvptx-loader llvm-gpu-loader)
endif()
Original file line number Diff line number Diff line change
@@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//

#include "Loader.h"
#include "llvm-gpu-loader.h"

#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
@@ -330,9 +330,9 @@ static hsa_status_t hsa_memcpy(void *dst, hsa_agent_t dst_agent,
return HSA_STATUS_SUCCESS;
}

int load(int argc, const char **argv, const char **envp, void *image,
size_t size, const LaunchParameters &params,
bool print_resource_usage) {
int load_amdhsa(int argc, const char **argv, const char **envp, void *image,
size_t size, const LaunchParameters &params,
bool print_resource_usage) {
// Initialize the HSA runtime used to communicate with the device.
if (hsa_status_t err = hsa_init())
handle_error(err);
Original file line number Diff line number Diff line change
@@ -6,21 +6,25 @@
//
//===----------------------------------------------------------------------===//
//
// This file opens a device image passed on the command line and passes it to
// one of the loader implementations for launch.
// This utility is used to launch standard programs onto the GPU in conjunction
// with the LLVM 'libc' project. It is designed to mimic a standard emulator
// workflow, allowing for unit tests to be run on the GPU directly.
//
//===----------------------------------------------------------------------===//

#include "Loader.h"
#include "llvm-gpu-loader.h"

#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/WithColor.h"
#include "llvm/TargetParser/Triple.h"

#include <cerrno>
#include <cstdio>
@@ -125,12 +129,40 @@ int main(int argc, const char **argv, const char **envp) {
strerror(errno)));
}

// Drop the loader from the program arguments.
LaunchParameters params{threads_x, threads_y, threads_z,
blocks_x, blocks_y, blocks_z};
int ret = load(new_argv.size(), new_argv.data(), envp,
const_cast<char *>(image.getBufferStart()),
image.getBufferSize(), params, print_resource_usage);

Expected<llvm::object::ELF64LEObjectFile> elf_or_err =
llvm::object::ELF64LEObjectFile::create(image);
if (!elf_or_err)
report_error(std::move(elf_or_err.takeError()));

int ret = 1;
if (elf_or_err->getArch() == Triple::amdgcn) {
#ifdef AMDHSA_SUPPORT
ret = load_amdhsa(new_argv.size(), new_argv.data(), envp,
const_cast<char *>(image.getBufferStart()),
image.getBufferSize(), params, print_resource_usage);
#else
report_error(createStringError(
"Unsupported architecture; %s",
Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin()));
#endif
} else if (elf_or_err->getArch() == Triple::nvptx64) {
#ifdef NVPTX_SUPPORT
ret = load_nvptx(new_argv.size(), new_argv.data(), envp,
const_cast<char *>(image.getBufferStart()),
image.getBufferSize(), params, print_resource_usage);
#else
report_error(createStringError(
"Unsupported architecture; %s",
Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin()));
#endif
} else {
report_error(createStringError(
"Unsupported architecture; %s",
Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin()));
}

if (no_parallelism) {
if (flock(fd, LOCK_UN) == -1)
Original file line number Diff line number Diff line change
@@ -54,9 +54,16 @@ struct end_args_t {
/// Generic interface to load the \p image and launch execution of the _start
/// kernel on the target device. Copies \p argc and \p argv to the device.
/// Returns the final value of the `main` function on the device.
int load(int argc, const char **argv, const char **evnp, void *image,
size_t size, const LaunchParameters &params,
bool print_resource_usage);
#ifdef AMDHSA_SUPPORT
int load_amdhsa(int argc, const char **argv, const char **evnp, void *image,
size_t size, const LaunchParameters &params,
bool print_resource_usage);
#endif
#ifdef NVPTX_SUPPORT
int load_nvptx(int argc, const char **argv, const char **evnp, void *image,
size_t size, const LaunchParameters &params,
bool print_resource_usage);
#endif

/// Return \p V aligned "upwards" according to \p Align.
template <typename V, typename A> inline V align_up(V val, A align) {
Original file line number Diff line number Diff line change
@@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//

#include "Loader.h"
#include "llvm-gpu-loader.h"

#include "cuda.h"

@@ -236,9 +236,9 @@ CUresult launch_kernel(CUmodule binary, CUstream stream, rpc::Server &server,
return CUDA_SUCCESS;
}

int load(int argc, const char **argv, const char **envp, void *image,
size_t size, const LaunchParameters &params,
bool print_resource_usage) {
int load_nvptx(int argc, const char **argv, const char **envp, void *image,
size_t size, const LaunchParameters &params,
bool print_resource_usage) {
if (CUresult err = cuInit(0))
handle_error(err);
// Obtain the first device found on the system.
Loading
Oops, something went wrong.