From d00044d22a310852c592b9be70697cdda7c5d9ef Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Thu, 4 Dec 2025 08:37:33 -0600 Subject: [PATCH] [Flang-RT] Changes required for embedded GPU LLVM IR Flang runtime --- flang-rt/CMakeLists.txt | 6 +++++- flang-rt/cmake/modules/AddFlangRT.cmake | 6 ++++++ flang-rt/include/flang-rt/runtime/lock.h | 13 +++++++++++++ flang-rt/include/flang-rt/runtime/tools.h | 7 +++++++ flang-rt/lib/runtime/CMakeLists.txt | 6 +++++- flang-rt/lib/runtime/assign.cpp | 13 ++----------- flang-rt/lib/runtime/descriptor.cpp | 2 ++ flang-rt/lib/runtime/edit-input.cpp | 4 ++++ flang-rt/lib/runtime/environment.cpp | 2 ++ flang-rt/lib/runtime/file.cpp | 2 ++ flang-rt/lib/runtime/io-api-minimal.cpp | 8 ++++++++ flang-rt/lib/runtime/io-api.cpp | 2 ++ flang-rt/lib/runtime/stop.cpp | 13 ++++++++++++- flang/include/flang/Runtime/extensions.h | 5 ++++- flang/include/flang/Runtime/main.h | 4 ++++ libc/CMakeLists.txt | 1 + libcxx/src/string.cpp | 2 ++ 17 files changed, 81 insertions(+), 15 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 252e3650cbc04..78f1811c294ef 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -152,6 +152,8 @@ if (NOT "${FLANG_RT_LIBCXX_PROVIDER}" IN_LIST FLANG_RT_SUPPORTED_PROVIDERS) endif () option(FLANG_RT_ENABLE_STATIC "Build Flang-RT as a static library." ON) +option(FLANG_RT_EMBED_GPU_LLVM_IR "Build Flang-RT as GPU LLVM IR library" ON) + if (WIN32) # Windows DLL currently not implemented. set(FLANG_RT_ENABLE_SHARED OFF) @@ -327,7 +329,9 @@ endif () if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(test) - add_subdirectory(unittests) + if (NOT "${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx") + add_subdirectory(unittests) + endif() else () add_custom_target(check-flang-rt) endif() diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index 923507764d691..8de1c5eb47949 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -219,6 +219,12 @@ function (add_flangrt_library name) # Minimum required C++ version for Flang-RT, even if CMAKE_CXX_STANDARD is defined to something else. target_compile_features(${tgtname} PRIVATE cxx_std_17) + # Determine which version of GPU Flang-RT we want to build: + # flang_rt.hostdevice or the implicitly linked device flang_rt.runtime. + if (FLANG_RT_EMBED_GPU_LLVM_IR) + target_compile_definitions(${tgtname} PRIVATE EMBED_FLANG_RT_GPU_LLVM_IR) + endif () + # When building the flang runtime if LTO is enabled the archive file # contains LLVM IR rather than object code. Currently flang is not # LTO aware so cannot link this file to compiled Fortran code. diff --git a/flang-rt/include/flang-rt/runtime/lock.h b/flang-rt/include/flang-rt/runtime/lock.h index 7c88534245733..0cffcf5e5deab 100644 --- a/flang-rt/include/flang-rt/runtime/lock.h +++ b/flang-rt/include/flang-rt/runtime/lock.h @@ -23,7 +23,9 @@ #endif #if USE_PTHREADS +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include +#endif #elif defined(_WIN32) #include "flang/Common/windows-include.h" #else @@ -45,6 +47,7 @@ class Lock { RT_API_ATTRS void Drop() {} RT_API_ATTRS bool TakeIfNoDeadlock() { return true; } #elif USE_PTHREADS +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) Lock() { pthread_mutex_init(&mutex_, nullptr); } ~Lock() { pthread_mutex_destroy(&mutex_); } void Take() { @@ -68,6 +71,14 @@ class Lock { isBusy_ = false; pthread_mutex_unlock(&mutex_); } +#else + RT_API_ATTRS void Take(){} + RT_API_ATTRS bool TakeIfNoDeadlock() {return true;} + RT_API_ATTRS bool Try() {return true;} + RT_API_ATTRS void Drop() {} + Lock() {} + ~Lock() {} +#endif #elif defined(_WIN32) Lock() { InitializeCriticalSection(&cs_); } ~Lock() { DeleteCriticalSection(&cs_); } @@ -91,9 +102,11 @@ class Lock { #if RT_USE_PSEUDO_FILE_UNIT // No state. #elif USE_PTHREADS +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) pthread_mutex_t mutex_{}; volatile bool isBusy_{false}; volatile pthread_t holder_; +#endif #elif defined(_WIN32) CRITICAL_SECTION cs_; #else diff --git a/flang-rt/include/flang-rt/runtime/tools.h b/flang-rt/include/flang-rt/runtime/tools.h index 1939c4d907be4..6955979321ee1 100644 --- a/flang-rt/include/flang-rt/runtime/tools.h +++ b/flang-rt/include/flang-rt/runtime/tools.h @@ -42,6 +42,13 @@ #define RT_USE_PSEUDO_FILE_UNIT 1 #endif +#if (defined(__AMDGPU__) || defined(__NVPTX__)) && defined(EMBED_FLANG_RT_GPU_LLVM_IR) +// Use the pseudo lock and pseudo file unit implementations +// for the device. +#define RT_USE_PSEUDO_LOCK 1 +#define RT_USE_PSEUDO_FILE_UNIT 1 +#endif + namespace Fortran::runtime { class Terminator; diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index ef0f812eaca00..ad46e8d847e5b 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -178,7 +178,11 @@ else () endif () if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx") - set(sources ${gpu_sources}) + if (FLANG_RT_EMBED_GPU_LLVM_IR) + set(sources ${supported_sources} ${gpu_sources}) + else () + set(sources ${gpu_sources}) + endif () elseif(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA") set(sources ${supported_sources}) else () diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp index 303ec79de240c..4aa0d003dd163 100644 --- a/flang-rt/lib/runtime/assign.cpp +++ b/flang-rt/lib/runtime/assign.cpp @@ -7,7 +7,9 @@ //===----------------------------------------------------------------------===// #include "flang/Runtime/assign.h" +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include "flang/Runtime/stop.h" +#endif #include "flang-rt/runtime/assign-impl.h" #include "flang-rt/runtime/derived.h" #include "flang-rt/runtime/descriptor.h" @@ -862,17 +864,6 @@ void RTDEF(AssignPolymorphic)(Descriptor &to, const Descriptor &from, PolymorphicLHS); } -#if defined(OMP_OFFLOAD_BUILD) -// To support a recently added use of variant in the OpenMP offload build, -// added an abort wrapper which calls the flang-rt FortranAAbort. -// Avoids the following linker error: -// ld.lld: error: undefined symbol: abort -// >>> referenced by /tmp/device_aassign.amdgcn.gfx90a-34a7ed.img.lto.o:(std::__throw_bad_variant_access(char const*)) -extern "C" void abort(void) { - RTNAME(Abort)(); -} -#endif - RT_EXT_API_GROUP_END } // extern "C" } // namespace Fortran::runtime diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp index c95da0a5371e5..147ff079fc2d9 100644 --- a/flang-rt/lib/runtime/descriptor.cpp +++ b/flang-rt/lib/runtime/descriptor.cpp @@ -8,7 +8,9 @@ #include "flang-rt/runtime/descriptor.h" #include "ISO_Fortran_util.h" +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include "memory.h" +#endif #include "flang-rt/runtime/allocator-registry.h" #include "flang-rt/runtime/derived.h" #include "flang-rt/runtime/stat.h" diff --git a/flang-rt/lib/runtime/edit-input.cpp b/flang-rt/lib/runtime/edit-input.cpp index 436fc3894d902..32ff42a4d0221 100644 --- a/flang-rt/lib/runtime/edit-input.cpp +++ b/flang-rt/lib/runtime/edit-input.cpp @@ -569,7 +569,11 @@ static RT_API_ATTRS void RaiseFPExceptions( #ifdef feraisexcept // a macro in some environments; omit std:: #define RAISE feraiseexcept #else +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined (EMBED_FLANG_RT_GPU_LLVM_IR) #define RAISE std::feraiseexcept +#else +#define RAISE +#endif #endif #endif // !defined(RT_DEVICE_COMPILATION) diff --git a/flang-rt/lib/runtime/environment.cpp b/flang-rt/lib/runtime/environment.cpp index be4f7308ab027..f3f9a6834e5ab 100644 --- a/flang-rt/lib/runtime/environment.cpp +++ b/flang-rt/lib/runtime/environment.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#if (not defined (__AMDGPU__) && not defined(__NVPTX__)) || not defined (EMBED_FLANG_RT_GPU_LLVM_IR) #include "flang-rt/runtime/environment.h" #include "environment-default-list.h" #include "memory.h" @@ -334,3 +335,4 @@ bool RTNAME(RegisterConfigureEnv)( } // extern "C" } // namespace Fortran::runtime +#endif diff --git a/flang-rt/lib/runtime/file.cpp b/flang-rt/lib/runtime/file.cpp index 8255ec8691886..c3d9a5b8321a7 100644 --- a/flang-rt/lib/runtime/file.cpp +++ b/flang-rt/lib/runtime/file.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include "flang-rt/runtime/file.h" #include "flang-rt/runtime/memory.h" #include "flang-rt/runtime/tools.h" @@ -486,3 +487,4 @@ RT_API_ATTRS std::int64_t SizeInBytes(const char *path) { #endif // defined(RT_DEVICE_COMPILATION) } // namespace Fortran::runtime::io +#endif diff --git a/flang-rt/lib/runtime/io-api-minimal.cpp b/flang-rt/lib/runtime/io-api-minimal.cpp index f84b62d63baa1..7077c20f41b4e 100644 --- a/flang-rt/lib/runtime/io-api-minimal.cpp +++ b/flang-rt/lib/runtime/io-api-minimal.cpp @@ -23,6 +23,7 @@ namespace Fortran::runtime::io { RT_EXT_API_GROUP_BEGIN #endif +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) Cookie IODEF(BeginExternalListOutput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalListIO( @@ -33,6 +34,7 @@ enum Iostat IODEF(EndIoStatement)(Cookie cookie) { IoStatementState &io{*cookie}; return static_cast(io.EndIoStatement()); } +#endif template > inline RT_API_ATTRS bool FormattedScalarIntegerOutput( @@ -45,6 +47,7 @@ inline RT_API_ATTRS bool FormattedScalarIntegerOutput( } } +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) bool IODEF(OutputInteger8)(Cookie cookie, std::int8_t n) { return FormattedScalarIntegerOutput<1>(*cookie, n, "OutputInteger8"); } @@ -60,6 +63,7 @@ bool IODEF(OutputInteger32)(Cookie cookie, std::int32_t n) { bool IODEF(OutputInteger64)(Cookie cookie, std::int64_t n) { return FormattedScalarIntegerOutput<8>(*cookie, n, "OutputInteger64"); } +#endif #ifdef __SIZEOF_INT128__ bool IODEF(OutputInteger128)(Cookie cookie, common::int128_t n) { @@ -79,6 +83,7 @@ inline RT_API_ATTRS bool FormattedScalarRealOutput( } } +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) bool IODEF(OutputReal32)(Cookie cookie, float x) { return FormattedScalarRealOutput<4>(*cookie, x, "OutputReal32"); } @@ -86,6 +91,7 @@ bool IODEF(OutputReal32)(Cookie cookie, float x) { bool IODEF(OutputReal64)(Cookie cookie, double x) { return FormattedScalarRealOutput<8>(*cookie, x, "OutputReal64"); } +#endif template ::BinaryFloatingPoint> @@ -110,6 +116,7 @@ inline RT_API_ATTRS bool FormattedScalarComplexOutput( return false; } +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) bool IODEF(OutputComplex32)(Cookie cookie, float re, float im) { return FormattedScalarComplexOutput<4>(*cookie, re, im, "OutputComplex32"); } @@ -145,6 +152,7 @@ bool IODEF(OutputLogical)(Cookie cookie, bool truth) { return false; } } +#endif } // namespace Fortran::runtime::io diff --git a/flang-rt/lib/runtime/io-api.cpp b/flang-rt/lib/runtime/io-api.cpp index 4c86fb9fdabf6..4eabde60a3a72 100644 --- a/flang-rt/lib/runtime/io-api.cpp +++ b/flang-rt/lib/runtime/io-api.cpp @@ -199,12 +199,14 @@ RT_API_ATTRS Cookie BeginExternalFormattedIO(const char *format, } } +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) Cookie IODEF(BeginExternalFormattedOutput)(const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalFormattedIO(format, formatLength, formatDescriptor, unitNumber, sourceFile, sourceLine); } +#endif Cookie IODEF(BeginExternalFormattedInput)(const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, diff --git a/flang-rt/lib/runtime/stop.cpp b/flang-rt/lib/runtime/stop.cpp index a12e9f14d90ec..87681dc8a1b1e 100644 --- a/flang-rt/lib/runtime/stop.cpp +++ b/flang-rt/lib/runtime/stop.cpp @@ -13,11 +13,15 @@ #include "flang-rt/runtime/file.h" #include "flang-rt/runtime/io-error.h" #include "flang-rt/runtime/terminator.h" +#if not defined(__AMDGPU__) && not defined(__NVPTX__) #include "flang/Runtime/main.h" +#endif #include #include #include +#if not defined(__AMDGPU__) && not defined(__NVPTX__) #include +#endif #ifdef HAVE_BACKTRACE #include BACKTRACE_HEADER @@ -26,6 +30,7 @@ extern "C" { [[maybe_unused]] static void DescribeIEEESignaledExceptions() { +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #ifdef fetestexcept // a macro in some environments; omit std:: auto excepts{fetestexcept(FE_ALL_EXCEPT)}; #else @@ -60,6 +65,7 @@ extern "C" { #endif std::fputc('\n', stderr); } +#endif } static void CloseAllExternalUnits(const char *why) { @@ -67,6 +73,7 @@ static void CloseAllExternalUnits(const char *why) { Fortran::runtime::io::ExternalFileUnit::CloseAll(handler); } +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) [[noreturn]] RT_API_ATTRS void RTNAME(StopStatement)( int code, bool isErrorStop, bool quiet) { #if defined(RT_DEVICE_COMPILATION) @@ -103,7 +110,9 @@ static void CloseAllExternalUnits(const char *why) { std::exit(code); #endif } +#endif +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) [[noreturn]] RT_API_ATTRS void RTNAME(StopStatementText)( const char *code, std::size_t length, bool isErrorStop, bool quiet) { #if defined(RT_DEVICE_COMPILATION) @@ -136,6 +145,7 @@ static void CloseAllExternalUnits(const char *why) { } #endif } +#endif static bool StartPause() { if (Fortran::runtime::io::IsATerminal(0)) { @@ -218,13 +228,14 @@ static RT_NOINLINE_ATTR void PrintBacktrace() { #endif } - +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) [[noreturn]] RT_OPTNONE_ATTR void RTNAME(Abort)() { #ifdef HAVE_BACKTRACE PrintBacktrace(); #endif std::abort(); } +#endif RT_OPTNONE_ATTR void FORTRAN_PROCEDURE_NAME(backtrace)() { PrintBacktrace(); } diff --git a/flang/include/flang/Runtime/extensions.h b/flang/include/flang/Runtime/extensions.h index f2765a5987ea1..82392d2bb9aa1 100644 --- a/flang/include/flang/Runtime/extensions.h +++ b/flang/include/flang/Runtime/extensions.h @@ -18,10 +18,13 @@ #define FORTRAN_PROCEDURE_NAME(name) name##_ -#ifdef _WIN32 +#if defined (_WIN32) // UID and GID don't exist on Windows, these exist to avoid errors. typedef std::uint32_t uid_t; typedef std::uint32_t gid_t; +#elif (defined(__AMDGPU__) || defined(__NVPTX__)) && defined (EMBED_FLANG_RT_GPU_LLVM_IR) +typedef std::uint32_t uid_t; +typedef std::uint32_t gid_t; #else #include "sys/types.h" //pid_t #endif diff --git a/flang/include/flang/Runtime/main.h b/flang/include/flang/Runtime/main.h index 40f7693221b6a..696ce466e67fc 100644 --- a/flang/include/flang/Runtime/main.h +++ b/flang/include/flang/Runtime/main.h @@ -11,11 +11,15 @@ #include "flang/Runtime/c-or-cpp.h" #include "flang/Runtime/entry-names.h" +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include +#endif struct EnvironmentDefaultList; +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) std::thread::id RTNAME(GetMainThreadId)(); +#endif FORTRAN_EXTERN_C_BEGIN void RTNAME(ProgramStart)( diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index 4e6b4195a9c5e..46d799cbec114 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -67,6 +67,7 @@ if(NOT LIBC_NAMESPACE MATCHES "^__llvm_libc") message(FATAL_ERROR "Invalid LIBC_NAMESPACE. Must start with '__llvm_libc' was '${LIBC_NAMESPACE}'") endif() +string(REPLACE "." "_" LIBC_NAMESPACE "${LIBC_NAMESPACE}") message(STATUS "Setting LIBC_NAMESPACE namespace to '${LIBC_NAMESPACE}'") add_compile_definitions(LIBC_NAMESPACE=${LIBC_NAMESPACE}) diff --git a/libcxx/src/string.cpp b/libcxx/src/string.cpp index 5028fc88fe46d..b9f8e01198c33 100644 --- a/libcxx/src/string.cpp +++ b/libcxx/src/string.cpp @@ -360,9 +360,11 @@ wstring to_wstring(unsigned long val) { return i_to_string(val); } wstring to_wstring(unsigned long long val) { return i_to_string(val); } #endif +#if not defined(__AMDGPU__) && not defined(__NVPTX__) string to_string(float val) { return as_string(snprintf, initial_string< string>()(), "%f", val); } string to_string(double val) { return as_string(snprintf, initial_string< string>()(), "%f", val); } string to_string(long double val) { return as_string(snprintf, initial_string< string>()(), "%Lf", val); } +#endif #if _LIBCPP_HAS_WIDE_CHARACTERS wstring to_wstring(float val) { return as_string(get_swprintf(), initial_string()(), L"%f", val); }