Skip to content

recursion on gpu stream scheduler with any_sender_of #1022

@weilewei

Description

@weilewei

Hi,

I would like to know how can we do recursion with gpu stream scheduler? I am implementing a stencil code on gpu with the stdexec library, and in the future, I plan to explore more task-based algorithms using sender on gpu. I feel like enabling recursion with gpu stream scheduler will be beneficial. It is an ongoing collaboration project with @brycelelbach, Jeff Larkin, nvhpc premium service, NERSC/Berkeley Lab.

I still have a struggle with the #995 issue with newer compilers (23.5, 23.7). The original feedback was the code could be compiled on Compiler Explorer (https://godbolt.org/z/v4nvKsa77), but when I execute it on Compiler Explorer, it actually produces errors (I attached below). Moreover, no compiler flag --experimental-stdpar was supplied in the original godbolt link for the sender feature, but it still compiled. I think the environment on Compiler Explorer is different. If it works on your local environment, can you please share some insights how to make such code work? Thanks.

Here are the source code + cmake, various methods I tried:

code:

// [[file:../../../async_control.org::*Simple Recursion][Simple Recursion:1]]
#include <cassert>
#include <exec/static_thread_pool.hpp>
#include <exec/any_sender_of.hpp>
#include <iostream>

#include <nvexec/stream_context.cuh>

template <class... Ts>
using any_sender_of = typename exec::any_receiver_ref<
    stdexec::completion_signatures<Ts...>>::template any_sender<>;
// Simple Recursion:1 ends here

// [[file:../../../async_control.org::*Simple Recursion][Simple Recursion:2]]
using any_int_sender =
    any_sender_of<stdexec::set_value_t(int),
                  stdexec::set_stopped_t(),
                  stdexec::set_error_t(std::exception_ptr)>;

auto fac(int n) -> any_int_sender {
    if (n == 0)
        return stdexec::just(1);

    return stdexec::just(n - 1)
        | stdexec::let_value([](int k) { return fac(k); })
        | stdexec::then([n](int k) { return k * n; });
}
// Simple Recursion:2 ends here

// [[file:../../../async_control.org::*Simple Recursion][Simple Recursion:3]]
int main() {
    nvexec::stream_context stream_ctx{};
    stdexec::scheduler auto sch = stream_ctx.get_scheduler();
    stdexec::sender auto begin = stdexec::schedule(sch);

// [[file:../../../async_control.org::*Simple Recursion][Simple Recursion:4]]
    int                  k = 10;
    stdexec::sender auto factorial =
        begin
        | stdexec::then([=]() { return k; })
        | stdexec::let_value([](int k) { return fac(k); });

    std::cout << "factorial built\n\n";

    auto [i] = stdexec::sync_wait(std::move(factorial)).value();
    std::cout << "factorial " << k << " = " << i << '\n';
// Simple Recursion:4 ends here

// [[file:../../../async_control.org::*Simple Recursion][Simple Recursion:5]]
    }
// Simple Recursion:5 ends here

cmake:

cmake_minimum_required(VERSION 3.14 FATAL_ERROR)

project(stdexecExample)

set(CPM_DOWNLOAD_VERSION 0.34.0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Minfo -std=c++20 --experimental-stdpar -stdpar=gpu --gcc-toolchain=/opt/cray/pe/gcc/12.2.0/bin/")

if(CPM_SOURCE_CACHE)
  set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
elseif(DEFINED ENV{CPM_SOURCE_CACHE})
  set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
else()
  set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
endif()

if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION}))
  message(STATUS "Downloading CPM.cmake to ${CPM_DOWNLOAD_LOCATION}")
  file(DOWNLOAD
       https://github.com/TheLartians/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
       ${CPM_DOWNLOAD_LOCATION}
  )
endif()

include(${CPM_DOWNLOAD_LOCATION})

CPMAddPackage(
  NAME stdexec
  GITHUB_REPOSITORY NVIDIA/stdexec
  GIT_TAG main # This will always pull the latest code from the `main` branch. You may also use a specific release version or tag
  OPTIONS 
  "STDEXEC_ENABLE_CUDA ON"
  "STDEXEC_BUILD_EXAMPLES OFF" 
  "STDEXEC_BUILD_TESTS OFF" 
  "STDEXEC_ENABLE_IO_URING_TESTS OFF"
  "BUILD_TESTING OFF"
)

add_executable(factorial_gpu factorial_gpu.cpp)
target_link_libraries(factorial_gpu stdexec)

method 1: nvchpc 23.5, use nvc++ built-in stdexec library

  1. godbolt with code execution enabled: https://godbolt.org/z/51ovjvohh, and the error is followings:
ASM generation compiler returned: 0

Killed - processing time exceeded
cleaning up after signal(15)...
Program terminated with signal: SIGKILL
Execution build compiler returned: 143
  1. My local build:
wwei@nid001013:~/src/test-gpu-recursion> nvc++ --version

nvc++ 23.5-0 64-bit target on x86-64 Linux -tp zen3 
NVIDIA Compilers and Tools
Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.

wwei@nid001013:~/src/test-gpu-recursion> echo $PATH
/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/bin:/opt/cray/pe/mpich/8.1.25/ofi/gnu/9.1/bin:/opt/cray/pe/mpich/8.1.25/bin:/opt/cray/pe/gcc/12.2.0/bin:/global/u2/w/wwei/.vscode-server/bin/97dec172d3256f8ca4bfb2143f3f76b503ca0534/bin/remote-cli:/global/common/software/nersc/bin:/opt/cray/pe/perftools/23.03.0/bin:/opt/cray/pe/papi/7.0.0.1/bin:/opt/cray/pe/craype/2.7.20/bin:/opt/cray/libfabric/1.15.2.0/bin:/usr/local/bin:/usr/bin:/bin:/usr/lib/mit/bin:/opt/cray/pe/bin

wwei@nid001013:~/src/test-gpu-recursion> echo $LD_LIBRARY_PATH
/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/cuda/lib64:/opt/cray/pe/gcc/12.2.0/snos/lib64:/opt/cray/pe/papi/7.0.0.1/lib64:/opt/cray/libfabric/1.15.2.0/lib64
wwei@nid001013:~/src/test-gpu-recursion> nvc++ -Minfo -std=c++20 --experimental-stdpar -stdpar=gpu --gcc-toolchain=/opt/cray/pe/gcc/12.2.0/bin/ factorial_gpu.cpp -o factorial_gpu
"/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/let_xxx.cuh", line 124: error: no instance of function template "stdexec::__connect::connect_t::operator()" matches the argument list
            argument types are: (result_sender_t, nvexec::_strm::propagate_receiver_t<nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>>::__t)
            object type is: const stdexec::__connect::connect_t
                  return stdexec::connect(
                         ^
          detected during:
            instantiation of "void nvexec::_strm::let_xxx::tag_invoke(_Tag, nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<stdexec::__id<nvexec::_strm::sync_wait::sync_wait_t::receiver_t<std::remove_reference<stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__let::let_value_t, lambda [](int)->any_int_sender>, stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__then::then_t, lambda []()->int>, stdexec::__tag_invoke::tag_invoke_result_t<stdexec::__schedule::schedule_t, nvexec::_strm::stream_scheduler &> &>> &>::type>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<std::decay<std::enable_if<true, int>::type>::type>>::__t &&, _As &&...) noexcept [with _Tag=stdexec::__receivers::set_value_t, _As=<int &>]" at line 106 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/functional.hpp"
            instantiation of "auto stdexec::__tag_invoke::tag_invoke_t::operator()(_Tag, _Args &&...) const->stdexec::__tag_invoke::tag_invoke_result_t<_Tag, _Args...> [with _Tag=stdexec::__receivers::set_value_t, _Args=<nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<int>>::__t, int &>]" at line 365 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/execution.hpp"
            instantiation of "void stdexec::__receivers::set_value_t::operator()(_Receiver &&, _As &&...) const noexcept [with _Receiver=nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<int>>::__t, _As=<int &>]" at line 419 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/common.cuh"
            instantiation of "void nvexec::_strm::operation_state_base_<OuterReceiverId>::__t::propagate_completion_signal(Tag, As &&...) noexcept [with OuterReceiverId=nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<int>>, Tag=stdexec::__receivers::set_value_t, As=<int &>]" at line 76 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/then.cuh"
            instantiation of "void nvexec::_strm::then::tag_invoke(stdexec::__receivers::set_value_t, nvexec::_strm::then::receiver_t<4UL, nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<stdexec::__id<nvexec::_strm::sync_wait::sync_wait_t::receiver_t<std::remove_reference<stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__let::let_value_t, lambda [](int)->any_int_sender>, stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__then::then_t, lambda []()->int>, stdexec::__tag_invoke::tag_invoke_result_t<stdexec::__schedule::schedule_t, nvexec::_strm::stream_scheduler &> &>> &>::type>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<std::decay<std::enable_if<true, int>::type>::type>>::__t::__id, lambda []()->int>::__t &&, As &&...) noexcept [with As=<>]" at line 106 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/functional.hpp"
            [ 11 instantiation contexts not shown ]
            instantiation of "void stdexec::__start::start_t::operator()(_Op &) const noexcept [with _Op=nvexec::_strm::let_xxx::__operation<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t>]" at line 501 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/common.cuh"
            instantiation of class "nvexec::_strm::operation_state_<CvrefSenderId, InnerReceiverId, OuterReceiverId>::__t [with CvrefSenderId=nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>, InnerReceiverId=nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, OuterReceiverId=nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>]" at line 106 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/functional.hpp"
            instantiation of "auto stdexec::__tag_invoke::tag_invoke_t::operator()(_Tag, _Args &&...) const->stdexec::__tag_invoke::tag_invoke_result_t<_Tag, _Args...> [with _Tag=stdexec::__start::start_t, _Args=<nvexec::_strm::operation_state_<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>::__t &>]" at line 1224 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/execution.hpp"
            instantiation of "void stdexec::__start::start_t::operator()(_Op &) const noexcept [with _Op=nvexec::_strm::operation_state_<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>::__t]" at line 138 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/sync_wait.cuh"
            instantiation of "auto nvexec::_strm::sync_wait::sync_wait_t::operator()(nvexec::_strm::context_state_t, Sender &&) const->std::optional<nvexec::_strm::sync_wait::sync_wait_result_t<Sender>> [with Sender=nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>::__t]" at line 257 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream_context.cuh"

"/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/__detail/__meta.hpp", line 70: error: class "stdexec::__mdefer<stdexec::__q<stdexec::__call_result_>, lambda []()-><error-type>>" has no member "__t"
    using __t = typename _T::__t;
                             ^
          detected during:
            instantiation of type "stdexec::__t<stdexec::__mdefer<stdexec::__q<stdexec::__call_result_>, lambda []()-><error-type>>>" at line 548
            instantiation of type "stdexec::__call_result_t<lambda []()-><error-type>>" at line 569
            instantiation of class "stdexec::__conv<_Fn> [with _Fn=lambda []()-><error-type>]" at line 128 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/let_xxx.cuh"
            instantiation of "void nvexec::_strm::let_xxx::tag_invoke(_Tag, nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<stdexec::__id<nvexec::_strm::sync_wait::sync_wait_t::receiver_t<std::remove_reference<stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__let::let_value_t, lambda [](int)->any_int_sender>, stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__then::then_t, lambda []()->int>, stdexec::__tag_invoke::tag_invoke_result_t<stdexec::__schedule::schedule_t, nvexec::_strm::stream_scheduler &> &>> &>::type>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<std::decay<std::enable_if<true, int>::type>::type>>::__t &&, _As &&...) noexcept [with _Tag=stdexec::__receivers::set_value_t, _As=<int &>]" at line 106 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/functional.hpp"
            instantiation of "auto stdexec::__tag_invoke::tag_invoke_t::operator()(_Tag, _Args &&...) const->stdexec::__tag_invoke::tag_invoke_result_t<_Tag, _Args...> [with _Tag=stdexec::__receivers::set_value_t, _Args=<nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<int>>::__t, int &>]" at line 365 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/execution.hpp"
            [ 14 instantiation contexts not shown ]
            instantiation of "void stdexec::__start::start_t::operator()(_Op &) const noexcept [with _Op=nvexec::_strm::let_xxx::__operation<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t>]" at line 501 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/common.cuh"
            instantiation of class "nvexec::_strm::operation_state_<CvrefSenderId, InnerReceiverId, OuterReceiverId>::__t [with CvrefSenderId=nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>, InnerReceiverId=nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, OuterReceiverId=nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>]" at line 106 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/functional.hpp"
            instantiation of "auto stdexec::__tag_invoke::tag_invoke_t::operator()(_Tag, _Args &&...) const->stdexec::__tag_invoke::tag_invoke_result_t<_Tag, _Args...> [with _Tag=stdexec::__start::start_t, _Args=<nvexec::_strm::operation_state_<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>::__t &>]" at line 1224 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/execution.hpp"
            instantiation of "void stdexec::__start::start_t::operator()(_Op &) const noexcept [with _Op=nvexec::_strm::operation_state_<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>::__t]" at line 138 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/sync_wait.cuh"
            instantiation of "auto nvexec::_strm::sync_wait::sync_wait_t::operator()(nvexec::_strm::context_state_t, Sender &&) const->std::optional<nvexec::_strm::sync_wait::sync_wait_result_t<Sender>> [with Sender=nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>::__t]" at line 257 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream_context.cuh"

"/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/let_xxx.cuh", line 122: error: no instance of overloaded function "std::variant<_Types...>::emplace [with _Types=<std::monostate, exec::__any::__operation<exec::__any::__sender<stdexec::completion_signatures<stdexec::__receivers::set_value_t (int), stdexec::__receivers::set_stopped_t (), stdexec::__receivers::set_error_t (std::__exception_ptr::exception_ptr)>, stdexec::__types<>, stdexec::__types<>>::__t, nvexec::_strm::propagate_receiver_t<nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>>::__t, stdexec::__types<>>::__t>]" matches the argument list
            argument types are: (stdexec::__conv<lambda []()-><error-type>>)
            object type is: std::variant<std::monostate, exec::__any::__operation<exec::__any::__sender<stdexec::completion_signatures<stdexec::__receivers::set_value_t (int), stdexec::__receivers::set_stopped_t (), stdexec::__receivers::set_error_t (std::__exception_ptr::exception_ptr)>, stdexec::__types<>, stdexec::__types<>>::__t, nvexec::_strm::propagate_receiver_t<nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>>::__t, stdexec::__types<>>::__t>
              auto& __op = __self.__op_state_->__op_state3_.template emplace<op_state_t>(
                                                                     ^
          detected during:
            instantiation of "void nvexec::_strm::let_xxx::tag_invoke(_Tag, nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<stdexec::__id<nvexec::_strm::sync_wait::sync_wait_t::receiver_t<std::remove_reference<stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__let::let_value_t, lambda [](int)->any_int_sender>, stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__then::then_t, lambda []()->int>, stdexec::__tag_invoke::tag_invoke_result_t<stdexec::__schedule::schedule_t, nvexec::_strm::stream_scheduler &> &>> &>::type>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<std::decay<std::enable_if<true, int>::type>::type>>::__t &&, _As &&...) noexcept [with _Tag=stdexec::__receivers::set_value_t, _As=<int &>]" at line 106 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/functional.hpp"
            instantiation of "auto stdexec::__tag_invoke::tag_invoke_t::operator()(_Tag, _Args &&...) const->stdexec::__tag_invoke::tag_invoke_result_t<_Tag, _Args...> [with _Tag=stdexec::__receivers::set_value_t, _Args=<nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<int>>::__t, int &>]" at line 365 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/execution.hpp"
            instantiation of "void stdexec::__receivers::set_value_t::operator()(_Receiver &&, _As &&...) const noexcept [with _Receiver=nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<int>>::__t, _As=<int &>]" at line 419 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/common.cuh"
            instantiation of "void nvexec::_strm::operation_state_base_<OuterReceiverId>::__t::propagate_completion_signal(Tag, As &&...) noexcept [with OuterReceiverId=nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<int>>, Tag=stdexec::__receivers::set_value_t, As=<int &>]" at line 76 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/then.cuh"
            instantiation of "void nvexec::_strm::then::tag_invoke(stdexec::__receivers::set_value_t, nvexec::_strm::then::receiver_t<4UL, nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<stdexec::__id<nvexec::_strm::sync_wait::sync_wait_t::receiver_t<std::remove_reference<stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__let::let_value_t, lambda [](int)->any_int_sender>, stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__then::then_t, lambda []()->int>, stdexec::__tag_invoke::tag_invoke_result_t<stdexec::__schedule::schedule_t, nvexec::_strm::stream_scheduler &> &>> &>::type>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<std::decay<std::enable_if<true, int>::type>::type>>::__t::__id, lambda []()->int>::__t &&, As &&...) noexcept [with As=<>]" at line 106 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/functional.hpp"
            [ 11 instantiation contexts not shown ]
            instantiation of "void stdexec::__start::start_t::operator()(_Op &) const noexcept [with _Op=nvexec::_strm::let_xxx::__operation<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t>]" at line 501 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/common.cuh"
            instantiation of class "nvexec::_strm::operation_state_<CvrefSenderId, InnerReceiverId, OuterReceiverId>::__t [with CvrefSenderId=nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>, InnerReceiverId=nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, OuterReceiverId=nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>]" at line 106 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/functional.hpp"
            instantiation of "auto stdexec::__tag_invoke::tag_invoke_t::operator()(_Tag, _Args &&...) const->stdexec::__tag_invoke::tag_invoke_result_t<_Tag, _Args...> [with _Tag=stdexec::__start::start_t, _Args=<nvexec::_strm::operation_state_<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>::__t &>]" at line 1224 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/execution.hpp"
            instantiation of "void stdexec::__start::start_t::operator()(_Op &) const noexcept [with _Op=nvexec::_strm::operation_state_<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>::__t]" at line 138 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/sync_wait.cuh"
            instantiation of "auto nvexec::_strm::sync_wait::sync_wait_t::operator()(nvexec::_strm::context_state_t, Sender &&) const->std::optional<nvexec::_strm::sync_wait::sync_wait_result_t<Sender>> [with Sender=nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>::__t]" at line 257 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream_context.cuh"

"/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/let_xxx.cuh", line 129: error: no instance of function template "stdexec::__start::start_t::operator()" matches the argument list
            argument types are: (<error-type>)
            object type is: const stdexec::__start::start_t
              stdexec::start(__op);
              ^
          detected during:
            instantiation of "void nvexec::_strm::let_xxx::tag_invoke(_Tag, nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<stdexec::__id<nvexec::_strm::sync_wait::sync_wait_t::receiver_t<std::remove_reference<stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__let::let_value_t, lambda [](int)->any_int_sender>, stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__then::then_t, lambda []()->int>, stdexec::__tag_invoke::tag_invoke_result_t<stdexec::__schedule::schedule_t, nvexec::_strm::stream_scheduler &> &>> &>::type>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<std::decay<std::enable_if<true, int>::type>::type>>::__t &&, _As &&...) noexcept [with _Tag=stdexec::__receivers::set_value_t, _As=<int &>]" at line 106 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/functional.hpp"
            instantiation of "auto stdexec::__tag_invoke::tag_invoke_t::operator()(_Tag, _Args &&...) const->stdexec::__tag_invoke::tag_invoke_result_t<_Tag, _Args...> [with _Tag=stdexec::__receivers::set_value_t, _Args=<nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<int>>::__t, int &>]" at line 365 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/execution.hpp"
            instantiation of "void stdexec::__receivers::set_value_t::operator()(_Receiver &&, _As &&...) const noexcept [with _Receiver=nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<int>>::__t, _As=<int &>]" at line 419 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/common.cuh"
            instantiation of "void nvexec::_strm::operation_state_base_<OuterReceiverId>::__t::propagate_completion_signal(Tag, As &&...) noexcept [with OuterReceiverId=nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<int>>, Tag=stdexec::__receivers::set_value_t, As=<int &>]" at line 76 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/then.cuh"
            instantiation of "void nvexec::_strm::then::tag_invoke(stdexec::__receivers::set_value_t, nvexec::_strm::then::receiver_t<4UL, nvexec::_strm::let_xxx::__receiver_<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<stdexec::__id<nvexec::_strm::sync_wait::sync_wait_t::receiver_t<std::remove_reference<stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__let::let_value_t, lambda [](int)->any_int_sender>, stdexec::__call_result_t<stdexec::__closure::__binder_back<stdexec::__then::then_t, lambda []()->int>, stdexec::__tag_invoke::tag_invoke_result_t<stdexec::__schedule::schedule_t, nvexec::_strm::stream_scheduler &> &>> &>::type>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t, std::tuple<std::decay<std::enable_if<true, int>::type>::type>>::__t::__id, lambda []()->int>::__t &&, As &&...) noexcept [with As=<>]" at line 106 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/functional.hpp"
            [ 11 instantiation contexts not shown ]
            instantiation of "void stdexec::__start::start_t::operator()(_Op &) const noexcept [with _Op=nvexec::_strm::let_xxx::__operation<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, lambda [](int)->any_int_sender, stdexec::__receivers::set_value_t>]" at line 501 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/common.cuh"
            instantiation of class "nvexec::_strm::operation_state_<CvrefSenderId, InnerReceiverId, OuterReceiverId>::__t [with CvrefSenderId=nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>, InnerReceiverId=nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, OuterReceiverId=nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>]" at line 106 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/functional.hpp"
            instantiation of "auto stdexec::__tag_invoke::tag_invoke_t::operator()(_Tag, _Args &&...) const->stdexec::__tag_invoke::tag_invoke_result_t<_Tag, _Args...> [with _Tag=stdexec::__start::start_t, _Args=<nvexec::_strm::operation_state_<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>::__t &>]" at line 1224 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/exec/../stdexec/execution.hpp"
            instantiation of "void stdexec::__start::start_t::operator()(_Op &) const noexcept [with _Op=nvexec::_strm::operation_state_<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>, nvexec::_strm::propagate_receiver_t<nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>, nvexec::_strm::sync_wait::receiver_t<nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>>>::__t]" at line 138 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream/sync_wait.cuh"
            instantiation of "auto nvexec::_strm::sync_wait::sync_wait_t::operator()(nvexec::_strm::context_state_t, Sender &&) const->std::optional<nvexec::_strm::sync_wait::sync_wait_result_t<Sender>> [with Sender=nvexec::_strm::let_sender_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_::__t::__id, lambda []()->int>, lambda [](int)->any_int_sender, stdexec::_X<stdexec::__receivers::set_value_t>::_T>::__t]" at line 257 of "/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/include-stdexec/experimental/nvexec/stream_context.cuh"

4 errors detected in the compilation of "factorial_gpu.cpp".

method 2: nvhpc/23.5, use latest stdexec library

wwei@nid001013:~/src/test-gpu-recursion/build> CXX=/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/bin/nvc++ CC=/opt/cray/pe/gcc/12.2.0/bin/gcc cmake -DCMAKE_BUILD_TYPE=Debug ..
-- The C compiler identification is GNU 12.2.0
-- The CXX compiler identification is NVHPC 23.5.0
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /opt/cray/pe/gcc/12.2.0/bin/gcc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/bin/nvc++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Downloading CPM.cmake to /global/homes/w/wwei/src/test-gpu-recursion/build/cmake/CPM_0.34.0.cmake
-- CPM: adding package stdexec@ (main)
-- System           : Linux-5.14.21-150400.24.46_12.0.73-cray_shasta_c
-- System name      : Linux
-- System ver       : 5.14.21-150400.24.46_12.0.73-cray_shasta_c
-- 
-- Library ver      : 0.8.0
-- Build date       : 2023-08-10
-- Build year       : 2023
-- 
CMake Warning (dev) at build/cmake/CPM_0.35.6.cmake:37 (message):
  CPM: stdexec: A dependency is using a more recent CPM version (0.35.6) than
  the current project (0.34.0).  It is recommended to upgrade CPM to the most
  recent version.  See https://github.com/cpm-cmake/CPM.cmake for more
  information.
Call Stack (most recent call first):
  build/_deps/rapids-cmake-src/rapids-cmake/cpm/detail/download.cmake:85 (include)
  build/_deps/rapids-cmake-src/rapids-cmake/cpm/init.cmake:65 (rapids_cpm_download)
  build/_deps/stdexec-src/CMakeLists.txt:82 (rapids_cpm_init)
This warning is for project developers.  Use -Wno-dev to suppress it.

-- CPM: stdexec: adding package Catch2@2.13.6 (2.13.6)
CMake Warning (dev) at /global/common/software/nersc/pm-2022q4/spack/linux-sles15-zen/cmake-3.24.3-k5msymx/share/cmake-3.24/Modules/FetchContent.cmake:1267 (message):
  The DOWNLOAD_EXTRACT_TIMESTAMP option was not given and policy CMP0135 is
  not set.  The policy's OLD behavior will be used.  When using a URL
  download, the timestamps of extracted files should preferably be that of
  the time of extraction, otherwise code that depends on the extracted
  contents might not be rebuilt if the URL changes.  The OLD behavior
  preserves the timestamps from the archive instead, but this is usually not
  what you want.  Update your project to the NEW behavior or specify the
  DOWNLOAD_EXTRACT_TIMESTAMP option with a value of true to avoid this
  robustness issue.
Call Stack (most recent call first):
  build/cmake/CPM_0.34.0.cmake:780 (FetchContent_Declare)
  build/cmake/CPM_0.34.0.cmake:667 (cpm_declare_fetch)
  build/cmake/CPM_0.34.0.cmake:262 (CPMAddPackage)
  build/_deps/rapids-cmake-src/rapids-cmake/cpm/find.cmake:167 (CPMFindPackage)
  build/_deps/stdexec-src/CMakeLists.txt:88 (rapids_cpm_find)
This warning is for project developers.  Use -Wno-dev to suppress it.

-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed
-- Looking for pthread_create in pthreads
-- Looking for pthread_create in pthreads - not found
-- Looking for pthread_create in pthread
-- Looking for pthread_create in pthread - found
-- Found Threads: TRUE  
-- Configuring done
-- Generating done
-- Build files have been written to: /global/homes/w/wwei/src/test-gpu-recursion/build
wwei@nid001013:~/src/test-gpu-recursion/build> make VERBOSE=1
/global/common/software/nersc/pm-2022q4/spack/linux-sles15-zen/cmake-3.24.3-k5msymx/bin/cmake -S/global/homes/w/wwei/src/test-gpu-recursion -B/global/homes/w/wwei/src/test-gpu-recursion/build --check-build-system CMakeFiles/Makefile.cmake 0
/global/common/software/nersc/pm-2022q4/spack/linux-sles15-zen/cmake-3.24.3-k5msymx/bin/cmake -E cmake_progress_start /global/homes/w/wwei/src/test-gpu-recursion/build/CMakeFiles /global/homes/w/wwei/src/test-gpu-recursion/build//CMakeFiles/progress.marks
make  -f CMakeFiles/Makefile2 all
make[1]: Entering directory '/global/u2/w/wwei/src/test-gpu-recursion/build'
make  -f CMakeFiles/factorial_gpu.dir/build.make CMakeFiles/factorial_gpu.dir/depend
make[2]: Entering directory '/global/u2/w/wwei/src/test-gpu-recursion/build'
cd /global/homes/w/wwei/src/test-gpu-recursion/build && /global/common/software/nersc/pm-2022q4/spack/linux-sles15-zen/cmake-3.24.3-k5msymx/bin/cmake -E cmake_depends "Unix Makefiles" /global/homes/w/wwei/src/test-gpu-recursion /global/homes/w/wwei/src/test-gpu-recursion /global/homes/w/wwei/src/test-gpu-recursion/build /global/homes/w/wwei/src/test-gpu-recursion/build /global/homes/w/wwei/src/test-gpu-recursion/build/CMakeFiles/factorial_gpu.dir/DependInfo.cmake --color=
make[2]: Leaving directory '/global/u2/w/wwei/src/test-gpu-recursion/build'
make  -f CMakeFiles/factorial_gpu.dir/build.make CMakeFiles/factorial_gpu.dir/build
make[2]: Entering directory '/global/u2/w/wwei/src/test-gpu-recursion/build'
[ 50%] Building CXX object CMakeFiles/factorial_gpu.dir/factorial_gpu.cpp.o
/pscratch/sd/w/wwei/nvhpc_23_5/Linux_x86_64/23.5/compilers/bin/nvc++  -I/global/homes/w/wwei/src/test-gpu-recursion/build/_deps/stdexec-src/include -Minfo -std=c++20 --experimental-stdpar -stdpar=gpu --gcc-toolchain=/opt/cray/pe/gcc/12.2.0/bin/ -g -O0 -std=gnu++20 -MD -MT CMakeFiles/factorial_gpu.dir/factorial_gpu.cpp.o -MF CMakeFiles/factorial_gpu.dir/factorial_gpu.cpp.o.d -o CMakeFiles/factorial_gpu.dir/factorial_gpu.cpp.o -c /global/homes/w/wwei/src/test-gpu-recursion/factorial_gpu.cpp
"/global/homes/w/wwei/src/test-gpu-recursion/build/_deps/stdexec-src/include/exec/any_sender_of.hpp", line 456: error: global or namespace scope variables such as "exec::__any::__null_storage_vtbl [with _ParentVTable=exec::__any::__sender<stdexec::completion_signatures<stdexec::__receivers::set_value_t (int), stdexec::__receivers::set_stopped_t (), stdexec::__receivers::set_error_t (std::__exception_ptr::exception_ptr *)>, stdexec::__types<>, stdexec::__types<>>::__vtable, _StorageCPOs=<exec::__any::__delete_t (void (*)() noexcept), exec::__any::__move_construct_t (void (*)(exec::__any::__storage<exec::__any::__sender<stdexec::completion_signatures<stdexec::__receivers::set_value_t (int), stdexec::__receivers::set_stopped_t (), stdexec::__receivers::set_error_t (std::__exception_ptr::exception_ptr *)>, stdexec::__types<>, stdexec::__types<>>::__vtable, std::allocator<std::byte>, false, 16UL, 24UL>::__t &&) noexcept)>]" (declared at line 222) cannot be accessed from device code
            function "exec::__any::__storage<_Vtable, _Allocator, _Copyable, _Alignment, _InlineSize>::__t::__reset [with _Vtable=exec::__any::__sender<stdexec::completion_signatures<stdexec::__receivers::set_value_t (int), stdexec::__receivers::set_stopped_t (), stdexec::__receivers::set_error_t (std::__exception_ptr::exception_ptr *)>, stdexec::__types<>, stdexec::__types<>>::__vtable, _Allocator=std::allocator<std::byte>, _Copyable=false, _Alignment=16UL, _InlineSize=24UL]" is implicitly a device function because it is called from device function "exec::__any::__storage<_Vtable, _Allocator, _Copyable, _Alignment, _InlineSize>::__t::~__t [with _Vtable=exec::__any::__sender<stdexec::completion_signatures<stdexec::__receivers::set_value_t (int), stdexec::__receivers::set_stopped_t (), stdexec::__receivers::set_error_t (std::__exception_ptr::exception_ptr *)>, stdexec::__types<>, stdexec::__types<>>::__vtable, _Allocator=std::allocator<std::byte>, _Copyable=false, _Alignment=16UL, _InlineSize=24UL]" (declared at line 449)
          __vtable_ = __default_storage_vtable((__vtable_t*) nullptr);
                      ^

1 error detected in the compilation of "/global/homes/w/wwei/src/test-gpu-recursion/factorial_gpu.cpp".
make[2]: *** [CMakeFiles/factorial_gpu.dir/build.make:76: CMakeFiles/factorial_gpu.dir/factorial_gpu.cpp.o] Error 2
make[2]: Leaving directory '/global/u2/w/wwei/src/test-gpu-recursion/build'
make[1]: *** [CMakeFiles/Makefile2:135: CMakeFiles/factorial_gpu.dir/all] Error 2
make[1]: Leaving directory '/global/u2/w/wwei/src/test-gpu-recursion/build'
make: *** [Makefile:156: all] Error 2

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions