Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libcudacxx/include/cuda/std/__pstl/cuda/shift_left.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ struct __pstl_dispatch<__pstl_algorithm::__shift_left, __execution_backend::__cu
auto __flag_iter = ::cuda::transform_iterator{
::cuda::counting_iterator<size_t>{0}, __shift_left_predicate{static_cast<size_t>(__num_shifted)}};

auto __stream = ::cuda::__call_or(::cuda::get_stream, ::cuda::stream_ref{cudaStreamPerThread}, __policy);
auto __stream = ::cuda::__call_or(::cuda::get_stream, ::cuda::stream_ref{::cudaStream_t{}}, __policy);

// Determine temporary device storage requirements for DeviceSelect::Flagged
size_t __num_bytes = 0;
Expand Down
2 changes: 1 addition & 1 deletion libcudacxx/include/cuda/std/__pstl/cuda/shift_right.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ struct __pstl_dispatch<__pstl_algorithm::__shift_right, __execution_backend::__c
const auto __count_remaining = static_cast<_OffsetType>(__count - __num_shifted);
const auto __result = __first + __num_shifted;

auto __stream = ::cuda::__call_or(::cuda::get_stream, ::cuda::stream_ref{cudaStreamPerThread}, __policy);
auto __stream = ::cuda::__call_or(::cuda::get_stream, ::cuda::stream_ref{::cudaStream_t{}}, __policy);

if (2 * __num_shifted > __count)
{ // There is no overlap between the source and destination, so we can just copy
Expand Down
4 changes: 2 additions & 2 deletions libcudacxx/include/cuda/std/__pstl/cuda/sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ struct __pstl_dispatch<__pstl_algorithm::__sort, __execution_backend::__cuda>
_CCCL_HOST_API static void __radix_sort_impl(const _Policy& __policy, _Tp* __first, _Tp* __last, _BinaryPredicate)
{
const auto __count = static_cast<size_t>(::cuda::std::distance(__first, __last));
auto __stream = ::cuda::__call_or(::cuda::get_stream, ::cuda::stream_ref{cudaStreamPerThread}, __policy);
auto __stream = ::cuda::__call_or(::cuda::get_stream, ::cuda::stream_ref{::cudaStream_t{}}, __policy);

CUB_NS_QUALIFIER::DoubleBuffer<_Tp> __buffer{__first, nullptr};

Expand Down Expand Up @@ -145,7 +145,7 @@ struct __pstl_dispatch<__pstl_algorithm::__sort, __execution_backend::__cuda>
__merge_sort_impl(const _Policy& __policy, _InputIterator __first, _InputIterator __last, _BinaryPredicate __pred)
{
const auto __count = ::cuda::std::distance(__first, __last);
auto __stream = ::cuda::__call_or(::cuda::get_stream, ::cuda::stream_ref{cudaStreamPerThread}, __policy);
auto __stream = ::cuda::__call_or(::cuda::get_stream, ::cuda::stream_ref{::cudaStream_t{}}, __policy);

// Run the kernel
_CCCL_TRY_CUDA_API(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ struct __pstl_dispatch<__pstl_algorithm::__stable_partition, __execution_backend

_OffsetType __num_selected;
const auto __count = static_cast<_OffsetType>(::cuda::std::distance(__first, __last));
auto __stream = ::cuda::__call_or(::cuda::get_stream, ::cuda::stream_ref{cudaStreamPerThread}, __policy);
auto __stream = ::cuda::__call_or(::cuda::get_stream, ::cuda::stream_ref{::cudaStream_t{}}, __policy);

// Determine temporary device storage requirements for device_stable_partition
size_t __num_bytes = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class __temporary_storage
_CCCL_REQUIRES((sizeof...(_Sizes) == sizeof...(_StoredTypes)))
_CCCL_HOST_API
__temporary_storage(const _Policy& __policy, const size_t __num_bytes_storage, const _Sizes... __elements_stored)
: __stream_(::cuda::__call_or(::cuda::get_stream, ::cuda::stream_ref{cudaStreamPerThread}, __policy))
: __stream_(::cuda::__call_or(::cuda::get_stream, ::cuda::stream_ref{::cudaStream_t{}}, __policy))
, __resource_(__get_memory_resource_or(__policy))
, __total_bytes_allocated_(__get_total_bytes_allocated(__num_bytes_storage, __elements_stored...))
, __storage_(__get_storage(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ static_assert(::cuda::mr::resource<test_resource>);
template <class Policy>
void test(Policy pol)
{
auto old_stream = cuda::__call_or(::cuda::get_stream, cuda::stream_ref{cudaStreamPerThread}, pol);
auto old_stream = cuda::__call_or(::cuda::get_stream, cuda::stream_ref{::cudaStream_t{}}, pol);
auto fallback_resource = ::cuda::device_default_memory_pool(cuda::device_ref{0});
{ // Ensure that the plain policy is not callable with get_memory_resource
assert(cuda::__call_or(::cuda::mr::get_memory_resource, fallback_resource, pol) == fallback_resource);
Expand All @@ -77,7 +77,7 @@ void test(Policy pol)
test_resource resource{42};
auto pol_with_resource = pol.with(cuda::mr::get_memory_resource, resource);
assert(cuda::mr::get_memory_resource(pol_with_resource) == resource);
assert(cuda::__call_or(::cuda::get_stream, cuda::stream_ref{cudaStreamPerThread}, pol_with_resource) == old_stream);
assert(cuda::__call_or(::cuda::get_stream, cuda::stream_ref{::cudaStream_t{}}, pol_with_resource) == old_stream);

using policy_t = decltype(pol_with_resource);
static_assert(cuda::std::is_execution_policy_v<policy_t>);
Expand All @@ -87,15 +87,15 @@ void test(Policy pol)
test_resource resource{42};
auto pol_with_resource = pol.with(cuda::mr::get_memory_resource, resource);
assert(cuda::mr::get_memory_resource(pol_with_resource) == resource);
assert(cuda::__call_or(::cuda::get_stream, cuda::stream_ref{cudaStreamPerThread}, pol_with_resource) == old_stream);
assert(cuda::__call_or(::cuda::get_stream, cuda::stream_ref{::cudaStream_t{}}, pol_with_resource) == old_stream);

test_resource other_resource{1337};
decltype(auto) pol_with_other_resource = pol_with_resource.with(cuda::mr::get_memory_resource, other_resource);

// The original resource is unchanged
assert(cuda::mr::get_memory_resource(pol_with_resource) == resource);
assert(cuda::mr::get_memory_resource(pol_with_other_resource) == other_resource);
assert(cuda::__call_or(::cuda::get_stream, cuda::stream_ref{cudaStreamPerThread}, pol_with_resource) == old_stream);
assert(cuda::__call_or(::cuda::get_stream, cuda::stream_ref{::cudaStream_t{}}, pol_with_resource) == old_stream);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ void test(Policy pol)
{
namespace execution = cuda::std::execution;

cuda::stream_ref default_stream{cudaStreamPerThread};
cuda::stream_ref default_stream{::cudaStream_t{}};
{ // Ensure that the plain policy does not provide a stream
assert(cuda::__call_or(::cuda::get_stream, default_stream, pol) == default_stream);
}
Expand Down
Loading