Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions c/parallel/include/cccl/c/binary_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ typedef struct cccl_device_binary_search_build_result_t
size_t cubin_size;
CUlibrary library;
CUkernel kernel;
// Lowered (mangled) kernel name, heap-allocated, freed by cccl_device_binary_search_cleanup():
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could do without these comments.

char* kernel_lowered_name;
} cccl_device_binary_search_build_result_t;

CCCL_C_API CUresult cccl_device_binary_search_build(
Expand Down Expand Up @@ -61,6 +63,23 @@ CCCL_C_API CUresult cccl_device_binary_search_build_ex(
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_binary_search_compile(
cccl_device_binary_search_build_result_t* build,
cccl_binary_search_mode_t mode,
cccl_iterator_t d_data,
cccl_iterator_t d_values,
cccl_iterator_t d_out,
cccl_op_t op,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_binary_search_load(cccl_device_binary_search_build_result_t* build);

CCCL_C_API CUresult cccl_device_binary_search(
cccl_device_binary_search_build_result_t build,
cccl_iterator_t d_data,
Expand Down
25 changes: 25 additions & 0 deletions c/parallel/include/cccl/c/histogram.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ typedef struct cccl_device_histogram_build_result_t
CUkernel init_kernel;
CUkernel sweep_kernel;
void* runtime_policy;
size_t runtime_policy_size;
// Lowered (mangled) kernel names, heap-allocated, freed by cccl_device_histogram_cleanup():
char* init_kernel_lowered_name;
char* sweep_kernel_lowered_name;
} cccl_device_histogram_build_result_t;

CCCL_C_API CUresult cccl_device_histogram_build(
Expand Down Expand Up @@ -77,6 +81,27 @@ CCCL_C_API CUresult cccl_device_histogram_build_ex(
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_histogram_compile(
cccl_device_histogram_build_result_t* build,
int num_channels,
int num_active_channels,
cccl_iterator_t d_samples,
int num_output_levels_val,
cccl_iterator_t d_output_histograms,
cccl_value_t lower_level,
int64_t num_rows,
int64_t row_stride_samples,
bool is_evenly_segmented,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_histogram_load(cccl_device_histogram_build_result_t* build);

CCCL_C_API CUresult cccl_device_histogram_even(
cccl_device_histogram_build_result_t build,
void* d_temp_storage,
Expand Down
22 changes: 22 additions & 0 deletions c/parallel/include/cccl/c/merge_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ typedef struct cccl_device_merge_sort_build_result_t
CUkernel partition_kernel;
CUkernel merge_kernel;
void* runtime_policy;
size_t runtime_policy_size;
// Lowered (mangled) kernel names, heap-allocated, freed by cccl_device_merge_sort_cleanup():
char* block_sort_kernel_lowered_name;
char* partition_kernel_lowered_name;
char* merge_kernel_lowered_name;
} cccl_device_merge_sort_build_result_t;

CCCL_C_API CUresult cccl_device_merge_sort_build(
Expand Down Expand Up @@ -66,6 +71,23 @@ CCCL_C_API CUresult cccl_device_merge_sort_build_ex(
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_merge_sort_compile(
cccl_device_merge_sort_build_result_t* build,
cccl_iterator_t d_in_keys,
cccl_iterator_t d_in_items,
cccl_iterator_t d_out_keys,
cccl_iterator_t d_out_items,
cccl_op_t op,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_merge_sort_load(cccl_device_merge_sort_build_result_t* build);

CCCL_C_API CUresult cccl_device_merge_sort(
cccl_device_merge_sort_build_result_t build,
void* d_temp_storage,
Expand Down
28 changes: 28 additions & 0 deletions c/parallel/include/cccl/c/radix_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,17 @@ typedef struct cccl_device_radix_sort_build_result_t
CUkernel onesweep_kernel;
cccl_sort_order_t order;
void* runtime_policy;
size_t runtime_policy_size;
// Lowered (mangled) kernel names, heap-allocated, freed by cccl_device_radix_sort_cleanup():
char* single_tile_kernel_lowered_name;
char* upsweep_kernel_lowered_name;
char* alt_upsweep_kernel_lowered_name;
char* scan_bins_kernel_lowered_name;
char* downsweep_kernel_lowered_name;
char* alt_downsweep_kernel_lowered_name;
char* histogram_kernel_lowered_name;
char* exclusive_sum_kernel_lowered_name;
char* onesweep_kernel_lowered_name;
} cccl_device_radix_sort_build_result_t;

CCCL_C_API CUresult cccl_device_radix_sort_build(
Expand Down Expand Up @@ -74,6 +85,23 @@ CCCL_C_API CUresult cccl_device_radix_sort_build_ex(
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_radix_sort_compile(
cccl_device_radix_sort_build_result_t* build,
cccl_sort_order_t sort_order,
cccl_iterator_t input_keys_it,
cccl_iterator_t input_values_it,
cccl_op_t decomposer,
const char* decomposer_return_type,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_radix_sort_load(cccl_device_radix_sort_build_result_t* build);

CCCL_C_API CUresult cccl_device_radix_sort(
cccl_device_radix_sort_build_result_t build,
void* d_temp_storage,
Expand Down
23 changes: 23 additions & 0 deletions c/parallel/include/cccl/c/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ typedef struct cccl_device_reduce_build_result_t
CUkernel nondeterministic_atomic_kernel;
cccl_determinism_t determinism;
void* runtime_policy;
size_t runtime_policy_size;
// Lowered (mangled) kernel names, heap-allocated, freed by cccl_device_reduce_cleanup():
char* single_tile_kernel_lowered_name;
char* single_tile_second_kernel_lowered_name;
char* reduction_kernel_lowered_name;
char* nondeterministic_kernel_lowered_name;
} cccl_device_reduce_build_result_t;

// TODO return a union of nvtx/cuda/nvrtc errors or a string?
Expand Down Expand Up @@ -68,6 +74,23 @@ CCCL_C_API CUresult cccl_device_reduce_build_ex(
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_reduce_compile(
cccl_device_reduce_build_result_t* build,
cccl_iterator_t d_in,
cccl_iterator_t d_out,
cccl_op_t op,
cccl_value_t init,
cccl_determinism_t determinism,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_reduce_load(cccl_device_reduce_build_result_t* build);

CCCL_C_API CUresult cccl_device_reduce(
cccl_device_reduce_build_result_t build,
void* d_temp_storage,
Expand Down
22 changes: 22 additions & 0 deletions c/parallel/include/cccl/c/scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ typedef struct cccl_device_scan_build_result_t
size_t description_bytes_per_tile;
size_t payload_bytes_per_tile;
void* runtime_policy;
size_t runtime_policy_size;
// Lowered (mangled) kernel names, heap-allocated, freed by cccl_device_scan_cleanup():
char* init_kernel_lowered_name;
char* scan_kernel_lowered_name;
} cccl_device_scan_build_result_t;

CCCL_C_API CUresult cccl_device_scan_build(
Expand Down Expand Up @@ -73,6 +77,24 @@ CCCL_C_API CUresult cccl_device_scan_build_ex(
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_scan_compile(
cccl_device_scan_build_result_t* build_ptr,
cccl_iterator_t d_in,
cccl_iterator_t d_out,
cccl_op_t op,
cccl_type_info init,
bool force_inclusive,
cccl_init_kind_t init_kind,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_scan_load(cccl_device_scan_build_result_t* build_ptr);

CCCL_C_API CUresult cccl_device_exclusive_scan(
cccl_device_scan_build_result_t build,
void* d_temp_storage,
Expand Down
21 changes: 21 additions & 0 deletions c/parallel/include/cccl/c/segmented_reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ typedef struct cccl_device_segmented_reduce_build_result_t
uint64_t accumulator_size;
CUkernel segmented_reduce_kernel;
void* runtime_policy;
size_t runtime_policy_size;
// Lowered (mangled) kernel name, heap-allocated, freed by cccl_device_segmented_reduce_cleanup():
char* segmented_reduce_kernel_lowered_name;
} cccl_device_segmented_reduce_build_result_t;

// TODO return a union of nvtx/cuda/nvrtc errors or a string?
Expand Down Expand Up @@ -66,6 +69,24 @@ CCCL_C_API CUresult cccl_device_segmented_reduce_build_ex(
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_segmented_reduce_compile(
cccl_device_segmented_reduce_build_result_t* build,
cccl_iterator_t d_in,
cccl_iterator_t d_out,
cccl_iterator_t begin_offset_in,
cccl_iterator_t end_offset_in,
cccl_op_t op,
cccl_value_t init,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_segmented_reduce_load(cccl_device_segmented_reduce_build_result_t* build);

CCCL_C_API CUresult cccl_device_segmented_reduce(
cccl_device_segmented_reduce_build_result_t build,
void* d_temp_storage,
Expand Down
25 changes: 25 additions & 0 deletions c/parallel/include/cccl/c/segmented_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,16 @@ typedef struct cccl_device_segmented_sort_build_result_t
CUkernel three_way_partition_init_kernel;
CUkernel three_way_partition_kernel;
void* runtime_policy;
size_t runtime_policy_size;
void* partition_runtime_policy;
size_t partition_runtime_policy_size;
cccl_sort_order_t order;
// Lowered (mangled) kernel names, heap-allocated, freed by cccl_device_segmented_sort_cleanup():
char* segmented_sort_fallback_kernel_lowered_name;
char* segmented_sort_kernel_small_lowered_name;
char* segmented_sort_kernel_large_lowered_name;
char* three_way_partition_init_kernel_lowered_name;
char* three_way_partition_kernel_lowered_name;
} cccl_device_segmented_sort_build_result_t;

// TODO return a union of nvtx/cuda/nvrtc errors or a string?
Expand Down Expand Up @@ -74,6 +82,23 @@ CCCL_C_API CUresult cccl_device_segmented_sort_build_ex(
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_segmented_sort_compile(
cccl_device_segmented_sort_build_result_t* build,
cccl_sort_order_t sort_order,
cccl_iterator_t d_keys_in,
cccl_iterator_t d_values_in,
cccl_iterator_t begin_offset_in,
cccl_iterator_t end_offset_in,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_segmented_sort_load(cccl_device_segmented_sort_build_result_t* build);

CCCL_C_API CUresult cccl_device_segmented_sort(
cccl_device_segmented_sort_build_result_t build,
void* d_temp_storage,
Expand Down
23 changes: 23 additions & 0 deletions c/parallel/include/cccl/c/three_way_partition.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ typedef struct cccl_device_three_way_partition_build_result_t
CUkernel three_way_partition_init_kernel;
CUkernel three_way_partition_kernel;
void* runtime_policy;
size_t runtime_policy_size;
// Lowered (mangled) kernel names, heap-allocated, freed by cccl_device_three_way_partition_cleanup():
char* three_way_partition_init_kernel_lowered_name;
char* three_way_partition_kernel_lowered_name;
} cccl_device_three_way_partition_build_result_t;

// TODO return a union of nvtx/cuda/nvrtc errors or a string?
Expand Down Expand Up @@ -68,6 +72,25 @@ CCCL_C_API CUresult cccl_device_three_way_partition_build_ex(
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_three_way_partition_compile(
cccl_device_three_way_partition_build_result_t* build,
cccl_iterator_t d_in,
cccl_iterator_t d_first_part_out,
cccl_iterator_t d_second_part_out,
cccl_iterator_t d_unselected_out,
cccl_iterator_t d_num_selected_out,
cccl_op_t select_first_part_op,
cccl_op_t select_second_part_op,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_three_way_partition_load(cccl_device_three_way_partition_build_result_t* build);

CCCL_C_API CUresult cccl_device_three_way_partition(
cccl_device_three_way_partition_build_result_t build,
void* d_temp_storage,
Expand Down
32 changes: 32 additions & 0 deletions c/parallel/include/cccl/c/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ typedef struct cccl_device_transform_build_result_t
CUkernel transform_kernel;
int loaded_bytes_per_iteration;
void* runtime_policy;
size_t runtime_policy_size;
void* cache;
// Lowered (mangled) kernel name, heap-allocated, freed by cccl_device_transform_cleanup():
char* transform_kernel_lowered_name;
} cccl_device_transform_build_result_t;

CCCL_C_API CUresult cccl_device_unary_transform_build(
Expand Down Expand Up @@ -59,6 +62,21 @@ CCCL_C_API CUresult cccl_device_unary_transform_build_ex(
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_unary_transform_compile(
cccl_device_transform_build_result_t* build_ptr,
cccl_iterator_t d_in,
cccl_iterator_t d_out,
cccl_op_t op,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_transform_load(cccl_device_transform_build_result_t* build_ptr);

CCCL_C_API CUresult cccl_device_unary_transform(
cccl_device_transform_build_result_t build,
cccl_iterator_t d_in,
Expand Down Expand Up @@ -95,6 +113,20 @@ CCCL_C_API CUresult cccl_device_binary_transform_build_ex(
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_binary_transform_compile(
cccl_device_transform_build_result_t* build_ptr,
cccl_iterator_t d_in1,
cccl_iterator_t d_in2,
cccl_iterator_t d_out,
cccl_op_t op,
int cc_major,
int cc_minor,
const char* cub_path,
const char* thrust_path,
const char* libcudacxx_path,
const char* ctk_path,
cccl_build_config* config);

CCCL_C_API CUresult cccl_device_binary_transform(
cccl_device_transform_build_result_t build,
cccl_iterator_t d_in1,
Expand Down
Loading
Loading