Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conda/environments/all_cuda-129_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies:
- libcusolver-dev
- libcusparse-dev
- libgrpc >=1.78.0,<1.80.0a0
- libnvjitlink-dev
- libprotobuf
- libraft-headers==26.6.*,>=0.0.0a0
- librmm==26.6.*,>=0.0.0a0
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-129_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies:
- libcusolver-dev
- libcusparse-dev
- libgrpc >=1.78.0,<1.80.0a0
- libnvjitlink-dev
- libprotobuf
- libraft-headers==26.6.*,>=0.0.0a0
- librmm==26.6.*,>=0.0.0a0
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-132_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies:
- libcusolver-dev
- libcusparse-dev
- libgrpc >=1.78.0,<1.80.0a0
- libnvjitlink-dev
- libprotobuf
- libraft-headers==26.6.*,>=0.0.0a0
- librmm==26.6.*,>=0.0.0a0
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-132_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies:
- libcusolver-dev
- libcusparse-dev
- libgrpc >=1.78.0,<1.80.0a0
- libnvjitlink-dev
- libprotobuf
- libraft-headers==26.6.*,>=0.0.0a0
- librmm==26.6.*,>=0.0.0a0
Expand Down
5 changes: 4 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,10 @@ set_target_properties(cuopt
CXX_SCAN_FOR_MODULES OFF
)

target_compile_definitions(cuopt PUBLIC "CUOPT_LOG_ACTIVE_LEVEL=RAPIDS_LOGGER_LOG_LEVEL_${LIBCUOPT_LOGGING_LEVEL}")
target_compile_definitions(cuopt
PUBLIC "CUOPT_LOG_ACTIVE_LEVEL=RAPIDS_LOGGER_LOG_LEVEL_${LIBCUOPT_LOGGING_LEVEL}"
PUBLIC CUSPARSE_ENABLE_EXPERIMENTAL_API
)

target_compile_options(cuopt
PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUOPT_CXX_FLAGS}>"
Expand Down
152 changes: 152 additions & 0 deletions cpp/src/pdlp/cusparse_view.cu
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,90 @@ cusparse_dn_mat_descr_wrapper_t<f_t>::operator cusparseDnMatDescr_t() const
return descr_;
}

#if CUDA_VER_13_2_UP
cusparse_spmvop_descr_wrapper_t::cusparse_spmvop_descr_wrapper_t()
: descr_(nullptr), need_destruction_(false)
{
}

cusparse_spmvop_descr_wrapper_t::~cusparse_spmvop_descr_wrapper_t()
{
if (need_destruction_) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyDescr(descr_)); }
}

cusparse_spmvop_descr_wrapper_t::cusparse_spmvop_descr_wrapper_t(
const cusparse_spmvop_descr_wrapper_t& other)
: descr_(other.descr_), need_destruction_(false)
{
}

cusparse_spmvop_descr_wrapper_t& cusparse_spmvop_descr_wrapper_t::operator=(
cusparse_spmvop_descr_wrapper_t&& other)
{
if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyDescr(descr_)); }
descr_ = other.descr_;
need_destruction_ = other.need_destruction_;
other.need_destruction_ = false;
return *this;
}

void cusparse_spmvop_descr_wrapper_t::create(cusparseHandle_t handle,
cusparseOperation_t opA,
cusparseSpMatDescr_t matA,
cusparseDnVecDescr_t vecX,
cusparseDnVecDescr_t vecY,
cusparseDnVecDescr_t vecZ,
cudaDataType computeType,
void* buffer)
{
if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyDescr(descr_)); }
RAFT_CUSPARSE_TRY(
cusparseSpMVOp_createDescr(handle, &descr_, opA, matA, vecX, vecY, vecZ, computeType, buffer));
need_destruction_ = true;
}

cusparse_spmvop_descr_wrapper_t::operator cusparseSpMVOpDescr_t() const { return descr_; }

cusparse_spmvop_plan_wrapper_t::cusparse_spmvop_plan_wrapper_t()
: plan_(nullptr), need_destruction_(false)
{
}

cusparse_spmvop_plan_wrapper_t::~cusparse_spmvop_plan_wrapper_t()
{
if (need_destruction_) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyPlan(plan_)); }
}

cusparse_spmvop_plan_wrapper_t::cusparse_spmvop_plan_wrapper_t(
const cusparse_spmvop_plan_wrapper_t& other)
: plan_(other.plan_), need_destruction_(false)
{
}

cusparse_spmvop_plan_wrapper_t& cusparse_spmvop_plan_wrapper_t::operator=(
cusparse_spmvop_plan_wrapper_t&& other)
{
if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyPlan(plan_)); }
plan_ = other.plan_;
need_destruction_ = other.need_destruction_;
other.need_destruction_ = false;
return *this;
}

void cusparse_spmvop_plan_wrapper_t::create(cusparseHandle_t handle,
cusparseSpMVOpDescr_t descr,
char* lto_buffer,
size_t lto_buffer_size)
{
if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyPlan(plan_)); }
RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle, descr, &plan_, lto_buffer, lto_buffer_size));
need_destruction_ = true;
}

cusparse_spmvop_plan_wrapper_t::operator cusparseSpMVOpPlan_t() const { return plan_; }

#endif

#if CUDA_VER_12_4_UP
struct dynamic_load_runtime {
static void* get_cusparse_runtime_handle()
Expand Down Expand Up @@ -304,6 +388,8 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
A_T_indices_{op_problem_scaled.reverse_constraints},
buffer_non_transpose{0, handle_ptr->get_stream()},
buffer_transpose{0, handle_ptr->get_stream()},
buffer_non_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_batch{0, handle_ptr->get_stream()},
buffer_non_transpose_batch{0, handle_ptr->get_stream()},
buffer_transpose_batch_row_row_{0, handle_ptr->get_stream()},
Expand Down Expand Up @@ -717,6 +803,8 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
A_T_indices_{_A_T_indices},
buffer_non_transpose{0, handle_ptr->get_stream()},
buffer_transpose{0, handle_ptr->get_stream()},
buffer_non_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_batch{0, handle_ptr->get_stream()},
buffer_non_transpose_batch{0, handle_ptr->get_stream()},
buffer_transpose_batch_row_row_{0, handle_ptr->get_stream()},
Expand Down Expand Up @@ -926,6 +1014,8 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
tmp_dual(existing_cusparse_view.tmp_dual),
buffer_non_transpose{0, handle_ptr->get_stream()},
buffer_transpose{0, handle_ptr->get_stream()},
buffer_non_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_batch{0, handle_ptr->get_stream()},
buffer_non_transpose_batch{0, handle_ptr->get_stream()},
buffer_transpose_batch_row_row_{0, handle_ptr->get_stream()},
Expand Down Expand Up @@ -1041,6 +1131,8 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
: handle_ptr_(handle_ptr),
buffer_non_transpose{0, handle_ptr->get_stream()},
buffer_transpose{0, handle_ptr->get_stream()},
buffer_non_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_batch{0, handle_ptr->get_stream()},
buffer_non_transpose_batch{0, handle_ptr->get_stream()},
buffer_transpose_batch_row_row_{0, handle_ptr->get_stream()},
Expand Down Expand Up @@ -1182,6 +1274,66 @@ bool is_cusparse_runtime_mixed_precision_supported()
return (major > 12) || (major == 12 && minor >= 5);
}

// Creates SpMVOp plans. Must be called after scale_problem() so plans use the scaled matrix.
template <typename i_t, typename f_t>
void cusparse_view_t<i_t, f_t>::create_spmv_op_plans(bool is_reflected)
{
#if CUDA_VER_13_2_UP
CUSPARSE_CHECK(cusparseSetStream(handle_ptr_->get_cusparse_handle(), handle_ptr_->get_stream()));
// Prepare buffers for At_y SpMVOp
size_t buffer_size_transpose = 0;
RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(),
CUSPARSE_OPERATION_NON_TRANSPOSE,
A_T,
dual_solution,
current_AtY,
current_AtY,
CUDA_R_64F,
&buffer_size_transpose));
buffer_transpose_spmvop.resize(buffer_size_transpose, handle_ptr_->get_stream());

spmv_op_descr_A_t_.create(handle_ptr_->get_cusparse_handle(),
CUSPARSE_OPERATION_NON_TRANSPOSE,
A_T,
dual_solution,
current_AtY,
current_AtY,
CUDA_R_64F,
buffer_transpose_spmvop.data());

char* lto_buffer = NULL;
size_t lto_buffer_size = 0;
spmv_op_plan_A_t_.create(
handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_t_, lto_buffer, lto_buffer_size);

// Only prepare buffers for A_x if we are using reflected_halpern
if (is_reflected) {
size_t buffer_size_non_transpose = 0;
RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(),
CUSPARSE_OPERATION_NON_TRANSPOSE,
A,
reflected_primal_solution,
dual_gradient,
dual_gradient,
CUDA_R_64F,
&buffer_size_non_transpose));
buffer_non_transpose_spmvop.resize(buffer_size_non_transpose, handle_ptr_->get_stream());

spmv_op_descr_A_.create(handle_ptr_->get_cusparse_handle(),
CUSPARSE_OPERATION_NON_TRANSPOSE,
A,
reflected_primal_solution,
dual_gradient,
dual_gradient,
CUDA_R_64F,
buffer_non_transpose_spmvop.data());

spmv_op_plan_A_.create(
handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_, lto_buffer, lto_buffer_size);
}
#endif
}
Comment on lines +1277 to +1335
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Description: Check template instantiations of cusparse_view_t at end of file

# Check for explicit instantiations of cusparse_view_t and create_spmv_op_plans
echo "=== Explicit template instantiations in cusparse_view.cu ==="
rg -n -A2 'template class cusparse_view_t' cpp/src/pdlp/cusparse_view.cu

echo ""
echo "=== Any float instantiation that could call create_spmv_op_plans? ==="
rg -n 'PDLP_INSTANTIATE_FLOAT|MIP_INSTANTIATE_FLOAT' cpp/src/pdlp/cusparse_view.cu

echo ""
echo "=== Any calls to create_spmv_op_plans ==="
rg -n 'create_spmv_op_plans' cpp/src/pdlp/

Repository: NVIDIA/cuopt

Length of output: 786


Add compile-time guard to enforce double-only instantiation.

The create_spmv_op_plans method is templated on f_t and can be instantiated with float (line 1344 of cusparse_view.cu), but hardcodes CUDA_R_64F at lines 1294, 1304, 1321, and 1331. Since the feature is "double precision only," add a compile-time check:

template <typename i_t, typename f_t>
void cusparse_view_t<i_t, f_t>::create_spmv_op_plans(bool is_reflected)
{
`#if` CUDA_VER_13_2_UP
  if constexpr (std::is_same_v<f_t, double>) {
    // ... existing implementation
  }
`#endif`
}

This prevents incorrect behavior if a float instantiation is created and calls this method.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@cpp/src/pdlp/cusparse_view.cu` around lines 1280 - 1338, The method
create_spmv_op_plans is templated on f_t but always uses CUDA_R_64F, so add a
compile-time guard to only instantiate the double-precision path: wrap the
existing CUDA_VER_13_2_UP body in an if constexpr (std::is_same_v<f_t, double>)
{ ... } block so float instantiations skip this code; ensure <type_traits> is
available for std::is_same_v and reference the symbols used
(create_spmv_op_plans, f_t, CUDA_R_64F, A_T, A, spmv_op_descr_A_t_,
spmv_op_descr_A_, spmv_op_plan_A_t_, spmv_op_plan_A_) when making the change.


#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
template class cusparse_sp_mat_descr_wrapper_t<int, float>;
template class cusparse_dn_vec_descr_wrapper_t<float>;
Expand Down
63 changes: 63 additions & 0 deletions cpp/src/pdlp/cusparse_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

#include <cusparse_v2.h>

#define CUDA_VER_13_2_UP (CUDART_VERSION >= 13020)

namespace cuopt::linear_programming::detail {

template <typename i_t, typename f_t>
Expand Down Expand Up @@ -79,6 +81,54 @@ class cusparse_dn_mat_descr_wrapper_t {
bool need_destruction_;
};

#if CUDA_VER_13_2_UP
class cusparse_spmvop_descr_wrapper_t {
public:
cusparse_spmvop_descr_wrapper_t();
~cusparse_spmvop_descr_wrapper_t();

cusparse_spmvop_descr_wrapper_t(const cusparse_spmvop_descr_wrapper_t& other);
cusparse_spmvop_descr_wrapper_t& operator=(cusparse_spmvop_descr_wrapper_t&& other);
cusparse_spmvop_descr_wrapper_t& operator=(const cusparse_spmvop_descr_wrapper_t& other) = delete;

void create(cusparseHandle_t handle,
cusparseOperation_t opA,
cusparseSpMatDescr_t matA,
cusparseDnVecDescr_t vecX,
cusparseDnVecDescr_t vecY,
cusparseDnVecDescr_t vecZ,
cudaDataType computeType,
void* buffer);

operator cusparseSpMVOpDescr_t() const;

private:
cusparseSpMVOpDescr_t descr_;
bool need_destruction_;
};

class cusparse_spmvop_plan_wrapper_t {
public:
cusparse_spmvop_plan_wrapper_t();
~cusparse_spmvop_plan_wrapper_t();

cusparse_spmvop_plan_wrapper_t(const cusparse_spmvop_plan_wrapper_t& other);
cusparse_spmvop_plan_wrapper_t& operator=(cusparse_spmvop_plan_wrapper_t&& other);
cusparse_spmvop_plan_wrapper_t& operator=(const cusparse_spmvop_plan_wrapper_t& other) = delete;

void create(cusparseHandle_t handle,
cusparseSpMVOpDescr_t descr,
char* lto_buffer,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Passing around raw pointers and sizes are a bad code smell. Please document what these are used for and who owns the memory. On first glance it seems like we always pass NULL and zero? Same with buffer above.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, I will fix it

size_t lto_buffer_size);

operator cusparseSpMVOpPlan_t() const;

private:
cusparseSpMVOpPlan_t plan_;
bool need_destruction_;
};
#endif

template <typename i_t, typename f_t>
class cusparse_view_t {
public:
Expand Down Expand Up @@ -172,6 +222,17 @@ class cusparse_view_t {
rmm::device_uvector<uint8_t> buffer_non_transpose;
rmm::device_uvector<uint8_t> buffer_transpose;

// SpMVOp buffers for A and A_T
rmm::device_uvector<uint8_t> buffer_non_transpose_spmvop{0, handle_ptr_->get_stream()};
rmm::device_uvector<uint8_t> buffer_transpose_spmvop{0, handle_ptr_->get_stream()};

#if CUDA_VER_13_2_UP
// SpMVOp descriptors and plans for A and A_T (descr before plan so dtor destroys plan first)
cusparse_spmvop_descr_wrapper_t spmv_op_descr_A_;
cusparse_spmvop_plan_wrapper_t spmv_op_plan_A_;
cusparse_spmvop_descr_wrapper_t spmv_op_descr_A_t_;
cusparse_spmvop_plan_wrapper_t spmv_op_plan_A_t_;
#endif
// reuse buffers for cusparse spmm
rmm::device_uvector<uint8_t> buffer_transpose_batch;
rmm::device_uvector<uint8_t> buffer_non_transpose_batch;
Expand Down Expand Up @@ -212,6 +273,8 @@ class cusparse_view_t {
// Redirects the cuSPARSE CSR structure pointers from op_problem_scaled_ to the original problem
// so the duplicated row/column buffers can be freed.
void redirect_cusparse_csr_structure_pointers(const problem_t<i_t, f_t>& original_problem);
// Creates SpMVOp plans. Must be called after scale_problem() so plans use the scaled matrix.
void create_spmv_op_plans(bool is_reflected);
};

// Mixed precision SpMV: FP32 matrix with FP64 vectors and FP64 compute type
Expand Down
Loading
Loading