NVIDIA · rapids-bot · May 13, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 23, 2026
@@ -26,7 +26,7 @@ if [[ "${package_dir}" == "python/libcuopt" ]]; then
         )
     else
         PYDISTCHECK_ARGS+=(
-            --max-allowed-size-compressed '510Mi'
+            --max-allowed-size-compressed '550Mi'
         )
     fi
 elif [[ "${package_dir}" != "python/cuopt" ]] && \

@@ -278,7 +278,8 @@ problem_t<i_t, f_t>::problem_t(const problem_t<i_t, f_t>& problem_, bool no_deep
     deterministic(problem_.deterministic),
     handle_ptr(problem_.handle_ptr),
     integer_fixed_problem(problem_.integer_fixed_problem),
-    integer_fixed_variable_map(problem_.n_variables, handle_ptr->get_stream()),
+    integer_fixed_variable_map((!no_deep_copy) ? 0 : problem_.n_variables,
+                               handle_ptr->get_stream()),
     n_variables(problem_.n_variables),
     n_constraints(problem_.n_constraints),
     n_binary_vars(problem_.n_binary_vars),
@@ -342,10 +343,7 @@ problem_t<i_t, f_t>::problem_t(const problem_t<i_t, f_t>& problem_, bool no_deep
       (!no_deep_copy)
         ? rmm::device_uvector<f_t>(problem_.combined_bounds, handle_ptr->get_stream())
         : rmm::device_uvector<f_t>(problem_.combined_bounds.size(), handle_ptr->get_stream())),
-    variable_types(
-      (!no_deep_copy)
-        ? rmm::device_uvector<var_t>(problem_.variable_types, handle_ptr->get_stream())
-        : rmm::device_uvector<var_t>(problem_.variable_types.size(), handle_ptr->get_stream())),
+    variable_types((!no_deep_copy) ? 0 : problem_.variable_types.size(), handle_ptr->get_stream()),
     integer_indices((!no_deep_copy) ? 0 : problem_.integer_indices.size(),
                     handle_ptr->get_stream()),
     binary_indices((!no_deep_copy) ? 0 : problem_.binary_indices.size(), handle_ptr->get_stream()),
@@ -354,7 +352,8 @@ problem_t<i_t, f_t>::problem_t(const problem_t<i_t, f_t>& problem_, bool no_deep
     is_binary_variable((!no_deep_copy) ? 0 : problem_.is_binary_variable.size(),
                        handle_ptr->get_stream()),
     related_variables(problem_.related_variables, handle_ptr->get_stream()),
-    related_variables_offsets(problem_.related_variables_offsets, handle_ptr->get_stream()),
+    related_variables_offsets((!no_deep_copy) ? 0 : problem_.related_variables_offsets.size(),
+                              handle_ptr->get_stream()),
     var_names(problem_.var_names),
     row_names(problem_.row_names),
     objective_name(problem_.objective_name),

@@ -407,8 +407,9 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
                            _tmp_primal.data(),
                            CUSPARSE_ORDER_COL);
 
-  primal_gradient.create(op_problem_scaled.n_variables,
-                         current_saddle_point_state.get_primal_gradient().data());
+  primal_gradient.create(
+    current_saddle_point_state.get_primal_gradient().size(),  // It is 0 in cupdlpx
+    current_saddle_point_state.get_primal_gradient().data());
   dual_gradient.create(op_problem_scaled.n_constraints,
                        current_saddle_point_state.get_dual_gradient().data());
 
@@ -1082,6 +1083,39 @@ void cusparse_view_t<i_t, f_t>::update_mixed_precision_matrices()
   }
 }
 
+// Redirects the cuSPARSE CSR structure pointers from op_problem_scaled_ to the original problem
+// so the duplicated row/column buffers can be freed.
+template <typename i_t, typename f_t>
+void cusparse_view_t<i_t, f_t>::redirect_cusparse_csr_structure_pointers(
+  const problem_t<i_t, f_t>& original_problem)
+{
+  RAFT_CUSPARSE_TRY(cusparseCsrSetPointers(A,
+                                           const_cast<i_t*>(original_problem.offsets.data()),
+                                           const_cast<i_t*>(original_problem.variables.data()),
+                                           const_cast<f_t*>(A_.data())));
+
+  RAFT_CUSPARSE_TRY(
+    cusparseCsrSetPointers(A_T,
+                           const_cast<i_t*>(original_problem.reverse_offsets.data()),
+                           const_cast<i_t*>(original_problem.reverse_constraints.data()),
+                           const_cast<f_t*>(A_T_.data())));
+
+  if constexpr (std::is_same_v<f_t, double>) {
+    if (mixed_precision_enabled_) {
+      RAFT_CUSPARSE_TRY(cusparseCsrSetPointers(A_mixed_,
+                                               const_cast<i_t*>(original_problem.offsets.data()),
+                                               const_cast<i_t*>(original_problem.variables.data()),
+                                               A_float_.data()));
+
+      RAFT_CUSPARSE_TRY(
+        cusparseCsrSetPointers(A_T_mixed_,
+                               const_cast<i_t*>(original_problem.reverse_offsets.data()),
+                               const_cast<i_t*>(original_problem.reverse_constraints.data()),
+                               A_T_float_.data()));
+    }
+  }
+}
+
 // Mixed precision SpMV implementation: FP32 matrix with FP64 vectors and FP64 compute type
 size_t mixed_precision_spmv_buffersize(cusparseHandle_t handle,
                                        cusparseOperation_t opA,

@@ -208,6 +208,10 @@ class cusparse_view_t {
 
   // Update FP32 matrix copies after scaling (must be called after scale_problem())
   void update_mixed_precision_matrices();
+
+  // Redirects the cuSPARSE CSR structure pointers from op_problem_scaled_ to the original problem
+  // so the duplicated row/column buffers can be freed.
+  void redirect_cusparse_csr_structure_pointers(const problem_t<i_t, f_t>& original_problem);
 };
 
 // Mixed precision SpMV: FP32 matrix with FP64 vectors and FP64 compute type

@@ -131,6 +131,9 @@ pdlp_initial_scaling_strategy_t<i_t, f_t>::pdlp_initial_scaling_strategy_t(
                f_t(1));
 
   compute_scaling_vectors(number_of_ruiz_iterations, alpha);
+
+  iteration_constraint_matrix_scaling_.resize(0, stream_view_);
+  iteration_variable_scaling_.resize(0, stream_view_);
 }
 
 template <typename i_t, typename f_t>

@@ -55,8 +55,11 @@ pdhg_solver_t<i_t, f_t>::pdhg_solver_t(
     problem_ptr(&op_problem_scaled),
     primal_size_h_(problem_ptr->n_variables),
     dual_size_h_(problem_ptr->n_constraints),
-    current_saddle_point_state_{
-      handle_ptr_, problem_ptr->n_variables, problem_ptr->n_constraints, climber_strategies.size()},
+    current_saddle_point_state_{handle_ptr_,
+                                problem_ptr->n_variables,
+                                problem_ptr->n_constraints,
+                                climber_strategies.size(),
+                                hyper_params},
     tmp_primal_{(climber_strategies.size() * problem_ptr->n_variables), stream_view_},
     tmp_dual_{(climber_strategies.size() * problem_ptr->n_constraints), stream_view_},
     potential_next_primal_solution_{(climber_strategies.size() * problem_ptr->n_variables),

@@ -233,16 +233,17 @@ pdlp_solver_t<i_t, f_t>::pdlp_solver_t(problem_t<i_t, f_t>& op_problem,
                       is_legacy_batch_mode,
                       climber_strategies_,
                       settings_.hyper_params},
-    average_termination_strategy_{handle_ptr_,
-                                  op_problem,
-                                  op_problem_scaled_,
-                                  average_op_problem_evaluation_cusparse_view_,
-                                  pdhg_solver_.get_cusparse_view(),
-                                  primal_size_h_,
-                                  dual_size_h_,
-                                  initial_scaling_strategy_,
-                                  settings_,
-                                  climber_strategies_},
+    average_termination_strategy_{
+      handle_ptr_,
+      op_problem,
+      op_problem_scaled_,
+      average_op_problem_evaluation_cusparse_view_,
+      pdhg_solver_.get_cusparse_view(),
+      settings_.hyper_params.never_restart_to_average ? 0 : primal_size_h_,
+      settings_.hyper_params.never_restart_to_average ? 0 : dual_size_h_,
+      initial_scaling_strategy_,
+      settings_,
+      climber_strategies_},
     current_termination_strategy_{handle_ptr_,
                                   op_problem,
                                   op_problem_scaled_,
@@ -349,7 +350,6 @@ pdlp_solver_t<i_t, f_t>::pdlp_solver_t(problem_t<i_t, f_t>& op_problem,
                                                    ? -std::numeric_limits<f_t>::infinity()
                                                    : std::numeric_limits<f_t>::infinity();
   op_problem.check_problem_representation(true, false);
-  op_problem_scaled_.check_problem_representation(true, false);
 
   if (batch_mode_) {
     batch_solution_to_return_.get_additional_termination_informations().resize(
@@ -2320,6 +2320,14 @@ optimization_problem_solution_t<i_t, f_t> pdlp_solver_t<i_t, f_t>::run_solver(co
   // Update FP32 matrix copies for mixed precision SpMV after scaling
   pdhg_solver_.get_cusparse_view().update_mixed_precision_matrices();
 
+  // Redirect cuSPARSE descriptors to use the original problem's structural data (offsets, indices),
+  // then free the duplicated structural vectors from the scaled copy to save device memory.
+  pdhg_solver_.get_cusparse_view().redirect_cusparse_csr_structure_pointers(*problem_ptr);
+  op_problem_scaled_.variables.resize(0, stream_view_);
+  op_problem_scaled_.offsets.resize(0, stream_view_);
+  op_problem_scaled_.reverse_constraints.resize(0, stream_view_);
+  op_problem_scaled_.reverse_offsets.resize(0, stream_view_);
+
   if (!settings_.hyper_params.compute_initial_step_size_before_scaling &&
       !settings_.get_initial_step_size().has_value())
     compute_initial_step_size();

@@ -89,8 +89,8 @@ pdlp_restart_strategy_t<i_t, f_t>::pdlp_restart_strategy_t(
     restart_triggered_{0, stream_view_},
     candidate_is_avg_{0, stream_view_},
     avg_duality_gap_{handle_ptr_,
-                     is_cupdlpx_restart<i_t, f_t>(hyper_params) ? 0 : primal_size,
-                     is_cupdlpx_restart<i_t, f_t>(hyper_params) ? 0 : dual_size,
+                     hyper_params.never_restart_to_average ? 0 : primal_size,
+                     hyper_params.never_restart_to_average ? 0 : dual_size,
                      climber_strategies,
                      hyper_params},
     current_duality_gap_{handle_ptr_,

@@ -7,6 +7,7 @@
 
 #include <cuopt/error.hpp>
 
+#include <pdlp/restart_strategy/pdlp_restart_strategy.cuh>
 #include <pdlp/saddle_point.hpp>
 #include <pdlp/swap_and_resize_helper.cuh>
 
@@ -17,18 +18,22 @@
 namespace cuopt::linear_programming::detail {
 
 template <typename i_t, typename f_t>
-saddle_point_state_t<i_t, f_t>::saddle_point_state_t(raft::handle_t const* handle_ptr,
-                                                     const i_t primal_size,
-                                                     const i_t dual_size,
-                                                     const size_t batch_size)
+saddle_point_state_t<i_t, f_t>::saddle_point_state_t(
+  raft::handle_t const* handle_ptr,
+  const i_t primal_size,
+  const i_t dual_size,
+  const size_t batch_size,
+  const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params)
   : primal_size_{primal_size},
     dual_size_{dual_size},
     primal_solution_{batch_size * primal_size, handle_ptr->get_stream()},
     dual_solution_{batch_size * dual_size, handle_ptr->get_stream()},
     delta_primal_{batch_size * primal_size, handle_ptr->get_stream()},
     delta_dual_{batch_size * dual_size, handle_ptr->get_stream()},
     // Primal gradient is only used in trust region restart mode which does not support batch mode
-    primal_gradient_{static_cast<size_t>(primal_size), handle_ptr->get_stream()},
+    primal_gradient_{
+      !is_cupdlpx_restart<i_t, f_t>(hyper_params) ? static_cast<size_t>(primal_size) : 0,
+      handle_ptr->get_stream()},
     dual_gradient_{batch_size * dual_size, handle_ptr->get_stream()},
     current_AtY_{batch_size * primal_size, handle_ptr->get_stream()},
     next_AtY_{batch_size * primal_size, handle_ptr->get_stream()}

@@ -7,6 +7,8 @@
 
 #pragma once
 
+#include <cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh>
+
 #include <raft/core/handle.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -64,7 +66,8 @@ class saddle_point_state_t {
   saddle_point_state_t(raft::handle_t const* handle_ptr,
                        i_t primal_size,
                        i_t dual_size,
-                       size_t batch_size);
+                       size_t batch_size,
+                       const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params);
 
   /**
    * @brief Copies the values of the solutions in another saddle_point_state_t

@@ -81,11 +81,11 @@ infeasibility_information_t<i_t, f_t>::infeasibility_information_t(
       (!infeasibility_detection) ? 0 : static_cast<size_t>(dual_size_h_), stream_view_},
     homogenous_dual_upper_bounds_{
       (!infeasibility_detection) ? 0 : static_cast<size_t>(dual_size_h_), stream_view_},
-    primal_slack_{(is_cupdlpx_restart<i_t, f_t>(hyper_params))
+    primal_slack_{(is_cupdlpx_restart<i_t, f_t>(hyper_params) && infeasibility_detection)
                     ? static_cast<size_t>(dual_size_h_ * climber_strategies.size())
                     : 0,
                   stream_view_},
-    dual_slack_{(is_cupdlpx_restart<i_t, f_t>(hyper_params))
+    dual_slack_{(is_cupdlpx_restart<i_t, f_t>(hyper_params) && infeasibility_detection)
                   ? static_cast<size_t>(primal_size_h_ * climber_strategies.size())
                   : 0,
                 stream_view_},