diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index c051fde974..61b768b1d3 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -26,7 +26,7 @@ if [[ "${package_dir}" == "python/libcuopt" ]]; then ) else PYDISTCHECK_ARGS+=( - --max-allowed-size-compressed '510Mi' + --max-allowed-size-compressed '550Mi' ) fi elif [[ "${package_dir}" != "python/cuopt" ]] && \ diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu index ce222e1192..d57bbb992f 100644 --- a/cpp/src/mip_heuristics/problem/problem.cu +++ b/cpp/src/mip_heuristics/problem/problem.cu @@ -278,7 +278,8 @@ problem_t::problem_t(const problem_t& problem_, bool no_deep deterministic(problem_.deterministic), handle_ptr(problem_.handle_ptr), integer_fixed_problem(problem_.integer_fixed_problem), - integer_fixed_variable_map(problem_.n_variables, handle_ptr->get_stream()), + integer_fixed_variable_map((!no_deep_copy) ? 0 : problem_.n_variables, + handle_ptr->get_stream()), n_variables(problem_.n_variables), n_constraints(problem_.n_constraints), n_binary_vars(problem_.n_binary_vars), @@ -342,10 +343,7 @@ problem_t::problem_t(const problem_t& problem_, bool no_deep (!no_deep_copy) ? rmm::device_uvector(problem_.combined_bounds, handle_ptr->get_stream()) : rmm::device_uvector(problem_.combined_bounds.size(), handle_ptr->get_stream())), - variable_types( - (!no_deep_copy) - ? rmm::device_uvector(problem_.variable_types, handle_ptr->get_stream()) - : rmm::device_uvector(problem_.variable_types.size(), handle_ptr->get_stream())), + variable_types((!no_deep_copy) ? 0 : problem_.variable_types.size(), handle_ptr->get_stream()), integer_indices((!no_deep_copy) ? 0 : problem_.integer_indices.size(), handle_ptr->get_stream()), binary_indices((!no_deep_copy) ? 0 : problem_.binary_indices.size(), handle_ptr->get_stream()), @@ -354,7 +352,8 @@ problem_t::problem_t(const problem_t& problem_, bool no_deep is_binary_variable((!no_deep_copy) ? 0 : problem_.is_binary_variable.size(), handle_ptr->get_stream()), related_variables(problem_.related_variables, handle_ptr->get_stream()), - related_variables_offsets(problem_.related_variables_offsets, handle_ptr->get_stream()), + related_variables_offsets((!no_deep_copy) ? 0 : problem_.related_variables_offsets.size(), + handle_ptr->get_stream()), var_names(problem_.var_names), row_names(problem_.row_names), objective_name(problem_.objective_name), diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 64ec44f5ef..359bb7e928 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -407,8 +407,9 @@ cusparse_view_t::cusparse_view_t( _tmp_primal.data(), CUSPARSE_ORDER_COL); - primal_gradient.create(op_problem_scaled.n_variables, - current_saddle_point_state.get_primal_gradient().data()); + primal_gradient.create( + current_saddle_point_state.get_primal_gradient().size(), // It is 0 in cupdlpx + current_saddle_point_state.get_primal_gradient().data()); dual_gradient.create(op_problem_scaled.n_constraints, current_saddle_point_state.get_dual_gradient().data()); @@ -1082,6 +1083,39 @@ void cusparse_view_t::update_mixed_precision_matrices() } } +// Redirects the cuSPARSE CSR structure pointers from op_problem_scaled_ to the original problem +// so the duplicated row/column buffers can be freed. +template +void cusparse_view_t::redirect_cusparse_csr_structure_pointers( + const problem_t& original_problem) +{ + RAFT_CUSPARSE_TRY(cusparseCsrSetPointers(A, + const_cast(original_problem.offsets.data()), + const_cast(original_problem.variables.data()), + const_cast(A_.data()))); + + RAFT_CUSPARSE_TRY( + cusparseCsrSetPointers(A_T, + const_cast(original_problem.reverse_offsets.data()), + const_cast(original_problem.reverse_constraints.data()), + const_cast(A_T_.data()))); + + if constexpr (std::is_same_v) { + if (mixed_precision_enabled_) { + RAFT_CUSPARSE_TRY(cusparseCsrSetPointers(A_mixed_, + const_cast(original_problem.offsets.data()), + const_cast(original_problem.variables.data()), + A_float_.data())); + + RAFT_CUSPARSE_TRY( + cusparseCsrSetPointers(A_T_mixed_, + const_cast(original_problem.reverse_offsets.data()), + const_cast(original_problem.reverse_constraints.data()), + A_T_float_.data())); + } + } +} + // Mixed precision SpMV implementation: FP32 matrix with FP64 vectors and FP64 compute type size_t mixed_precision_spmv_buffersize(cusparseHandle_t handle, cusparseOperation_t opA, diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index 416a0b1e5f..c6d0ddea61 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -208,6 +208,10 @@ class cusparse_view_t { // Update FP32 matrix copies after scaling (must be called after scale_problem()) void update_mixed_precision_matrices(); + + // Redirects the cuSPARSE CSR structure pointers from op_problem_scaled_ to the original problem + // so the duplicated row/column buffers can be freed. + void redirect_cusparse_csr_structure_pointers(const problem_t& original_problem); }; // Mixed precision SpMV: FP32 matrix with FP64 vectors and FP64 compute type diff --git a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu index ddcd78a1aa..c79249c45d 100644 --- a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu +++ b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu @@ -131,6 +131,9 @@ pdlp_initial_scaling_strategy_t::pdlp_initial_scaling_strategy_t( f_t(1)); compute_scaling_vectors(number_of_ruiz_iterations, alpha); + + iteration_constraint_matrix_scaling_.resize(0, stream_view_); + iteration_variable_scaling_.resize(0, stream_view_); } template diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index 7301443bad..d9dbb083f9 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -55,8 +55,11 @@ pdhg_solver_t::pdhg_solver_t( problem_ptr(&op_problem_scaled), primal_size_h_(problem_ptr->n_variables), dual_size_h_(problem_ptr->n_constraints), - current_saddle_point_state_{ - handle_ptr_, problem_ptr->n_variables, problem_ptr->n_constraints, climber_strategies.size()}, + current_saddle_point_state_{handle_ptr_, + problem_ptr->n_variables, + problem_ptr->n_constraints, + climber_strategies.size(), + hyper_params}, tmp_primal_{(climber_strategies.size() * problem_ptr->n_variables), stream_view_}, tmp_dual_{(climber_strategies.size() * problem_ptr->n_constraints), stream_view_}, potential_next_primal_solution_{(climber_strategies.size() * problem_ptr->n_variables), diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index f4b3d8b9bd..49c77e44dc 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -233,16 +233,17 @@ pdlp_solver_t::pdlp_solver_t(problem_t& op_problem, is_legacy_batch_mode, climber_strategies_, settings_.hyper_params}, - average_termination_strategy_{handle_ptr_, - op_problem, - op_problem_scaled_, - average_op_problem_evaluation_cusparse_view_, - pdhg_solver_.get_cusparse_view(), - primal_size_h_, - dual_size_h_, - initial_scaling_strategy_, - settings_, - climber_strategies_}, + average_termination_strategy_{ + handle_ptr_, + op_problem, + op_problem_scaled_, + average_op_problem_evaluation_cusparse_view_, + pdhg_solver_.get_cusparse_view(), + settings_.hyper_params.never_restart_to_average ? 0 : primal_size_h_, + settings_.hyper_params.never_restart_to_average ? 0 : dual_size_h_, + initial_scaling_strategy_, + settings_, + climber_strategies_}, current_termination_strategy_{handle_ptr_, op_problem, op_problem_scaled_, @@ -349,7 +350,6 @@ pdlp_solver_t::pdlp_solver_t(problem_t& op_problem, ? -std::numeric_limits::infinity() : std::numeric_limits::infinity(); op_problem.check_problem_representation(true, false); - op_problem_scaled_.check_problem_representation(true, false); if (batch_mode_) { batch_solution_to_return_.get_additional_termination_informations().resize( @@ -2320,6 +2320,14 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co // Update FP32 matrix copies for mixed precision SpMV after scaling pdhg_solver_.get_cusparse_view().update_mixed_precision_matrices(); + // Redirect cuSPARSE descriptors to use the original problem's structural data (offsets, indices), + // then free the duplicated structural vectors from the scaled copy to save device memory. + pdhg_solver_.get_cusparse_view().redirect_cusparse_csr_structure_pointers(*problem_ptr); + op_problem_scaled_.variables.resize(0, stream_view_); + op_problem_scaled_.offsets.resize(0, stream_view_); + op_problem_scaled_.reverse_constraints.resize(0, stream_view_); + op_problem_scaled_.reverse_offsets.resize(0, stream_view_); + if (!settings_.hyper_params.compute_initial_step_size_before_scaling && !settings_.get_initial_step_size().has_value()) compute_initial_step_size(); diff --git a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu index c0b84c8f72..17c7abcac5 100644 --- a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu @@ -89,8 +89,8 @@ pdlp_restart_strategy_t::pdlp_restart_strategy_t( restart_triggered_{0, stream_view_}, candidate_is_avg_{0, stream_view_}, avg_duality_gap_{handle_ptr_, - is_cupdlpx_restart(hyper_params) ? 0 : primal_size, - is_cupdlpx_restart(hyper_params) ? 0 : dual_size, + hyper_params.never_restart_to_average ? 0 : primal_size, + hyper_params.never_restart_to_average ? 0 : dual_size, climber_strategies, hyper_params}, current_duality_gap_{handle_ptr_, diff --git a/cpp/src/pdlp/saddle_point.cu b/cpp/src/pdlp/saddle_point.cu index 157e7fa389..f740176a3c 100644 --- a/cpp/src/pdlp/saddle_point.cu +++ b/cpp/src/pdlp/saddle_point.cu @@ -7,6 +7,7 @@ #include +#include #include #include @@ -17,10 +18,12 @@ namespace cuopt::linear_programming::detail { template -saddle_point_state_t::saddle_point_state_t(raft::handle_t const* handle_ptr, - const i_t primal_size, - const i_t dual_size, - const size_t batch_size) +saddle_point_state_t::saddle_point_state_t( + raft::handle_t const* handle_ptr, + const i_t primal_size, + const i_t dual_size, + const size_t batch_size, + const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params) : primal_size_{primal_size}, dual_size_{dual_size}, primal_solution_{batch_size * primal_size, handle_ptr->get_stream()}, @@ -28,7 +31,9 @@ saddle_point_state_t::saddle_point_state_t(raft::handle_t const* handl delta_primal_{batch_size * primal_size, handle_ptr->get_stream()}, delta_dual_{batch_size * dual_size, handle_ptr->get_stream()}, // Primal gradient is only used in trust region restart mode which does not support batch mode - primal_gradient_{static_cast(primal_size), handle_ptr->get_stream()}, + primal_gradient_{ + !is_cupdlpx_restart(hyper_params) ? static_cast(primal_size) : 0, + handle_ptr->get_stream()}, dual_gradient_{batch_size * dual_size, handle_ptr->get_stream()}, current_AtY_{batch_size * primal_size, handle_ptr->get_stream()}, next_AtY_{batch_size * primal_size, handle_ptr->get_stream()} diff --git a/cpp/src/pdlp/saddle_point.hpp b/cpp/src/pdlp/saddle_point.hpp index 7e8f87fa25..eb6b8025cf 100644 --- a/cpp/src/pdlp/saddle_point.hpp +++ b/cpp/src/pdlp/saddle_point.hpp @@ -7,6 +7,8 @@ #pragma once +#include + #include #include @@ -64,7 +66,8 @@ class saddle_point_state_t { saddle_point_state_t(raft::handle_t const* handle_ptr, i_t primal_size, i_t dual_size, - size_t batch_size); + size_t batch_size, + const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params); /** * @brief Copies the values of the solutions in another saddle_point_state_t diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu index f795d2c4ca..9268e17910 100644 --- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu +++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu @@ -81,11 +81,11 @@ infeasibility_information_t::infeasibility_information_t( (!infeasibility_detection) ? 0 : static_cast(dual_size_h_), stream_view_}, homogenous_dual_upper_bounds_{ (!infeasibility_detection) ? 0 : static_cast(dual_size_h_), stream_view_}, - primal_slack_{(is_cupdlpx_restart(hyper_params)) + primal_slack_{(is_cupdlpx_restart(hyper_params) && infeasibility_detection) ? static_cast(dual_size_h_ * climber_strategies.size()) : 0, stream_view_}, - dual_slack_{(is_cupdlpx_restart(hyper_params)) + dual_slack_{(is_cupdlpx_restart(hyper_params) && infeasibility_detection) ? static_cast(primal_size_h_ * climber_strategies.size()) : 0, stream_view_},