From c721556488391b8f80df4427aa2a58b71386c8b4 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 22 May 2026 14:48:27 -0700 Subject: [PATCH 01/10] Extend the barrier solver to SOCP problems This PR extends the 2x2 augmented system formulation of the barrier solver to solve SOCP problems. It supports both explicit cone variables as well as the cone rows in the Ax=b constraint. It uses Nesterov-Todd scaling to make the conic diagonal blocks symmetric. Co-authored-by: Yan Zaretskiy Co-authored-by: Yan Zaretskiy Co-authored-by: Yuwen Chen --- .../cuopt/linear_programming/constants.h | 4 + .../linear_programming/io/data_model_view.hpp | 6 +- .../linear_programming/io/mps_data_model.hpp | 20 +- .../optimization_problem_interface.hpp | 11 +- .../pdlp/solver_settings.hpp | 3 + cpp/src/barrier/barrier.cu | 1818 ++++++++++++----- cpp/src/barrier/barrier.hpp | 2 + cpp/src/barrier/dense_vector.hpp | 22 + cpp/src/dual_simplex/presolve.cpp | 560 +++-- cpp/src/dual_simplex/presolve.hpp | 10 +- cpp/src/dual_simplex/scaling.cpp | 225 +- cpp/src/dual_simplex/scaling.hpp | 8 +- .../dual_simplex/simplex_solver_settings.hpp | 4 +- cpp/src/dual_simplex/solve.cpp | 88 +- cpp/src/dual_simplex/user_problem.hpp | 2 + cpp/src/dual_simplex/vector_math.cuh | 21 +- cpp/src/io/lp_parser.cpp | 43 +- cpp/src/io/mps_data_model.cpp | 78 +- cpp/src/io/mps_parser.cpp | 370 +++- cpp/src/io/mps_writer.cpp | 40 +- cpp/src/math_optimization/solver_settings.cu | 3 + cpp/src/pdlp/solve.cu | 31 +- .../infeasibility_information.cu | 6 - cpp/src/pdlp/translate.hpp | 23 +- cpp/tests/dual_simplex/CMakeLists.txt | 3 +- .../dual_simplex/unit_tests/solve_barrier.cu | 819 +++++++- cpp/tests/linear_programming/parser_test.cpp | 40 +- cpp/tests/qp/unit_tests/no_constraints.cu | 2 + cpp/tests/qp/unit_tests/two_variable_test.cu | 1 + .../data_model/data_model.pxd | 18 + .../data_model/data_model.py | 86 + .../data_model/data_model_wrapper.pxd | 4 + .../data_model/data_model_wrapper.pyx | 140 +- .../cuopt/linear_programming/io/parser.pxd | 12 + .../linear_programming/io/parser_wrapper.pyx | 47 + .../cuopt/cuopt/linear_programming/problem.py | 226 +- .../linear_programming/test_python_API.py | 10 + 37 files changed, 3795 insertions(+), 1011 deletions(-) diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index 3342ba375c..1dfccef1ea 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -47,6 +47,10 @@ #define CUOPT_DUALIZE "dualize" #define CUOPT_ORDERING "ordering" #define CUOPT_BARRIER_DUAL_INITIAL_POINT "barrier_dual_initial_point" +#define CUOPT_BARRIER_RELATIVE_FEASIBILITY_TOLERANCE "barrier_relative_feasibility_tolerance" +#define CUOPT_BARRIER_RELATIVE_OPTIMALITY_TOLERANCE "barrier_relative_optimality_tolerance" +#define CUOPT_BARRIER_RELATIVE_COMPLEMENTARITY_TOLERANCE \ + "barrier_relative_complementarity_tolerance" #define CUOPT_BARRIER_ITERATIVE_REFINEMENT "barrier_iterative_refinement" #define CUOPT_BARRIER_STEP_SCALE "barrier_step_scale" #define CUOPT_ELIMINATE_DENSE_COLUMNS "eliminate_dense_columns" diff --git a/cpp/include/cuopt/linear_programming/io/data_model_view.hpp b/cpp/include/cuopt/linear_programming/io/data_model_view.hpp index ca2fd30393..aa76d122bf 100644 --- a/cpp/include/cuopt/linear_programming/io/data_model_view.hpp +++ b/cpp/include/cuopt/linear_programming/io/data_model_view.hpp @@ -434,9 +434,9 @@ class data_model_view_t { std::vector(qc.linear_values.begin(), qc.linear_values.end()), std::vector(qc.linear_indices.begin(), qc.linear_indices.end()), static_cast(qc.rhs_value), - std::vector(qc.quadratic_values.begin(), qc.quadratic_values.end()), - std::vector(qc.quadratic_indices.begin(), qc.quadratic_indices.end()), - std::vector(qc.quadratic_offsets.begin(), qc.quadratic_offsets.end())}); + std::vector(qc.quadratic_row_indices.begin(), qc.quadratic_row_indices.end()), + std::vector(qc.quadratic_col_indices.begin(), qc.quadratic_col_indices.end()), + std::vector(qc.quadratic_values.begin(), qc.quadratic_values.end())}); } } diff --git a/cpp/include/cuopt/linear_programming/io/mps_data_model.hpp b/cpp/include/cuopt/linear_programming/io/mps_data_model.hpp index 9828a00c0c..dee74a3a42 100644 --- a/cpp/include/cuopt/linear_programming/io/mps_data_model.hpp +++ b/cpp/include/cuopt/linear_programming/io/mps_data_model.hpp @@ -239,7 +239,7 @@ class mps_data_model_t { * - row identity and type (from ROWS), * - sparse linear coefficients (from COLUMNS), * - RHS value (from RHS), - * - quadratic matrix Q in CSR (from QCMATRIX). + * - quadratic matrix Q in COO (SoA: row, col, value) from QCMATRIX — one triplet per nonzero. */ struct quadratic_constraint_t { /** ROWS declaration index (among all constraint rows), not an index into the linear CSR. */ @@ -251,19 +251,21 @@ class mps_data_model_t { std::vector linear_values{}; std::vector linear_indices{}; f_t rhs_value{f_t(0)}; + /** Q nonzeros: parallel arrays, same length (COO / SoA). Sorted by (row, col) in append. */ + std::vector quadratic_row_indices{}; + std::vector quadratic_col_indices{}; std::vector quadratic_values{}; - std::vector quadratic_indices{}; - std::vector quadratic_offsets{}; }; /** * @brief Append one complete quadratic constraint (row + linear + rhs + quadratic Q). * @note All span inputs are host memory; the model copies this data. * @param linear_values, linear_indices Same nnz; can be empty for a purely quadratic row (rare). - * @param quadratic_values, quadratic_indices CSR nnz; may be empty if Q is empty. - * @param quadratic_offsets CSR row starts; must be non-empty. - * @param constraint_row_type MPS ROWS type; must be 'L'. 'G' and 'E' quadratic rows are not - * supported. + * @param quadratic_values, quadratic_row_indices, quadratic_col_indices COO triplets; same + * length; may all be empty if Q is empty. Stored sorted by (row, col). + * @param constraint_row_type MPS ROWS type: 'L' (<=) or 'G' (>=). Stored as given; 'G' rows are + * converted to '<=' form when building the SOCP for the barrier solver. Equality ('E') is + * not supported. */ void append_quadratic_constraint(i_t constraint_row_index, const std::string& constraint_row_name, @@ -272,8 +274,8 @@ class mps_data_model_t { std::span linear_indices, f_t rhs_value, std::span quadratic_values, - std::span quadratic_indices, - std::span quadratic_offsets); + std::span quadratic_row_indices, + std::span quadratic_col_indices); const std::vector& get_quadratic_constraints() const; diff --git a/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp b/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp index aa164ca756..56b373c897 100644 --- a/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp +++ b/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp @@ -64,9 +64,10 @@ class optimization_problem_interface_t { std::vector linear_values{}; std::vector linear_indices{}; f_t rhs_value{f_t(0)}; + /** Q in COO: parallel arrays, same length. */ + std::vector quadratic_row_indices{}; + std::vector quadratic_col_indices{}; std::vector quadratic_values{}; - std::vector quadratic_indices{}; - std::vector quadratic_offsets{}; }; virtual ~optimization_problem_interface_t() = default; @@ -89,9 +90,9 @@ class optimization_problem_interface_t { std::vector(qc.linear_values.begin(), qc.linear_values.end()), std::vector(qc.linear_indices.begin(), qc.linear_indices.end()), static_cast(qc.rhs_value), - std::vector(qc.quadratic_values.begin(), qc.quadratic_values.end()), - std::vector(qc.quadratic_indices.begin(), qc.quadratic_indices.end()), - std::vector(qc.quadratic_offsets.begin(), qc.quadratic_offsets.end())}); + std::vector(qc.quadratic_row_indices.begin(), qc.quadratic_row_indices.end()), + std::vector(qc.quadratic_col_indices.begin(), qc.quadratic_col_indices.end()), + std::vector(qc.quadratic_values.begin(), qc.quadratic_values.end())}); } set_quadratic_constraints(std::move(converted_constraints)); } diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index b30286f9ce..3597b85016 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -280,6 +280,9 @@ class pdlp_solver_settings_t { i_t dualize{-1}; i_t ordering{-1}; i_t barrier_dual_initial_point{-1}; + f_t barrier_relative_feasibility_tolerance{1.0e-8}; + f_t barrier_relative_optimality_tolerance{1.0e-8}; + f_t barrier_relative_complementarity_tolerance{1.0e-8}; bool eliminate_dense_columns{true}; pdlp_precision_t pdlp_precision{pdlp_precision_t::DefaultPrecision}; bool barrier_iterative_refinement{true}; diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu index e7604fb60e..fd6f9dbc6e 100644 --- a/cpp/src/barrier/barrier.cu +++ b/cpp/src/barrier/barrier.cu @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -34,6 +35,8 @@ #include #include +#include +#include #include #include @@ -43,6 +46,7 @@ #include #include #include +#include namespace cuopt::linear_programming::dual_simplex { @@ -61,6 +65,29 @@ namespace cuopt::linear_programming::dual_simplex { cuda::std::make_tuple(a, b), out, size, cuda::std::multiplies<>{}, stream.value()); } +// out[i] = is_native_free_linear[i] ? 0 : a[i] * b[i] +[[maybe_unused]] static void pairwise_multiply_skip_native_free_linear( + float* a, float* b, int* is_native_free_linear, float* out, int size, rmm::cuda_stream_view stream) +{ + cub::DeviceTransform::Transform( + cuda::std::make_tuple(a, b, is_native_free_linear), + out, + size, + [] __host__ __device__(float x_j, float d_j, int free_j) { return free_j ? 0.f : x_j * d_j; }, + stream.value()); +} + +[[maybe_unused]] static void pairwise_multiply_skip_native_free_linear( + double* a, double* b, int* is_native_free_linear, double* out, int size, rmm::cuda_stream_view stream) +{ + cub::DeviceTransform::Transform( + cuda::std::make_tuple(a, b, is_native_free_linear), + out, + size, + [] __host__ __device__(double x_j, double d_j, int free_j) { return free_j ? 0.0 : x_j * d_j; }, + stream.value()); +} + [[maybe_unused]] static void axpy( float alpha, float* x, float beta, float* y, float* out, int size, rmm::cuda_stream_view stream) { @@ -93,7 +120,7 @@ class iteration_data_t { public: iteration_data_t(const lp_problem_t& lp, i_t num_upper_bounds, - const std::vector& free_variable_indices, + const std::vector& native_free_linear_indices, const csc_matrix_t& Qin, const simplex_solver_settings_t& settings) : upper_bounds(num_upper_bounds), @@ -164,10 +191,12 @@ class iteration_data_t { d_inv_diag(lp.num_cols, lp.handle_ptr->get_stream()), d_cols_to_remove(0, lp.handle_ptr->get_stream()), d_augmented_diagonal_indices_(0, lp.handle_ptr->get_stream()), + d_cone_csr_indices_(0, lp.handle_ptr->get_stream()), + d_cone_Q_values_(0, lp.handle_ptr->get_stream()), use_augmented(false), has_factorization(false), - n_free_vars(0), - d_is_free_(0, lp.handle_ptr->get_stream()), + n_native_free_linear(0), + d_is_native_free_linear_(0, lp.handle_ptr->get_stream()), num_factorizations(0), has_solve_info(false), settings_(settings), @@ -209,9 +238,11 @@ class iteration_data_t { d_dw_residual_(0, lp.handle_ptr->get_stream()), d_wv_residual_(0, lp.handle_ptr->get_stream()), d_bound_rhs_(0, lp.handle_ptr->get_stream()), - d_complementarity_xz_rhs_(lp.num_cols, lp.handle_ptr->get_stream()), + d_complementarity_xz_rhs_(0, lp.handle_ptr->get_stream()), d_complementarity_wv_rhs_(0, lp.handle_ptr->get_stream()), d_dual_rhs_(lp.num_cols, lp.handle_ptr->get_stream()), + d_complementarity_target_(lp.num_cols, lp.handle_ptr->get_stream()), + d_cone_hessian_dx_(0, lp.handle_ptr->get_stream()), d_Q_diag_(0, lp.handle_ptr->get_stream()), d_Qx_(Qin.m, lp.handle_ptr->get_stream()), restrict_u_(0), @@ -219,192 +250,242 @@ class iteration_data_t { sum_reduce_helper_(lp.handle_ptr->get_stream()), indefinite_Q(false), Q_diagonal(false), - symbolic_status(0) + symbolic_status(0), + cone_combined_step_(false), + cone_sigma_mu_(f_t(0)) { raft::common::nvtx::range fun_scope("Barrier: LP Data Creation"); - // Set up free variable tracking for QPs - if (!free_variable_indices.empty()) { - n_free_vars = free_variable_indices.size(); - std::vector is_free_host(lp.num_cols, 0); - for (i_t j : free_variable_indices) { - is_free_host[j] = 1; + { + raft::common::nvtx::range scope("Barrier: LP Data: native free linear"); + // Set up native free linear variable tracking (linear columns only, j < cone_start). + if (!native_free_linear_indices.empty()) { + n_native_free_linear = native_free_linear_indices.size(); + std::vector is_native_free_linear_host(lp.num_cols, 0); + for (i_t j : native_free_linear_indices) { + is_native_free_linear_host[j] = 1; + } + d_is_native_free_linear_.resize(lp.num_cols, stream_view_); + raft::copy( + d_is_native_free_linear_.data(), is_native_free_linear_host.data(), lp.num_cols, stream_view_); + settings.log.printf("Native free linear (QP): %d\n", n_native_free_linear); } - d_is_free_.resize(lp.num_cols, stream_view_); - raft::copy(d_is_free_.data(), is_free_host.data(), lp.num_cols, stream_view_); - settings.log.printf("Free variables (QP) : %d\n", n_free_vars); } bool has_Q = Q.x.size() > 0; indefinite_Q = false; - if (has_Q) { - Qdiag.resize(lp.num_cols, 0.0); - - for (i_t j = 0; j < Q.n; j++) { - const i_t col_start = Q.col_start[j]; - const i_t col_end = Q.col_start[j + 1]; - for (i_t p = col_start; p < col_end; p++) { - const i_t i = Q.i[p]; - if (j == i) { - Qdiag[j] = Q.x[p]; - break; + { + raft::common::nvtx::range scope("Barrier: LP Data: Q setup"); + if (has_Q) { + Qdiag.resize(lp.num_cols, 0.0); + + for (i_t j = 0; j < Q.n; j++) { + const i_t col_start = Q.col_start[j]; + const i_t col_end = Q.col_start[j + 1]; + for (i_t p = col_start; p < col_end; p++) { + const i_t i = Q.i[p]; + if (j == i) { + Qdiag[j] = Q.x[p]; + break; + } } } - } - Q_diagonal = Q.is_diagonal(); + Q_diagonal = Q.is_diagonal(); - if (Q_diagonal) { - // Check to ensure that Q is positive semi-definite - for (i_t j = 0; j < lp.num_cols; j++) { - if (Qdiag[j] < 0.0) { - settings_.log.printf( - "Q is not positive semidefinite: Q(%d, %d) = %e\n", j, j, Qdiag[j]); - indefinite_Q = true; - return; + if (Q_diagonal) { + // Check to ensure that Q is positive semi-definite + for (i_t j = 0; j < lp.num_cols; j++) { + if (Qdiag[j] < 0.0) { + settings_.log.printf( + "Q is not positive semidefinite: Q(%d, %d) = %e\n", j, j, Qdiag[j]); + indefinite_Q = true; + return; + } } + } else if (settings.check_Q) { + // TODO: Check to ensure that Q is positive semi-definite + // This requires us to perform a Cholesky factorization. } - } else if (settings.check_Q) { - // TODO: Check to ensure that Q is positive semi-definite - // This requires us to perform a Cholesky factorization. + + d_Q_diag_.resize(lp.num_cols, stream_view_); + raft::copy(d_Q_diag_.data(), Qdiag.data(), Qdiag.size(), stream_view_); } + } - d_Q_diag_.resize(lp.num_cols, stream_view_); - raft::copy(d_Q_diag_.data(), Qdiag.data(), Qdiag.size(), stream_view_); + { + raft::common::nvtx::range scope("Barrier: LP Data: SOC setup"); + if (!lp.second_order_cone_dims.empty()) { + cone_var_start_ = lp.cone_var_start; + i_t total_cone_dim = + std::accumulate(lp.second_order_cone_dims.begin(), lp.second_order_cone_dims.end(), i_t(0)); + cuopt_assert(cone_var_start_ >= 0, "cone_var_start must be nonnegative"); + cuopt_assert(cone_var_start_ + total_cone_dim <= lp.num_cols, + "cone variables exceed problem dimension"); + cuopt_assert(cone_var_start_ + total_cone_dim == lp.num_cols, + "barrier expects [linear | cone] layout"); + cones_.emplace( + std::span(lp.second_order_cone_dims.data(), lp.second_order_cone_dims.size()), + raft::device_span{}, + raft::device_span{}, + stream_view_); + cuopt_assert(cone_count() > 0, "second-order cone topology must contain at least one cone"); + cuopt_assert(cone_entry_count() == total_cone_dim, "second-order cone entry count mismatch"); + } } - // Allocating GPU flag data for Form ADAT - RAFT_CUDA_TRY(cub::DeviceSelect::Flagged( - nullptr, - flag_buffer_size, - d_inv_diag_prime.data(), // Not the actual input but just to allcoate the memory - thrust::make_transform_iterator(d_cols_to_remove.data(), cuda::std::logical_not{}), - d_inv_diag_prime.data(), - d_num_flag.data(), - inv_diag.size(), - stream_view_)); + { + raft::common::nvtx::range scope("Barrier: LP Data: complementarity buffers"); + const i_t linear_xz_rhs_size = linear_xz_size(lp.num_cols); + d_complementarity_xz_rhs_.resize(linear_xz_rhs_size, stream_view_); - d_flag_buffer.resize(flag_buffer_size, stream_view_); + // Allocating GPU flag data for Form ADAT + RAFT_CUDA_TRY(cub::DeviceSelect::Flagged( + nullptr, + flag_buffer_size, + d_inv_diag_prime.data(), // Not the actual input but just to allcoate the memory + thrust::make_transform_iterator(d_cols_to_remove.data(), cuda::std::logical_not{}), + d_inv_diag_prime.data(), + d_num_flag.data(), + inv_diag.size(), + stream_view_)); - // Create the upper bounds vector - n_upper_bounds = 0; - for (i_t j = 0; j < lp.num_cols; j++) { - if (lp.upper[j] < inf) { upper_bounds[n_upper_bounds++] = j; } + d_flag_buffer.resize(flag_buffer_size, stream_view_); } - if (n_upper_bounds > 0) { - settings.log.printf("Upper bounds : %d\n", n_upper_bounds); + + { + raft::common::nvtx::range scope("Barrier: LP Data: upper bounds"); + // Create the upper bounds vector + n_upper_bounds = 0; + for (i_t j = 0; j < lp.num_cols; j++) { + if (lp.upper[j] < inf) { upper_bounds[n_upper_bounds++] = j; } + } + if (n_upper_bounds > 0) { + settings.log.printf("Upper bounds : %d\n", n_upper_bounds); + } } - // Decide if we are going to use the augmented system or not - n_dense_columns = 0; - i_t n_dense_rows = 0; - i_t max_row_nz = 0; - f_t estimated_nz_AAT = 0.0; std::vector dense_columns_unordered; + { + raft::common::nvtx::range scope("Barrier: LP Data: dense columns and augmented"); + // Decide if we are going to use the augmented system or not + n_dense_columns = 0; + i_t n_dense_rows = 0; + i_t max_row_nz = 0; + f_t estimated_nz_AAT = 0.0; + + const bool has_soc = has_cones(); + if (has_Q || has_soc) { + // QP and SOCP always use the augmented KKT; skip dense-column / ADAT heuristics. + use_augmented = true; + n_dense_columns = 0; + } else { + f_t start_column_density = tic(); - f_t start_column_density = tic(); - - // Do not look for dense columns if Q is not diagonal - if (!has_Q || Q_diagonal) { - find_dense_columns( - lp.A, settings, dense_columns_unordered, n_dense_rows, max_row_nz, estimated_nz_AAT); - } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } + // Do not look for dense columns if Q is not diagonal + if (!has_Q || Q_diagonal) { + find_dense_columns( + lp.A, settings, dense_columns_unordered, n_dense_rows, max_row_nz, estimated_nz_AAT); + } + if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } #ifdef PRINT_INFO - for (i_t j : dense_columns_unordered) { - settings.log.printf("Dense column %6d\n", j); - } + for (i_t j : dense_columns_unordered) { + settings.log.printf("Dense column %6d\n", j); + } #endif - float64_t column_density_time = toc(start_column_density); - if (!settings.eliminate_dense_columns) { dense_columns_unordered.clear(); } - n_dense_columns = static_cast(dense_columns_unordered.size()); - if (n_dense_columns > 0) { - settings.log.printf("Dense columns : %d\n", n_dense_columns); - } - if (n_dense_rows > 0) { - settings.log.printf("Dense rows : %d\n", n_dense_rows); - } - settings.log.printf("Density estimator time : %.2fs\n", column_density_time); - if ((settings.augmented != 0) && - (n_dense_columns > 50 || n_dense_rows > 10 || - lp.A.m == 0 /* handle case with no constraints */ || - (max_row_nz > 5000 && estimated_nz_AAT > 1e10) || settings.augmented == 1)) { - use_augmented = true; - n_dense_columns = 0; - } - - if (has_Q && !use_augmented) { - // For now let's not deal with dense columns - n_dense_columns = 0; - use_augmented = !Q_diagonal; - } + float64_t column_density_time = toc(start_column_density); + if (!settings.eliminate_dense_columns) { dense_columns_unordered.clear(); } + n_dense_columns = static_cast(dense_columns_unordered.size()); + if (n_dense_columns > 0) { + settings.log.printf("Dense columns : %d\n", n_dense_columns); + } + if (n_dense_rows > 0) { + settings.log.printf("Dense rows : %d\n", n_dense_rows); + } + settings.log.printf("Density estimator time : %.2fs\n", column_density_time); + if ((settings.augmented != 0) && + (n_dense_columns > 50 || n_dense_rows > 10 || + lp.A.m == 0 /* handle case with no constraints */ || + (max_row_nz > 5000 && estimated_nz_AAT > 1e10) || settings.augmented == 1)) { + use_augmented = true; + n_dense_columns = 0; + } + } - if (use_augmented) { - settings.log.printf("Linear system : augmented\n"); - } else { - settings.log.printf("Linear system : ADAT\n"); + if (use_augmented) { + settings.log.printf("Linear system : augmented\n"); + } else { + settings.log.printf("Linear system : ADAT\n"); + } } - // D = I + EET - diag.set_scalar(1.0); - if (n_upper_bounds > 0) { - for (i_t k = 0; k < n_upper_bounds; k++) { - i_t j = upper_bounds[k]; - diag[j] = 2.0; + { + raft::common::nvtx::range scope("Barrier: LP Data: diag and inv_diag"); + // D = I + EET + diag.set_scalar(1.0); + if (n_upper_bounds > 0) { + for (i_t k = 0; k < n_upper_bounds; k++) { + i_t j = upper_bounds[k]; + diag[j] = 2.0; + } } - } - // D = I + EET + Q (if Q is diagonal) - if (has_Q && !use_augmented) { - // this means that Q is diagonal - for (i_t j = 0; j < Q.n; j++) { - diag[j] += Qdiag[j]; + // D = I + EET + Q (if Q is diagonal) + if (has_Q && !use_augmented) { + // this means that Q is diagonal + for (i_t j = 0; j < Q.n; j++) { + diag[j] += Qdiag[j]; + } } - } - inv_diag.set_scalar(1.0); - if (use_augmented) { diag.multiply_scalar(-1.0); } - if (n_upper_bounds > 0 || (has_Q && !use_augmented)) { diag.inverse(inv_diag); } - // TMP diag and inv_diag should directly created and filled on the GPU - raft::copy(d_inv_diag.data(), inv_diag.data(), inv_diag.size(), stream_view_); - inv_sqrt_diag.set_scalar(1.0); - if (n_upper_bounds > 0 || (has_Q && !use_augmented)) { inv_diag.sqrt(inv_sqrt_diag); } + inv_diag.set_scalar(1.0); + if (use_augmented) { diag.multiply_scalar(-1.0); } + if (n_upper_bounds > 0 || (has_Q && !use_augmented)) { diag.inverse(inv_diag); } + // TMP diag and inv_diag should directly created and filled on the GPU + raft::copy(d_inv_diag.data(), inv_diag.data(), inv_diag.size(), stream_view_); + inv_sqrt_diag.set_scalar(1.0); + if (n_upper_bounds > 0 || (has_Q && !use_augmented)) { inv_diag.sqrt(inv_sqrt_diag); } + } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } - // Copy A into AD - AD = lp.A; - if (!use_augmented && n_dense_columns > 0) { - cols_to_remove.resize(lp.num_cols, 0); - for (i_t k : dense_columns_unordered) { - cols_to_remove[k] = 1; - } - d_cols_to_remove.resize(cols_to_remove.size(), stream_view_); - raft::copy( - d_cols_to_remove.data(), cols_to_remove.data(), cols_to_remove.size(), stream_view_); - dense_columns.clear(); - dense_columns.reserve(n_dense_columns); - for (i_t j = 0; j < lp.num_cols; j++) { - if (cols_to_remove[j]) { dense_columns.push_back(j); } - } - AD.remove_columns(cols_to_remove); + { + raft::common::nvtx::range scope("Barrier: LP Data: AD matrix setup"); + // Copy A into AD + AD = lp.A; + if (!use_augmented && n_dense_columns > 0) { + cols_to_remove.resize(lp.num_cols, 0); + for (i_t k : dense_columns_unordered) { + cols_to_remove[k] = 1; + } + d_cols_to_remove.resize(cols_to_remove.size(), stream_view_); + raft::copy( + d_cols_to_remove.data(), cols_to_remove.data(), cols_to_remove.size(), stream_view_); + dense_columns.clear(); + dense_columns.reserve(n_dense_columns); + for (i_t j = 0; j < lp.num_cols; j++) { + if (cols_to_remove[j]) { dense_columns.push_back(j); } + } + AD.remove_columns(cols_to_remove); - sparse_mark.resize(lp.num_cols, 1); - for (i_t k : dense_columns) { - sparse_mark[k] = 0; - } + sparse_mark.resize(lp.num_cols, 1); + for (i_t k : dense_columns) { + sparse_mark[k] = 0; + } - A_dense.resize(AD.m, n_dense_columns); - i_t k = 0; - for (i_t j : dense_columns) { - A_dense.from_sparse(lp.A, j, k++); + A_dense.resize(AD.m, n_dense_columns); + i_t k = 0; + for (i_t j : dense_columns) { + A_dense.from_sparse(lp.A, j, k++); + } } - } - AD.transpose(AT); + AD.transpose(AT); + } // device_AD / device_A / ADAT path is only used when forming ADAT (!use_augmented). if (!use_augmented) { + raft::common::nvtx::range scope("Barrier: LP Data: device AD path"); device_AD.copy(AD, handle_ptr->get_stream()); d_original_A_values.resize(device_AD.x.size(), handle_ptr->get_stream()); raft::copy(d_original_A_values.data(), @@ -421,44 +502,185 @@ class iteration_data_t { } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } - i_t factorization_size = use_augmented ? lp.num_rows + lp.num_cols : lp.num_rows; - chol = - std::make_unique>(handle_ptr, settings, factorization_size); - chol->set_positive_definite(false); + { + raft::common::nvtx::range scope("Barrier: LP Data: Cholesky init"); + i_t factorization_size = use_augmented ? lp.num_rows + lp.num_cols : lp.num_rows; + chol = std::make_unique>( + handle_ptr, settings, factorization_size); + chol->set_positive_definite(false); + } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } - // Perform symbolic analysis - symbolic_status = 0; - if (use_augmented) { - // Build the sparsity pattern of the augmented system - form_augmented(true); - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } - symbolic_status = chol->analyze(device_augmented); - } else { - form_adat(true); - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } - symbolic_status = chol->analyze(device_ADAT); + { + raft::common::nvtx::range scope("Barrier: LP Data: symbolic analysis"); + // Perform symbolic analysis + symbolic_status = 0; + if (use_augmented) { + { + raft::common::nvtx::range form_scope("Barrier: LP Data: form augmented"); + // Build the sparsity pattern of the augmented system + form_augmented(true); + } + if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } + symbolic_status = chol->analyze(device_augmented); + } else { + { + raft::common::nvtx::range form_scope("Barrier: LP Data: form ADAT"); + form_adat(true); + } + if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } + symbolic_status = chol->analyze(device_ADAT); + } + } + } + + bool has_cones() const { return cones_.has_value(); } + + cone_data_t& cones() + { + cuopt_assert(cones_.has_value(), "second-order cone data is not initialized"); + return *cones_; + } + + const cone_data_t& cones() const + { + cuopt_assert(cones_.has_value(), "second-order cone data is not initialized"); + return *cones_; + } + + i_t cone_count() const { return has_cones() ? cones_->n_cones : i_t(0); } + + i_t cone_entry_count() const + { + return has_cones() ? static_cast(cones_->n_cone_entries) : i_t(0); + } + + i_t cone_start() const { return cone_var_start_; } + + i_t cone_end() const { return cone_start() + cone_entry_count(); } + + i_t linear_xz_size(std::size_t full_xz_size) const + { + return has_cones() ? cone_start() : static_cast(full_xz_size); + } + + bool is_cone_variable(i_t variable) const + { + return has_cones() && variable >= cone_start() && variable < cone_end(); + } + + f_t complementarity_degree(std::size_t num_primal_variables, i_t num_upper_bounds) const + { + const bool has_soc = has_cones(); + f_t degree = static_cast(num_primal_variables) + static_cast(num_upper_bounds); + // Native QP free variables (linear only): no x·z complementarity in the barrier degree. + degree -= static_cast(n_native_free_linear); + if (has_soc) { + degree -= static_cast(cone_entry_count()); + degree += static_cast(cone_count()); } + return degree; } void form_augmented(bool first_call = false) { - i_t n = A.n; - i_t m = A.m; - i_t nnzA = A.col_start[n]; - i_t nnzQ = Q.n > 0 ? Q.col_start[n] : 0; - i_t factorization_size = n + m; - const f_t dual_perturb = 0.0; - const f_t primal_perturb = 1e-6; + i_t n = A.n; + i_t m = A.m; + i_t nnzA = A.col_start[n]; + i_t nnzQ = Q.n > 0 ? Q.col_start[n] : 0; + i_t factorization_size = n + m; + + const bool has_soc = has_cones(); + const i_t m_c = cone_entry_count(); + i_t total_block_nnz = 0; + + std::vector cone_offsets_host; + std::vector cone_block_offsets_host; + if (first_call) { - i_t new_nnz = 2 * nnzA + n + m + nnzQ; + if (has_soc) { + raft::common::nvtx::range scope("Barrier: augmented: SOC offsets"); + // Initialize the cone block offsets + const i_t n_cones = cone_count(); + cone_offsets_host.resize(n_cones + 1); + cone_block_offsets_host.resize(n_cones + 1); + raft::copy( + cone_offsets_host.data(), cones().cone_offsets.data(), n_cones + 1, stream_view_); + handle_ptr->sync_stream(); + for (i_t k = 0; k < n_cones; ++k) { + const auto q_k = cone_offsets_host[k + 1] - cone_offsets_host[k]; + cone_block_offsets_host[k + 1] = cone_block_offsets_host[k] + q_k * q_k; + } + total_block_nnz = static_cast(cone_block_offsets_host[n_cones]); + } + + i_t new_nnz = 2 * nnzA + n + m + nnzQ + total_block_nnz; // conservative estimate of nnz csr_matrix_t augmented_CSR(n + m, n + m, new_nnz); std::vector augmented_diagonal_indices(n + m, -1); + std::vector cone_csr_indices_host(total_block_nnz, -1); + std::vector cone_Q_values_host(total_block_nnz, f_t(0)); i_t q = 0; i_t off_diag_Qnz = 0; - for (i_t i = 0; i < n; i++) { + { + raft::common::nvtx::range scope("Barrier: augmented: host CSR build"); + for (i_t i = 0; i < n; i++) { augmented_CSR.row_start[i] = q; - if (nnzQ == 0) { + + const bool is_cone_row = is_cone_variable(i); + + if (is_cone_row) { + // Determine which cone this variable belongs to and its local row + i_t local_idx = i - cone_start(); + i_t k = 0; + while (k + 1 < cone_count() && + cone_offsets_host[k + 1] <= static_cast(local_idx)) { + k++; + } + i_t local_r = + static_cast(static_cast(local_idx) - cone_offsets_host[k]); + i_t q_k = static_cast(cone_offsets_host[k + 1] - cone_offsets_host[k]); + i_t cone_col_start = cone_start() + static_cast(cone_offsets_host[k]); + i_t block_base = static_cast(cone_block_offsets_host[k]) + local_r * q_k; + + // Merge-join: Q entries (sorted) with dense cone block columns (contiguous) + i_t qp = (nnzQ > 0) ? Q.col_start[i] : 0; + i_t q_end = (nnzQ > 0) ? Q.col_start[i + 1] : 0; + + // Q entries before cone block + while (qp < q_end && Q.i[qp] < cone_col_start) { + augmented_CSR.j[q] = Q.i[qp]; + augmented_CSR.x[q++] = -Q.x[qp]; + off_diag_Qnz++; + qp++; + } + + // Dense cone block, absorbing any Q entries that fall inside + for (i_t c = 0; c < q_k; c++) { + i_t col = cone_col_start + c; + f_t q_contrib = f_t(0); + f_t initial_val = (c == local_r) ? f_t(-dual_perturb) + : f_t(0); // diagonal entry of the cone block column + + if (qp < q_end && Q.i[qp] == col) { + q_contrib = Q.x[qp]; + qp++; + } + + cone_csr_indices_host[block_base + c] = q; + cone_Q_values_host[block_base + c] = q_contrib; + if (col == i) { augmented_diagonal_indices[i] = q; } + augmented_CSR.j[q] = col; + augmented_CSR.x[q++] = initial_val - q_contrib; + } + + // Q entries after cone block + while (qp < q_end) { + augmented_CSR.j[q] = Q.i[qp]; + augmented_CSR.x[q++] = -Q.x[qp]; + off_diag_Qnz++; + qp++; + } + } else if (nnzQ == 0) { augmented_diagonal_indices[i] = q; augmented_CSR.j[q] = i; augmented_CSR.x[q++] = -diag[i] - dual_perturb; @@ -487,42 +709,61 @@ class iteration_data_t { // AT block, we can use A in csc directly const i_t col_beg = A.col_start[i]; const i_t col_end = A.col_start[i + 1]; - for (i_t p = col_beg; p < col_end; ++p) { - augmented_CSR.j[q] = A.i[p] + n; - augmented_CSR.x[q++] = A.x[p]; + for (i_t p = col_beg; p < col_end; ++p) { + augmented_CSR.j[q] = A.i[p] + n; + augmented_CSR.x[q++] = A.x[p]; + } + } + + for (i_t k = n; k < n + m; ++k) { + // A block, we can use AT in csc directly + augmented_CSR.row_start[k] = q; + const i_t l = k - n; + const i_t col_beg = AT.col_start[l]; + const i_t col_end = AT.col_start[l + 1]; + for (i_t p = col_beg; p < col_end; ++p) { + augmented_CSR.j[q] = AT.i[p]; + augmented_CSR.x[q++] = AT.x[p]; + } + augmented_diagonal_indices[k] = q; + augmented_CSR.j[q] = k; + augmented_CSR.x[q++] = primal_perturb; } + augmented_CSR.row_start[n + m] = q; + augmented_CSR.nz_max = q; + augmented_CSR.j.resize(q); + augmented_CSR.x.resize(q); + i_t expected_nnz = 2 * nnzA + (n - m_c) + total_block_nnz + m + off_diag_Qnz; + settings_.log.debug("augmented nz %d predicted %d\n", q, expected_nnz); + cuopt_assert(q == expected_nnz, "augmented nnz != predicted"); + cuopt_assert(A.col_start[n] == AT.col_start[m], "A nz != AT nz"); } - for (i_t k = n; k < n + m; ++k) { - // A block, we can use AT in csc directly - augmented_CSR.row_start[k] = q; - const i_t l = k - n; - const i_t col_beg = AT.col_start[l]; - const i_t col_end = AT.col_start[l + 1]; - for (i_t p = col_beg; p < col_end; ++p) { - augmented_CSR.j[q] = AT.i[p]; - augmented_CSR.x[q++] = AT.x[p]; + { + raft::common::nvtx::range scope("Barrier: augmented: device upload"); + device_augmented.copy(augmented_CSR, handle_ptr->get_stream()); + d_augmented_diagonal_indices_.resize(augmented_diagonal_indices.size(), + handle_ptr->get_stream()); + raft::copy(d_augmented_diagonal_indices_.data(), + augmented_diagonal_indices.data(), + augmented_diagonal_indices.size(), + handle_ptr->get_stream()); + + if (has_soc) { + d_cone_csr_indices_.resize(total_block_nnz, handle_ptr->get_stream()); + raft::copy(d_cone_csr_indices_.data(), + cone_csr_indices_host.data(), + total_block_nnz, + handle_ptr->get_stream()); + d_cone_Q_values_.resize(total_block_nnz, handle_ptr->get_stream()); + raft::copy(d_cone_Q_values_.data(), + cone_Q_values_host.data(), + total_block_nnz, + handle_ptr->get_stream()); } - augmented_diagonal_indices[k] = q; - augmented_CSR.j[q] = k; - augmented_CSR.x[q++] = primal_perturb; - } - augmented_CSR.row_start[n + m] = q; - augmented_CSR.nz_max = q; - augmented_CSR.j.resize(q); - augmented_CSR.x.resize(q); - settings_.log.debug("augmented nz %d predicted %d\n", q, off_diag_Qnz + nnzA + n); - cuopt_assert(q == 2 * nnzA + n + m + off_diag_Qnz, "augmented nnz != predicted"); - cuopt_assert(A.col_start[n] == AT.col_start[m], "A nz != AT nz"); - - device_augmented.copy(augmented_CSR, handle_ptr->get_stream()); - d_augmented_diagonal_indices_.resize(augmented_diagonal_indices.size(), - handle_ptr->get_stream()); - raft::copy(d_augmented_diagonal_indices_.data(), - augmented_diagonal_indices.data(), - augmented_diagonal_indices.size(), - handle_ptr->get_stream()); - handle_ptr->sync_stream(); + + handle_ptr->sync_stream(); + } #ifdef CHECK_SYMMETRY csc_matrix_t augmented_transpose(1, 1, 1); augmented.transpose(augmented_transpose); @@ -537,20 +778,26 @@ class iteration_data_t { cuopt_assert(error.norm1() <= 1e-2, "|| Aug - Aug^T ||_1 > 1e-2"); #endif } else { + const i_t linear_n = has_soc ? cone_start() : n; + + // Primal diagonal: linear block includes dual_perturb; SOC block is filled by scatter below. + // Native free variables use barrier D = 0 in augmented_multiply; omit span_diag[j] here so + // the factorized matrix matches the matvec (only -q_diag - dual_perturb on the diagonal). thrust::for_each_n(rmm::exec_policy(handle_ptr->get_stream()), thrust::make_counting_iterator(0), - i_t(n), + linear_n, [span_x = cuopt::make_span(device_augmented.x), span_diag_indices = cuopt::make_span(d_augmented_diagonal_indices_), span_q_diag = cuopt::make_span(d_Q_diag_), span_diag = cuopt::make_span(d_diag_), + span_is_native_free_linear = cuopt::make_span(d_is_native_free_linear_), dual_perturb_value = dual_perturb] __device__(i_t j) { - f_t q_diag = span_q_diag.size() > 0 ? span_q_diag[j] : 0.0; - span_x[span_diag_indices[j]] = - -q_diag - span_diag[j] - dual_perturb_value; + f_t q_diag = span_q_diag.size() > 0 ? span_q_diag[j] : 0.0; + const f_t d_j = (span_is_native_free_linear.size() > 0 && span_is_native_free_linear[j]) ? f_t(0) : span_diag[j]; + span_x[span_diag_indices[j]] = -q_diag - d_j - dual_perturb_value; }); - RAFT_CHECK_CUDA(handle_ptr->get_stream()); + thrust::for_each_n(rmm::exec_policy(handle_ptr->get_stream()), thrust::make_counting_iterator(n), i_t(m), @@ -560,6 +807,16 @@ class iteration_data_t { span_x[span_diag_indices[j]] = primal_perturb_value; }); RAFT_CHECK_CUDA(handle_ptr->get_stream()); + + if (has_soc) { + scatter_hessian_into_augmented(cones(), + device_augmented.x, + d_cone_csr_indices_, + d_cone_Q_values_, + handle_ptr->get_stream(), + dual_perturb); + RAFT_CHECK_CUDA(handle_ptr->get_stream()); + } } } @@ -570,43 +827,53 @@ class iteration_data_t { float64_t start_form_adat = tic(); const i_t m = AD.m; - raft::copy(device_AD.x.data(), - d_original_A_values.data(), - d_original_A_values.size(), - handle_ptr->get_stream()); - if (n_dense_columns > 0) { - // Adjust inv_diag - d_inv_diag_prime.resize(AD.n, stream_view_); - // Copy If - cub::DeviceSelect::Flagged( - d_flag_buffer.data(), - flag_buffer_size, - d_inv_diag.data(), - thrust::make_transform_iterator(d_cols_to_remove.data(), cuda::std::logical_not{}), - d_inv_diag_prime.data(), - d_num_flag.data(), - d_inv_diag.size(), - stream_view_); - RAFT_CHECK_CUDA(stream_view_); - } else { - d_inv_diag_prime.resize(inv_diag.size(), stream_view_); - raft::copy(d_inv_diag_prime.data(), d_inv_diag.data(), inv_diag.size(), stream_view_); + { + raft::common::nvtx::range scope("Barrier: Form ADAT: restore A"); + raft::copy(device_AD.x.data(), + d_original_A_values.data(), + d_original_A_values.size(), + handle_ptr->get_stream()); + } + { + raft::common::nvtx::range scope("Barrier: Form ADAT: inv_diag prime"); + if (n_dense_columns > 0) { + // Adjust inv_diag + d_inv_diag_prime.resize(AD.n, stream_view_); + // Copy If + cub::DeviceSelect::Flagged( + d_flag_buffer.data(), + flag_buffer_size, + d_inv_diag.data(), + thrust::make_transform_iterator(d_cols_to_remove.data(), cuda::std::logical_not{}), + d_inv_diag_prime.data(), + d_num_flag.data(), + d_inv_diag.size(), + stream_view_); + RAFT_CHECK_CUDA(stream_view_); + } else { + d_inv_diag_prime.resize(inv_diag.size(), stream_view_); + raft::copy(d_inv_diag_prime.data(), d_inv_diag.data(), inv_diag.size(), stream_view_); + } } cuopt_assert(static_cast(d_inv_diag_prime.size()) == AD.n, "inv_diag_prime.size() != AD.n"); - thrust::for_each_n(rmm::exec_policy(stream_view_), - thrust::make_counting_iterator(0), - i_t(device_AD.x.size()), - [span_x = cuopt::make_span(device_AD.x), - span_scale = cuopt::make_span(d_inv_diag_prime), - span_col_ind = cuopt::make_span(device_AD.col_index)] __device__(i_t i) { - span_x[i] *= span_scale[span_col_ind[i]]; - }); - RAFT_CHECK_CUDA(stream_view_); + { + raft::common::nvtx::range scope("Barrier: Form ADAT: scale AD"); + thrust::for_each_n(rmm::exec_policy(stream_view_), + thrust::make_counting_iterator(0), + i_t(device_AD.x.size()), + [span_x = cuopt::make_span(device_AD.x), + span_scale = cuopt::make_span(d_inv_diag_prime), + span_col_ind = cuopt::make_span(device_AD.col_index)] __device__(i_t i) { + span_x[i] *= span_scale[span_col_ind[i]]; + }); + RAFT_CHECK_CUDA(stream_view_); + } if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return; } if (first_call) { + raft::common::nvtx::range scope("Barrier: Form ADAT: cusparse init"); try { initialize_cusparse_data( handle_ptr, device_A, device_AD, device_ADAT, cusparse_info); @@ -617,8 +884,11 @@ class iteration_data_t { } if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return; } - multiply_kernels(handle_ptr, device_A, device_AD, device_ADAT, cusparse_info); - handle_ptr->sync_stream(); + { + raft::common::nvtx::range scope("Barrier: Form ADAT: ADAT multiply"); + multiply_kernels(handle_ptr, device_A, device_AD, device_ADAT, cusparse_info); + handle_ptr->sync_stream(); + } auto adat_nnz = device_ADAT.row_start.element(device_ADAT.m, handle_ptr->get_stream()); float64_t adat_time = toc(start_form_adat); @@ -1436,8 +1706,9 @@ class iteration_data_t { f_t beta, rmm::device_uvector& y) { - const i_t m = A.m; - const i_t n = A.n; + const i_t m = A.m; + const i_t n = A.n; + const bool has_soc = has_cones(); rmm::device_uvector d_x1(n, handle_ptr->get_stream()); rmm::device_uvector d_x2(m, handle_ptr->get_stream()); @@ -1452,19 +1723,36 @@ class iteration_data_t { // y1 <- alpha ( -D * x_1 + A^T x_2) + beta * y1 rmm::device_uvector d_r1(n, handle_ptr->get_stream()); + thrust::fill_n(rmm::exec_policy(stream_view_), d_r1.begin(), n, f_t(0)); - // diag.pairwise_product(x1, r1); - // r1 <- D * x_1 - pairwise_multiply(d_x1.data(), d_diag_.data(), d_r1.data(), n, stream_view_); + // r1 <- D * x_1 on linear indices; barrier D is zero on native free variables + const i_t linear_n = has_soc ? cone_start() : n; + if (n_native_free_linear > 0) { + pairwise_multiply_skip_native_free_linear( + d_x1.data(), d_diag_.data(), d_is_native_free_linear_.data(), d_r1.data(), linear_n, stream_view_); + } else { + pairwise_multiply(d_x1.data(), d_diag_.data(), d_r1.data(), linear_n, stream_view_); + } + RAFT_CHECK_CUDA(stream_view_); - // r1 <- Q x1 + D x1 + // r1 <- D * x_1 + H x_1 (cone Hessian block H = S^T S; accumulate_cone_hessian_matvec) + if (has_soc) { + const i_t m_c = cone_entry_count(); + accumulate_cone_hessian_matvec(raft::device_span(d_x1.data() + cone_start(), m_c), + cones(), + raft::device_span(d_r1.data() + cone_start(), m_c), + stream_view_); + RAFT_CHECK_CUDA(stream_view_); + } + + // r1 <- Q x1 + D x1 + H x1 (cone: same H as above) if (Q.n > 0) { // matrix_vector_multiply(Q, 1.0, x1, 1.0, r1); cusparse_Q_view_.spmv(1.0, d_x1, 1.0, d_r1); } // y1 <- - alpha * r1 + beta * y1 - // y1.axpy(-alpha, r1, beta); + // flip the sign of r1 = (Q x1 + D x1 + H x1) axpy(-alpha, d_r1.data(), beta, d_y1.data(), d_y1.data(), n, stream_view_); // matrix_transpose_vector_multiply(A, alpha, x2, 1.0, y1); @@ -1554,13 +1842,22 @@ class iteration_data_t { std::vector Qdiag; bool Q_diagonal; rmm::device_uvector d_augmented_diagonal_indices_; + rmm::device_uvector d_cone_csr_indices_; + rmm::device_uvector d_cone_Q_values_; bool indefinite_Q; cusparse_view_t cusparse_Q_view_; + std::optional> cones_; + i_t cone_var_start_ = 0; + bool use_augmented; i_t symbolic_status; - i_t n_free_vars{0}; - rmm::device_uvector d_is_free_; // 1 if variable is free (QP only), 0 otherwise + i_t n_native_free_linear{0}; + rmm::device_uvector d_is_native_free_linear_; // 1 if native free linear (j < cone_start), else 0 + + // Adaptive regularization for the augmented system + f_t dual_perturb{1e-8}; + f_t primal_perturb{1e-8}; std::unique_ptr> chol; @@ -1656,6 +1953,8 @@ class iteration_data_t { rmm::device_uvector d_complementarity_xz_rhs_; rmm::device_uvector d_complementarity_wv_rhs_; rmm::device_uvector d_dual_rhs_; + rmm::device_uvector d_complementarity_target_; + rmm::device_uvector d_cone_hessian_dx_; rmm::device_uvector d_Q_diag_; rmm::device_uvector d_Qx_; @@ -1665,6 +1964,9 @@ class iteration_data_t { transform_reduce_helper_t transform_reduce_helper_; sum_reduce_helper_t sum_reduce_helper_; + bool cone_combined_step_; + f_t cone_sigma_mu_; + rmm::cuda_stream_view stream_view_; const simplex_solver_settings_t& settings_; @@ -1763,6 +2065,8 @@ int barrier_solver_t::initial_point(iteration_data_t& data) { raft::common::nvtx::range fun_scope("Barrier: initial_point"); const bool use_augmented = data.use_augmented; + const bool has_soc = data.has_cones(); + const bool has_native_free_linear = data.n_native_free_linear > 0; // Perform a numerical factorization i_t status; @@ -1892,6 +2196,50 @@ int barrier_solver_t::initial_point(iteration_data_t& data) dense_vector_t dual_res(lp.num_cols); float64_t epsilon_adjust = 10.0; + + // Data-dependent initial point for SOCP, following SeDuMi (Sturm, 1999). + // mu = sqrt((1 + ||b||_inf) * (1 + ||c||_inf)) + // primal and dual: x = mu * e_K, z = mu * e_K + // where e_K is the identity of the symmetric cone: + // LP block: e = 1, SOC block: e = (sqrt(2), 0, ..., 0) + if (has_soc) { + const i_t cs = data.cone_start(); + const f_t norm_b = vector_norm_inf(lp.rhs); + const f_t norm_c = vector_norm_inf(lp.objective); + const f_t mu = std::sqrt((1.0 + norm_b) * (1.0 + norm_c)); + const f_t sqrt2 = std::sqrt(2.0); + const f_t x_soc = mu * sqrt2; + const f_t z_soc = mu * sqrt2; + // Linear orthant + for (i_t j = 0; j < cs; ++j) { + data.x[j] = mu; + data.z[j] = mu; + } + if (has_native_free_linear) { + for (i_t j : presolve_info.native_free_linear_indices) { + if (j < cs) { data.z[j] = 0.0; } + } + } + // SOC blocks + i_t off = 0; + for (i_t k = 0; k < static_cast(lp.second_order_cone_dims.size()); ++k) { + i_t q_k = lp.second_order_cone_dims[k]; + data.x[cs + off] = x_soc; + data.z[cs + off] = z_soc; + for (i_t j = 1; j < q_k; ++j) { + data.x[cs + off + j] = 0.0; + data.z[cs + off + j] = 0.0; + } + off += q_k; + } + data.y.set_scalar(0.0); + if (data.n_upper_bounds > 0) { + data.w.set_scalar(mu); + data.v.set_scalar(mu); + } + return 0; + } + if (settings.barrier_dual_initial_point == -1 || settings.barrier_dual_initial_point == 0) { // Use the dual starting point suggested by the paper // On Implementing Mehrotra’s Predictor–Corrector Interior-Point Method for Linear Programming @@ -1924,8 +2272,10 @@ int barrier_solver_t::initial_point(iteration_data_t& data) data.v[k] = -c[j] + epsilon; } } - // Now hande the case with no upper bounds + // Now handle the case with no upper bounds (skip cone variables) + const i_t cone_end = data.cone_end(); for (i_t j = 0; j < lp.num_cols; j++) { + if (has_soc && j >= data.cone_start() && j < cone_end) continue; if (lp.upper[j] == inf) { if (c[j] > epsilon_adjust) { data.z[j] = c[j]; @@ -1935,8 +2285,8 @@ int barrier_solver_t::initial_point(iteration_data_t& data) } } // Free variables have z = 0 (no complementarity condition) - if (data.n_free_vars > 0) { - for (i_t j : presolve_info.free_variable_indices) { + if (has_native_free_linear) { + for (i_t j : presolve_info.native_free_linear_indices) { data.z[j] = 0.0; } } @@ -1960,7 +2310,7 @@ int barrier_solver_t::initial_point(iteration_data_t& data) data.v.multiply_scalar(-1.0); data.v.ensure_positive(epsilon_adjust); - data.z.ensure_positive(epsilon_adjust); + data.z.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count()); } else { // First compute rhs = A*Dinv*c dense_vector_t rhs(lp.num_rows); @@ -1984,7 +2334,7 @@ int barrier_solver_t::initial_point(iteration_data_t& data) data.gather_upper_bounds(data.z, data.v); data.v.multiply_scalar(-1.0); data.v.ensure_positive(epsilon_adjust); - data.z.ensure_positive(epsilon_adjust); + data.z.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count()); } // Verify A'*y + z - E*v - Q*x = c @@ -2001,24 +2351,35 @@ int barrier_solver_t::initial_point(iteration_data_t& data) settings.log.printf("||A^T y + z - E*v - Q*x - c ||: %e\n", vector_norm2(data.dual_residual)); #endif - // Make sure (w, x, v, z) > 0 - if (data.n_free_vars > 0) { + // Make sure (w, x, v, z) > 0. Cone blocks are not nonnegative orthant variables — skip them for + // box positivity; NT scaling keeps cone iterates feasible separately. + std::vector free_x_save; + if (has_native_free_linear) { + free_x_save.resize(data.n_native_free_linear); + for (i_t k = 0; k < data.n_native_free_linear; ++k) { + free_x_save[k] = data.x[presolve_info.native_free_linear_indices[k]]; + } + } + data.w.ensure_positive(epsilon_adjust); + if (has_soc) { + data.x.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count()); + } else if (has_native_free_linear) { std::vector nonnegative_variables(data.x.size(), 1); - for (i_t j : presolve_info.free_variable_indices) { + for (i_t j : presolve_info.native_free_linear_indices) { nonnegative_variables[j] = 0; } - data.x.ensure_positive(epsilon_adjust, nonnegative_variables); - - for (i_t j : presolve_info.free_variable_indices) { - data.z[j] = 0.0; - } - } else { data.x.ensure_positive(epsilon_adjust); } - data.w.ensure_positive(epsilon_adjust); - + // Native free variables (QP/SOCP): restore possibly negative x and set z = 0 + if (has_native_free_linear) { + for (i_t k = 0; k < data.n_native_free_linear; ++k) { + i_t j = presolve_info.native_free_linear_indices[k]; + data.x[j] = free_x_save[k]; + data.z[j] = 0.0; + } + } #ifdef PRINT_INFO settings.log.printf("min v %e min z %e\n", data.v.minimum(), data.z.minimum()); #endif @@ -2202,10 +2563,27 @@ void barrier_solver_t::gpu_compute_residual_norms(const rmm::device_uv primal_residual_norm = std::max(device_vector_norm_inf(data.d_primal_residual_, stream_view_), device_vector_norm_inf(data.d_bound_residual_, stream_view_)); - dual_residual_norm = device_vector_norm_inf(data.d_dual_residual_, stream_view_); + dual_residual_norm = device_vector_norm_inf(data.d_dual_residual_, stream_view_); + const bool has_soc = data.has_cones(); + const i_t linear_xz_size = data.linear_xz_size(data.d_complementarity_xz_residual_.size()); + auto linear_xz_span = + raft::device_span(data.d_complementarity_xz_residual_.data(), linear_xz_size); complementarity_residual_norm = - std::max(device_vector_norm_inf(data.d_complementarity_xz_residual_, stream_view_), + std::max(device_vector_norm_inf(linear_xz_span, stream_view_), device_vector_norm_inf(data.d_complementarity_wv_residual_, stream_view_)); + if (has_soc) { + f_t cone_complementarity_norm = f_t(0); + auto cone_dot = data.cones().scratch.template get_slot<0>(); + data.cones().segmented_sum( + data.d_complementarity_xz_residual_.data() + data.cone_start(), cone_dot, stream_view_); + cone_complementarity_norm = thrust::reduce(rmm::exec_policy(stream_view_), + cone_dot.begin(), + cone_dot.end(), + f_t(0), + thrust::maximum()); + complementarity_residual_norm = + std::max(complementarity_residual_norm, cone_complementarity_norm); + } } template @@ -2213,42 +2591,58 @@ f_t barrier_solver_t::gpu_max_step_to_boundary(iteration_data_t& x, const rmm::device_uvector& dx) { - // For x-sized vectors with free variables, skip free vars in ratio test - const bool has_free = data.n_free_vars > 0 && static_cast(x.size()) == lp.num_cols; - - if (has_free) { - auto is_free_ptr = data.d_is_free_.data(); - auto ratio_test_free = [is_free_ptr] HD(const thrust::tuple t) { - const f_t dx_val = thrust::get<0>(t); - const f_t x_val = thrust::get<1>(t); - const i_t is_free = thrust::get<2>(t); - if (is_free) return f_t(1.0); - if (dx_val < f_t(0.0)) return -x_val / dx_val; + const bool has_soc = data.has_cones() && static_cast(x.size()) >= data.cone_end(); + const bool has_native_free_linear = data.n_native_free_linear > 0 && static_cast(x.size()) == lp.num_cols; + + auto reduce_segment = [&](i_t start, i_t len) -> f_t { + if (len <= 0) { return f_t(1); } + if (has_native_free_linear) { + auto native_free_linear_ptr = data.d_is_native_free_linear_.data() + start; + // step size computation for nonnegative variables with free variables + auto ratio_test_free = [native_free_linear_ptr] HD(const thrust::tuple t) { + const f_t dx_val = thrust::get<0>(t); + const f_t x_val = thrust::get<1>(t); + const i_t is_native_free_linear = thrust::get<2>(t); + if (is_native_free_linear) return f_t(1.0); + if (dx_val < f_t(0.0)) return -x_val / dx_val; + return f_t(1.0); + }; + return data.transform_reduce_helper_.transform_reduce( + thrust::make_zip_iterator(dx.data() + start, x.data() + start, native_free_linear_ptr), + thrust::minimum(), + ratio_test_free, + f_t(1.0), + len, + stream_view_); + } + // step size computation for nonnegative variables without free variables + auto ratio_test = [] HD(const thrust::tuple t) { + const f_t dx = thrust::get<0>(t); + const f_t x = thrust::get<1>(t); + if (dx < f_t(0.0)) return -x / dx; return f_t(1.0); }; - return data.transform_reduce_helper_.transform_reduce( - thrust::make_zip_iterator(dx.data(), x.data(), is_free_ptr), + thrust::make_zip_iterator(dx.data() + start, x.data() + start), thrust::minimum(), - ratio_test_free, + ratio_test, f_t(1.0), - x.size(), + len, stream_view_); - } + }; - return data.transform_reduce_helper_.transform_reduce( - thrust::make_zip_iterator(dx.data(), x.data()), - thrust::minimum(), - [] HD(const thrust::tuple t) { - const f_t dx = thrust::get<0>(t); - const f_t x = thrust::get<1>(t); + // step size computation for when there are soc constraints + if (has_soc) { + const i_t cs = data.cone_start(); + const i_t mc = data.cone_entry_count(); + f_t alpha = reduce_segment(0, cs); + const i_t tail_start = cs + mc; + const i_t tail_size = static_cast(x.size()) - tail_start; + if (tail_size > 0) { alpha = std::min(alpha, reduce_segment(tail_start, tail_size)); } + return alpha; + } - if (dx < f_t(0.0)) return -x / dx; - return f_t(1.0); - }, - f_t(1.0), - x.size(), - stream_view_); + return reduce_segment(0, static_cast(x.size())); } template @@ -2258,98 +2652,136 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t& dy, pinned_dense_vector_t& dv, pinned_dense_vector_t& dz, + f_t& dual_perturb, + f_t& primal_perturb, f_t& max_residual) { raft::common::nvtx::range fun_scope("Barrier: compute_search_direction"); const bool debug = false; const bool use_augmented = data.use_augmented; + const bool has_soc = data.has_cones(); + const bool has_native_free_linear = data.n_native_free_linear > 0; + const i_t m_c = data.cone_entry_count(); + const i_t cone_var_start = data.cone_start(); + const i_t linear_size = data.linear_xz_size(lp.num_cols); + + // Linear (orthant) block only; SOC uses recover_cone_dz_from_target. + auto recover_linear_orthant_dz = [&](raft::device_span target, + raft::device_span z, + raft::device_span dx_span, + raft::device_span x, + raft::device_span dz_span) { + if (dz_span.empty()) return; + if (has_native_free_linear) { + cub::DeviceTransform::Transform( + cuda::std::make_tuple( + target.data(), z.data(), dx_span.data(), x.data(), data.d_is_native_free_linear_.data()), + dz_span.data(), + dz_span.size(), + [] HD(f_t target_val, f_t z_val, f_t dx_val, f_t x_val, i_t is_native_free_linear) { + if (is_native_free_linear) return f_t(0); + return target_val - (z_val * dx_val) / x_val; + }, + stream_view_.value()); + } else { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(target.data(), z.data(), dx_span.data(), x.data()), + dz_span.data(), + dz_span.size(), + [] HD(f_t target_val, f_t z_val, f_t dx_val, f_t x_val) { + return target_val - (z_val * dx_val) / x_val; + }, + stream_view_.value()); + } + RAFT_CHECK_CUDA(stream_view_); + }; - { - raft::common::nvtx::range fun_scope("Barrier: GPU allocation and copies"); - - // TMP allocation and copy should happen only once where it's written in a first place - data.d_bound_rhs_.resize(data.bound_rhs.size(), stream_view_); - raft::copy( - data.d_bound_rhs_.data(), data.bound_rhs.data(), data.bound_rhs.size(), stream_view_); - data.d_x_.resize(data.x.size(), stream_view_); - raft::copy(data.d_x_.data(), data.x.data(), data.x.size(), stream_view_); - data.d_z_.resize(data.z.size(), stream_view_); - raft::copy(data.d_z_.data(), data.z.data(), data.z.size(), stream_view_); - data.d_w_.resize(data.w.size(), stream_view_); - raft::copy(data.d_w_.data(), data.w.data(), data.w.size(), stream_view_); - data.d_v_.resize(data.v.size(), stream_view_); - raft::copy(data.d_v_.data(), data.v.data(), data.v.size(), stream_view_); - data.d_upper_bounds_.resize(data.upper_bounds.size(), stream_view_); - raft::copy(data.d_upper_bounds_.data(), - data.upper_bounds.data(), - data.upper_bounds.size(), - stream_view_); - data.d_dy_.resize(dy.size(), stream_view_); - raft::copy(data.d_dy_.data(), dy.data(), dy.size(), stream_view_); - data.d_dx_.resize(dx.size(), stream_view_); - raft::copy(data.d_h_.data(), data.primal_rhs.data(), data.primal_rhs.size(), stream_view_); - raft::copy(data.d_dual_rhs_.data(), data.dual_rhs.data(), data.dual_rhs.size(), stream_view_); - data.d_dz_.resize(dz.size(), stream_view_); - data.d_dv_.resize(dv.size(), stream_view_); - data.d_dw_.resize(data.bound_rhs.size(), stream_view_); - raft::copy(data.d_dw_.data(), data.bound_rhs.data(), data.bound_rhs.size(), stream_view_); - data.d_dw_residual_.resize(data.n_upper_bounds, stream_view_); - data.d_wv_residual_.resize(data.d_complementarity_wv_rhs_.size(), stream_view_); - data.d_xz_residual_.resize(data.d_complementarity_xz_rhs_.size(), stream_view_); - data.d_primal_residual_.resize(lp.rhs.size(), stream_view_); - raft::copy(data.d_primal_residual_.data(), lp.rhs.data(), lp.rhs.size(), stream_view_); - data.d_bound_residual_.resize(data.bound_residual.size(), stream_view_); - data.d_upper_.resize(lp.upper.size(), stream_view_); - raft::copy(data.d_upper_.data(), lp.upper.data(), lp.upper.size(), stream_view_); - } + copy_step_rhs_to_device(data, stream_view_); + data.d_dx_.resize(dx.size(), stream_view_); + data.d_dy_.resize(dy.size(), stream_view_); + data.d_dz_.resize(dz.size(), stream_view_); + data.d_dv_.resize(dv.size(), stream_view_); // Solves the linear system // // dw dx dy dv dz - // [ 0 A 0 0 0 ] [ dw ] = [ rp ] - // [ I E' 0 0 0 ] [ dx ] [ rw ] - // [ 0 0 A' -E I ] [ dy ] [ rd ] - // [ 0 Z 0 0 X ] [ dv ] [ rxz ] - // [ V 0 0 W 0 ] [ dz ] [ rwv ] + // [ 0 A 0 0 0 ] [ dw ] = [ rp ] + // [ I E' 0 0 0 ] [ dx ] [ rw ] + // [ 0 0 A' -E I ] [ dy ] [ rd ] + // [ 0 Z(S) 0 0 X(S^-T)] [ dv ] [ rxz ] + // [ V 0 0 W 0 ] [ dz ] [ rwv ] + + // NT-scaling: + // \lambda = Sx = S^-T z + // Affine step: (\lambda + S \delta xa) \circ (\lambda + S^-T \delta za) = 0 + // S \delta xa + S^-T \delta za = - \lambda + // \delta za = -S^T (S \delta xa + \lambda) = - S^T S \delta xa -S^T \lambda= - S^T S \delta xa + // - z + if (has_soc && !data.cone_combined_step_) { + auto& cones = data.cones(); + cones.x = raft::device_span(data.d_x_.data() + cone_var_start, m_c); + cones.z = raft::device_span(data.d_z_.data() + cone_var_start, m_c); + launch_nt_scaling(cones, stream_view_); + } max_residual = 0.0; { raft::common::nvtx::range fun_scope("Barrier: GPU diag, inv diag and sqrt inv diag formation"); - // diag = z ./ x - // For native free variables (QP): use Q diagonal if available, otherwise a static regularizer - if (data.n_free_vars > 0) { - constexpr f_t free_var_reg = 1e-7; - if (data.Q.n > 0 && data.Q_diagonal) { + // diag = z ./ x on the linear (non-cone) coordinates; cone block is overwritten below. + if (has_native_free_linear) { + if (use_augmented) { + // Augmented KKT: D = 0 on native free linear vars (Q curvature is in the KKT block, not D). cub::DeviceTransform::Transform( cuda::std::make_tuple( - data.d_z_.data(), data.d_x_.data(), data.d_is_free_.data(), data.d_Q_diag_.data()), + data.d_z_.data(), data.d_x_.data(), data.d_is_native_free_linear_.data()), data.d_diag_.data(), - data.d_diag_.size(), - [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_free, f_t q_jj) { - if (!is_free) return z_j / x_j; - return (q_jj > f_t(0)) ? f_t(0) : free_var_reg; + linear_size, + [] HD(f_t z_j, f_t x_j, i_t is_native_free_linear) { + return is_native_free_linear ? f_t(0) : (z_j / x_j); }, stream_view_.value()); } else { - cub::DeviceTransform::Transform( - cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data(), data.d_is_free_.data()), - data.d_diag_.data(), - data.d_diag_.size(), - [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_free) { - return is_free ? free_var_reg : (z_j / x_j); - }, - stream_view_.value()); + constexpr f_t free_var_reg = 1e-7; + if (data.Q.n > 0 && data.Q_diagonal) { + cub::DeviceTransform::Transform( + cuda::std::make_tuple( + data.d_z_.data(), data.d_x_.data(), data.d_is_native_free_linear_.data(), data.d_Q_diag_.data()), + data.d_diag_.data(), + linear_size, + [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_native_free_linear, f_t q_jj) { + if (!is_native_free_linear) return z_j / x_j; + return (q_jj > f_t(0)) ? f_t(0) : free_var_reg; + }, + stream_view_.value()); + } else { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data(), data.d_is_native_free_linear_.data()), + data.d_diag_.data(), + linear_size, + [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_native_free_linear) { + return is_native_free_linear ? free_var_reg : (z_j / x_j); + }, + stream_view_.value()); + } } } else { cub::DeviceTransform::Transform(cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data()), data.d_diag_.data(), - data.d_diag_.size(), + linear_size, cuda::std::divides<>{}, stream_view_.value()); } RAFT_CHECK_CUDA(stream_view_); + + // TODO: need to double check this + // Set cone diagonal part to be 1 + if (has_soc) { + thrust::fill_n( + rmm::exec_policy(stream_view_), data.d_diag_.begin() + cone_var_start, m_c, f_t(1)); + } + // diag = z ./ x + E * (v ./ w) * E' if (data.n_upper_bounds > 0) { @@ -2365,8 +2797,9 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t 0 && data.Q_diagonal) { + // In ADAT mode, diagonal Q is folded into D. In augmented mode, Q is an explicit block in the + // KKT matrix, so adding it here would double count the quadratic objective. + if (!use_augmented && data.Q.n > 0 && data.Q_diagonal) { cub::DeviceTransform::Transform( cuda::std::make_tuple(data.d_Q_diag_.data(), data.d_diag_.data()), data.d_diag_.data(), @@ -2376,18 +2809,25 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t 0) { + if (data.n_upper_bounds > 0) { cub::DeviceTransform::Transform( - cuda::std::make_tuple(data.d_inv_diag.data(), - data.d_tmp3_.data(), - data.d_complementarity_xz_rhs_.data(), - data.d_x_.data(), + cuda::std::make_tuple(data.d_bound_rhs_.data(), + data.d_v_.data(), + data.d_complementarity_wv_rhs_.data(), + data.d_w_.data()), + thrust::make_permutation_iterator(data.d_tmp3_.data(), data.d_upper_bounds_.data()), + data.n_upper_bounds, + [] HD(f_t bound_rhs, f_t v, f_t complementarity_wv_rhs, f_t w) { + return (complementarity_wv_rhs - v * bound_rhs) / w; + }, + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); + } + if (has_native_free_linear) { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_tmp3_.data(), + data.d_complementarity_target_.data(), data.d_dual_rhs_.data(), - data.d_is_free_.data()), - thrust::make_zip_iterator(data.d_tmp3_.data(), data.d_tmp4_.data()), + data.d_is_native_free_linear_.data()), + data.d_tmp3_.data(), lp.num_cols, - [] HD(f_t inv_diag, f_t tmp3, f_t complementarity_xz_rhs, f_t x, f_t dual_rhs, i_t is_free) - -> thrust::tuple { - const f_t xz_term = is_free ? f_t(0) : (complementarity_xz_rhs / x); - const f_t tmp = tmp3 - xz_term + dual_rhs; - return {tmp, inv_diag * tmp}; + [] HD(f_t tmp3, f_t target, f_t dual_rhs, i_t is_native_free_linear) { + const f_t comp_term = is_native_free_linear ? f_t(0) : target; + return tmp3 + dual_rhs - comp_term; }, stream_view_.value()); } else { cub::DeviceTransform::Transform( - cuda::std::make_tuple(data.d_inv_diag.data(), - data.d_tmp3_.data(), - data.d_complementarity_xz_rhs_.data(), - data.d_x_.data(), - data.d_dual_rhs_.data()), - thrust::make_zip_iterator(data.d_tmp3_.data(), data.d_tmp4_.data()), + cuda::std::make_tuple( + data.d_tmp3_.data(), data.d_dual_rhs_.data(), data.d_complementarity_target_.data()), + data.d_tmp3_.data(), lp.num_cols, - [] HD(f_t inv_diag, f_t tmp3, f_t complementarity_xz_rhs, f_t x, f_t dual_rhs) - -> thrust::tuple { - const f_t tmp = tmp3 + -(complementarity_xz_rhs / x) + dual_rhs; - return {tmp, inv_diag * tmp}; - }, + [] HD(f_t tmp3, f_t dual_rhs, f_t target) { return tmp3 + dual_rhs - target; }, stream_view_.value()); } RAFT_CHECK_CUDA(stream_view_); raft::copy(data.d_r1_.data(), data.d_tmp3_.data(), data.d_tmp3_.size(), stream_view_); raft::copy(data.d_r1_prime_.data(), data.d_tmp3_.data(), data.d_tmp3_.size(), stream_view_); + } - // h <- A @ tmp4 .+ primal_rhs + if (!use_augmented) { + raft::common::nvtx::range fun_scope("Barrier: GPU compute H"); + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_inv_diag.data(), data.d_tmp3_.data()), + data.d_tmp4_.data(), + lp.num_cols, + [] HD(f_t inv_diag, f_t tmp3) { return inv_diag * tmp3; }, + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); data.cusparse_view_.spmv(1, data.cusparse_tmp4_, 1, data.cusparse_h_); } if (use_augmented) { raft::common::nvtx::range fun_scope("Barrier: GPU augmented solve"); - // r1 <- dual_rhs -complementarity_xz_rhs ./ x + E * ((complementarity_wv_rhs - v .* bound_rhs) - // ./ w) + // Augmented RHS [dx; dy]: primal block is d_r1_ (assembled above). + // linear j: dual_rhs[j] - complementarity_target[j] + // + E_j*((complementarity_wv_rhs - v.*bound_rhs)./w) (target = xz_rhs/x; free: 0) + // cone j: dual_rhs[j] - complementarity_target[j] (NT target: -z or combined centering term) + // Constraint block: primal_rhs. + // TODO: d_augmented_rhs can be preallocated rmm::device_uvector d_augmented_rhs(lp.num_cols + lp.num_rows, stream_view_); raft::copy(d_augmented_rhs.data(), data.d_r1_.data(), lp.num_cols, stream_view_); raft::copy( @@ -2525,10 +2966,33 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t(op, d_augmented_rhs, d_augmented_soln); + const f_t solve_err = + iterative_refinement(op, d_augmented_rhs, d_augmented_soln); if (solve_err > 1e-1) { settings.log.printf("|| Aug (dx, dy) - aug_rhs || %e after IR\n", solve_err); } + + // Adaptive regularization: increase/decrease based on IR quality. + // Only adapt on calls where we actually (re)factorized — the affine step. + if (did_factorize) { + constexpr f_t min_perturb = 1e-8; + constexpr f_t max_perturb = 1e-1; + if (solve_err > 1e-2) { + f_t old_dp = dual_perturb; + dual_perturb = std::min(max_perturb, dual_perturb * 10.0); + primal_perturb = std::min(max_perturb, primal_perturb * 10.0); + settings.log.printf( + " reg UP: %e -> %e (solve_err=%e)\n", old_dp, dual_perturb, solve_err); + } else if (solve_err < 1e-4) { + f_t old_dp = dual_perturb; + dual_perturb = std::max(min_perturb, dual_perturb / 10.0); + primal_perturb = std::max(min_perturb, primal_perturb / 10.0); + if (old_dp != dual_perturb) { + settings.log.printf( + " reg DOWN: %e -> %e (solve_err=%e)\n", old_dp, dual_perturb, solve_err); + } + } + } } raft::copy(data.d_dx_.data(), d_augmented_soln.data(), lp.num_cols, stream_view_); @@ -2774,34 +3238,23 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t 0) { - cub::DeviceTransform::Transform( - cuda::std::make_tuple(data.d_complementarity_xz_rhs_.data(), - data.d_z_.data(), - data.d_dx_.data(), - data.d_x_.data(), - data.d_is_free_.data()), - data.d_dz_.data(), - data.d_dz_.size(), - [] HD(f_t complementarity_xz_rhs, f_t z, f_t dx, f_t x, i_t is_free) { - return is_free ? f_t(0) : ((complementarity_xz_rhs - z * dx) / x); - }, - stream_view_.value()); - } else { - cub::DeviceTransform::Transform( - cuda::std::make_tuple(data.d_complementarity_xz_rhs_.data(), - data.d_z_.data(), - data.d_dx_.data(), - data.d_x_.data()), - data.d_dz_.data(), - data.d_dz_.size(), - [] HD(f_t complementarity_xz_rhs, f_t z, f_t dx, f_t x) { - return (complementarity_xz_rhs - z * dx) / x; - }, - stream_view_.value()); + const i_t linear_dz_size = has_soc ? cone_var_start : static_cast(data.d_dz_.size()); + + if (has_soc) { + recover_cone_dz_from_target( + raft::device_span(data.d_dx_.data() + cone_var_start, m_c), + data.cones(), + raft::device_span(data.d_complementarity_target_.data() + cone_var_start, m_c), + raft::device_span(data.d_dz_.data() + cone_var_start, m_c), + stream_view_); } + + recover_linear_orthant_dz( + raft::device_span(data.d_complementarity_target_.data(), linear_dz_size), + raft::device_span(data.d_z_.data(), linear_dz_size), + raft::device_span(data.d_dx_.data(), linear_dz_size), + raft::device_span(data.d_x_.data(), linear_dz_size), + raft::device_span(data.d_dz_.data(), linear_dz_size)); RAFT_CHECK_CUDA(stream_view_); raft::copy(dz.data(), data.d_dz_.data(), data.d_dz_.size(), stream_view_); } @@ -2810,18 +3263,29 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t out, + raft::device_span rhs, + raft::device_span z, + raft::device_span dz_span, + raft::device_span dx_span, + raft::device_span x) { + if (out.empty()) return; + cub::DeviceTransform::Transform( + cuda::std::make_tuple(rhs.data(), z.data(), dz_span.data(), dx_span.data(), x.data()), + out.data(), + out.size(), + [] HD(f_t complementarity_xz_rhs, f_t z_val, f_t dz_val, f_t dx_val, f_t x_val) { + return z_val * dx_val + x_val * dz_val - complementarity_xz_rhs; + }, + stream_view_.value()); + }; + compute_linear_xz_residual( + raft::device_span(data.d_xz_residual_.data(), linear_size), + raft::device_span(data.d_complementarity_xz_rhs_.data(), linear_size), + raft::device_span(data.d_z_.data(), linear_size), + raft::device_span(data.d_dz_.data(), linear_size), + raft::device_span(data.d_dx_.data(), linear_size), + raft::device_span(data.d_x_.data(), linear_size)); RAFT_CHECK_CUDA(stream_view_); const f_t xz_residual_norm = device_vector_norm_inf(data.d_xz_residual_, stream_view_); @@ -2981,40 +3445,187 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t -void barrier_solver_t::compute_affine_rhs(iteration_data_t& data) +void copy_host_iterate_to_device(iteration_data_t& data, rmm::cuda_stream_view stream) { - raft::common::nvtx::range fun_scope("Barrier: compute_affine_rhs"); + raft::common::nvtx::range fun_scope("Barrier: copy_host_iterate_to_device"); + + data.d_x_.resize(data.x.size(), stream); + data.d_z_.resize(data.z.size(), stream); + raft::copy(data.d_x_.data(), data.x.data(), data.x.size(), stream); + raft::copy(data.d_z_.data(), data.z.data(), data.z.size(), stream); + + data.d_w_.resize(data.w.size(), stream); + data.d_v_.resize(data.v.size(), stream); + raft::copy(data.d_w_.data(), data.w.data(), data.w.size(), stream); + raft::copy(data.d_v_.data(), data.v.data(), data.v.size(), stream); + + data.d_y_.resize(data.y.size(), stream); + raft::copy(data.d_y_.data(), data.y.data(), data.y.size(), stream); + + data.d_upper_bounds_.resize(data.upper_bounds.size(), stream); + raft::copy(data.d_upper_bounds_.data(), + data.upper_bounds.data(), + data.upper_bounds.size(), + stream); +} - data.primal_rhs = data.primal_residual; - data.bound_rhs = data.bound_residual; - data.dual_rhs = data.dual_residual; +// One-time static problem data (constant across barrier iterations). +template +void copy_static_problem_data_to_device(const lp_problem_t& lp, + iteration_data_t& data, + rmm::cuda_stream_view stream) +{ + raft::common::nvtx::range fun_scope("Barrier: copy_static_problem_data_to_device"); - raft::copy(data.d_complementarity_xz_rhs_.data(), - data.d_complementarity_xz_residual_.data(), - data.d_complementarity_xz_residual_.size(), - stream_view_); - raft::copy(data.d_complementarity_wv_rhs_.data(), - data.d_complementarity_wv_residual_.data(), - data.d_complementarity_wv_residual_.size(), - stream_view_); + data.d_primal_residual_.resize(lp.rhs.size(), stream); + raft::copy(data.d_primal_residual_.data(), lp.rhs.data(), lp.rhs.size(), stream); + + data.d_upper_.resize(lp.upper.size(), stream); + raft::copy(data.d_upper_.data(), lp.upper.data(), lp.upper.size(), stream); + + data.d_bound_residual_.resize(data.bound_residual.size(), stream); + data.d_dw_residual_.resize(data.n_upper_bounds, stream); +} + +// Per Mehrotra step: affine/corrector RHS only (iterate stays on device from initial sync / next_iterate). +template +void copy_step_rhs_to_device(iteration_data_t& data, rmm::cuda_stream_view stream) +{ + raft::common::nvtx::range fun_scope("Barrier: copy_step_rhs_to_device"); + + data.d_bound_rhs_.resize(data.bound_rhs.size(), stream); + raft::copy( + data.d_bound_rhs_.data(), data.bound_rhs.data(), data.bound_rhs.size(), stream); + + raft::copy(data.d_h_.data(), data.primal_rhs.data(), data.primal_rhs.size(), stream); + raft::copy(data.d_dual_rhs_.data(), data.dual_rhs.data(), data.dual_rhs.size(), stream); + + data.d_dw_.resize(data.bound_rhs.size(), stream); + raft::copy(data.d_dw_.data(), data.bound_rhs.data(), data.bound_rhs.size(), stream); + + data.d_xz_residual_.resize(data.d_complementarity_xz_rhs_.size(), stream); + data.d_wv_residual_.resize(data.d_complementarity_wv_rhs_.size(), stream); +} + +template +void fill_linear_complementarity_target(iteration_data_t& data, + raft::device_span target, + raft::device_span xz_rhs, + raft::device_span x, + rmm::cuda_stream_view stream) +{ + if (target.empty()) return; + const bool has_native_free_linear = data.n_native_free_linear > 0; + if (has_native_free_linear) { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(xz_rhs.data(), x.data(), data.d_is_native_free_linear_.data()), + target.data(), + target.size(), + [] HD(f_t complementarity_xz_rhs, f_t x_val, i_t is_native_free_linear) { + if (is_native_free_linear) return f_t(0); + return complementarity_xz_rhs / x_val; + }, + stream.value()); + } else { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(xz_rhs.data(), x.data()), + target.data(), + target.size(), + [] HD(f_t complementarity_xz_rhs, f_t x_val) { return complementarity_xz_rhs / x_val; }, + stream.value()); + } + RAFT_CHECK_CUDA(stream); +} - // x.*z -> -x .* z +template +void fill_affine_cone_complementarity_target(iteration_data_t& data, + i_t cone_var_start, + i_t m_c, + rmm::cuda_stream_view stream) +{ + if (m_c == 0) return; + auto& cones = data.cones(); + cones.x = raft::device_span(data.d_x_.data() + cone_var_start, m_c); + cones.z = raft::device_span(data.d_z_.data() + cone_var_start, m_c); + auto cone_target = + raft::device_span(data.d_complementarity_target_.data() + cone_var_start, m_c); cub::DeviceTransform::Transform( - data.d_complementarity_xz_rhs_.data(), - data.d_complementarity_xz_rhs_.data(), - data.d_complementarity_xz_rhs_.size(), - [] HD(f_t xz_rhs) { return -xz_rhs; }, - stream_view_.value()); - RAFT_CHECK_CUDA(stream_view_); + cones.z.data(), + cone_target.data(), + m_c, + [] HD(f_t z_val) { return -z_val; }, + stream.value()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + RAFT_CHECK_CUDA(stream); +} + +template +void fill_corrector_cone_complementarity_target(iteration_data_t& data, + i_t cone_var_start, + i_t m_c, + f_t sigma_mu, + rmm::cuda_stream_view stream) +{ + if (m_c == 0) return; + auto& cones = data.cones(); + cones.x = raft::device_span(data.d_x_.data() + cone_var_start, m_c); + cones.z = raft::device_span(data.d_z_.data() + cone_var_start, m_c); + auto cone_target = + raft::device_span(data.d_complementarity_target_.data() + cone_var_start, m_c); + compute_combined_cone_rhs_term(raft::device_span(data.d_dx_aff_.data() + cone_var_start, m_c), + raft::device_span(data.d_dz_aff_.data() + cone_var_start, m_c), + cones, + sigma_mu, + cone_target, + stream); +} + +template +void barrier_solver_t::compute_affine_rhs(iteration_data_t& data) +{ + raft::common::nvtx::range fun_scope("Barrier: compute_affine_rhs"); + const bool has_soc = data.has_cones(); + const i_t linear_size = data.linear_xz_size(lp.num_cols); + const i_t cone_var_start = data.cone_start(); + const i_t m_c = data.cone_entry_count(); + + data.primal_rhs = data.primal_residual; + data.bound_rhs = data.bound_residual; + data.dual_rhs = data.dual_residual; + data.cone_combined_step_ = false; + data.cone_sigma_mu_ = f_t(0); + + auto negate_complementarity_rhs = [&](raft::device_span out, + raft::device_span residual) { + if (out.empty()) return; + cub::DeviceTransform::Transform( + residual.data(), + out.data(), + out.size(), + [] HD(f_t rhs) { return -rhs; }, + stream_view_.value()); + }; + // xz -> -x .* z + negate_complementarity_rhs( + raft::device_span(data.d_complementarity_xz_rhs_.data(), linear_size), + raft::device_span(data.d_complementarity_xz_residual_.data(), linear_size)); // w.*v -> -w .* v - cub::DeviceTransform::Transform( - data.d_complementarity_wv_rhs_.data(), - data.d_complementarity_wv_rhs_.data(), - data.d_complementarity_wv_rhs_.size(), - [] HD(f_t wv_rhs) { return -wv_rhs; }, - stream_view_.value()); + negate_complementarity_rhs(cuopt::make_span(data.d_complementarity_wv_rhs_), + cuopt::make_span(data.d_complementarity_wv_residual_)); RAFT_CHECK_CUDA(stream_view_); + + fill_linear_complementarity_target( + data, + raft::device_span(data.d_complementarity_target_.data(), linear_size), + raft::device_span(data.d_complementarity_xz_rhs_.data(), linear_size), + raft::device_span(data.d_x_.data(), linear_size), + stream_view_); + if (has_soc) { + cuopt_assert(cone_var_start + m_c == lp.num_cols, "barrier expects [linear | cone] layout"); + fill_affine_cone_complementarity_target(data, cone_var_start, m_c, stream_view_); + } } template @@ -3022,6 +3633,7 @@ void barrier_solver_t::compute_target_mu( iteration_data_t& data, f_t mu, f_t& mu_aff, f_t& sigma, f_t& new_mu) { raft::common::nvtx::range fun_scope("Barrier: compute_target_mu"); + const bool has_soc = data.has_cones(); f_t complementarity_aff_sum = 0.0; // TMP no copy and data should always be on the GPU @@ -3040,7 +3652,23 @@ void barrier_solver_t::compute_target_mu( f_t step_dual_aff = std::min(gpu_max_step_to_boundary(data, data.d_v_, data.d_dv_aff_), gpu_max_step_to_boundary(data, data.d_z_, data.d_dz_aff_)); - if (data.Q.n > 0) { step_primal_aff = step_dual_aff = std::min(step_primal_aff, step_dual_aff); } + if (has_soc) { + i_t cs = data.cone_start(); + i_t mc = data.cone_entry_count(); + auto [cone_p, cone_d] = + compute_cone_step_length(data.cones(), + raft::device_span(data.d_dx_aff_.data() + cs, mc), + raft::device_span(data.d_dz_aff_.data() + cs, mc), + std::min(step_primal_aff, step_dual_aff), + stream_view_); + f_t cone_aff = std::min(cone_p, cone_d); + step_primal_aff = std::min(step_primal_aff, cone_aff); + step_dual_aff = step_primal_aff; + } + + if (data.Q.n > 0 || has_soc) { + step_primal_aff = step_dual_aff = std::min(step_primal_aff, step_dual_aff); + } // Compute complementarity_xz_aff_sum = sum(x_aff * z_aff), // where x_aff = x + step_primal_aff * dx_aff and z_aff = z + step_dual_aff * dz_aff @@ -3091,55 +3719,63 @@ void barrier_solver_t::compute_target_mu( stream_view_); complementarity_aff_sum = complementarity_xz_aff_sum + complementarity_wv_aff_sum; - f_t mu_denom = static_cast(data.x.size()) + static_cast(data.n_upper_bounds); - mu_denom -= static_cast(data.n_free_vars); - mu_denom = std::max(mu_denom, f_t(1.0)); - mu_aff = complementarity_aff_sum / mu_denom; - sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); - new_mu = sigma * mu_aff; -} - -template -static void fill_linear_cc_rhs(iteration_data_t& data, - f_t new_mu, - raft::device_span out, - raft::device_span dx_aff, - raft::device_span dz_aff, - rmm::cuda_stream_view stream_view) -{ - if (out.empty()) return; - if (data.n_free_vars > 0) { - auto is_free_ptr = data.d_is_free_.data(); - cub::DeviceTransform::Transform( - cuda::std::make_tuple(dx_aff.data(), dz_aff.data(), is_free_ptr), - out.data(), - out.size(), - [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val, i_t is_free) { - return is_free ? f_t(0) : (-(dx_aff_val * dz_aff_val) + new_mu); - }, - stream_view.value()); - } else { - cub::DeviceTransform::Transform( - cuda::std::make_tuple(dx_aff.data(), dz_aff.data()), - out.data(), - out.size(), - [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val) { return -(dx_aff_val * dz_aff_val) + new_mu; }, - stream_view.value()); - } + const f_t mu_denom = data.complementarity_degree(data.x.size(), data.n_upper_bounds); + mu_aff = complementarity_aff_sum / mu_denom; + sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); + new_mu = sigma * mu; } template void barrier_solver_t::compute_cc_rhs(iteration_data_t& data, f_t& new_mu) { raft::common::nvtx::range fun_scope("Barrier: compute_cc_rhs"); - - fill_linear_cc_rhs(data, - new_mu, - cuopt::make_span(data.d_complementarity_xz_rhs_), - cuopt::make_span(data.d_dx_aff_), - cuopt::make_span(data.d_dz_aff_), - stream_view_); + const bool has_soc = data.has_cones(); + const bool has_native_free_linear = data.n_native_free_linear > 0; + const i_t linear_size = data.linear_xz_size(lp.num_cols); + + auto fill_linear_cc_rhs = [&](raft::device_span out, + raft::device_span dx_aff, + raft::device_span dz_aff) { + if (out.empty()) return; + if (has_native_free_linear) { + auto native_free_linear_ptr = data.d_is_native_free_linear_.data(); + cub::DeviceTransform::Transform( + cuda::std::make_tuple(dx_aff.data(), dz_aff.data(), native_free_linear_ptr), + out.data(), + out.size(), + [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val, i_t is_native_free_linear) { + return is_native_free_linear ? f_t(0) : (-(dx_aff_val * dz_aff_val) + new_mu); + }, + stream_view_.value()); + } else { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(dx_aff.data(), dz_aff.data()), + out.data(), + out.size(), + [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val) { return -(dx_aff_val * dz_aff_val) + new_mu; }, + stream_view_.value()); + } + }; + fill_linear_cc_rhs(raft::device_span(data.d_complementarity_xz_rhs_.data(), linear_size), + raft::device_span(data.d_dx_aff_.data(), linear_size), + raft::device_span(data.d_dz_aff_.data(), linear_size)); RAFT_CHECK_CUDA(stream_view_); + + const i_t cone_var_start = data.cone_start(); + const i_t m_c = data.cone_entry_count(); + + fill_linear_complementarity_target( + data, + raft::device_span(data.d_complementarity_target_.data(), linear_size), + raft::device_span(data.d_complementarity_xz_rhs_.data(), linear_size), + raft::device_span(data.d_x_.data(), linear_size), + stream_view_); + if (has_soc) { + cuopt_assert(cone_var_start + m_c == lp.num_cols, "barrier expects [linear | cone] layout"); + fill_corrector_cone_complementarity_target( + data, cone_var_start, m_c, new_mu, stream_view_); + } + cub::DeviceTransform::Transform( cuda::std::make_tuple(data.d_dw_aff_.data(), data.d_dv_aff_.data()), data.d_complementarity_wv_rhs_.data(), @@ -3147,21 +3783,19 @@ void barrier_solver_t::compute_cc_rhs(iteration_data_t& data [new_mu] HD(f_t dw_aff, f_t dv_aff) { return -(dw_aff * dv_aff) + new_mu; }, stream_view_.value()); RAFT_CHECK_CUDA(stream_view_); - // TMP should be CPU to 0 if CPU and GPU to 0 if GPU data.primal_rhs.set_scalar(0.0); data.bound_rhs.set_scalar(0.0); data.dual_rhs.set_scalar(0.0); + data.cone_combined_step_ = has_soc; + data.cone_sigma_mu_ = has_soc ? new_mu : f_t(0); } template void barrier_solver_t::compute_final_direction(iteration_data_t& data) { raft::common::nvtx::range fun_scope("Barrier: compute_final_direction"); - // TODO Nicolas: Redundant copies - data.d_y_.resize(data.y.size(), stream_view_); data.d_dy_aff_.resize(data.dy_aff.size(), stream_view_); - raft::copy(data.d_y_.data(), data.y.data(), data.y.size(), stream_view_); raft::copy(data.d_dy_aff_.data(), data.dy_aff.data(), data.dy_aff.size(), stream_view_); #ifdef FINITE_CHECK @@ -3224,6 +3858,8 @@ void barrier_solver_t::compute_primal_dual_step_length(iteration_data_ f_t& step_dual) { raft::common::nvtx::range fun_scope("Barrier: compute_primal_dual_step_length"); + const bool has_soc = data.has_cones(); + f_t max_step_primal = 0.0; f_t max_step_dual = 0.0; max_step_primal = std::min(gpu_max_step_to_boundary(data, data.d_w_, data.d_dw_), @@ -3231,10 +3867,23 @@ void barrier_solver_t::compute_primal_dual_step_length(iteration_data_ max_step_dual = std::min(gpu_max_step_to_boundary(data, data.d_v_, data.d_dv_), gpu_max_step_to_boundary(data, data.d_z_, data.d_dz_)); + if (has_soc) { + i_t cs = data.cone_start(); + i_t mc = data.cone_entry_count(); + auto [cone_primal, cone_dual] = + compute_cone_step_length(data.cones(), + raft::device_span(data.d_dx_.data() + cs, mc), + raft::device_span(data.d_dz_.data() + cs, mc), + f_t(1), + stream_view_); + max_step_primal = std::min(max_step_primal, cone_primal); + max_step_dual = std::min(max_step_dual, cone_dual); + } + step_primal = step_scale * max_step_primal; step_dual = step_scale * max_step_dual; - if (data.Q.n > 0) { step_primal = step_dual = std::min(step_primal, step_dual); } + if (data.Q.n > 0 || has_soc) { step_primal = step_dual = std::min(step_primal, step_dual); } } template @@ -3325,11 +3974,8 @@ void barrier_solver_t::compute_mu(iteration_data_t& data, f_ { raft::common::nvtx::range fun_scope("Barrier: compute_mu"); - f_t mu_denom = static_cast(data.x.size()) + static_cast(data.n_upper_bounds); - mu_denom -= static_cast(data.n_free_vars); // free vars don't contribute to mu - mu_denom = std::max(mu_denom, f_t(1.0)); - - mu = (data.sum_reduce_helper_.sum(data.d_complementarity_xz_residual_.begin(), + const f_t mu_denom = data.complementarity_degree(data.x.size(), data.n_upper_bounds); + mu = (data.sum_reduce_helper_.sum(data.d_complementarity_xz_residual_.begin(), data.d_complementarity_xz_residual_.size(), stream_view_) + data.sum_reduce_helper_.sum(data.d_complementarity_wv_residual_.begin(), @@ -3595,6 +4241,10 @@ lp_status_t barrier_solver_t::solve(f_t start_time, lp_solution_t 0) { settings.log.printf("Quadratic objective matrix: %d nonzeros\n", lp.Q.row_start[lp.Q.n]); } + if (lp.second_order_cone_dims.size() > 0) { + settings.log.printf("Second-order cones: %d\n", + static_cast(lp.second_order_cone_dims.size())); + } settings.log.printf("\n"); // Compute the number of free variables @@ -3613,7 +4263,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, lp_solution_t 0) { create_Q(lp, Q); } iteration_data_t data( - lp, num_upper_bounds, presolve_info.free_variable_indices, Q, settings); + lp, num_upper_bounds, presolve_info.native_free_linear_indices, Q, settings); if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { settings.log.printf("Barrier solver halted\n"); return lp_status_t::CONCURRENT_LIMIT; @@ -3651,17 +4301,42 @@ lp_status_t barrier_solver_t::solve(f_t start_time, lp_solution_t(data, stream_view_); + copy_static_problem_data_to_device(lp, data, stream_view_); + compute_residuals>(data.w, data.x, data.y, data.v, data.z, data); f_t primal_residual_norm = std::max(vector_norm_inf(data.primal_residual, stream_view_), vector_norm_inf(data.bound_residual, stream_view_)); - f_t dual_residual_norm = vector_norm_inf(data.dual_residual, stream_view_); + f_t dual_residual_norm = vector_norm_inf(data.dual_residual, stream_view_); + const bool has_soc = data.has_cones(); + const i_t linear_xz_size = data.linear_xz_size(data.complementarity_xz_residual.size()); + auto linear_xz_span = + raft::host_span(data.complementarity_xz_residual.data(), linear_xz_size); + // compute complementarity residual norm for nonnegativity constraints f_t complementarity_residual_norm = - std::max(vector_norm_inf(data.complementarity_xz_residual, stream_view_), + std::max(vector_norm_inf(linear_xz_span, stream_view_), vector_norm_inf(data.complementarity_wv_residual, stream_view_)); - f_t mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / - (static_cast(n) + static_cast(num_upper_bounds)); + // compute complementarity residual norm for second-order cones + if (has_soc) { + f_t cone_complementarity_norm = f_t(0); + i_t off = data.cone_start(); + for (auto q_k : lp.second_order_cone_dims) { + f_t cone_dot = f_t(0); + for (i_t j = 0; j < q_k; ++j) { + cone_dot += data.complementarity_xz_residual[off + j]; + } + cone_complementarity_norm = std::max(cone_complementarity_norm, cone_dot); + off += q_k; + } + complementarity_residual_norm = + std::max(complementarity_residual_norm, cone_complementarity_norm); + } + f_t mu_denom = data.complementarity_degree(n, num_upper_bounds); + f_t mu = + (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / mu_denom; f_t norm_b = vector_norm_inf(data.b, stream_view_); f_t norm_c = vector_norm_inf(data.c, stream_view_); @@ -3686,6 +4361,11 @@ lp_status_t barrier_solver_t::solve(f_t start_time, lp_solution_t::solve(f_t start_time, lp_solution_t::solve(f_t start_time, lp_solution_t::solve(f_t start_time, lp_solution_t::solve(f_t start_time, lp_solution_t::solve(f_t start_time, lp_solution_t::solve(f_t start_time, lp_solution_t::solve(f_t start_time, lp_solution_t& dy, pinned_dense_vector_t& dv, pinned_dense_vector_t& dz, + f_t& dual_perturb, + f_t& primal_perturb, f_t& max_residual); private: diff --git a/cpp/src/barrier/dense_vector.hpp b/cpp/src/barrier/dense_vector.hpp index 5d6f2b12ec..2d5d0b335c 100644 --- a/cpp/src/barrier/dense_vector.hpp +++ b/cpp/src/barrier/dense_vector.hpp @@ -184,6 +184,28 @@ class dense_vector_t : public std::vector { } } + void ensure_positive_skip_range(f_t epsilon_adjust, i_t skip_start, i_t skip_count) + { + if (skip_count == 0) { + ensure_positive(epsilon_adjust); + return; + } + const i_t n = this->size(); + const i_t skip_end = skip_start + skip_count; + f_t min_val = std::numeric_limits::max(); + for (i_t i = 0; i < n; i++) { + if (i >= skip_start && i < skip_end) continue; + min_val = std::min(min_val, (*this)[i]); + } + if (min_val <= 0.0) { + const f_t delta = -min_val + epsilon_adjust; + for (i_t i = 0; i < n; i++) { + if (i >= skip_start && i < skip_end) continue; + (*this)[i] += delta; + } + } + } + void ensure_positive(f_t epsilon_adjust, const std::vector& mask) { f_t min_x = inf; diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index a193bddded..721465d2a1 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -16,10 +16,16 @@ #include #include #include -#include namespace cuopt::linear_programming::dual_simplex { +template +/** Number of leading linear columns; SOCP cone variables occupy [linear_cols, num_cols). */ +static i_t linear_var_count(const lp_problem_t& problem) +{ + return problem.second_order_cone_dims.empty() ? problem.num_cols : problem.cone_var_start; +} + template i_t remove_empty_cols(lp_problem_t& problem, i_t& num_empty_cols, @@ -27,26 +33,18 @@ i_t remove_empty_cols(lp_problem_t& problem, { constexpr bool verbose = false; if (verbose) { printf("Removing %d empty columns\n", num_empty_cols); } - // We have a variable x_j that does not appear in any rows - // The cost function - // sum_{k != j} c_k * x_k + c_j * x_j - // becomes - // sum_{k != j} c_k * x_k + c_j * l_j if c_j > 0 - // or - // sum_{k != j} c_k * x_k + c_j * u_j if c_j < 0 presolve_info.removed_variables.reserve(num_empty_cols); presolve_info.removed_values.reserve(num_empty_cols); presolve_info.removed_reduced_costs.reserve(num_empty_cols); - // Check to see if a variable participates in a quadratic objective std::vector has_quadratic_term(problem.num_cols, false); + i_t linear_cols = linear_var_count(problem); if (problem.Q.n > 0) { - for (i_t j = 0; j < problem.num_cols; ++j) { + for (i_t j = 0; j < linear_cols; ++j) { const i_t row_start = problem.Q.row_start[j]; const i_t row_end = problem.Q.row_start[j + 1]; if (row_end - row_start == 0) { continue; } - // Q is symmetric, so its sufficient to check only the row size has_quadratic_term[j] = true; } } @@ -55,12 +53,13 @@ i_t remove_empty_cols(lp_problem_t& problem, i_t new_cols = 0; for (i_t j = 0; j < problem.num_cols; ++j) { bool remove_var = false; - if ((problem.A.col_start[j + 1] - problem.A.col_start[j]) == 0) { - if (problem.objective[j] >= 0 && problem.lower[j] > -inf && !has_quadratic_term[j]) { + if (j < linear_cols && (problem.A.col_start[j + 1] - problem.A.col_start[j]) == 0) { + bool non_removable = has_quadratic_term[j]; + if (problem.objective[j] >= 0 && problem.lower[j] > -inf && !non_removable) { presolve_info.removed_values.push_back(problem.lower[j]); problem.obj_constant += problem.objective[j] * problem.lower[j]; remove_var = true; - } else if (problem.objective[j] <= 0 && problem.upper[j] < inf && !has_quadratic_term[j]) { + } else if (problem.objective[j] <= 0 && problem.upper[j] < inf && !non_removable) { presolve_info.removed_values.push_back(problem.upper[j]); problem.obj_constant += problem.objective[j] * problem.upper[j]; remove_var = true; @@ -123,6 +122,12 @@ i_t remove_empty_cols(lp_problem_t& problem, problem.Q.check_matrix("After removing empty columns"); } + if (!problem.second_order_cone_dims.empty()) { + i_t new_cone_start = col_old_to_new[problem.cone_var_start]; + assert(new_cone_start != -1); + problem.cone_var_start = new_cone_start; + } + problem.objective = objective; problem.lower = lower; problem.upper = upper; @@ -272,6 +277,113 @@ i_t convert_less_than_to_equal(const user_problem_t& user_problem, // We must convert rows in the form: a_i^T x <= beta // into: a_i^T x + s_i = beta, s_i >= 0 + if (!problem.second_order_cone_dims.empty()) { + const i_t old_num_cols = problem.num_cols; + const i_t linear_cols = linear_var_count(problem); + const i_t num_slacks = less_rows; + const i_t num_cols = old_num_cols + num_slacks; + const i_t old_nnz = problem.A.col_start[old_num_cols]; + const i_t nnz = old_nnz + num_slacks; + const i_t new_cone_start = linear_cols + num_slacks; + + auto old_A = problem.A; + csc_matrix_t expanded_A(problem.A.m, num_cols, nnz); + + std::vector objective(num_cols, 0.0); + std::vector lower(num_cols, 0.0); + std::vector upper(num_cols, INFINITY); + std::vector old_to_new(old_num_cols, -1); + + for (i_t j = 0; j < linear_cols; ++j) { + old_to_new[j] = j; + objective[j] = problem.objective[j]; + lower[j] = problem.lower[j]; + upper[j] = problem.upper[j]; + } + for (i_t j = linear_cols; j < old_num_cols; ++j) { + old_to_new[j] = j + num_slacks; + objective[old_to_new[j]] = problem.objective[j]; + lower[old_to_new[j]] = problem.lower[j]; + upper[old_to_new[j]] = problem.upper[j]; + } + + i_t nz = 0; + for (i_t j = 0; j < linear_cols; ++j) { + expanded_A.col_start[j] = nz; + for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) { + expanded_A.i[nz] = old_A.i[p]; + expanded_A.x[nz] = old_A.x[p]; + ++nz; + } + } + + i_t slack_col = linear_cols; + for (i_t i = 0; i < problem.num_rows; i++) { + if (row_sense[i] == 'L') { + expanded_A.col_start[slack_col] = nz; + expanded_A.i[nz] = i; + expanded_A.x[nz] = 1.0; + new_slacks.push_back(slack_col); + row_sense[i] = 'E'; + ++slack_col; + ++nz; + --less_rows; + } + } + + for (i_t j = linear_cols; j < old_num_cols; ++j) { + i_t new_j = old_to_new[j]; + expanded_A.col_start[new_j] = nz; + for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) { + expanded_A.i[nz] = old_A.i[p]; + expanded_A.x[nz] = old_A.x[p]; + ++nz; + } + } + expanded_A.col_start[num_cols] = nz; + assert(less_rows == 0); + assert(slack_col == new_cone_start); + assert(nz == nnz); + + if (problem.Q.n > 0) { + const auto old_Q = problem.Q; + const i_t q_nnz = old_Q.row_start[old_num_cols]; + + problem.Q.row_start.assign(num_cols + 1, 0); + for (i_t row = 0; row < old_num_cols; ++row) { + i_t new_row = old_to_new[row]; + problem.Q.row_start[new_row + 1] = old_Q.row_start[row + 1] - old_Q.row_start[row]; + } + for (i_t row = 0; row < num_cols; ++row) { + problem.Q.row_start[row + 1] += problem.Q.row_start[row]; + } + + problem.Q.j.resize(q_nnz); + problem.Q.x.resize(q_nnz); + auto row_starts = problem.Q.row_start; + for (i_t row = 0; row < old_num_cols; ++row) { + i_t new_row = old_to_new[row]; + for (i_t p = old_Q.row_start[row]; p < old_Q.row_start[row + 1]; ++p) { + problem.Q.j[row_starts[new_row]] = old_to_new[old_Q.j[p]]; + problem.Q.x[row_starts[new_row]] = old_Q.x[p]; + ++row_starts[new_row]; + } + } + problem.Q.m = num_cols; + problem.Q.n = num_cols; + problem.Q.nz_max = q_nnz; + } + + problem.A = expanded_A; + problem.A.n = num_cols; + problem.objective = objective; + problem.lower = lower; + problem.upper = upper; + problem.num_cols = num_cols; + problem.cone_var_start = new_cone_start; + return 0; + } + i_t num_cols = problem.num_cols + less_rows; i_t nnz = problem.A.col_start[problem.num_cols] + less_rows; problem.A.col_start.resize(num_cols + 1); @@ -571,16 +683,18 @@ void convert_user_problem(const user_problem_t& user_problem, } // Copy info from user_problem to problem - problem.num_rows = user_problem.num_rows; - problem.num_cols = user_problem.num_cols; - problem.A = user_problem.A; - problem.objective = user_problem.objective; - problem.obj_scale = user_problem.obj_scale; - problem.obj_constant = user_problem.obj_constant; - problem.objective_is_integral = user_problem.objective_is_integral; - problem.rhs = user_problem.rhs; - problem.lower = user_problem.lower; - problem.upper = user_problem.upper; + problem.num_rows = user_problem.num_rows; + problem.num_cols = user_problem.num_cols; + problem.A = user_problem.A; + problem.objective = user_problem.objective; + problem.obj_scale = user_problem.obj_scale; + problem.obj_constant = user_problem.obj_constant; + problem.objective_is_integral = user_problem.objective_is_integral; + problem.rhs = user_problem.rhs; + problem.lower = user_problem.lower; + problem.upper = user_problem.upper; + problem.cone_var_start = user_problem.cone_var_start; + problem.second_order_cone_dims = user_problem.second_order_cone_dims; // Make a copy of row_sense so we can modify it std::vector row_sense = user_problem.row_sense; @@ -637,6 +751,7 @@ void convert_user_problem(const user_problem_t& user_problem, settings.log.debug( "equality rows %d less rows %d columns %d\n", equal_rows, less_rows, problem.num_cols); if (settings.barrier && settings.dualize != 0 && user_problem.Q_values.size() == 0 && + problem.second_order_cone_dims.empty() && (settings.dualize == 1 || (settings.dualize == -1 && less_rows > 1.2 * problem.num_cols && equal_rows < 2e4))) { settings.log.debug("Dualizing in presolve\n"); @@ -808,8 +923,8 @@ void convert_user_problem(const user_problem_t& user_problem, problem.Q.x = user_problem.Q_values; } - // Add artifical variables - if (!settings.barrier_presolve) { + // Artificial vars break [linear | cone]; only for barrier_presolve-off LP-style layouts. + if (!settings.barrier_presolve && problem.second_order_cone_dims.empty()) { add_artifical_variables(problem, user_problem.range_rows, equality_rows, new_slacks); } } @@ -821,13 +936,17 @@ i_t presolve(const lp_problem_t& original, presolve_info_t& presolve_info) { problem = original; + const i_t linear_cols = linear_var_count(problem); + const bool has_cones = !problem.second_order_cone_dims.empty(); std::vector row_sense(problem.num_rows, '='); - // Check for free variables + + // Check for free variables (linear block only; cone columns are handled by the barrier SOC layout) i_t free_variables = 0; - for (i_t j = 0; j < problem.num_cols; j++) { + for (i_t j = 0; j < linear_cols; j++) { if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; } } + // Barrier presolve phase 1: bound free linear variables from equality rows. if (settings.barrier_presolve && free_variables > 0) { // Try to remove free variables std::vector constraints_to_check; @@ -835,7 +954,7 @@ i_t presolve(const lp_problem_t& original, std::vector row_marked(problem.num_rows, 0); current_free_variables.reserve(problem.num_cols); constraints_to_check.reserve(problem.num_rows); - for (i_t j = 0; j < problem.num_cols; j++) { + for (i_t j = 0; j < linear_cols; j++) { if (problem.lower[j] == -inf && problem.upper[j] == inf) { current_free_variables.push_back(j); const i_t col_start = problem.A.col_start[j]; @@ -859,10 +978,30 @@ i_t presolve(const lp_problem_t& original, std::vector upper_bound_coefficient(problem.num_cols, 0.0); if (!constraints_to_check.empty()) { - // Check if the constraints are feasible csr_matrix_t Arow(0, 0, 0); problem.A.to_compressed_row(Arow); + // Keep only rows safe for phase-1 bound inference: no cone columns, exactly one free linear. + { + std::vector safe_constraints; + safe_constraints.reserve(constraints_to_check.size()); + for (i_t i : constraints_to_check) { + bool touches_cone = false; + i_t free_linear_in_row = 0; + for (i_t p = Arow.row_start[i]; p < Arow.row_start[i + 1]; ++p) { + const i_t j = Arow.j[p]; + if (j >= linear_cols) { + touches_cone = true; + continue; + } + if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_linear_in_row++; } + } + if (touches_cone || free_linear_in_row != 1) { continue; } + safe_constraints.push_back(i); + } + constraints_to_check.swap(safe_constraints); + } + // The constraints are in the form: // sum_j a_j x_j = beta for (i_t i : constraints_to_check) { @@ -876,6 +1015,7 @@ i_t presolve(const lp_problem_t& original, f_t last_free_coeff_i = 0.0; for (i_t p = row_start; p < row_end; p++) { const i_t j = Arow.j[p]; + if (j >= linear_cols) { continue; } const f_t aij = Arow.x[p]; const f_t lower_j = problem.lower[j]; const f_t upper_j = problem.upper[j]; @@ -929,6 +1069,7 @@ i_t presolve(const lp_problem_t& original, // x_j <= 1/a_ij * (rhs - upper_activity_i) const i_t j = last_free_i; const f_t a_ij = last_free_coeff_i; + if (a_ij == 0) { continue; } const f_t max_bound = 1e10; bool bounded = false; if (a_ij > 0) { @@ -986,6 +1127,9 @@ i_t presolve(const lp_problem_t& original, problem.lower[j] = -inf; } } + if (!(problem.lower[j] == -inf && problem.upper[j] == inf)) { + presolve_info.phase1_bounded_linear_indices.push_back(j); + } } // Record bounded free variables for dual correction in uncrush. @@ -1016,11 +1160,12 @@ i_t presolve(const lp_problem_t& original, } } + // Barrier presolve phase 2: negate one-sided bounds (-inf < x <= u -> -u <= x < inf). // The original problem may have a variable without a lower bound // but a finite upper bound - // -inf < x_j <= u_j + // -inf < x_j <= u_j (linear variables only) i_t no_lower_bound = 0; - for (i_t j = 0; j < problem.num_cols; j++) { + for (i_t j = 0; j < linear_cols; j++) { if (problem.lower[j] == -inf && problem.upper[j] < inf) { no_lower_bound++; } } @@ -1031,7 +1176,7 @@ i_t presolve(const lp_problem_t& original, // Handle -inf < x_j <= u_j by substituting x'_j = -x_j, giving -u_j <= x'_j < inf if (settings.barrier_presolve && no_lower_bound > 0) { presolve_info.negated_variables.reserve(no_lower_bound); - for (i_t j = 0; j < problem.num_cols; j++) { + for (i_t j = 0; j < linear_cols; j++) { if (problem.lower[j] == -inf && problem.upper[j] < inf) { presolve_info.negated_variables.push_back(j); @@ -1066,10 +1211,10 @@ i_t presolve(const lp_problem_t& original, } } - // The original problem may have nonzero lower bounds - // 0 != l_j <= x_j <= u_j + // Barrier presolve phase 3: shift nonzero lower bounds to zero (cone/stack columns not shifted). + // 0 != l_j <= x_j <= u_j (linear variables only; cone/stack columns are not shifted) i_t nonzero_lower_bounds = 0; - for (i_t j = 0; j < problem.num_cols; j++) { + for (i_t j = 0; j < linear_cols; j++) { if (problem.lower[j] != 0.0 && problem.lower[j] > -inf) { nonzero_lower_bounds++; } } if (settings.barrier_presolve && nonzero_lower_bounds > 0) { @@ -1092,7 +1237,7 @@ i_t presolve(const lp_problem_t& original, // so we get the constant term c_j * l_j std::vector lower_bounds_removed(problem.num_cols, false); - for (i_t j = 0; j < problem.num_cols; j++) { + for (i_t j = 0; j < linear_cols; j++) { if (problem.lower[j] != 0.0 && problem.lower[j] > -inf) { lower_bounds_removed[j] = true; presolve_info.removed_lower_bounds[j] = problem.lower[j]; @@ -1101,7 +1246,7 @@ i_t presolve(const lp_problem_t& original, auto old_objective = problem.objective; if (problem.Q.n > 0) { - for (i_t row = 0; row < problem.num_cols; row++) { + for (i_t row = 0; row < linear_cols; row++) { i_t row_start = problem.Q.row_start[row]; i_t row_end = problem.Q.row_start[row + 1]; for (i_t p = row_start; p < row_end; p++) { @@ -1122,7 +1267,7 @@ i_t presolve(const lp_problem_t& original, } std::vector kahan_compensation(problem.num_rows, 0.0); - for (i_t j = 0; j < problem.num_cols; j++) { + for (i_t j = 0; j < linear_cols; j++) { if (lower_bounds_removed[j]) { i_t col_start = problem.A.col_start[j]; i_t col_end = problem.A.col_start[j + 1]; @@ -1142,7 +1287,7 @@ i_t presolve(const lp_problem_t& original, } } - // Check for empty rows + // Barrier presolve phase 4: remove empty rows and empty linear columns. i_t num_empty_rows = 0; { csr_matrix_t Arow(0, 0, 0); @@ -1160,7 +1305,7 @@ i_t presolve(const lp_problem_t& original, // Check for empty cols i_t num_empty_cols = 0; { - for (i_t j = 0; j < problem.num_cols; ++j) { + for (i_t j = 0; j < linear_cols; ++j) { if ((problem.A.col_start[j + 1] - problem.A.col_start[j]) == 0) { num_empty_cols++; } } } @@ -1169,19 +1314,33 @@ i_t presolve(const lp_problem_t& original, remove_empty_cols(problem, num_empty_cols, presolve_info); } + // Check for free variables (exclude cone variables — they are naturally unbounded) + free_variables = 0; + for (i_t j = 0; j < linear_cols; j++) { + if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; } + } problem.Q.check_matrix("Before free variable expansion"); - // For QPs, keep free variables as-is rather than - // splitting x = v - w. The barrier solver handles them natively with a - // static regularizer on the diagonal instead of z/x complementarity terms. - if (settings.barrier_presolve && free_variables > 0 && problem.Q.n > 0) { - presolve_info.free_variable_indices.clear(); - for (i_t j = 0; j < problem.num_cols; j++) { + // Barrier presolve phase 5: free linear variables — 5a native (QP/SOCP) or 5b v-w split (LP). + // QP/SOCP augmented system: keep free linears as-is (no x = v - w); barrier uses D = 0 on them. + const bool register_native_free_linear = free_variables > 0 && (problem.Q.n > 0 || has_cones); + if (register_native_free_linear) { + presolve_info.free_variable_pairs.clear(); + presolve_info.native_free_linear_indices.clear(); + // Only linear decision variables can be "native" free variables; cone/stack columns + // are unbounded by construction and must not be counted here. + i_t native_free_count = 0; + for (i_t j = 0; j < linear_cols; j++) { if (problem.lower[j] == -inf && problem.upper[j] == inf) { - presolve_info.free_variable_indices.push_back(j); + presolve_info.native_free_linear_indices.push_back(j); + native_free_count++; } } - } else if (settings.barrier_presolve && free_variables > 0) { + settings.log.printf( + "Keeping %d native free linear variables for augmented-system barrier (QP/SOCP)\n", + native_free_count); + } else if (settings.barrier_presolve && !has_cones && free_variables > 0) { + // Phase 5b: x_j = v - w with v, w >= 0 (LP without cones; SOCP/QP use phase 5a). // We have a variable x_j: with -inf < x_j < inf // we create new variables v and w with 0 <= v, w and x_j = v - w // Constraints @@ -1194,123 +1353,163 @@ i_t presolve(const lp_problem_t& original, // becomes // sum_{k != j} c_k x_k + c_j v - c_j w - std::vector pair_index(problem.num_cols, -1); - i_t num_cols = problem.num_cols + free_variables; - i_t nnz = problem.A.col_start[problem.num_cols]; - for (i_t j = 0; j < problem.num_cols; j++) { - if (problem.lower[j] == -inf && problem.upper[j] == inf) { - nnz += (problem.A.col_start[j + 1] - problem.A.col_start[j]); + const i_t old_num_cols = problem.num_cols; + const i_t new_cone_start = + problem.second_order_cone_dims.empty() ? 0 : linear_cols + free_variables; + const i_t num_cols = old_num_cols + free_variables; + + auto old_A = problem.A; + auto old_Q = problem.Q; + auto old_objective = problem.objective; + auto old_lower = problem.lower; + auto old_upper = problem.upper; + + std::vector partner_index(old_num_cols, -1); + std::vector orig_to_new(old_num_cols, -1); + std::vector is_free(old_num_cols, false); + + i_t next_partner = linear_cols; + for (i_t j = 0; j < linear_cols; ++j) { + orig_to_new[j] = j; + if (old_lower[j] == -inf && old_upper[j] == inf) { + is_free[j] = true; + partner_index[j] = next_partner++; } } + for (i_t j = linear_cols; j < old_num_cols; ++j) { + orig_to_new[j] = j + free_variables; + } + assert(next_partner == new_cone_start || problem.second_order_cone_dims.empty()); - problem.A.col_start.resize(num_cols + 1); - problem.A.i.resize(nnz); - problem.A.x.resize(nnz); - problem.lower.resize(num_cols); - problem.upper.resize(num_cols); - problem.objective.resize(num_cols); + i_t nnz = old_A.col_start[old_num_cols]; + for (i_t j = 0; j < linear_cols; ++j) { + if (is_free[j]) { nnz += old_A.col_start[j + 1] - old_A.col_start[j]; } + } - presolve_info.free_variable_pairs.resize(free_variables * 2); - i_t pair_count = 0; - i_t q = problem.A.col_start[problem.num_cols]; - i_t col = problem.num_cols; - for (i_t j = 0; j < problem.num_cols; j++) { - if (problem.lower[j] == -inf && problem.upper[j] == inf) { - for (i_t p = problem.A.col_start[j]; p < problem.A.col_start[j + 1]; p++) { - i_t i = problem.A.i[p]; - f_t aij = problem.A.x[p]; - problem.A.i[q] = i; - problem.A.x[q] = -aij; - q++; + csc_matrix_t expanded_A(problem.A.m, num_cols, nnz); + i_t nz = 0; + for (i_t j = 0; j < linear_cols; ++j) { + expanded_A.col_start[j] = nz; + for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) { + expanded_A.i[nz] = old_A.i[p]; + expanded_A.x[nz] = old_A.x[p]; + ++nz; + } + } + for (i_t j = 0; j < linear_cols; ++j) { + if (partner_index[j] != -1) { + expanded_A.col_start[partner_index[j]] = nz; + for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) { + expanded_A.i[nz] = old_A.i[p]; + expanded_A.x[nz] = -old_A.x[p]; + ++nz; } - problem.lower[col] = 0.0; - problem.upper[col] = inf; - problem.objective[col] = -problem.objective[j]; - presolve_info.free_variable_pairs[pair_count++] = j; - presolve_info.free_variable_pairs[pair_count++] = col; - pair_index[j] = col; - problem.A.col_start[++col] = q; - problem.lower[j] = 0.0; } } + for (i_t j = linear_cols; j < old_num_cols; ++j) { + i_t new_j = orig_to_new[j]; + expanded_A.col_start[new_j] = nz; + for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) { + expanded_A.i[nz] = old_A.i[p]; + expanded_A.x[nz] = old_A.x[p]; + ++nz; + } + } + expanded_A.col_start[num_cols] = nz; + + std::vector objective(num_cols); + std::vector lower(num_cols, -INFINITY); + std::vector upper(num_cols, INFINITY); + presolve_info.free_variable_pairs.clear(); + presolve_info.free_variable_pairs.reserve(free_variables * 2); + + for (i_t j = 0; j < linear_cols; ++j) { + objective[j] = old_objective[j]; + if (is_free[j]) { + lower[j] = 0.0; + upper[j] = inf; + objective[partner_index[j]] = -old_objective[j]; + lower[partner_index[j]] = 0.0; + upper[partner_index[j]] = inf; + presolve_info.free_variable_pairs.push_back(j); + presolve_info.free_variable_pairs.push_back(partner_index[j]); + } else { + lower[j] = old_lower[j]; + upper[j] = old_upper[j]; + } + } + for (i_t j = linear_cols; j < old_num_cols; ++j) { + i_t new_j = orig_to_new[j]; + objective[new_j] = old_objective[j]; + lower[new_j] = old_lower[j]; + upper[new_j] = old_upper[j]; + } - if (problem.Q.n > 0) { + if (old_Q.n > 0) { std::vector row_counts(num_cols, 0); - i_t nz_count = problem.Q.row_start[problem.num_cols]; - for (i_t row = 0; row < problem.Q.n; row++) { - i_t q_start = problem.Q.row_start[row]; - i_t q_end = problem.Q.row_start[row + 1]; - row_counts[row] = q_end - q_start; - for (i_t qj = q_start; qj < q_end; qj++) { - i_t col = problem.Q.j[qj]; - if (pair_index[row] != -1 && pair_index[col] != -1) { - assert(pair_index[row] >= problem.num_cols); - assert(pair_index[col] >= problem.num_cols); - row_counts[row]++; - row_counts[pair_index[row]] += 2; - nz_count += 3; - } else if (pair_index[col] != -1) { - assert(pair_index[col] >= problem.num_cols); - row_counts[row]++; + i_t nz_count = 0; + for (i_t row = 0; row < old_Q.n; ++row) { + i_t new_row = orig_to_new[row]; + i_t partner_row = partner_index[row]; + i_t q_start = old_Q.row_start[row]; + i_t q_end = old_Q.row_start[row + 1]; + for (i_t qj = q_start; qj < q_end; ++qj) { + i_t col = old_Q.j[qj]; + i_t partner_col = partner_index[col]; + row_counts[new_row]++; + nz_count++; + if (partner_col != -1) { + row_counts[new_row]++; nz_count++; - } else if (pair_index[row] != -1) { - assert(pair_index[row] >= problem.num_cols); - row_counts[pair_index[row]]++; + } + if (partner_row != -1) { + row_counts[partner_row]++; nz_count++; + if (partner_col != -1) { + row_counts[partner_row]++; + nz_count++; + } } } } std::vector Q_row_start(num_cols + 1); Q_row_start[0] = 0; - for (i_t row = 0; row < num_cols; row++) { + for (i_t row = 0; row < num_cols; ++row) { Q_row_start[row + 1] = Q_row_start[row] + row_counts[row]; } std::vector Q_j(nz_count); std::vector Q_x(nz_count); auto row_starts = Q_row_start; - // First copy the original Q ma - for (i_t row = 0; row < problem.Q.n; row++) { - i_t q_start = problem.Q.row_start[row]; - i_t q_end = problem.Q.row_start[row + 1]; - i_t q_nz = Q_row_start[row]; - for (i_t qj = q_start; qj < q_end; qj++) { - i_t col = problem.Q.j[qj]; - f_t qij = problem.Q.x[qj]; - Q_j[q_nz] = col; - Q_x[q_nz] = qij; - q_nz++; - } - row_starts[row] = q_nz; - } - - // Expand the Q matrix for the free variables - for (i_t row = 0; row < problem.Q.n; row++) { - i_t q_start = problem.Q.row_start[row]; - i_t q_end = problem.Q.row_start[row + 1]; - for (i_t qj = q_start; qj < q_end; qj++) { - i_t col = problem.Q.j[qj]; - f_t qij = problem.Q.x[qj]; - if (pair_index[row] != -1 && pair_index[col] != -1) { - Q_j[row_starts[row]] = pair_index[col]; - Q_x[row_starts[row]] = -qij; - row_starts[row]++; - - Q_j[row_starts[pair_index[row]]] = col; - Q_x[row_starts[pair_index[row]]] = -qij; - row_starts[pair_index[row]]++; - - Q_j[row_starts[pair_index[row]]] = pair_index[col]; - Q_x[row_starts[pair_index[row]]] = qij; - row_starts[pair_index[row]]++; - } else if (pair_index[col] != -1) { - Q_j[row_starts[row]] = pair_index[col]; - Q_x[row_starts[row]] = -qij; - row_starts[row]++; - } else if (pair_index[row] != -1) { - Q_j[row_starts[pair_index[row]]] = col; - Q_x[row_starts[pair_index[row]]] = -qij; - row_starts[pair_index[row]]++; + for (i_t row = 0; row < old_Q.n; ++row) { + i_t new_row = orig_to_new[row]; + i_t partner_row = partner_index[row]; + i_t q_start = old_Q.row_start[row]; + i_t q_end = old_Q.row_start[row + 1]; + for (i_t qj = q_start; qj < q_end; ++qj) { + i_t col = old_Q.j[qj]; + f_t qij = old_Q.x[qj]; + i_t new_col = orig_to_new[col]; + i_t partner_col = partner_index[col]; + + Q_j[row_starts[new_row]] = new_col; + Q_x[row_starts[new_row]] = qij; + row_starts[new_row]++; + + if (partner_col != -1) { + Q_j[row_starts[new_row]] = partner_col; + Q_x[row_starts[new_row]] = -qij; + row_starts[new_row]++; + } + if (partner_row != -1) { + Q_j[row_starts[partner_row]] = new_col; + Q_x[row_starts[partner_row]] = -qij; + row_starts[partner_row]++; + if (partner_col != -1) { + Q_j[row_starts[partner_row]] = partner_col; + Q_x[row_starts[partner_row]] = qij; + row_starts[partner_row]++; + } } } } @@ -1323,12 +1522,17 @@ i_t presolve(const lp_problem_t& original, problem.Q.check_matrix("After free variable expansion"); } - // assert(problem.A.p[num_cols] == nnz); - problem.A.n = num_cols; - problem.num_cols = num_cols; + problem.A = expanded_A; + problem.A.n = num_cols; + problem.objective = objective; + problem.lower = lower; + problem.upper = upper; + problem.num_cols = num_cols; + if (!problem.second_order_cone_dims.empty()) { problem.cone_var_start = new_cone_start; } } - if (settings.barrier_presolve && settings.folding != 0 && problem.Q.n == 0) { + if (settings.barrier_presolve && settings.folding != 0 && problem.Q.n == 0 && + problem.second_order_cone_dims.empty()) { folding(problem, settings, presolve_info); } @@ -1406,7 +1610,7 @@ void crush_primal_solution(const user_problem_t& user_problem, // including previously added slacks, are reset before writing new values. solution.assign(problem.num_cols, 0.0); for (i_t j = 0; j < user_problem.num_cols; j++) { - solution[j] = user_solution[j]; + solution[user_col_to_problem_col(user_problem, problem, j)] = user_solution[j]; } std::vector primal_residual(problem.num_rows); @@ -1446,7 +1650,7 @@ void crush_primal_solution_with_slack(const user_problem_t& user_probl // Re-crush can be called with a reused output vector; clear stale entries first. solution.assign(problem.num_cols, 0.0); for (i_t j = 0; j < user_problem.num_cols; j++) { - solution[j] = user_solution[j]; + solution[user_col_to_problem_col(user_problem, problem, j)] = user_solution[j]; } std::vector primal_residual(problem.num_rows); @@ -1493,9 +1697,9 @@ f_t crush_dual_solution(const user_problem_t& user_problem, for (i_t i = 0; i < user_problem.num_rows; i++) { y[i] = user_y[i]; } - z.resize(problem.num_cols); + z.assign(problem.num_cols, 0.0); for (i_t j = 0; j < user_problem.num_cols; j++) { - z[j] = user_z[j]; + z[user_col_to_problem_col(user_problem, problem, j)] = user_z[j]; } std::vector is_range_row(problem.num_rows, false); @@ -1562,6 +1766,17 @@ f_t crush_dual_solution(const user_problem_t& user_problem, return dual_res_inf; } +template +static i_t user_col_to_problem_col(const user_problem_t& user_problem, + const lp_problem_t& problem, + i_t user_col) +{ + if (user_problem.second_order_cone_dims.empty()) { return user_col; } + if (problem.cone_var_start <= user_problem.cone_var_start) { return user_col; } + if (user_col < user_problem.cone_var_start) { return user_col; } + return problem.cone_var_start + (user_col - user_problem.cone_var_start); +} + template void uncrush_primal_solution(const user_problem_t& user_problem, const lp_problem_t& problem, @@ -1571,9 +1786,9 @@ void uncrush_primal_solution(const user_problem_t& user_problem, user_solution.resize(user_problem.num_cols); assert(problem.num_cols >= user_problem.num_cols); assert(solution.size() >= user_problem.num_cols); - std::copy(solution.begin(), - solution.begin() + std::min((i_t)solution.size(), user_problem.num_cols), - user_solution.data()); + for (i_t j = 0; j < user_problem.num_cols; ++j) { + user_solution[j] = solution[user_col_to_problem_col(user_problem, problem, j)]; + } } template @@ -1677,13 +1892,25 @@ void uncrush_solution(const presolve_info_t& presolve_info, if (num_free_variables > 0) { settings.log.printf("Post-solve: Handling free variables %d\n", num_free_variables); // We added free variables so we need to map the crushed solution back to the original variables + std::vector remove_partner(input_x.size(), false); for (i_t k = 0; k < 2 * num_free_variables; k += 2) { const i_t u = free_variable_pairs[k]; const i_t v = free_variable_pairs[k + 1]; input_x[u] -= input_x[v]; + remove_partner[v] = true; + } + std::vector compact_x; + std::vector compact_z; + compact_x.reserve(input_x.size() - num_free_variables); + compact_z.reserve(input_z.size() - num_free_variables); + for (i_t j = 0; j < static_cast(input_x.size()); ++j) { + if (!remove_partner[j]) { + compact_x.push_back(input_x[j]); + compact_z.push_back(input_z[j]); + } } - input_z.resize(input_z.size() - num_free_variables); - input_x.resize(input_x.size() - num_free_variables); + input_x = compact_x; + input_z = compact_z; } if (presolve_info.removed_variables.size() > 0) { @@ -1754,6 +1981,11 @@ void uncrush_solution(const presolve_info_t& presolve_info, } } + if (!presolve_info.phase1_bounded_linear_indices.empty()) { + settings.log.printf("Post-solve: %d linear column(s) had phase-1 bound tightening (x unchanged)\n", + static_cast(presolve_info.phase1_bounded_linear_indices.size())); + } + // Dual correction for originally free variables that received implied bounds. // Barrier produced (y, z) with z_j != 0 satisfying A^T y + z = c + Qx. // We need corrected (y_bar, z_bar) with z_bar_j = 0 for all j in F_b where @@ -1772,6 +2004,8 @@ void uncrush_solution(const presolve_info_t& presolve_info, settings.log.printf("Post-solve: Correcting duals for %d bounded free variables\n", static_cast(presolve_info.bounded_free_variables.size())); const csc_matrix_t& A = original_problem.A; + csr_matrix_t A_row(A.m, A.n, A.nnz()); + A.to_compressed_row(A_row); // Traverse in reverse order, to ensure that all z_j = 0 after the correction for (auto it = presolve_info.bounded_free_variables.rbegin(); it != presolve_info.bounded_free_variables.rend(); @@ -1781,15 +2015,9 @@ void uncrush_solution(const presolve_info_t& presolve_info, if (w_j == 0.0) { continue; } const f_t du = w_j / bfv.coefficient; input_y[bfv.constraint] += du; - for (i_t j = 0; j < A.n; j++) { - const i_t col_start = A.col_start[j]; - const i_t col_end = A.col_start[j + 1]; - for (i_t p = col_start; p < col_end; p++) { - if (A.i[p] == bfv.constraint) { - input_z[j] -= A.x[p] * du; - break; - } - } + const auto [row_start, row_end] = A_row.get_constraint_range(bfv.constraint); + for (i_t p = row_start; p < row_end; ++p) { + input_z[A_row.j[p]] -= A_row.x[p] * du; } } } diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index c050454b33..bc67627c21 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -49,6 +49,8 @@ struct lp_problem_t { f_t obj_constant; f_t obj_scale; // 1.0 for min, -1.0 for max bool objective_is_integral{false}; + i_t cone_var_start{0}; + std::vector second_order_cone_dims; void write_mps(const std::string& path) const { @@ -162,10 +164,14 @@ struct presolve_info_t { // Variables that were negated to handle -inf < x_j <= u_j std::vector negated_variables; + + // Free variable indices for QP/SOCP augmented barrier (not split, handled natively) + std::vector native_free_linear_indices; + + // Linear columns where phase 1 tightened a fully free var to a one-sided bound (same x index) + std::vector phase1_bounded_linear_indices; // Originally-free variables that received implied bounds, with the constraint used std::vector> bounded_free_variables; - // Free variable indices for QP augmented system (not split, handled natively) - std::vector free_variable_indices; }; template diff --git a/cpp/src/dual_simplex/scaling.cpp b/cpp/src/dual_simplex/scaling.cpp index 1531c91486..2db6c75b43 100644 --- a/cpp/src/dual_simplex/scaling.cpp +++ b/cpp/src/dual_simplex/scaling.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -16,21 +16,174 @@ template i_t column_scaling(const lp_problem_t& unscaled, const simplex_solver_settings_t& settings, lp_problem_t& scaled, - std::vector& column_scaling) + std::vector& col_scale, + std::vector& row_scale) { scaled = unscaled; i_t m = scaled.num_rows; i_t n = scaled.num_cols; - if (!settings.scale_columns || unscaled.Q.n > 0) { + row_scale.assign(m, 1.0); + + // ========================================================================= + // Ruiz equilibration for SOCP (and QP) problems + // ========================================================================= + // For SOCP problems, apply Ruiz equilibration: alternating row and column + // infinity-norm scaling to bring the constraint matrix close to equilibrium. + // This dramatically improves the conditioning of the augmented KKT system. + // Ruiz equilibration for SOCP (and QP) problems, applied only when the + // constraint matrix has a large row-norm imbalance. + if (!unscaled.second_order_cone_dims.empty() || unscaled.Q.n > 0) { + col_scale.assign(n, 1.0); + + // Decide whether Ruiz scaling is needed by checking row-norm imbalance. + // If max_row_norm / min_row_norm is small, the matrix is already well-conditioned + // and scaling can hurt (e.g. by amplifying tiny noise coefficients). + csr_matrix_t Arow_check(0, 0, 0); + scaled.A.to_compressed_row(Arow_check); + f_t max_row_norm = 0; + f_t min_row_norm = std::numeric_limits::max(); + for (i_t i = 0; i < m; ++i) { + f_t row_norm = 0; + for (i_t p = Arow_check.row_start[i]; p < Arow_check.row_start[i + 1]; ++p) { + f_t a = std::abs(Arow_check.x[p]); + if (a > row_norm) row_norm = a; + } + if (row_norm > 0) { + max_row_norm = std::max(max_row_norm, row_norm); + min_row_norm = std::min(min_row_norm, row_norm); + } + } + f_t row_norm_ratio = (min_row_norm > 0) ? max_row_norm / min_row_norm : 1.0; + + if (row_norm_ratio < 100.0) { + settings.log.printf("Skipping Ruiz equilibration (row norm ratio %.1f < 100)\n", + row_norm_ratio); + return 0; + } + + // Apply Ruiz equilibration + csr_matrix_t Arow(0, 0, 0); + scaled.A.to_compressed_row(Arow); + + constexpr i_t max_ruiz_iterations = 10; + for (i_t iter = 0; iter < max_ruiz_iterations; ++iter) { + f_t max_deviation = 0.0; + + // --- Row scaling: scale each row by 1/sqrt(max|a_ij|) --- + std::vector r(m); + for (i_t i = 0; i < m; ++i) { + f_t rm = 0.0; + for (i_t p = Arow.row_start[i]; p < Arow.row_start[i + 1]; ++p) { + f_t a = std::abs(Arow.x[p]); + if (a > rm) rm = a; + } + r[i] = rm > 0 ? 1.0 / std::sqrt(rm) : 1.0; + max_deviation = std::max(max_deviation, std::abs(rm - 1.0)); + } + for (i_t j = 0; j < n; ++j) { + for (i_t p = scaled.A.col_start[j]; p < scaled.A.col_start[j + 1]; ++p) { + scaled.A.x[p] *= r[scaled.A.i[p]]; + } + } + for (i_t i = 0; i < m; ++i) { + for (i_t p = Arow.row_start[i]; p < Arow.row_start[i + 1]; ++p) { + Arow.x[p] *= r[i]; + } + scaled.rhs[i] *= r[i]; + row_scale[i] *= r[i]; + } + + // --- Column scaling: scale each column by 1/sqrt(max|a_ij|) --- + // For cone variables, use a uniform scale per cone block to preserve SOC structure. + std::vector c(n); + const i_t cone_start = unscaled.second_order_cone_dims.empty() ? n : unscaled.cone_var_start; + + // Linear columns: scale independently + for (i_t j = 0; j < cone_start; ++j) { + f_t cm = 0.0; + for (i_t p = scaled.A.col_start[j]; p < scaled.A.col_start[j + 1]; ++p) { + f_t a = std::abs(scaled.A.x[p]); + if (a > cm) cm = a; + } + c[j] = cm > 0 ? 1.0 / std::sqrt(cm) : 1.0; + max_deviation = std::max(max_deviation, std::abs(cm - 1.0)); + } + + // Cone columns: uniform scale per cone block + i_t cone_off = cone_start; + for (i_t k = 0; k < static_cast(unscaled.second_order_cone_dims.size()); ++k) { + i_t q_k = unscaled.second_order_cone_dims[k]; + // Find max column inf-norm across all columns in this cone + f_t cone_max = 0.0; + for (i_t j = cone_off; j < cone_off + q_k; ++j) { + for (i_t p = scaled.A.col_start[j]; p < scaled.A.col_start[j + 1]; ++p) { + f_t a = std::abs(scaled.A.x[p]); + if (a > cone_max) cone_max = a; + } + } + f_t cone_scale = cone_max > 0 ? 1.0 / std::sqrt(cone_max) : 1.0; + max_deviation = std::max(max_deviation, std::abs(cone_max - 1.0)); + for (i_t j = cone_off; j < cone_off + q_k; ++j) { + c[j] = cone_scale; + } + cone_off += q_k; + } + for (i_t j = 0; j < n; ++j) { + for (i_t p = scaled.A.col_start[j]; p < scaled.A.col_start[j + 1]; ++p) { + scaled.A.x[p] *= c[j]; + } + } + for (i_t i = 0; i < m; ++i) { + for (i_t p = Arow.row_start[i]; p < Arow.row_start[i + 1]; ++p) { + Arow.x[p] *= c[Arow.j[p]]; + } + } + for (i_t j = 0; j < n; ++j) { + scaled.objective[j] *= c[j]; + col_scale[j] *= c[j]; + } + // Bounds use +/-inf for unbounded sides (see types.hpp). Use +/-1e20 as a practical + // sentinel: we do not expect finite bounds beyond this magnitude, and skipping scale + // on |bound| >= 1e20 avoids overflow when dividing very large limits by small c[j]. + for (i_t j = 0; j < n; ++j) { + if (scaled.lower[j] > -1e20) scaled.lower[j] /= c[j]; + if (scaled.upper[j] < 1e20) scaled.upper[j] /= c[j]; + } + if (scaled.Q.n > 0) { + for (i_t row = 0; row < scaled.Q.m; ++row) { + for (i_t p = scaled.Q.row_start[row]; p < scaled.Q.row_start[row + 1]; ++p) { + i_t col = scaled.Q.j[p]; + scaled.Q.x[p] *= c[row] * c[col]; + } + } + } + if (max_deviation < 0.1) break; + } + + f_t a_min = std::numeric_limits::max(); + f_t a_max = 0; + for (i_t p = 0; p < scaled.A.col_start[n]; ++p) { + f_t a = std::abs(scaled.A.x[p]); + if (a > 0) { + a_min = std::min(a_min, a); + a_max = std::max(a_max, a); + } + } + settings.log.printf("Ruiz equilibration: coefficient range [%e, %e]\n", a_min, a_max); + return 0; + } + + if (!settings.scale_columns) { settings.log.printf("Skipping column scaling\n"); - column_scaling.resize(n, 1.0); + col_scale.resize(n, 1.0); return 0; } - column_scaling.resize(n); - f_t max = 0; - f_t min = std::numeric_limits::max(); + col_scale.resize(n); + + f_t max_col_norm = 0; + f_t min_col_norm = std::numeric_limits::max(); for (i_t j = 0; j < n; ++j) { const i_t col_start = scaled.A.col_start[j]; const i_t col_end = scaled.A.col_start[j + 1]; @@ -39,57 +192,63 @@ i_t column_scaling(const lp_problem_t& unscaled, const f_t x = scaled.A.x[p]; sum += x * x; } - f_t col_norm_j = column_scaling[j] = sum > 0 ? std::sqrt(sum) : 1.0; - max = std::max(col_norm_j, max); - min = std::min(col_norm_j, min); + const f_t col_norm = sum > 0 ? std::sqrt(sum) : 1.0; + col_scale[j] = f_t(1) / col_norm; + max_col_norm = std::max(col_norm, max_col_norm); + min_col_norm = std::min(col_norm, min_col_norm); } - settings.log.printf("Scaling matrix. Maximum column norm %e, minimum column norm %e\n", max, min); - // C(j, j) = 1/column_scaling(j) + settings.log.printf("Scaling matrix. Maximum column norm %e, minimum column norm %e\n", + max_col_norm, + min_col_norm); // scaled_A = unscaled_A * C for (i_t j = 0; j < n; ++j) { const i_t col_start = scaled.A.col_start[j]; const i_t col_end = scaled.A.col_start[j + 1]; for (i_t p = col_start; p < col_end; ++p) { - scaled.A.x[p] /= column_scaling[j]; + scaled.A.x[p] *= col_scale[j]; } - } - // scaled_obj = C*unscaled_obj - for (i_t j = 0; j < n; ++j) { - scaled.objective[j] /= column_scaling[j]; - } - // scaled_lower = C^{-1} * unscaled_lower - // scaled_upper = C^{-1} * unscaled_upper - for (i_t j = 0; j < n; ++j) { - scaled.lower[j] *= column_scaling[j]; - scaled.upper[j] *= column_scaling[j]; + scaled.objective[j] *= col_scale[j]; + if (scaled.lower[j] > -1e20) scaled.lower[j] /= col_scale[j]; + if (scaled.upper[j] < 1e20) scaled.upper[j] /= col_scale[j]; } for (i_t i = 0; i < unscaled.Q.n; ++i) { const i_t row_start = unscaled.Q.row_start[i]; const i_t row_end = unscaled.Q.row_start[i + 1]; - i_t row = i; for (i_t p = row_start; p < row_end; ++p) { - i_t col = unscaled.Q.j[p]; - scaled.Q.x[p] = unscaled.Q.x[p] / (column_scaling[row] * column_scaling[col]); + const i_t col = unscaled.Q.j[p]; + scaled.Q.x[p] = unscaled.Q.x[p] * col_scale[i] * col_scale[col]; } } return 0; } template -void unscale_solution(const std::vector& column_scaling, +void unscale_solution(const std::vector& col_scale, + const std::vector& row_scale, const std::vector& scaled_x, + const std::vector& scaled_y, const std::vector& scaled_z, std::vector& unscaled_x, + std::vector& unscaled_y, std::vector& unscaled_z) { const i_t n = scaled_x.size(); unscaled_x.resize(n); unscaled_z.resize(n); for (i_t j = 0; j < n; ++j) { - unscaled_x[j] = scaled_x[j] / column_scaling[j]; - unscaled_z[j] = scaled_z[j] * column_scaling[j]; + // column_scaling multiplies A(:,j) and c_j by col_scale[j], divides bounds: + // x_orig = col_scale .* x_scaled, z_orig = z_scaled / col_scale. + unscaled_x[j] = scaled_x[j] * col_scale[j]; + unscaled_z[j] = scaled_z[j] / col_scale[j]; + } + + const i_t m = scaled_y.size(); + unscaled_y.resize(m); + for (i_t i = 0; i < m; ++i) { + // y_unscaled = R * y_scaled (row scaling: constraint was scaled by R) + unscaled_y[i] = scaled_y[i] * row_scale[i]; } } @@ -98,12 +257,16 @@ void unscale_solution(const std::vector& column_scaling, template int column_scaling(const lp_problem_t& unscaled, const simplex_solver_settings_t& settings, lp_problem_t& scaled, - std::vector& column_scaling); + std::vector& col_scale, + std::vector& row_scale); -template void unscale_solution(const std::vector& column_scaling, +template void unscale_solution(const std::vector& col_scale, + const std::vector& row_scale, const std::vector& scaled_x, + const std::vector& scaled_y, const std::vector& scaled_z, std::vector& unscaled_x, + std::vector& unscaled_y, std::vector& unscaled_z); #endif diff --git a/cpp/src/dual_simplex/scaling.hpp b/cpp/src/dual_simplex/scaling.hpp index 120660c765..94aca2055d 100644 --- a/cpp/src/dual_simplex/scaling.hpp +++ b/cpp/src/dual_simplex/scaling.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -20,13 +20,17 @@ template i_t column_scaling(const lp_problem_t& unscaled, const simplex_solver_settings_t& settings, lp_problem_t& scaled, - std::vector& column_scaling); + std::vector& column_scaling, + std::vector& row_scaling); template void unscale_solution(const std::vector& column_scaling, + const std::vector& row_scaling, const std::vector& scaled_x, + const std::vector& scaled_y, const std::vector& scaled_z, std::vector& unscaled_x, + std::vector& unscaled_y, std::vector& unscaled_z); } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 500822e1bd..35510c37b9 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -79,7 +79,7 @@ struct simplex_solver_settings_t { use_left_looking_lu(false), eliminate_singletons(true), print_presolve_stats(true), - barrier_presolve(false), + barrier_presolve(true), cudss_deterministic(false), deterministic(false), barrier(false), @@ -163,7 +163,7 @@ struct simplex_solver_settings_t { use_left_looking_lu; // true to use left looking LU factorization, false to use right looking bool eliminate_singletons; // true to eliminate singletons from the basis bool print_presolve_stats; // true to print presolve stats - bool barrier_presolve; // true to use barrier presolve + bool barrier_presolve; // bound shifts, free-var bounding; LP v-w split / folding when no cones bool cudss_deterministic; // true to use cuDSS deterministic mode, false for non-deterministic bool barrier; // true to use barrier method, false to use dual simplex method bool deterministic; // true to use B&B deterministic mode, false to use non-deterministic mode diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 9d8a62efc3..cf39c99e20 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -61,6 +61,42 @@ void write_matlab(const std::string& filename, const dual_simplex::lp_problem_t< fclose(fid); } +template +bool validate_barrier_cone_layout(const lp_problem_t& problem, + const simplex_solver_settings_t& settings) +{ + if (problem.second_order_cone_dims.empty()) { return true; } + + i_t cone_end = problem.cone_var_start; + for (auto q_k : problem.second_order_cone_dims) { + if (q_k <= 1) { + settings.log.printf( + "Error: second-order cone dimensions must be at least 2; use linear variables instead of " + "Q^1\n"); + return false; + } + cone_end += q_k; + } + + if (cone_end != problem.num_cols) { + settings.log.printf("Error: conic variables must form a trailing block [linear | cone]\n"); + return false; + } + + for (i_t j = problem.cone_var_start; j < cone_end; ++j) { + if (problem.lower[j] != 0.0 && problem.lower[j] > -inf) { + settings.log.printf("Error: explicit lower bound on conic variable %d is not supported\n", j); + return false; + } + if (problem.upper[j] < inf) { + settings.log.printf("Error: explicit upper bound on conic variable %d is not supported\n", j); + return false; + } + } + + return true; +} + } // namespace template @@ -170,9 +206,10 @@ lp_status_t solve_linear_program_with_advanced_basis( presolved_lp.num_cols, presolved_lp.A.col_start[presolved_lp.num_cols]); std::vector column_scales; + std::vector row_scales_simplex; { raft::common::nvtx::range scope_scaling("DualSimplex::scaling"); - column_scaling(presolved_lp, settings, lp, column_scales); + column_scaling(presolved_lp, settings, lp, column_scales, row_scales_simplex); } assert(presolved_lp.num_cols == lp.num_cols); lp_problem_t phase1_problem(original_lp.handle_ptr, 1, 1, 1); @@ -293,13 +330,21 @@ lp_status_t solve_linear_program_with_advanced_basis( } if (status == dual::status_t::OPTIMAL) { std::vector unscaled_x(lp.num_cols); + std::vector unscaled_y(lp.num_rows); std::vector unscaled_z(lp.num_cols); - unscale_solution(column_scales, solution.x, solution.z, unscaled_x, unscaled_z); + unscale_solution(column_scales, + row_scales_simplex, + solution.x, + solution.y, + solution.z, + unscaled_x, + unscaled_y, + unscaled_z); uncrush_solution(presolve_info, settings, original_lp, unscaled_x, - solution.y, + unscaled_y, unscaled_z, original_solution.x, original_solution.y, @@ -349,16 +394,18 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us // Convert the user problem to a linear program with only equality constraints std::vector new_slacks; - simplex_solver_settings_t barrier_settings = settings; - barrier_settings.barrier_presolve = true; dualize_info_t dualize_info; - convert_user_problem(user_problem, barrier_settings, original_lp, new_slacks, dualize_info); + convert_user_problem(user_problem, settings, original_lp, new_slacks, dualize_info); + if (!validate_barrier_cone_layout(original_lp, settings)) { + return lp_status_t::NUMERICAL_ISSUES; + } + lp_solution_t lp_solution(original_lp.num_rows, original_lp.num_cols); // Presolve the linear program presolve_info_t presolve_info; lp_problem_t presolved_lp(user_problem.handle_ptr, 1, 1, 1); - const i_t ok = presolve(original_lp, barrier_settings, presolved_lp, presolve_info); + const i_t ok = presolve(original_lp, settings, presolved_lp, presolve_info); if (ok == CONCURRENT_HALT_RETURN) { return lp_status_t::CONCURRENT_LIMIT; } if (ok == TIME_LIMIT_RETURN) { return lp_status_t::TIME_LIMIT; } if (ok == -1) { return lp_status_t::INFEASIBLE; } @@ -369,12 +416,13 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us presolved_lp.num_cols, presolved_lp.A.col_start[presolved_lp.num_cols]); std::vector column_scales; - column_scaling(presolved_lp, barrier_settings, barrier_lp, column_scales); + std::vector row_scales; + column_scaling(presolved_lp, settings, barrier_lp, column_scales, row_scales); // Solve using barrier lp_solution_t barrier_solution(barrier_lp.num_rows, barrier_lp.num_cols); - barrier_solver_t barrier_solver(barrier_lp, presolve_info, barrier_settings); + barrier_solver_t barrier_solver(barrier_lp, presolve_info, settings); lp_status_t barrier_status = barrier_solver.solve(start_time, barrier_solution); if (barrier_status == lp_status_t::OPTIMAL) { #ifdef COMPUTE_SCALED_RESIDUALS @@ -394,9 +442,16 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us #endif // Unscale the solution std::vector unscaled_x(barrier_lp.num_cols); + std::vector unscaled_y(barrier_lp.num_rows); std::vector unscaled_z(barrier_lp.num_cols); - unscale_solution( - column_scales, barrier_solution.x, barrier_solution.z, unscaled_x, unscaled_z); + unscale_solution(column_scales, + row_scales, + barrier_solution.x, + barrier_solution.y, + barrier_solution.z, + unscaled_x, + unscaled_y, + unscaled_z); std::vector residual = presolved_lp.rhs; matrix_vector_multiply(presolved_lp.A, 1.0, unscaled_x, -1.0, residual); @@ -410,7 +465,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us unscaled_dual_residual[j] -= presolved_lp.objective[j]; } matrix_transpose_vector_multiply( - presolved_lp.A, 1.0, barrier_solution.y, 1.0, unscaled_dual_residual); + presolved_lp.A, 1.0, unscaled_y, 1.0, unscaled_dual_residual); f_t unscaled_dual_residual_norm = vector_norm_inf(unscaled_dual_residual); settings.log.printf( "Unscaled Dual infeasibility (abs/rel): %.2e/%.2e\n", @@ -420,10 +475,10 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us // Undo presolve uncrush_solution(presolve_info, - barrier_settings, + settings, original_lp, unscaled_x, - barrier_solution.y, + unscaled_y, unscaled_z, lp_solution.x, lp_solution.y, @@ -564,7 +619,8 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us uncrush_primal_solution(user_problem, original_lp, lp_solution.x, solution.x); uncrush_dual_solution( user_problem, original_lp, lp_solution.y, lp_solution.z, solution.y, solution.z); - solution.objective = barrier_solution.objective; + solution.objective = + barrier_solution.user_objective / user_problem.obj_scale - user_problem.obj_constant; solution.user_objective = barrier_solution.user_objective; solution.l2_primal_residual = barrier_solution.l2_primal_residual; solution.l2_dual_residual = barrier_solution.l2_dual_residual; @@ -641,7 +697,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us lp_solution_t crossover_solution(original_lp.num_rows, original_lp.num_cols); std::vector vstatus(original_lp.num_cols); crossover_status_t crossover_status = crossover( - original_lp, barrier_settings, lp_solution, start_time, crossover_solution, vstatus); + original_lp, settings, lp_solution, start_time, crossover_solution, vstatus); settings.log.printf("Crossover status: %d\n", crossover_status); if (crossover_status == crossover_status_t::OPTIMAL) { barrier_status = lp_status_t::OPTIMAL; } } diff --git a/cpp/src/dual_simplex/user_problem.hpp b/cpp/src/dual_simplex/user_problem.hpp index 73c4c391be..8b0588064c 100644 --- a/cpp/src/dual_simplex/user_problem.hpp +++ b/cpp/src/dual_simplex/user_problem.hpp @@ -52,6 +52,8 @@ struct user_problem_t { std::vector Q_offsets; std::vector Q_indices; std::vector Q_values; + i_t cone_var_start{0}; + std::vector second_order_cone_dims; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/vector_math.cuh b/cpp/src/dual_simplex/vector_math.cuh index abc7263858..32c75ea366 100644 --- a/cpp/src/dual_simplex/vector_math.cuh +++ b/cpp/src/dual_simplex/vector_math.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -9,6 +9,10 @@ #include +#include +#include +#include + #include #include @@ -28,6 +32,7 @@ struct norm_inf_max { template f_t device_custom_vector_norm_inf(InputIteratorT in, i_t size, rmm::cuda_stream_view stream_view) { + if (size == 0) { return 0; } // FIXME: Tmp storage stored in vector_math class. auto d_out = rmm::device_scalar(stream_view); rmm::device_uvector d_temp_storage(0, stream_view); @@ -62,6 +67,12 @@ f_t device_vector_norm_inf(const rmm::device_uvector& in, rmm::cuda_stream_ return device_custom_vector_norm_inf(in.data(), in.size(), stream_view); } +template +f_t device_vector_norm_inf(raft::device_span in, rmm::cuda_stream_view stream_view) +{ + return device_custom_vector_norm_inf(in.data(), in.size(), stream_view); +} + // TMP we should just have a CPU and GPU version to do the comparison // Should never have to norm inf a CPU vector if we are using the GPU template @@ -71,4 +82,12 @@ f_t vector_norm_inf(const std::vector& x, rmm::cuda_stream_view return device_vector_norm_inf(d_x, stream_view); } +template +f_t vector_norm_inf(raft::host_span x, rmm::cuda_stream_view stream_view) +{ + rmm::device_uvector d_x(x.size(), stream_view); + raft::copy(d_x.data(), x.data(), x.size(), stream_view); + return device_vector_norm_inf(d_x, stream_view); +} + } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/io/lp_parser.cpp b/cpp/src/io/lp_parser.cpp index 84daa101b4..81ec63f323 100644 --- a/cpp/src/io/lp_parser.cpp +++ b/cpp/src/io/lp_parser.cpp @@ -118,41 +118,42 @@ bool is_free_keyword(std::string_view lower) { return lower == "free"; } bool is_infinity_text(std::string_view lower) { return lower == "inf" || lower == "infinity"; } -// Builds the symmetric Q in CSR from LP-format raw upper-triangular triples. +// Builds the symmetric Q in COO from LP-format raw upper-triangular triples. // Each input triple (i, j, c) with i <= j represents `c * x_i * x_j` in the // LP source. The output Q satisfies x^T Q x = sum of those terms. // Diagonal (i == j): Q[i,i] = c (one entry). // Off-diagonal (i != j): Q[i,j] = Q[j,i] = c/2 (two entries; symmetric split). template -void build_symmetric_q_csr(const std::vector>& raw_triples, +void build_symmetric_q_coo(const std::vector>& raw_triples, i_t n_vars, - std::vector& out_values, - std::vector& out_indices, - std::vector& out_offsets) + std::vector& out_row_indices, + std::vector& out_col_indices, + std::vector& out_values) { - std::vector>> row_data(n_vars); + std::vector>> row_data(static_cast(n_vars)); for (const auto& [i, j, c] : raw_triples) { if (i == j) { - row_data[i].emplace_back(i, c); + row_data[static_cast(i)].emplace_back(i, c); } else { - row_data[i].emplace_back(j, c / f_t(2)); - row_data[j].emplace_back(i, c / f_t(2)); + row_data[static_cast(i)].emplace_back(j, c / f_t(2)); + row_data[static_cast(j)].emplace_back(i, c / f_t(2)); } } for (auto& row : row_data) { std::sort(row.begin(), row.end()); } - out_offsets.clear(); - out_indices.clear(); + out_row_indices.clear(); + out_col_indices.clear(); out_values.clear(); - out_offsets.reserve(static_cast(n_vars) + 1); - out_offsets.push_back(0); + out_row_indices.reserve(raw_triples.size() * 2); + out_col_indices.reserve(raw_triples.size() * 2); + out_values.reserve(raw_triples.size() * 2); for (i_t r = 0; r < n_vars; ++r) { - for (const auto& [col, val] : row_data[r]) { + for (const auto& [col, val] : row_data[static_cast(r)]) { + out_row_indices.push_back(r); + out_col_indices.push_back(col); out_values.push_back(val); - out_indices.push_back(col); } - out_offsets.push_back(static_cast(out_values.size())); } } @@ -1507,10 +1508,10 @@ void flush_quadratic_constraints(mps_data_model_t& problem, const i_t linear_row_count = static_cast(parser.row_names.size()); i_t k = 0; for (const auto& block : parser.quadratic_constraint_blocks) { + std::vector q_row_indices; + std::vector q_col_indices; std::vector q_values; - std::vector q_indices; - std::vector q_offsets; - build_symmetric_q_csr(block.quad_triples, n_vars, q_values, q_indices, q_offsets); + build_symmetric_q_coo(block.quad_triples, n_vars, q_row_indices, q_col_indices, q_values); problem.append_quadratic_constraint(linear_row_count + k, block.row_name, static_cast(block.row_type), @@ -1518,8 +1519,8 @@ void flush_quadratic_constraints(mps_data_model_t& problem, block.linear_indices, block.rhs_value, q_values, - q_indices, - q_offsets); + q_row_indices, + q_col_indices); ++k; } } diff --git a/cpp/src/io/mps_data_model.cpp b/cpp/src/io/mps_data_model.cpp index 7ae359e450..0172918c62 100644 --- a/cpp/src/io/mps_data_model.cpp +++ b/cpp/src/io/mps_data_model.cpp @@ -9,6 +9,7 @@ #include #include +#include #include namespace cuopt::linear_programming::io { @@ -139,32 +140,51 @@ void mps_data_model_t::set_quadratic_objective_matrix(std::span -void mps_data_model_t::append_quadratic_constraint(i_t constraint_row_index, - const std::string& constraint_row_name, - char constraint_row_type, - std::span linear_values, - std::span linear_indices, - f_t rhs_value, - std::span quadratic_values, - std::span quadratic_indices, - std::span quadratic_offsets) +void mps_data_model_t::append_quadratic_constraint( + i_t constraint_row_index, + const std::string& constraint_row_name, + char constraint_row_type, + std::span linear_values, + std::span linear_indices, + f_t rhs_value, + std::span quadratic_values, + std::span quadratic_row_indices, + std::span quadratic_col_indices) { mps_parser_expects(constraint_row_index >= 0, error_type_t::ValidationError, "constraint_row_index must be non-negative"); - mps_parser_expects(constraint_row_type == 'L', + mps_parser_expects(constraint_row_type == 'L' || constraint_row_type == 'G', error_type_t::ValidationError, - "Quadratic constraint ROWS type must be 'L' (less-or-equal); got '%c'. " - "Only 'L' is supported for convex quadratic constraints.", + "Quadratic constraint ROWS type must be 'L' (<=) or 'G' (>=); got '%c'.", constraint_row_type); mps_parser_expects(linear_values.size() == linear_indices.size(), error_type_t::ValidationError, "linear_values and linear_indices must have the same nnz count"); - mps_parser_expects( - !quadratic_offsets.empty(), error_type_t::ValidationError, "quadratic_offsets cannot be empty"); + const size_t q_nnz = quadratic_values.size(); + mps_parser_expects(q_nnz == quadratic_row_indices.size(), + error_type_t::ValidationError, + "quadratic_values and quadratic_row_indices must have the same length"); + mps_parser_expects(q_nnz == quadratic_col_indices.size(), + error_type_t::ValidationError, + "quadratic_values and quadratic_col_indices must have the same length"); + + if (!linear_values.empty()) { + mps_parser_expects(linear_values.data() != nullptr && linear_indices.data() != nullptr, + error_type_t::ValidationError, + "linear_values and linear_indices cannot be null when non-empty"); + } + + if (q_nnz > 0) { + mps_parser_expects(quadratic_values.data() != nullptr && + quadratic_row_indices.data() != nullptr && + quadratic_col_indices.data() != nullptr, + error_type_t::ValidationError, + "Q COO spans cannot be null when nnz > 0"); + } quadratic_constraint_t qc; qc.constraint_row_index = constraint_row_index; @@ -173,9 +193,33 @@ void mps_data_model_t::append_quadratic_constraint(i_t constraint_row_ qc.rhs_value = rhs_value; qc.linear_values.assign(linear_values.begin(), linear_values.end()); qc.linear_indices.assign(linear_indices.begin(), linear_indices.end()); - qc.quadratic_values.assign(quadratic_values.begin(), quadratic_values.end()); - qc.quadratic_indices.assign(quadratic_indices.begin(), quadratic_indices.end()); - qc.quadratic_offsets.assign(quadratic_offsets.begin(), quadratic_offsets.end()); + + if (q_nnz == 0) { + qc.quadratic_row_indices.clear(); + qc.quadratic_col_indices.clear(); + qc.quadratic_values.clear(); + } else { + std::vector wr(quadratic_row_indices.begin(), quadratic_row_indices.end()); + std::vector wc(quadratic_col_indices.begin(), quadratic_col_indices.end()); + std::vector wv(quadratic_values.begin(), quadratic_values.end()); + + std::vector perm(q_nnz); + std::iota(perm.begin(), perm.end(), size_t{0}); + std::stable_sort(perm.begin(), perm.end(), [&](size_t a, size_t b) { + if (wr[a] != wr[b]) { return wr[a] < wr[b]; } + return wc[a] < wc[b]; + }); + + qc.quadratic_row_indices.resize(q_nnz); + qc.quadratic_col_indices.resize(q_nnz); + qc.quadratic_values.resize(q_nnz); + for (size_t t = 0; t < q_nnz; ++t) { + const size_t ix = perm[t]; + qc.quadratic_row_indices[t] = wr[ix]; + qc.quadratic_col_indices[t] = wc[ix]; + qc.quadratic_values[t] = wv[ix]; + } + } quadratic_constraints_.push_back(std::move(qc)); } diff --git a/cpp/src/io/mps_parser.cpp b/cpp/src/io/mps_parser.cpp index 535938b09c..40b47b03bd 100644 --- a/cpp/src/io/mps_parser.cpp +++ b/cpp/src/io/mps_parser.cpp @@ -17,14 +17,18 @@ #include #include #include +#include +#include #include #include +#include #include #include namespace { using cuopt::linear_programming::io::error_type_t; using cuopt::linear_programming::io::mps_parser_expects; +using cuopt::linear_programming::io::mps_parser_expects_fatal; std::vector string_to_buffer(std::string_view input) { @@ -34,6 +38,229 @@ std::vector string_to_buffer(std::string_view input) } } // end namespace +namespace { + +/** @brief ceil(log2(x)) for x >= 1; used for sparse-vs-dense CSR heuristic only. */ +inline size_t ceil_log2_size_t(size_t x) +{ + size_t b = 0; + size_t y = (x <= 1) ? 0 : (x - 1); + while (y > 0) { + y >>= 1; + ++b; + } + return (b == 0) ? 1 : b; +} + +/** + * @brief Reusable scratch for converting (row,col,value) triples to CSR via CSC (double transpose). + * + * Avoids `std::vector>` per column/row, which is allocation-heavy for large Q + * blocks (e.g. many QCMATRIX sections in portfolio models). + */ +template +struct double_transpose_scratch_t { + std::vector col_nnz{}; + std::vector col_off{}; + std::vector col_wr{}; + std::vector csc_rows{}; + std::vector csc_vals{}; + std::vector row_nnz{}; + std::vector row_off{}; + std::vector row_wr{}; + /** Indices 0..nnz-1 for sort-by-(row,col) sparse CSR path (no symmetrization). */ + std::vector sort_perm{}; +}; + +/** + * @brief CSR from triples without full O(num_cols) column sweeps: count per row, sort by (row,col), + * scatter. Same numeric result ordering as the dense double-transpose path for non-symmetrized + * inputs when column order within a row is the stable sort order for equal keys (see std::sort). + * + * Used for QCMATRIX / QMATRIX when nnz is small enough vs matrix dimension that sorting beats + * scanning every column index twice. + */ +template +void triples_to_csr_sparse_sorted_flat(const std::vector>& entries, + i_t num_rows, + i_t num_cols, + f_t value_scale, + double_transpose_scratch_t& scratch, + std::vector& out_values, + std::vector& out_indices, + std::vector& out_offsets) +{ + static_cast(num_cols); // square Q paths pass num_cols == num_rows + const size_t nnz = entries.size(); + const size_t nr = static_cast(num_rows); + + scratch.row_nnz.assign(nr, 0); + for (const auto& e : entries) { + ++scratch.row_nnz[static_cast(std::get<0>(e))]; + } + + scratch.row_off.resize(nr + 1); + scratch.row_off[0] = 0; + for (size_t r = 0; r < nr; ++r) { + scratch.row_off[r + 1] = scratch.row_off[r] + scratch.row_nnz[r]; + } + const size_t csr_nnz = static_cast(scratch.row_off[nr]); + + out_values.resize(csr_nnz); + out_indices.resize(csr_nnz); + + scratch.sort_perm.resize(nnz); + std::iota(scratch.sort_perm.begin(), scratch.sort_perm.end(), size_t{0}); + std::stable_sort(scratch.sort_perm.begin(), scratch.sort_perm.end(), [&](size_t a, size_t b) { + const auto& ta = entries[a]; + const auto& tb = entries[b]; + const i_t ra = std::get<0>(ta); + const i_t rb = std::get<0>(tb); + if (ra != rb) { return ra < rb; } + return std::get<1>(ta) < std::get<1>(tb); + }); + + scratch.row_wr.resize(nr); + std::copy(scratch.row_off.begin(), scratch.row_off.begin() + nr, scratch.row_wr.begin()); + + for (size_t k = 0; k < nnz; ++k) { + const size_t ix = scratch.sort_perm[k]; + const auto& e = entries[ix]; + const i_t row = std::get<0>(e); + const i_t col = std::get<1>(e); + const f_t val = std::get<2>(e); + const size_t sr = static_cast(row); + const i_t w = scratch.row_wr[sr]++; + out_indices[static_cast(w)] = col; + out_values[static_cast(w)] = val * value_scale; + } + + out_offsets = std::move(scratch.row_off); +} + +/** + * @brief Build CSR from coordinate triples using the same double-transpose semantics as the + * previous nested-vector implementation: CSC column-major buckets, then CSR with columns sorted + * within each row (column index ascending). + * + * @param symmetrize_upper_triangular If true (QUADOBJ), each off-diagonal (r,c) also adds (c,r). + */ +template +void triples_to_csr_double_transpose_flat(const std::vector>& entries, + i_t num_rows, + i_t num_cols, + bool symmetrize_upper_triangular, + f_t value_scale, + double_transpose_scratch_t& scratch, + std::vector& out_values, + std::vector& out_indices, + std::vector& out_offsets) +{ + if (entries.empty()) { + out_values.clear(); + out_indices.clear(); + out_offsets.assign(static_cast(num_rows) + 1, 0); + return; + } + + const size_t nc = static_cast(num_cols); + const size_t nr = static_cast(num_rows); + const size_t nnz = entries.size(); + const size_t n_dim = std::max(nr, nc); + + // Without symmetrization (QCMATRIX, QMATRIX), avoid O(n) full-column sweeps when blocks are + // sparse: sort triples by (row,col) and scatter in one pass. QUADOBJ keeps the dense transpose + // path because symmetrization duplicates off-diagonal entries. + if (!symmetrize_upper_triangular) { + const size_t lg = ceil_log2_size_t(nnz); + // Roughly: sort cost ~ nnz·lg vs ~O(n) column iterations in dense path; 48 is a tunable fudge. + constexpr size_t k_sparse_vs_dense_heuristic = 96; + if (nnz * lg < k_sparse_vs_dense_heuristic * n_dim) { + triples_to_csr_sparse_sorted_flat( + entries, num_rows, num_cols, value_scale, scratch, out_values, out_indices, out_offsets); + return; + } + } + + scratch.col_nnz.assign(nc, 0); + for (const auto& entry : entries) { + const i_t r = std::get<0>(entry); + const i_t c = std::get<1>(entry); + scratch.col_nnz[static_cast(c)]++; + if (symmetrize_upper_triangular && r != c) { scratch.col_nnz[static_cast(r)]++; } + } + + scratch.col_off.resize(nc + 1); + scratch.col_off[0] = 0; + for (size_t c = 0; c < nc; ++c) { + scratch.col_off[c + 1] = scratch.col_off[c] + scratch.col_nnz[c]; + } + const i_t csc_nnz = scratch.col_off[nc]; + scratch.csc_rows.resize(static_cast(csc_nnz)); + scratch.csc_vals.resize(static_cast(csc_nnz)); + scratch.col_wr.resize(nc); + std::copy(scratch.col_off.begin(), scratch.col_off.begin() + nc, scratch.col_wr.begin()); + + for (const auto& entry : entries) { + const i_t r = std::get<0>(entry); + const i_t c = std::get<1>(entry); + const f_t v = std::get<2>(entry); + { + const size_t sc = static_cast(c); + const i_t p = scratch.col_wr[sc]++; + scratch.csc_rows[static_cast(p)] = r; + scratch.csc_vals[static_cast(p)] = v; + } + if (symmetrize_upper_triangular && r != c) { + const size_t sr = static_cast(r); + const i_t p = scratch.col_wr[sr]++; + scratch.csc_rows[static_cast(p)] = c; + scratch.csc_vals[static_cast(p)] = v; + } + } + + scratch.row_nnz.assign(nr, 0); + for (i_t cc = 0; cc < num_cols; ++cc) { + const size_t col_z = static_cast(cc); + const i_t lo = scratch.col_off[col_z]; + const i_t hi = scratch.col_off[col_z + 1]; + for (i_t t = lo; t < hi; ++t) { + const i_t row = scratch.csc_rows[static_cast(t)]; + scratch.row_nnz[static_cast(row)]++; + } + } + + scratch.row_off.resize(nr + 1); + scratch.row_off[0] = 0; + for (size_t r = 0; r < nr; ++r) { + scratch.row_off[r + 1] = scratch.row_off[r] + scratch.row_nnz[r]; + } + const size_t csr_nnz = static_cast(scratch.row_off[nr]); + + out_values.resize(csr_nnz); + out_indices.resize(csr_nnz); + scratch.row_wr.resize(nr); + std::copy(scratch.row_off.begin(), scratch.row_off.begin() + nr, scratch.row_wr.begin()); + + for (i_t cc = 0; cc < num_cols; ++cc) { + const size_t col_z = static_cast(cc); + const i_t lo = scratch.col_off[col_z]; + const i_t hi = scratch.col_off[col_z + 1]; + for (i_t t = lo; t < hi; ++t) { + const i_t row = scratch.csc_rows[static_cast(t)]; + const f_t val = scratch.csc_vals[static_cast(t)]; + const size_t sr = static_cast(row); + const i_t w = scratch.row_wr[sr]++; + out_indices[static_cast(w)] = cc; + out_values[static_cast(w)] = val * value_scale; + } + } + + out_offsets = std::move(scratch.row_off); +} + +} // namespace + namespace cuopt::linear_programming::io { template @@ -288,115 +515,72 @@ void mps_parser_t::fill_problem(mps_data_model_t& problem) problem.set_variable_types(std::move(var_types)); problem.set_maximize(maximize); - // Helper function to build CSR format using double transpose (O(m+n+nnz) instead of - // O(nnz*log(nnz))) For QUADOBJ: handles upper triangular input by expanding to full symmetric - // matrix. + // Build CSR from (row,col,value) triples via double transpose (CSC then CSR). Uses flat buffers + // plus reusable scratch to avoid per-column/per-row `std::vector` allocations — important for + // large Q blocks and many QCMATRIX sections. // - // @p value_scale: - // QUADOBJ/QMATRIX use 0.5 (MPS ½ xᵀQx vs internal xᵀQx); - // QCMATRIX uses 1.0 (symmetric Q defines xᵀQx directly in the constraint). - auto build_csr_via_transpose = [](const std::vector>& entries, - i_t num_rows, - i_t num_cols, - bool symmetrize_upper_triangular, - f_t value_scale) { - struct CSRResult { - std::vector values; - std::vector indices; - std::vector offsets; - }; - - if (entries.empty()) { - CSRResult result; - result.offsets.resize(num_rows + 1, 0); - return result; - } - - // First transpose: build CSC format (entries sorted by column) - std::vector>> csc_data(num_cols); - for (const auto& entry : entries) { - i_t row = std::get<0>(entry); - i_t col = std::get<1>(entry); - f_t val = std::get<2>(entry); - - // For QUADOBJ (upper triangular), add both (row,col) and (col,row) if off-diagonal - csc_data[col].emplace_back(row, val); - if (symmetrize_upper_triangular && row != col) { csc_data[row].emplace_back(col, val); } - } - - // Second transpose: convert CSC to CSR (entries sorted by row, columns within rows sorted) - std::vector>> csr_data(num_rows); - for (i_t col = 0; col < num_cols; ++col) { - for (const auto& [row, val] : csc_data[col]) { - csr_data[row].emplace_back(col, val); - } - } - - // Build final CSR format - CSRResult result; - result.offsets.reserve(num_rows + 1); - result.offsets.push_back(0); - - for (i_t row = 0; row < num_rows; ++row) { - for (const auto& [col, val] : csr_data[row]) { - // While the mps format expects to optimize for 0.5 xT Q x, cuopt optimizes for xT Q xExpand - // commentComment on line L488 so we have to multiply the value by value_scale=0.5 to get - // the correct value. - result.values.push_back(val * value_scale); - result.indices.push_back(col); - } - result.offsets.push_back(result.values.size()); - } - - return result; - }; + // value_scale: QUADOBJ/QMATRIX use 0.5 (MPS ½ xᵀQx vs internal xᵀQx); QCMATRIX uses 1.0. + double_transpose_scratch_t triple_csr_scratch{}; + std::vector quad_csr_values{}; + std::vector quad_csr_indices{}; + std::vector quad_csr_offsets{}; // Process QUADOBJ data if present (upper triangular format) if (!quadobj_entries.empty()) { - // Convert quadratic objective entries to CSR format using double transpose - // QUADOBJ stores upper triangular elements, so we expand to full symmetric matrix constexpr f_t k_mps_quad_half_scale = f_t(0.5); // MPS ½ xᵀQx vs internal xᵀQx - auto csr_result = build_csr_via_transpose( - quadobj_entries, num_vars_for_quad, num_vars_for_quad, true, k_mps_quad_half_scale); - - // Use optimized double transpose method - O(m+n+nnz) instead of O(nnz*log(nnz)) - problem.set_quadratic_objective_matrix( - csr_result.values, csr_result.indices, csr_result.offsets); + triples_to_csr_double_transpose_flat(quadobj_entries, + num_vars_for_quad, + num_vars_for_quad, + true, + k_mps_quad_half_scale, + triple_csr_scratch, + quad_csr_values, + quad_csr_indices, + quad_csr_offsets); + problem.set_quadratic_objective_matrix(quad_csr_values, quad_csr_indices, quad_csr_offsets); } else if (!qmatrix_entries.empty()) { - // Convert quadratic objective entries to CSR format using double transpose - // QMATRIX stores full symmetric matrix constexpr f_t k_mps_quad_half_scale = f_t(0.5); - auto csr_result = build_csr_via_transpose( - qmatrix_entries, num_vars_for_quad, num_vars_for_quad, false, k_mps_quad_half_scale); - - // Use optimized double transpose method - O(m+n+nnz) instead of O(nnz*log(nnz)) - problem.set_quadratic_objective_matrix( - csr_result.values, csr_result.indices, csr_result.offsets); + triples_to_csr_double_transpose_flat(qmatrix_entries, + num_vars_for_quad, + num_vars_for_quad, + false, + k_mps_quad_half_scale, + triple_csr_scratch, + quad_csr_values, + quad_csr_indices, + quad_csr_offsets); + problem.set_quadratic_objective_matrix(quad_csr_values, quad_csr_indices, quad_csr_offsets); } // QCMATRIX: one symmetric Q per constraint row (no extra ½ factor vs file coeffs). - // Bundle row metadata, row-linear coefficients (from COLUMNS), rhs, and quadratic part together. + // Store Q in COO (row, col, value) per block — avoids O(n) CSR row-pointer materialization. constexpr f_t k_qcmatrix_value_scale = f_t(1); - const i_t linear_row_count = static_cast(row_types.size() - quadratic_row_ids.size()); - i_t quadratic_row_id = 0; for (const auto& block : qcmatrix_blocks_) { - auto csr_result = build_csr_via_transpose( - block.entries, num_vars_for_quad, num_vars_for_quad, false, k_qcmatrix_value_scale); const i_t row_id = block.constraint_row_id; mps_parser_expects(row_id >= 0 && row_id < static_cast(row_types.size()), error_type_t::ValidationError, "QCMATRIX row index %d is out of range for constraints", static_cast(row_id)); - problem.append_quadratic_constraint(linear_row_count + quadratic_row_id, - row_names[row_id], - static_cast(row_types[row_id]), - A_values[row_id], - A_indices[row_id], - b_values[row_id], - csr_result.values, - csr_result.indices, - csr_result.offsets); - ++quadratic_row_id; + const i_t nnz = static_cast(block.entries.size()); + std::vector qc_rows(static_cast(nnz)); + std::vector qc_cols(static_cast(nnz)); + std::vector qc_vals(static_cast(nnz)); + for (i_t e = 0; e < nnz; ++e) { + qc_rows[static_cast(e)] = std::get<0>(block.entries[static_cast(e)]); + qc_cols[static_cast(e)] = std::get<1>(block.entries[static_cast(e)]); + qc_vals[static_cast(e)] = + std::get<2>(block.entries[static_cast(e)]) * k_qcmatrix_value_scale; + } + problem.append_quadratic_constraint( + row_id, + row_names[row_id], + static_cast(row_types[row_id]), + std::span(A_values[row_id].data(), A_values[row_id].size()), + std::span(A_indices[row_id].data(), A_indices[row_id].size()), + b_values[row_id], + std::span(qc_vals.data(), qc_vals.size()), + std::span(qc_rows.data(), qc_rows.size()), + std::span(qc_cols.data(), qc_cols.size())); } if (!quadratic_row_ids.empty()) { @@ -694,9 +878,7 @@ mps_parser_t::mps_parser_t(mps_data_model_t& problem, // raft::common::nvtx::range fun_scope("mps parser"); std::vector buf = detail::file_to_string(file); - parse_string(buf.data()); - fill_problem(problem); } diff --git a/cpp/src/io/mps_writer.cpp b/cpp/src/io/mps_writer.cpp index 73489277ce..dae7d75a9a 100644 --- a/cpp/src/io/mps_writer.cpp +++ b/cpp/src/io/mps_writer.cpp @@ -157,7 +157,15 @@ void mps_writer_t::write(const std::string& mps_file_path) std::vector constraint_bounds(problem_.get_constraint_bounds().size()); std::vector variable_lower_bounds(problem_.get_variable_lower_bounds().size()); std::vector variable_upper_bounds(problem_.get_variable_upper_bounds().size()); - std::vector variable_types(problem_.get_variable_types().size()); + // Default unset variable types to continuous ('C'); API models often omit set_variable_types. + std::vector variable_types(static_cast(n_variables), 'C'); + { + const auto& src_types = problem_.get_variable_types(); + const size_t n_copy = std::min(src_types.size(), variable_types.size()); + for (size_t j = 0; j < n_copy; ++j) { + variable_types[j] = src_types[j]; + } + } std::vector row_types(problem_.get_row_types().size()); std::vector constraint_matrix_offsets(problem_.get_constraint_matrix_offsets().size()); std::vector constraint_matrix_indices(problem_.get_constraint_matrix_indices().size()); @@ -178,9 +186,6 @@ void mps_writer_t::write(const std::string& mps_file_path) problem_.get_variable_upper_bounds().data(), problem_.get_variable_upper_bounds().data() + problem_.get_variable_upper_bounds().size(), variable_upper_bounds.data()); - std::copy(problem_.get_variable_types().data(), - problem_.get_variable_types().data() + problem_.get_variable_types().size(), - variable_types.data()); std::copy(problem_.get_row_types().data(), problem_.get_row_types().data() + problem_.get_row_types().size(), row_types.data()); @@ -243,8 +248,8 @@ void mps_writer_t::write(const std::string& mps_file_path) const auto& qc = quadratic_constraints[q]; std::string row_name = qc.constraint_row_name.empty() ? "QC" + std::to_string(q) : qc.constraint_row_name; - // Quadratic rows are currently restricted to MPS 'L' (<=). - mps_file << " L " << row_name << "\n"; + char const type = qc.constraint_row_type; + mps_file << " " << type << " " << row_name << "\n"; } // COLUMNS section @@ -497,20 +502,19 @@ void mps_writer_t::write(const std::string& mps_file_path) if (problem_.has_quadratic_constraints()) { for (const auto& qc : problem_.get_quadratic_constraints()) { mps_file << "QCMATRIX " << qc.constraint_row_name << "\n"; - const i_t n_quad_rows = static_cast(qc.quadratic_offsets.size()) - 1; - for (i_t i = 0; i < n_quad_rows; ++i) { + const i_t nnz = static_cast(qc.quadratic_values.size()); + for (i_t p = 0; p < nnz; ++p) { + const i_t i = qc.quadratic_row_indices[static_cast(p)]; + const i_t j = qc.quadratic_col_indices[static_cast(p)]; + f_t v = qc.quadratic_values[static_cast(p)]; std::string row_var_name = static_cast(i) < problem_.get_variable_names().size() - ? problem_.get_variable_names()[i] + ? problem_.get_variable_names()[static_cast(i)] : "C" + std::to_string(i); - for (i_t p = qc.quadratic_offsets[i]; p < qc.quadratic_offsets[i + 1]; ++p) { - i_t j = qc.quadratic_indices[p]; - f_t v = qc.quadratic_values[p]; - std::string col_var_name = static_cast(j) < problem_.get_variable_names().size() - ? problem_.get_variable_names()[j] - : "C" + std::to_string(j); - if (v != f_t(0)) { - mps_file << " " << row_var_name << " " << col_var_name << " " << v << "\n"; - } + std::string col_var_name = static_cast(j) < problem_.get_variable_names().size() + ? problem_.get_variable_names()[static_cast(j)] + : "C" + std::to_string(j); + if (v != f_t(0)) { + mps_file << " " << row_var_name << " " << col_var_name << " " << v << "\n"; } } } diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 0b4c360b10..505b4e02f7 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -98,6 +98,9 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_MIP_INTEGRALITY_TOLERANCE, &mip_settings.tolerances.integrality_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-5)}, {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, f_t(0.0), std::numeric_limits::infinity(), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, {CUOPT_MIP_RELATIVE_GAP, &mip_settings.tolerances.relative_mip_gap, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_BARRIER_RELATIVE_FEASIBILITY_TOLERANCE, &pdlp_settings.barrier_relative_feasibility_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-8)}, + {CUOPT_BARRIER_RELATIVE_OPTIMALITY_TOLERANCE, &pdlp_settings.barrier_relative_optimality_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-8)}, + {CUOPT_BARRIER_RELATIVE_COMPLEMENTARITY_TOLERANCE, &pdlp_settings.barrier_relative_complementarity_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-8)}, {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, {CUOPT_MIP_CUT_CHANGE_THRESHOLD, &mip_settings.cut_change_threshold, f_t(-1.0), std::numeric_limits::infinity(), f_t(-1.0)}, diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index 43d507b2f9..cf741eee0b 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -507,9 +507,14 @@ run_barrier(dual_simplex::user_problem_t& user_problem, barrier_settings.barrier = true; barrier_settings.crossover = settings.crossover; barrier_settings.eliminate_dense_columns = settings.eliminate_dense_columns; - barrier_settings.barrier_iterative_refinement = settings.barrier_iterative_refinement; - barrier_settings.barrier_step_scale = settings.barrier_step_scale; + barrier_settings.barrier_iterative_refinement = settings.barrier_iterative_refinement; + barrier_settings.barrier_step_scale = settings.barrier_step_scale; barrier_settings.cudss_deterministic = settings.cudss_deterministic; + barrier_settings.barrier_relative_feasibility_tol = + settings.barrier_relative_feasibility_tolerance; + barrier_settings.barrier_relative_optimality_tol = settings.barrier_relative_optimality_tolerance; + barrier_settings.barrier_relative_complementarity_tol = + settings.barrier_relative_complementarity_tolerance; barrier_settings.barrier_relaxed_feasibility_tol = settings.tolerances.relative_primal_tolerance; barrier_settings.barrier_relaxed_optimality_tol = settings.tolerances.relative_dual_tolerance; barrier_settings.barrier_relaxed_complementarity_tol = settings.tolerances.relative_gap_tolerance; @@ -1749,6 +1754,10 @@ optimization_problem_solution_t solve_qp(optimization_problem_t(op_problem.get_quadratic_constraints().size())); + } if (settings.user_problem_file != "") { CUOPT_LOG_INFO("Writing user problem to file: %s", settings.user_problem_file.c_str()); op_problem.write_to_mps(settings.user_problem_file); @@ -1803,6 +1812,24 @@ optimization_problem_solution_t solve_lp( // This needs to be called before pdlp is initialized init_handler(op_problem.get_handle_ptr()); + if (op_problem.has_quadratic_objective() || op_problem.has_quadratic_constraints()) { + if (op_problem.has_quadratic_objective()) { + CUOPT_LOG_INFO("Problem has a quadratic objective. Using Barrier."); + } + if (op_problem.has_quadratic_constraints()) { + CUOPT_LOG_INFO("Problem has %d quadratic constraints. Using Barrier with SOC conversion.", + static_cast(op_problem.get_quadratic_constraints().size())); + } + settings.method = method_t::Barrier; + settings.presolver = presolver_t::None; + // Quadratic objective support is minimization-only. + if (op_problem.has_quadratic_objective() && op_problem.get_sense()) { + CUOPT_LOG_ERROR("Quadratic problems must be minimized"); + return optimization_problem_solution_t(pdlp_termination_status_t::NumericalError, + op_problem.get_handle_ptr()->get_stream()); + } + } + raft::common::nvtx::range fun_scope("Running solver"); if (problem_checking) { diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu index 9268e17910..96de3a788d 100644 --- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu +++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu @@ -26,13 +26,7 @@ #include #include -#include -#include -#include #include -#include -#include -#include namespace cuopt::linear_programming::detail { template diff --git a/cpp/src/pdlp/translate.hpp b/cpp/src/pdlp/translate.hpp index 7d78da80eb..2bfaa127ed 100644 --- a/cpp/src/pdlp/translate.hpp +++ b/cpp/src/pdlp/translate.hpp @@ -12,10 +12,16 @@ #include #include +#include + +#include #include #include +#include +#include +#include namespace cuopt::linear_programming { @@ -111,8 +117,6 @@ static dual_simplex::user_problem_t cuopt_problem_to_user_problem( csr_A.j = std::vector(cuopt::host_copy(model.variables, handle_ptr->get_stream())); csr_A.row_start = std::vector(cuopt::host_copy(model.offsets, handle_ptr->get_stream())); - csr_A.to_compressed_col(user_problem.A); - user_problem.rhs.resize(m); user_problem.row_sense.resize(m); user_problem.range_rows.clear(); @@ -186,6 +190,13 @@ static dual_simplex::user_problem_t cuopt_problem_to_user_problem( user_problem.Q_indices = model.Q_indices; user_problem.Q_values = model.Q_values; + if (model.original_problem_ptr->has_quadratic_constraints()) { + detail::apply_soc_qcmatrix_conversion_for_simplex( + n, model.original_problem_ptr->get_quadratic_constraints(), csr_A, user_problem); + } + + csr_A.to_compressed_col(user_problem.A); + return user_problem; } @@ -211,7 +222,6 @@ static dual_simplex::user_problem_t cuopt_optimization_problem_to_user csr_A.row_start.resize(1); csr_A.row_start[0] = 0; } - csr_A.to_compressed_col(user_problem.A); user_problem.rhs.resize(m); user_problem.row_sense.resize(m); @@ -291,6 +301,13 @@ static dual_simplex::user_problem_t cuopt_optimization_problem_to_user user_problem.Q_indices = model.get_quadratic_objective_indices(); user_problem.Q_values = model.get_quadratic_objective_values(); + if (model.has_quadratic_constraints()) { + detail::apply_soc_qcmatrix_conversion_for_simplex( + static_cast(n), model.get_quadratic_constraints(), csr_A, user_problem); + } + + csr_A.to_compressed_col(user_problem.A); + return user_problem; } diff --git a/cpp/tests/dual_simplex/CMakeLists.txt b/cpp/tests/dual_simplex/CMakeLists.txt index dc4ab35b73..f6dff93227 100644 --- a/cpp/tests/dual_simplex/CMakeLists.txt +++ b/cpp/tests/dual_simplex/CMakeLists.txt @@ -6,4 +6,5 @@ ConfigureTest(DUAL_SIMPLEX_TEST ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solve_barrier.cu - LABELS numopt) + ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/second_order_cone_kernels.cu +) diff --git a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu index b0cbe624dc..ae4efb9b86 100644 --- a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu +++ b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu @@ -5,8 +5,6 @@ */ /* clang-format on */ -#include - #include #include @@ -16,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -146,7 +145,6 @@ TEST(barrier, dual_variable_greater_than) user_problem.A.x[nnz++] = 1.0; user_problem.A.i[nnz] = 1; user_problem.A.x[nnz++] = 2.0; - user_problem.A.print_matrix(); EXPECT_EQ(nnz, nz); user_problem.rhs.resize(m); @@ -181,6 +179,821 @@ TEST(barrier, dual_variable_greater_than) EXPECT_NEAR(solution.z[1], 0.0, 1e-5); } +TEST(barrier, cone_metadata_reindexed_when_slack_is_inserted_before_cones) +{ + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 1; + constexpr int n = 5; + constexpr int nz = 5; + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective.assign(n, 0.0); + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + user_problem.A.col_start.resize(n + 1); + for (int j = 0; j < n; ++j) { + user_problem.A.col_start[j] = j; + user_problem.A.i[j] = 0; + user_problem.A.x[j] = 1.0; + } + user_problem.A.col_start[n] = nz; + user_problem.rhs = {1.0}; + user_problem.row_sense = {'L'}; + user_problem.lower.assign(n, 0.0); + user_problem.upper.assign(n, inf); + user_problem.num_range_rows = 0; + user_problem.second_order_cone_dims = {2, 2}; + user_problem.cone_var_start = 1; + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + settings.scale_columns = false; + + std::vector new_slacks; + dualize_info_t dualize_info; + lp_problem_t original_lp(user_problem.handle_ptr, 1, 1, 1); + convert_user_problem(user_problem, settings, original_lp, new_slacks, dualize_info); + + ASSERT_EQ(new_slacks.size(), 1); + EXPECT_EQ(new_slacks[0], 1); + EXPECT_EQ(original_lp.num_cols, 6); + EXPECT_EQ(original_lp.second_order_cone_dims, user_problem.second_order_cone_dims); + EXPECT_EQ(original_lp.cone_var_start, 2); + + lp_problem_t barrier_lp(user_problem.handle_ptr, + original_lp.num_rows, + original_lp.num_cols, + original_lp.A.col_start[original_lp.num_cols]); + std::vector column_scales; + std::vector row_scales; + column_scaling(original_lp, settings, barrier_lp, column_scales, row_scales); + + EXPECT_EQ(barrier_lp.second_order_cone_dims, user_problem.second_order_cone_dims); + EXPECT_EQ(barrier_lp.cone_var_start, 2); +} + +TEST(barrier, presolve_reindexes_cone_start_after_empty_column_removal) +{ + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 1; + constexpr int n = 4; + constexpr int nz = 3; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {1.0, 0.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + user_problem.A.col_start = {0, 0, 1, 2, 3}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 0; + user_problem.A.x[1] = -1.0; + user_problem.A.i[2] = 0; + user_problem.A.x[2] = 0.5; + + user_problem.rhs = {1.0}; + user_problem.row_sense = {'E'}; + user_problem.lower.assign(n, 0.0); + user_problem.upper.assign(n, inf); + user_problem.num_range_rows = 0; + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + settings.scale_columns = false; + + std::vector new_slacks; + dualize_info_t dualize_info; + lp_problem_t original_lp(user_problem.handle_ptr, 1, 1, 1); + convert_user_problem(user_problem, settings, original_lp, new_slacks, dualize_info); + + presolve_info_t presolve_info; + lp_problem_t presolved_lp(user_problem.handle_ptr, 1, 1, 1); + ASSERT_EQ(presolve(original_lp, settings, presolved_lp, presolve_info), 0); + + EXPECT_EQ(presolved_lp.num_cols, 3); + EXPECT_EQ(presolved_lp.second_order_cone_dims, std::vector({3})); + EXPECT_EQ(presolved_lp.cone_var_start, 0); + + lp_problem_t barrier_lp(user_problem.handle_ptr, + presolved_lp.num_rows, + presolved_lp.num_cols, + presolved_lp.A.col_start[presolved_lp.num_cols]); + std::vector column_scales; + std::vector row_scales; + ASSERT_EQ(column_scaling(presolved_lp, settings, barrier_lp, column_scales, row_scales), 0); + EXPECT_EQ(barrier_lp.cone_var_start, 0); +} + +TEST(barrier, presolve_keeps_native_free_variables_before_cones) +{ + // Layout: [x0, x1 | cone x2, x3, x4] with x0, x1 free and a 3-dimensional SOC block. + // SOCP barrier presolve keeps native free variables (no x = v - w split); cone_var_start + // and column count stay unchanged. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 1; + constexpr int n = 5; + constexpr int nz = 5; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 0.0, 0.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + user_problem.A.col_start = {0, 1, 2, 3, 4, 5}; + for (int j = 0; j < n; ++j) { + user_problem.A.i[j] = 0; + user_problem.A.x[j] = 1.0; + } + + user_problem.rhs = {1.0}; + user_problem.row_sense = {'E'}; + user_problem.lower = {-inf, -inf, 0.0, 0.0, 0.0}; + user_problem.upper.assign(n, inf); + user_problem.num_range_rows = 0; + user_problem.cone_var_start = 2; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + settings.scale_columns = false; + + std::vector new_slacks; + dualize_info_t dualize_info; + lp_problem_t original_lp(user_problem.handle_ptr, 1, 1, 1); + convert_user_problem(user_problem, settings, original_lp, new_slacks, dualize_info); + + presolve_info_t presolve_info; + lp_problem_t presolved_lp(user_problem.handle_ptr, 1, 1, 1); + ASSERT_EQ(presolve(original_lp, settings, presolved_lp, presolve_info), 0); + + EXPECT_EQ(presolved_lp.num_cols, 5); + EXPECT_EQ(presolved_lp.cone_var_start, 2); + EXPECT_EQ(presolved_lp.second_order_cone_dims, std::vector({3})); + EXPECT_TRUE(presolve_info.free_variable_pairs.empty()); + ASSERT_EQ(presolve_info.native_free_linear_indices.size(), 2); + EXPECT_EQ(presolve_info.native_free_linear_indices[0], 0); + EXPECT_EQ(presolve_info.native_free_linear_indices[1], 1); + EXPECT_EQ(presolved_lp.lower[0], -inf); + EXPECT_EQ(presolved_lp.lower[1], -inf); + EXPECT_EQ(presolved_lp.upper[0], inf); + EXPECT_EQ(presolved_lp.upper[1], inf); +} + +TEST(barrier, uncrush_solution_removes_non_tail_free_variable_partner) +{ + using namespace cuopt::linear_programming::dual_simplex; + + raft::handle_t handle{}; + init_handler(&handle); + lp_problem_t original_lp(&handle, 0, 3, 0); + + presolve_info_t presolve_info; + presolve_info.free_variable_pairs = {0, 1}; + + simplex_solver_settings_t settings; + std::vector crushed_x{5.0, 2.0, 9.0, 8.0}; + std::vector crushed_y{}; + std::vector crushed_z{7.0, 11.0, 13.0, 17.0}; + std::vector uncrushed_x(3); + std::vector uncrushed_y(0); + std::vector uncrushed_z(3); + + uncrush_solution(presolve_info, + settings, + original_lp, + crushed_x, + crushed_y, + crushed_z, + uncrushed_x, + uncrushed_y, + uncrushed_z); + + EXPECT_EQ(uncrushed_x, std::vector({3.0, 9.0, 8.0})); + EXPECT_EQ(uncrushed_z, std::vector({7.0, 13.0, 17.0})); +} + +TEST(barrier, rejects_middle_cone_input_before_barrier) +{ + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 3; + constexpr int n = 5; + constexpr int nz = 3; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {1.0, 0.0, 0.0, 0.0, 1.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + user_problem.A.col_start = {0, 1, 1, 2, 2, 3}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 1; + user_problem.A.x[1] = 1.0; + user_problem.A.i[2] = 2; + user_problem.A.x[2] = 1.0; + + user_problem.rhs = {2.0, 1.0, 3.0}; + user_problem.row_sense = {'E', 'E', 'E'}; + user_problem.lower.assign(n, 0.0); + user_problem.upper.assign(n, inf); + user_problem.num_range_rows = 0; + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + lp_solution_t solution(m, n); + + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + EXPECT_EQ(status, lp_status_t::NUMERICAL_ISSUES); +} + +TEST(barrier, socp_min_x0_subject_to_norm_constraint) +{ + // minimize x_0 + // subject to x_1 = 1 + // (x_0, x_1, x_2) in Q^3 + // + // Optimal: x* = (1, 1, 0), obj* = 1 + + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 1; + constexpr int n = 3; + constexpr int nz = 1; + + user_problem.num_rows = m; + user_problem.num_cols = n; + + user_problem.objective = {1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + user_problem.A.col_start = {0, 0, 1, 1}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + + user_problem.rhs = {1.0}; + user_problem.row_sense = {'E'}; + + user_problem.lower = {0.0, 0.0, 0.0}; + user_problem.upper = {inf, inf, inf}; + + user_problem.num_range_rows = 0; + user_problem.problem_name = "socp_norm_cone"; + + user_problem.cone_var_start = 0; + user_problem.second_order_cone_dims = {3}; + + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 1.0, 1e-4); + EXPECT_NEAR(solution.x[0], 1.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[2]), 0.0, 1e-4); +} + +TEST(barrier, mixed_linear_and_soc_block) +{ + // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3. + // + // minimize l + // subject to l - t = 0 + // u = 1 + // (t, u, v) in Q^3 + // + // Optimal: l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 2; + constexpr int n = 4; + constexpr int nz = 4; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {1.0, 0.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l, t, u, v + user_problem.A.col_start = {0, 1, 2, 3, 3}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 0; + user_problem.A.x[1] = -1.0; + user_problem.A.i[2] = 1; + user_problem.A.x[2] = 1.0; + + user_problem.rhs = {0.0, 1.0}; + user_problem.row_sense = {'E', 'E'}; + + user_problem.lower = {0.0, 0.0, 0.0, 0.0}; + user_problem.upper = {inf, inf, inf, inf}; + + user_problem.num_range_rows = 0; + user_problem.problem_name = "mixed_linear_and_soc_block"; + + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 1.0, 1e-4); + EXPECT_NEAR(solution.x[0], 1.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4); +} + +TEST(barrier, mixed_linear_and_soc_tail_coupling) +{ + // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3. + // + // minimize t + // subject to l - u = 0 + // l + u = 2 + // (t, u, v) in Q^3 + // + // Optimal: l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 2; + constexpr int n = 4; + constexpr int nz = 4; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l, t, u, v + user_problem.A.col_start = {0, 2, 2, 4, 4}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 1; + user_problem.A.x[1] = 1.0; + user_problem.A.i[2] = 0; + user_problem.A.x[2] = -1.0; + user_problem.A.i[3] = 1; + user_problem.A.x[3] = 1.0; + + user_problem.rhs = {0.0, 2.0}; + user_problem.row_sense = {'E', 'E'}; + user_problem.lower = {0.0, 0.0, 0.0, 0.0}; + user_problem.upper = {inf, inf, inf, inf}; + + user_problem.num_range_rows = 0; + user_problem.problem_name = "mixed_linear_and_soc_tail_coupling"; + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + settings.scale_columns = true; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 1.0, 1e-4); + EXPECT_NEAR(solution.x[0], 1.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4); +} + +TEST(barrier, mixed_linear_and_soc_tail_coupling_with_inequality) +{ + // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3. + // + // minimize t + // subject to l - u = 0 + // l + u >= 2 + // (t, u, v) in Q^3 + // + // Optimal: l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 2; + constexpr int n = 4; + constexpr int nz = 4; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l, t, u, v + user_problem.A.col_start = {0, 2, 2, 4, 4}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 1; + user_problem.A.x[1] = 1.0; + user_problem.A.i[2] = 0; + user_problem.A.x[2] = -1.0; + user_problem.A.i[3] = 1; + user_problem.A.x[3] = 1.0; + + user_problem.rhs = {0.0, 2.0}; + user_problem.row_sense = {'E', 'G'}; + user_problem.lower = {0.0, 0.0, 0.0, 0.0}; + user_problem.upper = {inf, inf, inf, inf}; + + user_problem.num_range_rows = 0; + user_problem.problem_name = "mixed_linear_and_soc_tail_coupling_with_inequality"; + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + settings.scale_columns = true; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 1.0, 1e-4); + EXPECT_NEAR(solution.x[0], 1.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4); +} + +TEST(barrier, mixed_linear_and_two_soc_blocks) +{ + // Variables ordered as [l1, l2 | t1, u1, v1 | t2, u2, v2], + // where (t1, u1, v1), (t2, u2, v2) \in Q^3. + // + // minimize t1 + t2 + // subject to l1 - u1 = 0 + // l2 - u2 = 0 + // l1 + l2 = 3 + // l1 - l2 = 1 + // + // Optimal: l1* = 2, l2* = 1, t1* = 2, u1* = 2, v1* = 0, + // t2* = 1, u2* = 1, v2* = 0, obj* = 3. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 4; + constexpr int n = 8; + constexpr int nz = 8; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l1, l2, t1, u1, v1, t2, u2, v2 + user_problem.A.col_start = {0, 3, 6, 6, 7, 7, 7, 8, 8}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 2; + user_problem.A.x[1] = 1.0; + user_problem.A.i[2] = 3; + user_problem.A.x[2] = 1.0; + user_problem.A.i[3] = 1; + user_problem.A.x[3] = 1.0; + user_problem.A.i[4] = 2; + user_problem.A.x[4] = 1.0; + user_problem.A.i[5] = 3; + user_problem.A.x[5] = -1.0; + user_problem.A.i[6] = 0; + user_problem.A.x[6] = -1.0; + user_problem.A.i[7] = 1; + user_problem.A.x[7] = -1.0; + + user_problem.rhs = {0.0, 0.0, 3.0, 1.0}; + user_problem.row_sense = {'E', 'E', 'E', 'E'}; + user_problem.lower.assign(n, 0.0); + user_problem.upper.assign(n, inf); + + user_problem.num_range_rows = 0; + user_problem.problem_name = "mixed_linear_and_two_soc_blocks"; + user_problem.cone_var_start = 2; + user_problem.second_order_cone_dims = {3, 3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 3.0, 1e-4); + EXPECT_NEAR(solution.x[0], 2.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 2.0, 1e-4); + EXPECT_NEAR(solution.x[3], 2.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[4]), 0.0, 1e-4); + EXPECT_NEAR(solution.x[5], 1.0, 1e-4); + EXPECT_NEAR(solution.x[6], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[7]), 0.0, 1e-4); +} + +TEST(barrier, mixed_linear_and_two_soc_blocks_with_inequality) +{ + // Variables ordered as [l1, l2 | t1, u1, v1 | t2, u2, v2], + // where (t1, u1, v1), (t2, u2, v2) \in Q^3. + // + // minimize t1 + t2 + // subject to l1 - u1 = 0 + // l2 - u2 = 0 + // l1 + l2 >= 3 + // l1 - l2 = 1 + // + // Optimal: l1* = 2, l2* = 1, t1* = 2, u1* = 2, v1* = 0, + // t2* = 1, u2* = 1, v2* = 0, obj* = 3. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 4; + constexpr int n = 8; + constexpr int nz = 8; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l1, l2, t1, u1, v1, t2, u2, v2 + user_problem.A.col_start = {0, 3, 6, 6, 7, 7, 7, 8, 8}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 2; + user_problem.A.x[1] = 1.0; + user_problem.A.i[2] = 3; + user_problem.A.x[2] = 1.0; + user_problem.A.i[3] = 1; + user_problem.A.x[3] = 1.0; + user_problem.A.i[4] = 2; + user_problem.A.x[4] = 1.0; + user_problem.A.i[5] = 3; + user_problem.A.x[5] = -1.0; + user_problem.A.i[6] = 0; + user_problem.A.x[6] = -1.0; + user_problem.A.i[7] = 1; + user_problem.A.x[7] = -1.0; + + user_problem.rhs = {0.0, 0.0, 3.0, 1.0}; + user_problem.row_sense = {'E', 'E', 'G', 'E'}; + user_problem.lower.assign(n, 0.0); + user_problem.upper.assign(n, inf); + + user_problem.num_range_rows = 0; + user_problem.problem_name = "mixed_linear_and_two_soc_blocks_with_inequality"; + user_problem.cone_var_start = 2; + user_problem.second_order_cone_dims = {3, 3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + settings.scale_columns = true; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 3.0, 1e-4); + EXPECT_NEAR(solution.x[0], 2.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 2.0, 1e-4); + EXPECT_NEAR(solution.x[3], 2.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[4]), 0.0, 1e-4); + EXPECT_NEAR(solution.x[5], 1.0, 1e-4); + EXPECT_NEAR(solution.x[6], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[7]), 0.0, 1e-4); +} + +TEST(barrier, free_linear_prefix_is_uncrushed_correctly_with_soc_block) +{ + // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3 and l is free. + // + // minimize t + // subject to l - u = 0 + // u = 1 + // (t, u, v) in Q^3 + // + // Native free variable l is kept through presolve; end-to-end solve returns + // l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 2; + constexpr int n = 4; + constexpr int nz = 3; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l, t, u, v + user_problem.A.col_start = {0, 1, 1, 3, 3}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 0; + user_problem.A.x[1] = -1.0; + user_problem.A.i[2] = 1; + user_problem.A.x[2] = 1.0; + + user_problem.rhs = {0.0, 1.0}; + user_problem.row_sense = {'E', 'E'}; + user_problem.lower = {-inf, 0.0, 0.0, 0.0}; + user_problem.upper = {inf, inf, inf, inf}; + + user_problem.num_range_rows = 0; + user_problem.problem_name = "free_linear_prefix_is_uncrushed_correctly_with_soc_block"; + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 1.0, 1e-4); + EXPECT_NEAR(solution.x[0], 1.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4); +} + +TEST(barrier, qp_with_soc_block) +{ + // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3. + // + // minimize 0.5 l^2 + t + // subject to l + u = 2 + // (t, u, v) in Q^3 + // + // Since t >= |u| and u = 2 - l with l >= 0, the objective becomes + // 0.5 l^2 + |2 - l|, which is minimized at l* = 1, u* = 1, t* = 1, v* = 0. + raft::handle_t handle{}; + init_handler(&handle); + + using namespace cuopt::linear_programming::dual_simplex; + user_problem_t user_problem(&handle); + + constexpr int m = 1; + constexpr int n = 4; + constexpr int nz = 2; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = {0.0, 1.0, 0.0, 0.0}; + + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + // Columns: l, t, u, v + user_problem.A.col_start = {0, 1, 1, 2, 2}; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 0; + user_problem.A.x[1] = 1.0; + + user_problem.rhs = {2.0}; + user_problem.row_sense = {'E'}; + user_problem.lower.assign(n, 0.0); + user_problem.upper.assign(n, inf); + + user_problem.Q_offsets = {0, 1, 1, 1, 1}; + user_problem.Q_indices = {0}; + user_problem.Q_values = {1.0}; + + user_problem.num_range_rows = 0; + user_problem.problem_name = "qp_with_soc_block"; + user_problem.cone_var_start = 1; + user_problem.second_order_cone_dims = {3}; + user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); + + simplex_solver_settings_t settings; + settings.barrier = true; + settings.dualize = 0; + + lp_solution_t solution(m, n); + auto status = solve_linear_program_with_barrier(user_problem, settings, solution); + + EXPECT_EQ(status, lp_status_t::OPTIMAL); + EXPECT_NEAR(solution.objective, 1.5, 1e-4); + EXPECT_NEAR(solution.x[0], 1.0, 1e-4); + EXPECT_NEAR(solution.x[1], 1.0, 1e-4); + EXPECT_NEAR(solution.x[2], 1.0, 1e-4); + EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4); +} + TEST(barrier, min_x_squared_free_variable_dual_correction) { // minimize x^2 (Q = [2.0], so 0.5 * x^T Q x = x^2) diff --git a/cpp/tests/linear_programming/parser_test.cpp b/cpp/tests/linear_programming/parser_test.cpp index 4263a594ce..086432e7b3 100644 --- a/cpp/tests/linear_programming/parser_test.cpp +++ b/cpp/tests/linear_programming/parser_test.cpp @@ -2155,8 +2155,9 @@ End EXPECT_EQ(qc.constraint_row_type, static_cast(LesserThanOrEqual)); EXPECT_NEAR(qc.rhs_value, 10.0, tolerance); EXPECT_TRUE(qc.linear_indices.empty()); - // Q = diag(1, 1). CSR: offsets=[0, 1, 2], indices=[0, 1], values=[1, 1]. - EXPECT_EQ(qc.quadratic_offsets, (std::vector{0, 1, 2})); + // Q = diag(1, 1) stored as COO triplets (row, col, value). + EXPECT_EQ(qc.quadratic_row_indices, (std::vector{0, 1})); + EXPECT_EQ(qc.quadratic_col_indices, (std::vector{0, 1})); ASSERT_EQ(qc.quadratic_values.size(), 2u); EXPECT_NEAR(qc.quadratic_values[0], 1.0, tolerance); EXPECT_NEAR(qc.quadratic_values[1], 1.0, tolerance); @@ -2165,7 +2166,7 @@ End TEST(lp_parser, qc_cross_term_splits_symmetrically) { // `4 x*y` in the LP source means coefficient on x_i * x_j = 4 in the - // symmetric x^T Q x. Split into Q[x,y] = Q[y,x] = 2 in the CSR. + // symmetric x^T Q x. Split into Q[x,y] = Q[y,x] = 2 in the COO storage. auto m = parse_lp_string(R"LP( Minimize x + y @@ -2175,8 +2176,9 @@ End )LP"); ASSERT_EQ(m.get_quadratic_constraints().size(), 1u); const auto& qc = nth_qc(m, 0); - // Q has 4 entries (all of [[1,2],[2,1]]). - EXPECT_EQ(qc.quadratic_offsets, (std::vector{0, 2, 4})); + // Q has 4 entries (all of [[1,2],[2,1]]) stored as COO triplets. + EXPECT_EQ(qc.quadratic_row_indices, (std::vector{0, 0, 1, 1})); + EXPECT_EQ(qc.quadratic_col_indices, (std::vector{0, 1, 0, 1})); ASSERT_EQ(qc.quadratic_values.size(), 4u); EXPECT_NEAR(qc.quadratic_values[0], 1.0, tolerance); // (0, 0) EXPECT_NEAR(qc.quadratic_values[1], 2.0, tolerance); // (0, 1) @@ -2730,16 +2732,16 @@ TEST(qps_parser, qcmatrix_append_api) model_t::quadratic_constraint_t default_qcm; EXPECT_EQ(0, default_qcm.constraint_row_index); EXPECT_TRUE(default_qcm.quadratic_values.empty()); - EXPECT_TRUE(default_qcm.quadratic_indices.empty()); - EXPECT_TRUE(default_qcm.quadratic_offsets.empty()); + EXPECT_TRUE(default_qcm.quadratic_row_indices.empty()); + EXPECT_TRUE(default_qcm.quadratic_col_indices.empty()); EXPECT_TRUE(default_qcm.linear_values.empty()); EXPECT_TRUE(default_qcm.linear_indices.empty()); EXPECT_EQ(0.0, default_qcm.rhs_value); // QC0: [[10, 2], [2, 2]] const std::vector qc0_values = {10.0, 2.0, 2.0, 2.0}; - const std::vector qc0_indices = {0, 1, 0, 1}; - const std::vector qc0_offsets = {0, 2, 4}; + const std::vector qc0_row_indices = {0, 0, 1, 1}; + const std::vector qc0_col_indices = {0, 1, 0, 1}; const std::vector qc0_linear_values = {1.0, 1.0}; const std::vector qc0_linear_indices = {0, 1}; model.append_quadratic_constraint(0, @@ -2749,13 +2751,13 @@ TEST(qps_parser, qcmatrix_append_api) qc0_linear_indices, 5.0, qc0_values, - qc0_indices, - qc0_offsets); + qc0_row_indices, + qc0_col_indices); // QC1: [[4, 1], [1, 6]] const std::vector qc1_values = {4.0, 1.0, 1.0, 6.0}; - const std::vector qc1_indices = {0, 1, 0, 1}; - const std::vector qc1_offsets = {0, 2, 4}; + const std::vector qc1_row_indices = {0, 0, 1, 1}; + const std::vector qc1_col_indices = {0, 1, 0, 1}; const std::vector qc1_linear_values = {3.0, 1.0}; const std::vector qc1_linear_indices = {0, 1}; model.append_quadratic_constraint(1, @@ -2765,8 +2767,8 @@ TEST(qps_parser, qcmatrix_append_api) qc1_linear_indices, 10.0, qc1_values, - qc1_indices, - qc1_offsets); + qc1_row_indices, + qc1_col_indices); ASSERT_TRUE(model.has_quadratic_constraints()); const auto& qcs = model.get_quadratic_constraints(); @@ -2779,8 +2781,8 @@ TEST(qps_parser, qcmatrix_append_api) EXPECT_EQ(qc0_linear_indices, qcs[0].linear_indices); EXPECT_EQ(5.0, qcs[0].rhs_value); EXPECT_EQ(qc0_values, qcs[0].quadratic_values); - EXPECT_EQ(qc0_indices, qcs[0].quadratic_indices); - EXPECT_EQ(qc0_offsets, qcs[0].quadratic_offsets); + EXPECT_EQ(qc0_row_indices, qcs[0].quadratic_row_indices); + EXPECT_EQ(qc0_col_indices, qcs[0].quadratic_col_indices); EXPECT_EQ(1, qcs[1].constraint_row_index); EXPECT_EQ("QC1", qcs[1].constraint_row_name); @@ -2789,8 +2791,8 @@ TEST(qps_parser, qcmatrix_append_api) EXPECT_EQ(qc1_linear_indices, qcs[1].linear_indices); EXPECT_EQ(10.0, qcs[1].rhs_value); EXPECT_EQ(qc1_values, qcs[1].quadratic_values); - EXPECT_EQ(qc1_indices, qcs[1].quadratic_indices); - EXPECT_EQ(qc1_offsets, qcs[1].quadratic_offsets); + EXPECT_EQ(qc1_row_indices, qcs[1].quadratic_row_indices); + EXPECT_EQ(qc1_col_indices, qcs[1].quadratic_col_indices); } // QCQP MPS: each quadratic constraint bundles row + linear + rhs + quadratic. diff --git a/cpp/tests/qp/unit_tests/no_constraints.cu b/cpp/tests/qp/unit_tests/no_constraints.cu index 75190b37fc..dc99a2ce5c 100644 --- a/cpp/tests/qp/unit_tests/no_constraints.cu +++ b/cpp/tests/qp/unit_tests/no_constraints.cu @@ -52,6 +52,8 @@ TEST(no_constraints_test, simple_test) op_problem.set_quadratic_objective_matrix(Q_values_host, 2, Q_indices_host, 2, Q_offsets_host, 3); auto settings = cuopt::linear_programming::pdlp_solver_settings_t(); + // Tighter than default: unconstrained QP needs small x at the origin, not just small objective. + settings.barrier_relative_complementarity_tolerance = 1e-12; auto solution = cuopt::linear_programming::solve_lp(op_problem, settings); EXPECT_EQ(solution.get_termination_status(), diff --git a/cpp/tests/qp/unit_tests/two_variable_test.cu b/cpp/tests/qp/unit_tests/two_variable_test.cu index aac618994a..f1fb4852ec 100644 --- a/cpp/tests/qp/unit_tests/two_variable_test.cu +++ b/cpp/tests/qp/unit_tests/two_variable_test.cu @@ -59,6 +59,7 @@ TEST(two_variable_test, simple_test) op_problem.set_quadratic_objective_matrix(Q_values_host, 2, Q_indices_host, 2, Q_offsets_host, 3); auto settings = cuopt::linear_programming::pdlp_solver_settings_t(); + settings.barrier_relative_complementarity_tolerance = 1e-12; auto solution = cuopt::linear_programming::solve_lp(op_problem, settings); EXPECT_EQ(solution.get_termination_status(), diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model.pxd b/python/cuopt/cuopt/linear_programming/data_model/data_model.pxd index 4a83f3a058..b25ba76e3a 100644 --- a/python/cuopt/cuopt/linear_programming/data_model/data_model.pxd +++ b/python/cuopt/cuopt/linear_programming/data_model/data_model.pxd @@ -7,11 +7,27 @@ # cython: embedsignature = True # cython: language_level = 3 +from libc.stddef cimport size_t from libcpp cimport bool from libcpp.string cimport string from libcpp.vector cimport vector +cdef extern from "cuopt/linear_programming/io/mps_data_model.hpp" namespace "cuopt::linear_programming::io" nogil: # noqa + + cdef cppclass mps_data_model_t[i_t, f_t]: + cppclass quadratic_constraint_t: + int constraint_row_index + string constraint_row_name + char constraint_row_type + vector[double] linear_values + vector[int] linear_indices + double rhs_value + vector[int] quadratic_row_indices + vector[int] quadratic_col_indices + vector[double] quadratic_values + + cdef extern from "cuopt/linear_programming/io/data_model_view.hpp" namespace "cuopt::linear_programming::io" nogil: # noqa cdef cppclass data_model_view_t[i_t, f_t]: @@ -54,6 +70,8 @@ cdef extern from "cuopt/linear_programming/io/data_model_view.hpp" namespace "cu void set_row_names(const vector[string] row_names) except + void set_problem_name(const string problem_name) except + void set_objective_name(const string objective_name) except + + void set_quadratic_constraints( + vector[mps_data_model_t[i_t, f_t].quadratic_constraint_t] constraints) except + cdef extern from "cuopt/linear_programming/io/writer.hpp" namespace "cuopt::linear_programming::io" nogil: # noqa diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model.py b/python/cuopt/cuopt/linear_programming/data_model/data_model.py index 648809eac1..5e6fd6744d 100644 --- a/python/cuopt/cuopt/linear_programming/data_model/data_model.py +++ b/python/cuopt/cuopt/linear_programming/data_model/data_model.py @@ -4,6 +4,8 @@ import os import time +import numpy as np + from . import data_model_wrapper from .utilities import catch_cuopt_exception @@ -288,6 +290,90 @@ def set_quadratic_objective_matrix(self, Q_values, Q_indices, Q_offsets): """ super().set_quadratic_objective_matrix(Q_values, Q_indices, Q_offsets) + def get_quadratic_constraints(self): + """ + Return quadratic (QCMATRIX) constraints appended to this model. + + Each entry is a dict with keys including ``constraint_row_index``, + ``constraint_row_name``, ``constraint_row_type``, COO arrays, and ``rhs_value``. + """ + return self.quadratic_constraints + + @catch_cuopt_exception + def clear_quadratic_constraints(self): + """ + Remove all quadratic (QCMATRIX) constraints from the model. + + Quadratic constraints are converted to second-order cone form internally + and solved with the barrier method. + """ + super().clear_quadratic_constraints() + + @catch_cuopt_exception + def add_quadratic_constraint( + self, + constraint_row_index, + constraint_row_name="", + linear_values=None, + linear_indices=None, + rhs_value=0.0, + quadratic_values=None, + quadratic_row_indices=None, + quadratic_col_indices=None, + sense="L", + ): + """ + Add one quadratic constraint in MPS QCMATRIX form. + + Each constraint has a linear part (optional) and a quadratic part in COO + format. Call multiple times to add several QCMATRIX rows; each call must + use a distinct ``constraint_row_index``. + + Parameters + ---------- + constraint_row_index : int + ROWS declaration index for this quadratic row (among all rows). + constraint_row_name : str, optional + Optional row name (for MPS export). + linear_values, linear_indices : array-like, optional + Sparse linear coefficients on the same variable index space. + rhs_value : float, optional + Right-hand side of the quadratic row. + quadratic_values, quadratic_row_indices, quadratic_col_indices : array-like + COO triplets for the quadratic matrix Q in + ``linear^T x + x^T Q x {sense} rhs_value``. + sense : str, optional + MPS row type: ``'L'`` (default, ``<=``) or ``'G'`` (``>=``), same values as + :meth:`set_row_types`. Rows are stored with the given sense; ``'G'`` is converted to + ``'L'`` when the barrier solver builds second-order cones from QCMATRIX data. + Equality (``'E'``) is not supported. + + Notes + ----- + When any quadratic constraint is present, cuOpt selects the barrier + solver and converts QCMATRIX rows to second-order cones. + """ + if hasattr(sense, "value"): + sense = sense.value + if sense == "E": + raise ValueError("Equality constraints are not supported.") + if sense not in ("L", "G"): + raise ValueError( + f"Invalid sense {sense!r}; use 'L' or 'G' like set_row_types " + "(equality 'E' is not supported)." + ) + super().add_quadratic_constraint( + constraint_row_index, + constraint_row_name, + linear_values, + linear_indices, + rhs_value, + quadratic_values, + quadratic_row_indices, + quadratic_col_indices, + constraint_row_type=sense, + ) + @catch_cuopt_exception def set_variable_lower_bounds(self, variable_lower_bounds): """ diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd index 6c798a0f6a..9f9f5ba5cf 100644 --- a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd +++ b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd @@ -12,3 +12,7 @@ from libcpp.memory cimport unique_ptr cdef class DataModel: cdef unique_ptr[data_model_view_t[int, double]] c_data_model_view + + cdef void _set_cpp_quadratic_constraints( + self, data_model_view_t[int, double]* c_data_model_view + ) diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx index 7722fb2437..7c764b1413 100644 --- a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx +++ b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx @@ -7,7 +7,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from .data_model cimport data_model_view_t, write_mps +from .data_model cimport data_model_view_t, mps_data_model_t, write_mps import warnings @@ -17,10 +17,14 @@ from cuopt.utilities import get_data_ptr from libc.stdint cimport uintptr_t from libcpp.memory cimport unique_ptr +from libcpp.string cimport string from libcpp.utility cimport move +from libcpp.vector cimport vector def type_cast(np_obj, np_type, name): + if not isinstance(np_obj, np.ndarray): + np_obj = np.asarray(np_obj) obj_type = np_obj.dtype if ((np.issubdtype(np_type, np.floating) and @@ -62,6 +66,79 @@ cdef class DataModel: self.variable_types = np.array([]) self.variable_names = np.array([]) self.row_names = np.array([]) + self.quadratic_constraints = [] + + def clear_quadratic_constraints(self): + self.quadratic_constraints = [] + + def add_quadratic_constraint( + self, + constraint_row_index, + constraint_row_name="", + linear_values=None, + linear_indices=None, + rhs_value=0.0, + quadratic_values=None, + quadratic_row_indices=None, + quadratic_col_indices=None, + constraint_row_type="L", + ): + linear_values = ( + np.array([], dtype=np.float64) + if linear_values is None + else type_cast(linear_values, np.float64, "linear_values") + ) + linear_indices = ( + np.array([], dtype=np.int32) + if linear_indices is None + else type_cast(linear_indices, np.int32, "linear_indices") + ) + if linear_values.shape[0] != linear_indices.shape[0]: + raise ValueError("linear_values and linear_indices must have the same length") + quadratic_values = ( + np.array([], dtype=np.float64) + if quadratic_values is None + else type_cast(quadratic_values, np.float64, "quadratic_values") + ) + quadratic_row_indices = ( + np.array([], dtype=np.int32) + if quadratic_row_indices is None + else type_cast(quadratic_row_indices, np.int32, "quadratic_row_indices") + ) + quadratic_col_indices = ( + np.array([], dtype=np.int32) + if quadratic_col_indices is None + else type_cast(quadratic_col_indices, np.int32, "quadratic_col_indices") + ) + if not ( + quadratic_values.shape[0] + == quadratic_row_indices.shape[0] + == quadratic_col_indices.shape[0] + ): + raise ValueError( + "quadratic_values, quadratic_row_indices, and " + "quadratic_col_indices must have the same length" + ) + row_type = str(constraint_row_type) + if row_type == "E": + raise ValueError("Equality constraints are not supported.") + if row_type not in ("L", "G"): + raise ValueError( + f"Invalid constraint_row_type {row_type!r}; use 'L' or 'G' like set_row_types." + ) + self.quadratic_constraints.append( + { + "constraint_row_index": int(constraint_row_index), + "constraint_row_name": str(constraint_row_name), + "constraint_row_type": row_type, + "linear_values": linear_values, + "linear_indices": linear_indices, + "rhs_value": float(rhs_value), + "quadratic_values": quadratic_values, + "quadratic_row_indices": quadratic_row_indices, + "quadratic_col_indices": quadratic_col_indices, + } + ) def set_maximize(self, maximize): self.maximize = maximize @@ -379,10 +456,65 @@ cdef class DataModel: self.get_initial_dual_solution().shape[0] ) + if self.quadratic_constraints: + self._set_cpp_quadratic_constraints(c_data_model_view) + + cdef void _set_cpp_quadratic_constraints( + self, data_model_view_t[int, double]* c_data_model_view + ): + cdef vector[mps_data_model_t[int, double].quadratic_constraint_t] constraints + cdef mps_data_model_t[int, double].quadratic_constraint_t qc + cdef dict item + cdef size_t i + cdef uintptr_t c_linear_values + cdef uintptr_t c_linear_indices + cdef uintptr_t c_quadratic_values + cdef uintptr_t c_quadratic_row_indices + cdef uintptr_t c_quadratic_col_indices + cdef size_t linear_nnz + cdef size_t quadratic_nnz + + for item in self.quadratic_constraints: + qc.constraint_row_index = item["constraint_row_index"] + qc.constraint_row_name = item["constraint_row_name"].encode("utf-8") + qc.constraint_row_type = ord(item.get("constraint_row_type", "L")) + qc.rhs_value = item["rhs_value"] + + linear_nnz = item["linear_values"].shape[0] + qc.linear_values.resize(linear_nnz) + qc.linear_indices.resize(linear_nnz) + if linear_nnz > 0: + c_linear_values = get_data_ptr(item["linear_values"]) + c_linear_indices = get_data_ptr(item["linear_indices"]) + for i in range(linear_nnz): + qc.linear_values[i] = (c_linear_values)[i] + qc.linear_indices[i] = (c_linear_indices)[i] + + quadratic_nnz = item["quadratic_values"].shape[0] + qc.quadratic_values.resize(quadratic_nnz) + qc.quadratic_row_indices.resize(quadratic_nnz) + qc.quadratic_col_indices.resize(quadratic_nnz) + if quadratic_nnz > 0: + c_quadratic_values = get_data_ptr(item["quadratic_values"]) + c_quadratic_row_indices = get_data_ptr(item["quadratic_row_indices"]) + c_quadratic_col_indices = get_data_ptr(item["quadratic_col_indices"]) + for i in range(quadratic_nnz): + qc.quadratic_values[i] = (c_quadratic_values)[i] + qc.quadratic_row_indices[i] = (c_quadratic_row_indices)[i] + qc.quadratic_col_indices[i] = (c_quadratic_col_indices)[i] + + constraints.push_back(qc) + + c_data_model_view.set_quadratic_constraints(constraints) + def writeMPS(self, user_problem_file): - self.variable_types = type_cast( - self.variable_types, "S1", "variable_types" - ) + n_vars = self.get_variable_lower_bounds().shape[0] + if self.variable_types.shape[0] == 0 and n_vars > 0: + self.variable_types = np.array(["C"] * n_vars, dtype="S1") + else: + self.variable_types = type_cast( + self.variable_types, "S1", "variable_types" + ) self.set_data_model_view() write_mps(self.c_data_model_view.get()[0], user_problem_file.encode('utf-8')) diff --git a/python/cuopt/cuopt/linear_programming/io/parser.pxd b/python/cuopt/cuopt/linear_programming/io/parser.pxd index 402873f9ab..b4bea47d3c 100644 --- a/python/cuopt/cuopt/linear_programming/io/parser.pxd +++ b/python/cuopt/cuopt/linear_programming/io/parser.pxd @@ -15,6 +15,17 @@ from libcpp.vector cimport vector cdef extern from "cuopt/linear_programming/io/mps_data_model.hpp" namespace "cuopt::linear_programming::io": # noqa cdef cppclass mps_data_model_t[i_t, f_t]: + cppclass quadratic_constraint_t: + int constraint_row_index + string constraint_row_name + char constraint_row_type + vector[double] linear_values + vector[int] linear_indices + double rhs_value + vector[int] quadratic_row_indices + vector[int] quadratic_col_indices + vector[double] quadratic_values + bool maximize_ vector[f_t] A_ vector[i_t] A_indices_ @@ -36,6 +47,7 @@ cdef extern from "cuopt/linear_programming/io/mps_data_model.hpp" namespace "cuo vector[char] row_types_ string objective_name_ string problem_name_ + const vector[quadratic_constraint_t]& get_quadratic_constraints() const cdef extern from "cuopt/linear_programming/io/utilities/cython_parser.hpp" namespace "cuopt::cython": # noqa diff --git a/python/cuopt/cuopt/linear_programming/io/parser_wrapper.pyx b/python/cuopt/cuopt/linear_programming/io/parser_wrapper.pyx index 61e10b1864..3cc0e1a110 100644 --- a/python/cuopt/cuopt/linear_programming/io/parser_wrapper.pyx +++ b/python/cuopt/cuopt/linear_programming/io/parser_wrapper.pyx @@ -135,6 +135,53 @@ cdef _marshal_data_model(mps_data_model_t[int, double]* dm, data_model): data_model.set_objective_name(dm.objective_name_.decode()) data_model.set_problem_name(dm.problem_name_.decode()) + cdef size_t qi + cdef size_t n_qc = dm.get_quadratic_constraints().size() + cdef mps_data_model_t[int, double].quadratic_constraint_t qc + cdef size_t linear_nnz, quadratic_nnz + cdef double[:] linear_values_view + cdef int[:] linear_indices_view + cdef double[:] quadratic_values_view + cdef int[:] quadratic_row_indices_view + cdef int[:] quadratic_col_indices_view + + for qi in range(n_qc): + qc = dm.get_quadratic_constraints()[qi] + linear_nnz = qc.linear_values.size() + if linear_nnz > 0: + linear_values_view = qc.linear_values.data() + linear_values = np.asarray(linear_values_view).copy() + linear_indices_view = qc.linear_indices.data() + linear_indices = np.asarray(linear_indices_view).copy() + else: + linear_values = None + linear_indices = None + + quadratic_nnz = qc.quadratic_values.size() + if quadratic_nnz > 0: + quadratic_values_view = qc.quadratic_values.data() + quadratic_values = np.asarray(quadratic_values_view).copy() + quadratic_row_indices_view = qc.quadratic_row_indices.data() + quadratic_row_indices = np.asarray(quadratic_row_indices_view).copy() + quadratic_col_indices_view = qc.quadratic_col_indices.data() + quadratic_col_indices = np.asarray(quadratic_col_indices_view).copy() + else: + quadratic_values = None + quadratic_row_indices = None + quadratic_col_indices = None + + data_model.add_quadratic_constraint( + qc.constraint_row_index, + qc.constraint_row_name.decode("utf-8"), + linear_values=linear_values, + linear_indices=linear_indices, + rhs_value=qc.rhs_value, + quadratic_values=quadratic_values, + quadratic_row_indices=quadratic_row_indices, + quadratic_col_indices=quadratic_col_indices, + sense=chr(qc.constraint_row_type), + ) + return data_model diff --git a/python/cuopt/cuopt/linear_programming/problem.py b/python/cuopt/cuopt/linear_programming/problem.py index 25d9d634da..84ca43c5b5 100644 --- a/python/cuopt/cuopt/linear_programming/problem.py +++ b/python/cuopt/cuopt/linear_programming/problem.py @@ -256,8 +256,10 @@ def __mul__(self, other): qvars1 = [self] * len(other.vars) qvars2 = other.vars qcoeffs = other.coefficients - vars = [self] - coeffs = [other.constant] + vars, coeffs = [], [] + if other.constant != 0.0: + vars = [self] + coeffs = [other.constant] return QuadraticExpression( qvars1=qvars1, qvars2=qvars2, @@ -283,6 +285,8 @@ def __le__(self, other): # var1 <= var2 -> var1 - var2 <= 0 expr = self - other return Constraint(expr, LE, 0.0) + case QuadraticExpression(): + return Constraint(self - other, LE, 0.0) case _: raise ValueError("Unsupported operation") @@ -295,6 +299,8 @@ def __ge__(self, other): # var1 >= var2 -> var1 - var2 >= 0 expr = self - other return Constraint(expr, GE, 0.0) + case QuadraticExpression(): + return Constraint(self - other, GE, 0.0) case _: raise ValueError("Unsupported operation") @@ -314,8 +320,9 @@ def __eq__(self, other): class QuadraticExpression: """ QuadraticExpressions contain quadratic terms, linear terms, and a constant. - QuadraticExpressions can be used to create quadratic objectives in - the Problem. + Use them for quadratic objectives (``Problem.setObjective``) or quadratic + constraints via ``<=`` or ``>=`` comparisons passed to + :py:meth:`Problem.addConstraint` (equality is not supported). QuadraticExpressions can be added and subtracted with other QuadraticExpressions, LinearExpressions, and Variables, and can also be multiplied and divided by scalars. @@ -734,6 +741,9 @@ def __rsub__(self, other): # other - self -> other + self * -1.0 return other + self * -1.0 + def __neg__(self): + return self * -1.0 + def __imul__(self, other): # Compute expr *= constant match other: @@ -834,13 +844,91 @@ def __truediv__(self, other): ) def __le__(self, other): - raise Exception("Quadratic constraints not supported") + match other: + case int() | float(): + return Constraint(self, LE, float(other)) + case Variable() | LinearExpression() | QuadraticExpression(): + return Constraint(self - other, LE, 0.0) + case _: + raise ValueError( + "Can't compare QuadraticExpression with type %s" + % type(other).__name__ + ) def __ge__(self, other): - raise Exception("Quadratic constraints not supported") + match other: + case int() | float(): + return Constraint(self, GE, float(other)) + case Variable() | LinearExpression() | QuadraticExpression(): + return Constraint(self - other, GE, 0.0) + case _: + raise ValueError( + "Can't compare QuadraticExpression with type %s" + % type(other).__name__ + ) def __eq__(self, other): - raise Exception("Quadratic constraints not supported") + raise ValueError("Equality constraints are not supported.") + + +def _quadratic_expression_to_qcmatrix(expr, rhs): + """Build QCMATRIX COO data for a quadratic row ``expr`` sense ``rhs``. + + Used for both ``<=`` (``LE``) and ``>=`` (``GE``); row sense is stored on + ``Constraint.Sense``, not in this helper. The constant term is moved to + ``rhs_value``. + + Duplicate linear variable indices and duplicate Q (row, col) triplets are + merged by summing coefficients, matching linear ``Constraint`` behavior. + """ + rhs_value = float(rhs) - expr.constant + + linear_coeff = {} + for var, coeff in zip(expr.vars, expr.coefficients): + if coeff == 0.0: + continue + idx = var.index + linear_coeff[idx] = linear_coeff.get(idx, 0.0) + coeff + + linear_indices = [] + linear_values = [] + for idx in sorted(linear_coeff): + coeff = linear_coeff[idx] + if coeff != 0.0: + linear_indices.append(idx) + linear_values.append(coeff) + + quad_coeff = {} + for var1, var2, coeff in zip(expr.qvars1, expr.qvars2, expr.qcoefficients): + if coeff == 0.0: + continue + key = (var1.index, var2.index) + quad_coeff[key] = quad_coeff.get(key, 0.0) + coeff + if expr.qmatrix is not None: + q_coo = expr.qmatrix.tocoo() + for row, col, value in zip(q_coo.row, q_coo.col, q_coo.data): + if value == 0.0: + continue + key = (expr.qvars[row].index, expr.qvars[col].index) + quad_coeff[key] = quad_coeff.get(key, 0.0) + value + + quadratic_row_indices = [] + quadratic_col_indices = [] + quadratic_values = [] + for (row, col), value in sorted(quad_coeff.items()): + if value != 0.0: + quadratic_row_indices.append(row) + quadratic_col_indices.append(col) + quadratic_values.append(value) + + return ( + linear_values, + linear_indices, + quadratic_values, + quadratic_row_indices, + quadratic_col_indices, + rhs_value, + ) class LinearExpression: @@ -1150,6 +1238,8 @@ def __le__(self, other): # expr1 <= expr2 -> expr1 - expr2 <= 0 expr = self - other return Constraint(expr, LE, 0.0) + case QuadraticExpression(): + return Constraint(self - other, LE, 0.0) def __ge__(self, other): match other: @@ -1159,6 +1249,8 @@ def __ge__(self, other): # expr1 >= expr2 -> expr1 - expr2 >= 0 expr = self - other return Constraint(expr, GE, 0.0) + case QuadraticExpression(): + return Constraint(self - other, GE, 0.0) def __eq__(self, other): match other: @@ -1172,16 +1264,15 @@ def __eq__(self, other): class Constraint: """ - cuOpt constraint object containing a linear expression, - the sense of the constraint, and the right-hand side of - the constraint. - Constraints are associated with a problem and can be - created using :py:meth:`Problem.addConstraint`. + cuOpt constraint object containing a linear or quadratic (QCMATRIX) + expression, the sense of the constraint, and the right-hand side. + Constraints are associated with a problem and can be created using + :py:meth:`Problem.addConstraint`. Parameters ---------- - expr : LinearExpression - Linear expression corresponding to a problem. + expr : LinearExpression or QuadraticExpression + Expression corresponding to the constraint. sense : enum Sense of the constraint. Either LE for <=, GE for >= or EQ for == . @@ -1197,7 +1288,9 @@ class Constraint: Sense : LE, GE or EQ Row sense. LE for <=, GE for >= or EQ for == . RHS : float - Constraint right-hand side value. + Constraint right-hand side value (linear rows). + is_quadratic : bool + True when the row is exported as a QCMATRIX quadratic constraint. Slack : float Computed LHS - RHS with current solution. DualValue : float @@ -1205,10 +1298,41 @@ class Constraint: """ def __init__(self, expr, sense, rhs, name=""): + self.index = -1 + self.Sense = sense + self.ConstraintName = name + self.DualValue = float("nan") + self.Slack = float("nan") + + if isinstance(expr, QuadraticExpression): + self.is_quadratic = True + ( + linear_values, + linear_indices, + quadratic_values, + quadratic_row_indices, + quadratic_col_indices, + rhs_value, + ) = _quadratic_expression_to_qcmatrix(expr, rhs) + self.linear_values = np.array(linear_values, dtype=np.float64) + self.linear_indices = np.array(linear_indices, dtype=np.int32) + self.quadratic_values = np.array(quadratic_values, dtype=np.float64) + self.quadratic_row_indices = np.array( + quadratic_row_indices, dtype=np.int32 + ) + self.quadratic_col_indices = np.array( + quadratic_col_indices, dtype=np.int32 + ) + self.rhs_value = rhs_value + self.RHS = rhs_value + self.vindex_coeff_dict = {} + self.vars = expr.vars + return + + self.is_quadratic = False self.vindex_coeff_dict = {} nz = len(expr) self.vars = expr.vars - self.index = -1 for i in range(nz): v_idx = expr.vars[i].index v_coeff = expr.coefficients[i] @@ -1217,11 +1341,7 @@ def __init__(self, expr, sense, rhs, name=""): if v_idx in self.vindex_coeff_dict else v_coeff ) - self.Sense = sense self.RHS = rhs - expr.getConstant() - self.ConstraintName = name - self.DualValue = float("nan") - self.Slack = float("nan") def __len__(self): return len(self.vindex_coeff_dict) @@ -1254,9 +1374,12 @@ def getCoefficient(self, var): def compute_slack(self): # Computes the constraint Slack in the current solution. - lhs = 0.0 - for var in self.vars: - lhs += var.Value * self.vindex_coeff_dict[var.index] + index_to_var = {var.index: var for var in self.vars} + lhs = sum( + index_to_var[v_idx].Value * coeff + for v_idx, coeff in self.vindex_coeff_dict.items() + ) + return self.RHS - lhs @@ -1401,6 +1524,8 @@ def _to_data_model(self): "values": [], } for constr in self.constrs: + if constr.is_quadratic: + continue csr_dict["column_indices"].extend( list(constr.vindex_coeff_dict.keys()) ) @@ -1420,6 +1545,8 @@ def _to_data_model(self): else: for constr in self.constrs: + if constr.is_quadratic: + continue self.rhs.append(constr.RHS) self.row_sense.append(constr.Sense) @@ -1463,6 +1590,27 @@ def _to_data_model(self): dm.set_row_names(self.row_names) dm.set_problem_name(self.Name) + linear_constr_count = sum(1 for c in self.constrs if not c.is_quadratic) + quad_index = 0 + for constr in self.constrs: + if not constr.is_quadratic: + continue + row_name = constr.ConstraintName + if row_name == "": + row_name = "Q" + str(constr.index) + dm.add_quadratic_constraint( + constraint_row_index=linear_constr_count + quad_index, + constraint_row_name=row_name, + linear_values=constr.linear_values, + linear_indices=constr.linear_indices, + rhs_value=constr.rhs_value, + quadratic_values=constr.quadratic_values, + quadratic_row_indices=constr.quadratic_row_indices, + quadratic_col_indices=constr.quadratic_col_indices, + sense=constr.Sense, + ) + quad_index += 1 + self.model = dm def update(self): @@ -1530,15 +1678,15 @@ def addVariable( def addConstraint(self, constr, name=""): """ Adds a constraint to the problem defined by constraint object - and name. A constraint is generated using LinearExpression, - Sense and RHS. + and name. A constraint is generated using LinearExpression or + QuadraticExpression comparisons (``<=``, ``>=``, or ``==``). Parameters ---------- constr : :py:class:`Constraint` - Constructed using LinearExpressions (See Examples) + Constructed using expression comparisons (see Examples). name : string - Name of the variable. Optional. + Name of the constraint. Optional. Examples -------- @@ -1548,6 +1696,7 @@ def addConstraint(self, constr, name=""): >>> problem.addConstraint(2*x - 3*y <= 10, name="Constr1") >>> expr = 3*x + y >>> problem.addConstraint(expr + x == 20, name="Constr2") + >>> problem.addConstraint(-x*x + y*y <= 0, name="soc") """ if self.solved: self.reset_solved_values() # Reset all solved values @@ -1587,6 +1736,10 @@ def updateConstraint(self, constr, coeffs=[], rhs=None): """ self.reset_solved_values() if isinstance(constr, Constraint): + if constr.is_quadratic: + raise ValueError( + "updateConstraint applies to linear constraints only" + ) if isinstance(coeffs, dict): coeffs = coeffs.items() for var, coeff in coeffs: @@ -1827,6 +1980,12 @@ def NumConstraints(self): # Returns number of contraints in the problem. return len(self.constrs) + def getQuadraticConstraints(self): + """ + Returns all quadratic (QCMATRIX) constraints in the problem. + """ + return [c for c in self.constrs if c.is_quadratic] + @property def NumNZs(self): # Returns number of non-zeros in the problem. @@ -1873,6 +2032,8 @@ def getCSR(self): return self.dict_to_object(self.constraint_csr_matrix) csr_dict = {"row_pointers": [0], "column_indices": [], "values": []} for constr in self.constrs: + if constr.is_quadratic: + continue csr_dict["column_indices"].extend( list(constr.vindex_coeff_dict.keys()) ) @@ -1949,10 +2110,14 @@ def populate_solution(self, solution): dual_sol = None if not IsMIP: dual_sol = solution.get_dual_solution() - for i, constr in enumerate(self.constrs): - if dual_sol is not None and len(dual_sol) > 0: - constr.DualValue = dual_sol[i] + linear_row = 0 + for constr in self.constrs: + if constr.is_quadratic: + continue + if dual_sol is not None and len(dual_sol) > linear_row: + constr.DualValue = dual_sol[linear_row] constr.Slack = constr.compute_slack() + linear_row += 1 self.solved = True def solve(self, settings=solver_settings.SolverSettings()): @@ -1977,3 +2142,4 @@ def solve(self, settings=solver_settings.SolverSettings()): solution = solver.Solve(self.model, settings) # Post Solve self.populate_solution(solution) + return solution diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py index 66242550aa..749601926d 100644 --- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py +++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py @@ -150,6 +150,16 @@ def test_model(): assert prob.ObjValue == pytest.approx(5 * x.Value + 3 * y.Value + 70) +def test_constraint_duplicate_terms_slack(): + """Merged coeffs in vindex_coeff_dict must not be double-counted in slack.""" + prob = Problem() + x = prob.addVariable() + c = prob.addConstraint(5 * x + 7 * x <= 18) + assert c.getCoefficient(x) == 12 + x.Value = 1.0 + assert c.compute_slack() == pytest.approx(6.0) + + def test_semi_continuous_variable(): prob = Problem("Semi-continuous") x = prob.addVariable(lb=5.0, ub=10.0, vtype=SEMI_CONTINUOUS, name="x") From 3e7a526a269b928f57e878f59d8268f37103cf32 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 22 May 2026 15:00:27 -0700 Subject: [PATCH 02/10] Add label back to test --- cpp/tests/dual_simplex/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/dual_simplex/CMakeLists.txt b/cpp/tests/dual_simplex/CMakeLists.txt index f6dff93227..2df648377d 100644 --- a/cpp/tests/dual_simplex/CMakeLists.txt +++ b/cpp/tests/dual_simplex/CMakeLists.txt @@ -7,4 +7,4 @@ ConfigureTest(DUAL_SIMPLEX_TEST ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solve_barrier.cu ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/second_order_cone_kernels.cu -) + LABELS numopt) From 911cb044094ea3678d2b76b7dacf545e54782d67 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 22 May 2026 15:25:20 -0700 Subject: [PATCH 03/10] Remove barrier specific tolerances. These should not be exposed to the user --- .../cuopt/linear_programming/constants.h | 4 ---- .../pdlp/solver_settings.hpp | 3 --- cpp/src/barrier/barrier.cu | 8 +++----- .../dual_simplex/simplex_solver_settings.hpp | 2 +- cpp/src/dual_simplex/solve.cpp | 20 ++++++++++--------- cpp/src/math_optimization/solver_settings.cu | 3 --- cpp/src/pdlp/solve.cu | 5 ----- cpp/tests/qp/unit_tests/no_constraints.cu | 2 -- cpp/tests/qp/unit_tests/two_variable_test.cu | 1 - 9 files changed, 15 insertions(+), 33 deletions(-) diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index 1dfccef1ea..3342ba375c 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -47,10 +47,6 @@ #define CUOPT_DUALIZE "dualize" #define CUOPT_ORDERING "ordering" #define CUOPT_BARRIER_DUAL_INITIAL_POINT "barrier_dual_initial_point" -#define CUOPT_BARRIER_RELATIVE_FEASIBILITY_TOLERANCE "barrier_relative_feasibility_tolerance" -#define CUOPT_BARRIER_RELATIVE_OPTIMALITY_TOLERANCE "barrier_relative_optimality_tolerance" -#define CUOPT_BARRIER_RELATIVE_COMPLEMENTARITY_TOLERANCE \ - "barrier_relative_complementarity_tolerance" #define CUOPT_BARRIER_ITERATIVE_REFINEMENT "barrier_iterative_refinement" #define CUOPT_BARRIER_STEP_SCALE "barrier_step_scale" #define CUOPT_ELIMINATE_DENSE_COLUMNS "eliminate_dense_columns" diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index 3597b85016..b30286f9ce 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -280,9 +280,6 @@ class pdlp_solver_settings_t { i_t dualize{-1}; i_t ordering{-1}; i_t barrier_dual_initial_point{-1}; - f_t barrier_relative_feasibility_tolerance{1.0e-8}; - f_t barrier_relative_optimality_tolerance{1.0e-8}; - f_t barrier_relative_complementarity_tolerance{1.0e-8}; bool eliminate_dense_columns{true}; pdlp_precision_t pdlp_precision{pdlp_precision_t::DefaultPrecision}; bool barrier_iterative_refinement{true}; diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu index fd6f9dbc6e..b39d75c6ae 100644 --- a/cpp/src/barrier/barrier.cu +++ b/cpp/src/barrier/barrier.cu @@ -4382,11 +4382,9 @@ lp_status_t barrier_solver_t::solve(f_t start_time, lp_solution_t::solve(f_t start_time, lp_solution_t lp_status_t solve_linear_program_with_barrier(const user_problem_t& user_problem, - const simplex_solver_settings_t& settings, - f_t start_time, - lp_solution_t& solution) + const simplex_solver_settings_t& settings, + f_t start_time, + lp_solution_t& solution) { lp_status_t status = lp_status_t::UNSET; lp_problem_t original_lp(user_problem.handle_ptr, 1, 1, 1); // Convert the user problem to a linear program with only equality constraints std::vector new_slacks; + simplex_solver_settings_t barrier_settings = settings; + barrier_settings.barrier_presolve = true; dualize_info_t dualize_info; - convert_user_problem(user_problem, settings, original_lp, new_slacks, dualize_info); - if (!validate_barrier_cone_layout(original_lp, settings)) { + convert_user_problem(user_problem, barrier_settings, original_lp, new_slacks, dualize_info); + if (!validate_barrier_cone_layout(original_lp, barrier_settings)) { return lp_status_t::NUMERICAL_ISSUES; } @@ -405,7 +407,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us // Presolve the linear program presolve_info_t presolve_info; lp_problem_t presolved_lp(user_problem.handle_ptr, 1, 1, 1); - const i_t ok = presolve(original_lp, settings, presolved_lp, presolve_info); + const i_t ok = presolve(original_lp, barrier_settings, presolved_lp, presolve_info); if (ok == CONCURRENT_HALT_RETURN) { return lp_status_t::CONCURRENT_LIMIT; } if (ok == TIME_LIMIT_RETURN) { return lp_status_t::TIME_LIMIT; } if (ok == -1) { return lp_status_t::INFEASIBLE; } @@ -417,12 +419,12 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us presolved_lp.A.col_start[presolved_lp.num_cols]); std::vector column_scales; std::vector row_scales; - column_scaling(presolved_lp, settings, barrier_lp, column_scales, row_scales); + column_scaling(presolved_lp, barrier_settings, barrier_lp, column_scales, row_scales); // Solve using barrier lp_solution_t barrier_solution(barrier_lp.num_rows, barrier_lp.num_cols); - barrier_solver_t barrier_solver(barrier_lp, presolve_info, settings); + barrier_solver_t barrier_solver(barrier_lp, presolve_info, barrier_settings); lp_status_t barrier_status = barrier_solver.solve(start_time, barrier_solution); if (barrier_status == lp_status_t::OPTIMAL) { #ifdef COMPUTE_SCALED_RESIDUALS @@ -475,7 +477,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us // Undo presolve uncrush_solution(presolve_info, - settings, + barrier_settings, original_lp, unscaled_x, unscaled_y, diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 505b4e02f7..0b4c360b10 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -98,9 +98,6 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_MIP_INTEGRALITY_TOLERANCE, &mip_settings.tolerances.integrality_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-5)}, {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, f_t(0.0), std::numeric_limits::infinity(), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, {CUOPT_MIP_RELATIVE_GAP, &mip_settings.tolerances.relative_mip_gap, f_t(0.0), f_t(1e-1), f_t(1e-4)}, - {CUOPT_BARRIER_RELATIVE_FEASIBILITY_TOLERANCE, &pdlp_settings.barrier_relative_feasibility_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-8)}, - {CUOPT_BARRIER_RELATIVE_OPTIMALITY_TOLERANCE, &pdlp_settings.barrier_relative_optimality_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-8)}, - {CUOPT_BARRIER_RELATIVE_COMPLEMENTARITY_TOLERANCE, &pdlp_settings.barrier_relative_complementarity_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-8)}, {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, {CUOPT_MIP_CUT_CHANGE_THRESHOLD, &mip_settings.cut_change_threshold, f_t(-1.0), std::numeric_limits::infinity(), f_t(-1.0)}, diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index cf741eee0b..4bbd2613d3 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -510,11 +510,6 @@ run_barrier(dual_simplex::user_problem_t& user_problem, barrier_settings.barrier_iterative_refinement = settings.barrier_iterative_refinement; barrier_settings.barrier_step_scale = settings.barrier_step_scale; barrier_settings.cudss_deterministic = settings.cudss_deterministic; - barrier_settings.barrier_relative_feasibility_tol = - settings.barrier_relative_feasibility_tolerance; - barrier_settings.barrier_relative_optimality_tol = settings.barrier_relative_optimality_tolerance; - barrier_settings.barrier_relative_complementarity_tol = - settings.barrier_relative_complementarity_tolerance; barrier_settings.barrier_relaxed_feasibility_tol = settings.tolerances.relative_primal_tolerance; barrier_settings.barrier_relaxed_optimality_tol = settings.tolerances.relative_dual_tolerance; barrier_settings.barrier_relaxed_complementarity_tol = settings.tolerances.relative_gap_tolerance; diff --git a/cpp/tests/qp/unit_tests/no_constraints.cu b/cpp/tests/qp/unit_tests/no_constraints.cu index dc99a2ce5c..75190b37fc 100644 --- a/cpp/tests/qp/unit_tests/no_constraints.cu +++ b/cpp/tests/qp/unit_tests/no_constraints.cu @@ -52,8 +52,6 @@ TEST(no_constraints_test, simple_test) op_problem.set_quadratic_objective_matrix(Q_values_host, 2, Q_indices_host, 2, Q_offsets_host, 3); auto settings = cuopt::linear_programming::pdlp_solver_settings_t(); - // Tighter than default: unconstrained QP needs small x at the origin, not just small objective. - settings.barrier_relative_complementarity_tolerance = 1e-12; auto solution = cuopt::linear_programming::solve_lp(op_problem, settings); EXPECT_EQ(solution.get_termination_status(), diff --git a/cpp/tests/qp/unit_tests/two_variable_test.cu b/cpp/tests/qp/unit_tests/two_variable_test.cu index f1fb4852ec..aac618994a 100644 --- a/cpp/tests/qp/unit_tests/two_variable_test.cu +++ b/cpp/tests/qp/unit_tests/two_variable_test.cu @@ -59,7 +59,6 @@ TEST(two_variable_test, simple_test) op_problem.set_quadratic_objective_matrix(Q_values_host, 2, Q_indices_host, 2, Q_offsets_host, 3); auto settings = cuopt::linear_programming::pdlp_solver_settings_t(); - settings.barrier_relative_complementarity_tolerance = 1e-12; auto solution = cuopt::linear_programming::solve_lp(op_problem, settings); EXPECT_EQ(solution.get_termination_status(), From 6b2c011d1142cafd7ef567fc473dff13110db3bf Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 22 May 2026 16:37:26 -0700 Subject: [PATCH 04/10] Clean up barrier and presolve. Native -> Direct free variables. --- cpp/src/barrier/barrier.cu | 202 ++++++------ cpp/src/barrier/dense_vector.hpp | 22 -- cpp/src/dual_simplex/presolve.cpp | 295 +++++------------- cpp/src/dual_simplex/presolve.hpp | 7 +- cpp/src/pdlp/solve.cu | 1 + .../dual_simplex/unit_tests/solve_barrier.cu | 12 +- 6 files changed, 187 insertions(+), 352 deletions(-) diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu index b39d75c6ae..a4c4b5b9bd 100644 --- a/cpp/src/barrier/barrier.cu +++ b/cpp/src/barrier/barrier.cu @@ -65,23 +65,23 @@ namespace cuopt::linear_programming::dual_simplex { cuda::std::make_tuple(a, b), out, size, cuda::std::multiplies<>{}, stream.value()); } -// out[i] = is_native_free_linear[i] ? 0 : a[i] * b[i] -[[maybe_unused]] static void pairwise_multiply_skip_native_free_linear( - float* a, float* b, int* is_native_free_linear, float* out, int size, rmm::cuda_stream_view stream) +// out[i] = is_direct_free_linear[i] ? 0 : a[i] * b[i] +[[maybe_unused]] static void pairwise_multiply_skip_direct_free_linear( + float* a, float* b, int* is_direct_free_linear, float* out, int size, rmm::cuda_stream_view stream) { cub::DeviceTransform::Transform( - cuda::std::make_tuple(a, b, is_native_free_linear), + cuda::std::make_tuple(a, b, is_direct_free_linear), out, size, [] __host__ __device__(float x_j, float d_j, int free_j) { return free_j ? 0.f : x_j * d_j; }, stream.value()); } -[[maybe_unused]] static void pairwise_multiply_skip_native_free_linear( - double* a, double* b, int* is_native_free_linear, double* out, int size, rmm::cuda_stream_view stream) +[[maybe_unused]] static void pairwise_multiply_skip_direct_free_linear( + double* a, double* b, int* is_direct_free_linear, double* out, int size, rmm::cuda_stream_view stream) { cub::DeviceTransform::Transform( - cuda::std::make_tuple(a, b, is_native_free_linear), + cuda::std::make_tuple(a, b, is_direct_free_linear), out, size, [] __host__ __device__(double x_j, double d_j, int free_j) { return free_j ? 0.0 : x_j * d_j; }, @@ -120,7 +120,7 @@ class iteration_data_t { public: iteration_data_t(const lp_problem_t& lp, i_t num_upper_bounds, - const std::vector& native_free_linear_indices, + const std::vector& direct_free_variables, const csc_matrix_t& Qin, const simplex_solver_settings_t& settings) : upper_bounds(num_upper_bounds), @@ -195,8 +195,8 @@ class iteration_data_t { d_cone_Q_values_(0, lp.handle_ptr->get_stream()), use_augmented(false), has_factorization(false), - n_native_free_linear(0), - d_is_native_free_linear_(0, lp.handle_ptr->get_stream()), + n_direct_free_linear(0), + d_is_direct_free_linear_(0, lp.handle_ptr->get_stream()), num_factorizations(0), has_solve_info(false), settings_(settings), @@ -257,18 +257,18 @@ class iteration_data_t { raft::common::nvtx::range fun_scope("Barrier: LP Data Creation"); { - raft::common::nvtx::range scope("Barrier: LP Data: native free linear"); - // Set up native free linear variable tracking (linear columns only, j < cone_start). - if (!native_free_linear_indices.empty()) { - n_native_free_linear = native_free_linear_indices.size(); - std::vector is_native_free_linear_host(lp.num_cols, 0); - for (i_t j : native_free_linear_indices) { - is_native_free_linear_host[j] = 1; + raft::common::nvtx::range scope("Barrier: LP Data: direct free linear"); + // Set up direct free linear variable tracking (linear columns only, j < cone_start). + if (!direct_free_variables.empty()) { + n_direct_free_linear = direct_free_variables.size(); + std::vector is_direct_free_linear_host(lp.num_cols, 0); + for (i_t j : direct_free_variables) { + is_direct_free_linear_host[j] = 1; } - d_is_native_free_linear_.resize(lp.num_cols, stream_view_); + d_is_direct_free_linear_.resize(lp.num_cols, stream_view_); raft::copy( - d_is_native_free_linear_.data(), is_native_free_linear_host.data(), lp.num_cols, stream_view_); - settings.log.printf("Native free linear (QP): %d\n", n_native_free_linear); + d_is_direct_free_linear_.data(), is_direct_free_linear_host.data(), lp.num_cols, stream_view_); + settings.log.printf("Direct free linear (QP): %d\n", n_direct_free_linear); } } @@ -572,8 +572,8 @@ class iteration_data_t { { const bool has_soc = has_cones(); f_t degree = static_cast(num_primal_variables) + static_cast(num_upper_bounds); - // Native QP free variables (linear only): no x·z complementarity in the barrier degree. - degree -= static_cast(n_native_free_linear); + // Direct QP free variables (linear only): no x·z complementarity in the barrier degree. + degree -= static_cast(n_direct_free_linear); if (has_soc) { degree -= static_cast(cone_entry_count()); degree += static_cast(cone_count()); @@ -781,7 +781,7 @@ class iteration_data_t { const i_t linear_n = has_soc ? cone_start() : n; // Primal diagonal: linear block includes dual_perturb; SOC block is filled by scatter below. - // Native free variables use barrier D = 0 in augmented_multiply; omit span_diag[j] here so + // Direct free variables use barrier D = 0 in augmented_multiply; omit span_diag[j] here so // the factorized matrix matches the matvec (only -q_diag - dual_perturb on the diagonal). thrust::for_each_n(rmm::exec_policy(handle_ptr->get_stream()), thrust::make_counting_iterator(0), @@ -790,10 +790,10 @@ class iteration_data_t { span_diag_indices = cuopt::make_span(d_augmented_diagonal_indices_), span_q_diag = cuopt::make_span(d_Q_diag_), span_diag = cuopt::make_span(d_diag_), - span_is_native_free_linear = cuopt::make_span(d_is_native_free_linear_), + span_is_direct_free_linear = cuopt::make_span(d_is_direct_free_linear_), dual_perturb_value = dual_perturb] __device__(i_t j) { f_t q_diag = span_q_diag.size() > 0 ? span_q_diag[j] : 0.0; - const f_t d_j = (span_is_native_free_linear.size() > 0 && span_is_native_free_linear[j]) ? f_t(0) : span_diag[j]; + const f_t d_j = (span_is_direct_free_linear.size() > 0 && span_is_direct_free_linear[j]) ? f_t(0) : span_diag[j]; span_x[span_diag_indices[j]] = -q_diag - d_j - dual_perturb_value; }); RAFT_CHECK_CUDA(handle_ptr->get_stream()); @@ -1725,11 +1725,11 @@ class iteration_data_t { rmm::device_uvector d_r1(n, handle_ptr->get_stream()); thrust::fill_n(rmm::exec_policy(stream_view_), d_r1.begin(), n, f_t(0)); - // r1 <- D * x_1 on linear indices; barrier D is zero on native free variables + // r1 <- D * x_1 on linear indices; barrier D is zero on direct free variables const i_t linear_n = has_soc ? cone_start() : n; - if (n_native_free_linear > 0) { - pairwise_multiply_skip_native_free_linear( - d_x1.data(), d_diag_.data(), d_is_native_free_linear_.data(), d_r1.data(), linear_n, stream_view_); + if (n_direct_free_linear > 0) { + pairwise_multiply_skip_direct_free_linear( + d_x1.data(), d_diag_.data(), d_is_direct_free_linear_.data(), d_r1.data(), linear_n, stream_view_); } else { pairwise_multiply(d_x1.data(), d_diag_.data(), d_r1.data(), linear_n, stream_view_); } @@ -1852,8 +1852,8 @@ class iteration_data_t { bool use_augmented; i_t symbolic_status; - i_t n_native_free_linear{0}; - rmm::device_uvector d_is_native_free_linear_; // 1 if native free linear (j < cone_start), else 0 + i_t n_direct_free_linear{0}; + rmm::device_uvector d_is_direct_free_linear_; // 1 if direct free linear (j < cone_start), else 0 // Adaptive regularization for the augmented system f_t dual_perturb{1e-8}; @@ -2066,7 +2066,18 @@ int barrier_solver_t::initial_point(iteration_data_t& data) raft::common::nvtx::range fun_scope("Barrier: initial_point"); const bool use_augmented = data.use_augmented; const bool has_soc = data.has_cones(); - const bool has_native_free_linear = data.n_native_free_linear > 0; + const bool has_direct_free_linear = data.n_direct_free_linear > 0; + + // Mask used by the two ADAT/augmented branches below to enforce z > 0 on the linear block + // while leaving the SOC cone block (kept feasible by NT scaling) alone. + std::vector nonnegative_z(lp.num_cols, 1); + if (has_soc) { + const i_t cone_start = data.cone_start(); + const i_t cone_end = data.cone_end(); + for (i_t i = cone_start; i < cone_end; ++i) { + nonnegative_z[i] = 0; + } + } // Perform a numerical factorization i_t status; @@ -2215,8 +2226,8 @@ int barrier_solver_t::initial_point(iteration_data_t& data) data.x[j] = mu; data.z[j] = mu; } - if (has_native_free_linear) { - for (i_t j : presolve_info.native_free_linear_indices) { + if (has_direct_free_linear) { + for (i_t j : presolve_info.direct_free_variables) { if (j < cs) { data.z[j] = 0.0; } } } @@ -2285,8 +2296,8 @@ int barrier_solver_t::initial_point(iteration_data_t& data) } } // Free variables have z = 0 (no complementarity condition) - if (has_native_free_linear) { - for (i_t j : presolve_info.native_free_linear_indices) { + if (has_direct_free_linear) { + for (i_t j : presolve_info.direct_free_variables) { data.z[j] = 0.0; } } @@ -2310,7 +2321,7 @@ int barrier_solver_t::initial_point(iteration_data_t& data) data.v.multiply_scalar(-1.0); data.v.ensure_positive(epsilon_adjust); - data.z.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count()); + data.z.ensure_positive(epsilon_adjust, nonnegative_z); } else { // First compute rhs = A*Dinv*c dense_vector_t rhs(lp.num_rows); @@ -2334,7 +2345,7 @@ int barrier_solver_t::initial_point(iteration_data_t& data) data.gather_upper_bounds(data.z, data.v); data.v.multiply_scalar(-1.0); data.v.ensure_positive(epsilon_adjust); - data.z.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count()); + data.z.ensure_positive(epsilon_adjust, nonnegative_z); } // Verify A'*y + z - E*v - Q*x = c @@ -2351,32 +2362,25 @@ int barrier_solver_t::initial_point(iteration_data_t& data) settings.log.printf("||A^T y + z - E*v - Q*x - c ||: %e\n", vector_norm2(data.dual_residual)); #endif - // Make sure (w, x, v, z) > 0. Cone blocks are not nonnegative orthant variables — skip them for - // box positivity; NT scaling keeps cone iterates feasible separately. - std::vector free_x_save; - if (has_native_free_linear) { - free_x_save.resize(data.n_native_free_linear); - for (i_t k = 0; k < data.n_native_free_linear; ++k) { - free_x_save[k] = data.x[presolve_info.native_free_linear_indices[k]]; - } - } + // Make sure (w, x, v, z) > 0. Skip cone variables and free variables being handled directly. data.w.ensure_positive(epsilon_adjust); + std::vector nonnegative_variables(data.x.size(), 1); if (has_soc) { - data.x.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count()); - } else if (has_native_free_linear) { - std::vector nonnegative_variables(data.x.size(), 1); - for (i_t j : presolve_info.native_free_linear_indices) { + const i_t cone_start = data.cone_start(); + const i_t cone_end = data.cone_end(); + for (i_t i = cone_start; i < cone_end; ++i) { + nonnegative_variables[i] = 0; + } + } + if (has_direct_free_linear) { + for (i_t j : presolve_info.direct_free_variables) { nonnegative_variables[j] = 0; } - data.x.ensure_positive(epsilon_adjust, nonnegative_variables); - } else { - data.x.ensure_positive(epsilon_adjust); } - // Native free variables (QP/SOCP): restore possibly negative x and set z = 0 - if (has_native_free_linear) { - for (i_t k = 0; k < data.n_native_free_linear; ++k) { - i_t j = presolve_info.native_free_linear_indices[k]; - data.x[j] = free_x_save[k]; + data.x.ensure_positive(epsilon_adjust, nonnegative_variables); + // Direct free variables: reduced cost z = 0 (no complementarity condition). + if (has_direct_free_linear) { + for (i_t j : presolve_info.direct_free_variables) { data.z[j] = 0.0; } } @@ -2592,23 +2596,23 @@ f_t barrier_solver_t::gpu_max_step_to_boundary(iteration_data_t& dx) { const bool has_soc = data.has_cones() && static_cast(x.size()) >= data.cone_end(); - const bool has_native_free_linear = data.n_native_free_linear > 0 && static_cast(x.size()) == lp.num_cols; + const bool has_direct_free_linear = data.n_direct_free_linear > 0 && static_cast(x.size()) == lp.num_cols; auto reduce_segment = [&](i_t start, i_t len) -> f_t { if (len <= 0) { return f_t(1); } - if (has_native_free_linear) { - auto native_free_linear_ptr = data.d_is_native_free_linear_.data() + start; + if (has_direct_free_linear) { + auto direct_free_linear_ptr = data.d_is_direct_free_linear_.data() + start; // step size computation for nonnegative variables with free variables - auto ratio_test_free = [native_free_linear_ptr] HD(const thrust::tuple t) { + auto ratio_test_free = [direct_free_linear_ptr] HD(const thrust::tuple t) { const f_t dx_val = thrust::get<0>(t); const f_t x_val = thrust::get<1>(t); - const i_t is_native_free_linear = thrust::get<2>(t); - if (is_native_free_linear) return f_t(1.0); + const i_t is_direct_free_linear = thrust::get<2>(t); + if (is_direct_free_linear) return f_t(1.0); if (dx_val < f_t(0.0)) return -x_val / dx_val; return f_t(1.0); }; return data.transform_reduce_helper_.transform_reduce( - thrust::make_zip_iterator(dx.data() + start, x.data() + start, native_free_linear_ptr), + thrust::make_zip_iterator(dx.data() + start, x.data() + start, direct_free_linear_ptr), thrust::minimum(), ratio_test_free, f_t(1.0), @@ -2661,7 +2665,7 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t 0; + const bool has_direct_free_linear = data.n_direct_free_linear > 0; const i_t m_c = data.cone_entry_count(); const i_t cone_var_start = data.cone_start(); const i_t linear_size = data.linear_xz_size(lp.num_cols); @@ -2673,14 +2677,14 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t x, raft::device_span dz_span) { if (dz_span.empty()) return; - if (has_native_free_linear) { + if (has_direct_free_linear) { cub::DeviceTransform::Transform( cuda::std::make_tuple( - target.data(), z.data(), dx_span.data(), x.data(), data.d_is_native_free_linear_.data()), + target.data(), z.data(), dx_span.data(), x.data(), data.d_is_direct_free_linear_.data()), dz_span.data(), dz_span.size(), - [] HD(f_t target_val, f_t z_val, f_t dx_val, f_t x_val, i_t is_native_free_linear) { - if (is_native_free_linear) return f_t(0); + [] HD(f_t target_val, f_t z_val, f_t dx_val, f_t x_val, i_t is_direct_free_linear) { + if (is_direct_free_linear) return f_t(0); return target_val - (z_val * dx_val) / x_val; }, stream_view_.value()); @@ -2730,16 +2734,16 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t 0 && data.Q_diagonal) { cub::DeviceTransform::Transform( cuda::std::make_tuple( - data.d_z_.data(), data.d_x_.data(), data.d_is_native_free_linear_.data(), data.d_Q_diag_.data()), + data.d_z_.data(), data.d_x_.data(), data.d_is_direct_free_linear_.data(), data.d_Q_diag_.data()), data.d_diag_.data(), linear_size, - [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_native_free_linear, f_t q_jj) { - if (!is_native_free_linear) return z_j / x_j; + [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_direct_free_linear, f_t q_jj) { + if (!is_direct_free_linear) return z_j / x_j; return (q_jj > f_t(0)) ? f_t(0) : free_var_reg; }, stream_view_.value()); } else { cub::DeviceTransform::Transform( - cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data(), data.d_is_native_free_linear_.data()), + cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data(), data.d_is_direct_free_linear_.data()), data.d_diag_.data(), linear_size, - [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_native_free_linear) { - return is_native_free_linear ? free_var_reg : (z_j / x_j); + [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_direct_free_linear) { + return is_direct_free_linear ? free_var_reg : (z_j / x_j); }, stream_view_.value()); } @@ -2874,7 +2878,7 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t& data, rmm::cuda_stream_view stream) { if (target.empty()) return; - const bool has_native_free_linear = data.n_native_free_linear > 0; - if (has_native_free_linear) { + const bool has_direct_free_linear = data.n_direct_free_linear > 0; + if (has_direct_free_linear) { cub::DeviceTransform::Transform( - cuda::std::make_tuple(xz_rhs.data(), x.data(), data.d_is_native_free_linear_.data()), + cuda::std::make_tuple(xz_rhs.data(), x.data(), data.d_is_direct_free_linear_.data()), target.data(), target.size(), - [] HD(f_t complementarity_xz_rhs, f_t x_val, i_t is_native_free_linear) { - if (is_native_free_linear) return f_t(0); + [] HD(f_t complementarity_xz_rhs, f_t x_val, i_t is_direct_free_linear) { + if (is_direct_free_linear) return f_t(0); return complementarity_xz_rhs / x_val; }, stream.value()); @@ -3730,21 +3734,21 @@ void barrier_solver_t::compute_cc_rhs(iteration_data_t& data { raft::common::nvtx::range fun_scope("Barrier: compute_cc_rhs"); const bool has_soc = data.has_cones(); - const bool has_native_free_linear = data.n_native_free_linear > 0; + const bool has_direct_free_linear = data.n_direct_free_linear > 0; const i_t linear_size = data.linear_xz_size(lp.num_cols); auto fill_linear_cc_rhs = [&](raft::device_span out, raft::device_span dx_aff, raft::device_span dz_aff) { if (out.empty()) return; - if (has_native_free_linear) { - auto native_free_linear_ptr = data.d_is_native_free_linear_.data(); + if (has_direct_free_linear) { + auto direct_free_linear_ptr = data.d_is_direct_free_linear_.data(); cub::DeviceTransform::Transform( - cuda::std::make_tuple(dx_aff.data(), dz_aff.data(), native_free_linear_ptr), + cuda::std::make_tuple(dx_aff.data(), dz_aff.data(), direct_free_linear_ptr), out.data(), out.size(), - [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val, i_t is_native_free_linear) { - return is_native_free_linear ? f_t(0) : (-(dx_aff_val * dz_aff_val) + new_mu); + [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val, i_t is_direct_free_linear) { + return is_direct_free_linear ? f_t(0) : (-(dx_aff_val * dz_aff_val) + new_mu); }, stream_view_.value()); } else { @@ -4263,7 +4267,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, lp_solution_t 0) { create_Q(lp, Q); } iteration_data_t data( - lp, num_upper_bounds, presolve_info.native_free_linear_indices, Q, settings); + lp, num_upper_bounds, presolve_info.direct_free_variables, Q, settings); if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { settings.log.printf("Barrier solver halted\n"); return lp_status_t::CONCURRENT_LIMIT; diff --git a/cpp/src/barrier/dense_vector.hpp b/cpp/src/barrier/dense_vector.hpp index 2d5d0b335c..5d6f2b12ec 100644 --- a/cpp/src/barrier/dense_vector.hpp +++ b/cpp/src/barrier/dense_vector.hpp @@ -184,28 +184,6 @@ class dense_vector_t : public std::vector { } } - void ensure_positive_skip_range(f_t epsilon_adjust, i_t skip_start, i_t skip_count) - { - if (skip_count == 0) { - ensure_positive(epsilon_adjust); - return; - } - const i_t n = this->size(); - const i_t skip_end = skip_start + skip_count; - f_t min_val = std::numeric_limits::max(); - for (i_t i = 0; i < n; i++) { - if (i >= skip_start && i < skip_end) continue; - min_val = std::min(min_val, (*this)[i]); - } - if (min_val <= 0.0) { - const f_t delta = -min_val + epsilon_adjust; - for (i_t i = 0; i < n; i++) { - if (i >= skip_start && i < skip_end) continue; - (*this)[i] += delta; - } - } - } - void ensure_positive(f_t epsilon_adjust, const std::vector& mask) { f_t min_x = inf; diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 721465d2a1..6676660c29 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -33,10 +33,18 @@ i_t remove_empty_cols(lp_problem_t& problem, { constexpr bool verbose = false; if (verbose) { printf("Removing %d empty columns\n", num_empty_cols); } + // We have a variable x_j that does not appear in any rows + // The cost function + // sum_{k != j} c_k * x_k + c_j * x_j + // becomes + // sum_{k != j} c_k * x_k + c_j * l_j if c_j > 0 + // or + // sum_{k != j} c_k * x_k + c_j * u_j if c_j < 0 presolve_info.removed_variables.reserve(num_empty_cols); presolve_info.removed_values.reserve(num_empty_cols); presolve_info.removed_reduced_costs.reserve(num_empty_cols); + // Check to see if a variable participates in a quadratic objective std::vector has_quadratic_term(problem.num_cols, false); i_t linear_cols = linear_var_count(problem); @@ -45,6 +53,7 @@ i_t remove_empty_cols(lp_problem_t& problem, const i_t row_start = problem.Q.row_start[j]; const i_t row_end = problem.Q.row_start[j + 1]; if (row_end - row_start == 0) { continue; } + // Q is symmetric, so its sufficient to check only the row size has_quadratic_term[j] = true; } } @@ -923,8 +932,8 @@ void convert_user_problem(const user_problem_t& user_problem, problem.Q.x = user_problem.Q_values; } - // Artificial vars break [linear | cone]; only for barrier_presolve-off LP-style layouts. - if (!settings.barrier_presolve && problem.second_order_cone_dims.empty()) { + // Add artifical variables + if (!settings.barrier_presolve) { add_artifical_variables(problem, user_problem.range_rows, equality_rows, new_slacks); } } @@ -946,7 +955,6 @@ i_t presolve(const lp_problem_t& original, if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; } } - // Barrier presolve phase 1: bound free linear variables from equality rows. if (settings.barrier_presolve && free_variables > 0) { // Try to remove free variables std::vector constraints_to_check; @@ -978,25 +986,25 @@ i_t presolve(const lp_problem_t& original, std::vector upper_bound_coefficient(problem.num_cols, 0.0); if (!constraints_to_check.empty()) { + // Check if the constraints are feasible csr_matrix_t Arow(0, 0, 0); problem.A.to_compressed_row(Arow); - // Keep only rows safe for phase-1 bound inference: no cone columns, exactly one free linear. + // Keep only rows safe for bound inference: no cone columns + if (has_cones) { std::vector safe_constraints; safe_constraints.reserve(constraints_to_check.size()); for (i_t i : constraints_to_check) { bool touches_cone = false; - i_t free_linear_in_row = 0; for (i_t p = Arow.row_start[i]; p < Arow.row_start[i + 1]; ++p) { const i_t j = Arow.j[p]; if (j >= linear_cols) { touches_cone = true; continue; } - if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_linear_in_row++; } } - if (touches_cone || free_linear_in_row != 1) { continue; } + if (touches_cone) { continue; } safe_constraints.push_back(i); } constraints_to_check.swap(safe_constraints); @@ -1127,9 +1135,6 @@ i_t presolve(const lp_problem_t& original, problem.lower[j] = -inf; } } - if (!(problem.lower[j] == -inf && problem.upper[j] == inf)) { - presolve_info.phase1_bounded_linear_indices.push_back(j); - } } // Record bounded free variables for dual correction in uncrush. @@ -1160,7 +1165,6 @@ i_t presolve(const lp_problem_t& original, } } - // Barrier presolve phase 2: negate one-sided bounds (-inf < x <= u -> -u <= x < inf). // The original problem may have a variable without a lower bound // but a finite upper bound // -inf < x_j <= u_j (linear variables only) @@ -1211,8 +1215,8 @@ i_t presolve(const lp_problem_t& original, } } - // Barrier presolve phase 3: shift nonzero lower bounds to zero (cone/stack columns not shifted). - // 0 != l_j <= x_j <= u_j (linear variables only; cone/stack columns are not shifted) + // The original problem may have nonzero lower bounds + // 0 != l_j <= x_j <= u_j i_t nonzero_lower_bounds = 0; for (i_t j = 0; j < linear_cols; j++) { if (problem.lower[j] != 0.0 && problem.lower[j] > -inf) { nonzero_lower_bounds++; } @@ -1287,7 +1291,7 @@ i_t presolve(const lp_problem_t& original, } } - // Barrier presolve phase 4: remove empty rows and empty linear columns. + // Check for empty rows i_t num_empty_rows = 0; { csr_matrix_t Arow(0, 0, 0); @@ -1321,26 +1325,24 @@ i_t presolve(const lp_problem_t& original, } problem.Q.check_matrix("Before free variable expansion"); - // Barrier presolve phase 5: free linear variables — 5a native (QP/SOCP) or 5b v-w split (LP). - // QP/SOCP augmented system: keep free linears as-is (no x = v - w); barrier uses D = 0 on them. - const bool register_native_free_linear = free_variables > 0 && (problem.Q.n > 0 || has_cones); - if (register_native_free_linear) { + // Free linear variables. We handle them directly in QP/SOCP or split them in LP. + const bool direct_free_linear = settings.barrier_presolve && free_variables > 0 && (problem.Q.n > 0 || has_cones); + if (direct_free_linear) { presolve_info.free_variable_pairs.clear(); - presolve_info.native_free_linear_indices.clear(); - // Only linear decision variables can be "native" free variables; cone/stack columns + presolve_info.direct_free_variables.clear(); + // Only free linear decision variables need to be handled; cone/stack columns // are unbounded by construction and must not be counted here. - i_t native_free_count = 0; + i_t direct_free_count = 0; for (i_t j = 0; j < linear_cols; j++) { if (problem.lower[j] == -inf && problem.upper[j] == inf) { - presolve_info.native_free_linear_indices.push_back(j); - native_free_count++; + presolve_info.direct_free_variables.push_back(j); + direct_free_count++; } } - settings.log.printf( - "Keeping %d native free linear variables for augmented-system barrier (QP/SOCP)\n", - native_free_count); + settings.log.printf("Handling %d free variables directly in augmented system\n", + direct_free_count); } else if (settings.barrier_presolve && !has_cones && free_variables > 0) { - // Phase 5b: x_j = v - w with v, w >= 0 (LP without cones; SOCP/QP use phase 5a). + // For pure LP problems (Q is empty and there are no cones in this branch) // We have a variable x_j: with -inf < x_j < inf // we create new variables v and w with 0 <= v, w and x_j = v - w // Constraints @@ -1353,191 +1355,55 @@ i_t presolve(const lp_problem_t& original, // becomes // sum_{k != j} c_k x_k + c_j v - c_j w - const i_t old_num_cols = problem.num_cols; - const i_t new_cone_start = - problem.second_order_cone_dims.empty() ? 0 : linear_cols + free_variables; - const i_t num_cols = old_num_cols + free_variables; - - auto old_A = problem.A; - auto old_Q = problem.Q; - auto old_objective = problem.objective; - auto old_lower = problem.lower; - auto old_upper = problem.upper; - - std::vector partner_index(old_num_cols, -1); - std::vector orig_to_new(old_num_cols, -1); - std::vector is_free(old_num_cols, false); - - i_t next_partner = linear_cols; - for (i_t j = 0; j < linear_cols; ++j) { - orig_to_new[j] = j; - if (old_lower[j] == -inf && old_upper[j] == inf) { - is_free[j] = true; - partner_index[j] = next_partner++; - } - } - for (i_t j = linear_cols; j < old_num_cols; ++j) { - orig_to_new[j] = j + free_variables; - } - assert(next_partner == new_cone_start || problem.second_order_cone_dims.empty()); - - i_t nnz = old_A.col_start[old_num_cols]; - for (i_t j = 0; j < linear_cols; ++j) { - if (is_free[j]) { nnz += old_A.col_start[j + 1] - old_A.col_start[j]; } - } - - csc_matrix_t expanded_A(problem.A.m, num_cols, nnz); - i_t nz = 0; - for (i_t j = 0; j < linear_cols; ++j) { - expanded_A.col_start[j] = nz; - for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) { - expanded_A.i[nz] = old_A.i[p]; - expanded_A.x[nz] = old_A.x[p]; - ++nz; - } - } - for (i_t j = 0; j < linear_cols; ++j) { - if (partner_index[j] != -1) { - expanded_A.col_start[partner_index[j]] = nz; - for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) { - expanded_A.i[nz] = old_A.i[p]; - expanded_A.x[nz] = -old_A.x[p]; - ++nz; - } - } - } - for (i_t j = linear_cols; j < old_num_cols; ++j) { - i_t new_j = orig_to_new[j]; - expanded_A.col_start[new_j] = nz; - for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) { - expanded_A.i[nz] = old_A.i[p]; - expanded_A.x[nz] = old_A.x[p]; - ++nz; - } - } - expanded_A.col_start[num_cols] = nz; - - std::vector objective(num_cols); - std::vector lower(num_cols, -INFINITY); - std::vector upper(num_cols, INFINITY); - presolve_info.free_variable_pairs.clear(); - presolve_info.free_variable_pairs.reserve(free_variables * 2); - - for (i_t j = 0; j < linear_cols; ++j) { - objective[j] = old_objective[j]; - if (is_free[j]) { - lower[j] = 0.0; - upper[j] = inf; - objective[partner_index[j]] = -old_objective[j]; - lower[partner_index[j]] = 0.0; - upper[partner_index[j]] = inf; - presolve_info.free_variable_pairs.push_back(j); - presolve_info.free_variable_pairs.push_back(partner_index[j]); - } else { - lower[j] = old_lower[j]; - upper[j] = old_upper[j]; + i_t num_cols = problem.num_cols + free_variables; + i_t nnz = problem.A.col_start[problem.num_cols]; + for (i_t j = 0; j < problem.num_cols; j++) { + if (problem.lower[j] == -inf && problem.upper[j] == inf) { + nnz += (problem.A.col_start[j + 1] - problem.A.col_start[j]); } } - for (i_t j = linear_cols; j < old_num_cols; ++j) { - i_t new_j = orig_to_new[j]; - objective[new_j] = old_objective[j]; - lower[new_j] = old_lower[j]; - upper[new_j] = old_upper[j]; - } - if (old_Q.n > 0) { - std::vector row_counts(num_cols, 0); - i_t nz_count = 0; - for (i_t row = 0; row < old_Q.n; ++row) { - i_t new_row = orig_to_new[row]; - i_t partner_row = partner_index[row]; - i_t q_start = old_Q.row_start[row]; - i_t q_end = old_Q.row_start[row + 1]; - for (i_t qj = q_start; qj < q_end; ++qj) { - i_t col = old_Q.j[qj]; - i_t partner_col = partner_index[col]; - row_counts[new_row]++; - nz_count++; - if (partner_col != -1) { - row_counts[new_row]++; - nz_count++; - } - if (partner_row != -1) { - row_counts[partner_row]++; - nz_count++; - if (partner_col != -1) { - row_counts[partner_row]++; - nz_count++; - } - } - } - } + problem.A.col_start.resize(num_cols + 1); + problem.A.i.resize(nnz); + problem.A.x.resize(nnz); + problem.lower.resize(num_cols); + problem.upper.resize(num_cols); + problem.objective.resize(num_cols); - std::vector Q_row_start(num_cols + 1); - Q_row_start[0] = 0; - for (i_t row = 0; row < num_cols; ++row) { - Q_row_start[row + 1] = Q_row_start[row] + row_counts[row]; - } - std::vector Q_j(nz_count); - std::vector Q_x(nz_count); - auto row_starts = Q_row_start; - for (i_t row = 0; row < old_Q.n; ++row) { - i_t new_row = orig_to_new[row]; - i_t partner_row = partner_index[row]; - i_t q_start = old_Q.row_start[row]; - i_t q_end = old_Q.row_start[row + 1]; - for (i_t qj = q_start; qj < q_end; ++qj) { - i_t col = old_Q.j[qj]; - f_t qij = old_Q.x[qj]; - i_t new_col = orig_to_new[col]; - i_t partner_col = partner_index[col]; - - Q_j[row_starts[new_row]] = new_col; - Q_x[row_starts[new_row]] = qij; - row_starts[new_row]++; - - if (partner_col != -1) { - Q_j[row_starts[new_row]] = partner_col; - Q_x[row_starts[new_row]] = -qij; - row_starts[new_row]++; - } - if (partner_row != -1) { - Q_j[row_starts[partner_row]] = new_col; - Q_x[row_starts[partner_row]] = -qij; - row_starts[partner_row]++; - if (partner_col != -1) { - Q_j[row_starts[partner_row]] = partner_col; - Q_x[row_starts[partner_row]] = qij; - row_starts[partner_row]++; - } - } + presolve_info.free_variable_pairs.resize(free_variables * 2); + i_t pair_count = 0; + i_t q = problem.A.col_start[problem.num_cols]; + i_t col = problem.num_cols; + for (i_t j = 0; j < problem.num_cols; j++) { + if (problem.lower[j] == -inf && problem.upper[j] == inf) { + for (i_t p = problem.A.col_start[j]; p < problem.A.col_start[j + 1]; p++) { + i_t i = problem.A.i[p]; + f_t aij = problem.A.x[p]; + problem.A.i[q] = i; + problem.A.x[q] = -aij; + q++; } + problem.lower[col] = 0.0; + problem.upper[col] = inf; + problem.objective[col] = -problem.objective[j]; + presolve_info.free_variable_pairs[pair_count++] = j; + presolve_info.free_variable_pairs[pair_count++] = col; + problem.A.col_start[++col] = q; + problem.lower[j] = 0.0; } - - problem.Q.m = problem.Q.n = num_cols; - problem.Q.nz_max = Q_row_start[num_cols]; - problem.Q.row_start = Q_row_start; - problem.Q.j = Q_j; - problem.Q.x = Q_x; - problem.Q.check_matrix("After free variable expansion"); } - problem.A = expanded_A; - problem.A.n = num_cols; - problem.objective = objective; - problem.lower = lower; - problem.upper = upper; - problem.num_cols = num_cols; - if (!problem.second_order_cone_dims.empty()) { problem.cone_var_start = new_cone_start; } + problem.A.n = num_cols; + problem.num_cols = num_cols; } if (settings.barrier_presolve && settings.folding != 0 && problem.Q.n == 0 && - problem.second_order_cone_dims.empty()) { + !has_cones) { folding(problem, settings, presolve_info); } // Check for dependent rows - bool check_dependent_rows = false; // settings.barrier; + bool check_dependent_rows = false; if (check_dependent_rows) { std::vector dependent_rows; constexpr i_t kOk = -1; @@ -1892,25 +1758,13 @@ void uncrush_solution(const presolve_info_t& presolve_info, if (num_free_variables > 0) { settings.log.printf("Post-solve: Handling free variables %d\n", num_free_variables); // We added free variables so we need to map the crushed solution back to the original variables - std::vector remove_partner(input_x.size(), false); for (i_t k = 0; k < 2 * num_free_variables; k += 2) { const i_t u = free_variable_pairs[k]; const i_t v = free_variable_pairs[k + 1]; input_x[u] -= input_x[v]; - remove_partner[v] = true; - } - std::vector compact_x; - std::vector compact_z; - compact_x.reserve(input_x.size() - num_free_variables); - compact_z.reserve(input_z.size() - num_free_variables); - for (i_t j = 0; j < static_cast(input_x.size()); ++j) { - if (!remove_partner[j]) { - compact_x.push_back(input_x[j]); - compact_z.push_back(input_z[j]); - } } - input_x = compact_x; - input_z = compact_z; + input_z.resize(input_z.size() - num_free_variables); + input_x.resize(input_x.size() - num_free_variables); } if (presolve_info.removed_variables.size() > 0) { @@ -1981,11 +1835,6 @@ void uncrush_solution(const presolve_info_t& presolve_info, } } - if (!presolve_info.phase1_bounded_linear_indices.empty()) { - settings.log.printf("Post-solve: %d linear column(s) had phase-1 bound tightening (x unchanged)\n", - static_cast(presolve_info.phase1_bounded_linear_indices.size())); - } - // Dual correction for originally free variables that received implied bounds. // Barrier produced (y, z) with z_j != 0 satisfying A^T y + z = c + Qx. // We need corrected (y_bar, z_bar) with z_bar_j = 0 for all j in F_b where @@ -2004,8 +1853,6 @@ void uncrush_solution(const presolve_info_t& presolve_info, settings.log.printf("Post-solve: Correcting duals for %d bounded free variables\n", static_cast(presolve_info.bounded_free_variables.size())); const csc_matrix_t& A = original_problem.A; - csr_matrix_t A_row(A.m, A.n, A.nnz()); - A.to_compressed_row(A_row); // Traverse in reverse order, to ensure that all z_j = 0 after the correction for (auto it = presolve_info.bounded_free_variables.rbegin(); it != presolve_info.bounded_free_variables.rend(); @@ -2015,9 +1862,15 @@ void uncrush_solution(const presolve_info_t& presolve_info, if (w_j == 0.0) { continue; } const f_t du = w_j / bfv.coefficient; input_y[bfv.constraint] += du; - const auto [row_start, row_end] = A_row.get_constraint_range(bfv.constraint); - for (i_t p = row_start; p < row_end; ++p) { - input_z[A_row.j[p]] -= A_row.x[p] * du; + for (i_t j = 0; j < A.n; j++) { + const i_t col_start = A.col_start[j]; + const i_t col_end = A.col_start[j + 1]; + for (i_t p = col_start; p < col_end; p++) { + if (A.i[p] == bfv.constraint) { + input_z[j] -= A.x[p] * du; + break; + } + } } } } diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index bc67627c21..add97eafce 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -165,11 +165,10 @@ struct presolve_info_t { // Variables that were negated to handle -inf < x_j <= u_j std::vector negated_variables; - // Free variable indices for QP/SOCP augmented barrier (not split, handled natively) - std::vector native_free_linear_indices; + // Free variable indices that the barrier solver handles directly in the augmented system + // (not split into v - w). Used for QP/SOCP. + std::vector direct_free_variables; - // Linear columns where phase 1 tightened a fully free var to a one-sided bound (same x index) - std::vector phase1_bounded_linear_indices; // Originally-free variables that received implied bounds, with the constraint used std::vector> bounded_free_variables; }; diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index 4bbd2613d3..b747203061 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -505,6 +505,7 @@ run_barrier(dual_simplex::user_problem_t& user_problem, barrier_settings.ordering = settings.ordering; barrier_settings.barrier_dual_initial_point = settings.barrier_dual_initial_point; barrier_settings.barrier = true; + barrier_settings.barrier_presolve = true; barrier_settings.crossover = settings.crossover; barrier_settings.eliminate_dense_columns = settings.eliminate_dense_columns; barrier_settings.barrier_iterative_refinement = settings.barrier_iterative_refinement; diff --git a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu index ae4efb9b86..4058bf3aa9 100644 --- a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu +++ b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu @@ -305,10 +305,10 @@ TEST(barrier, presolve_reindexes_cone_start_after_empty_column_removal) EXPECT_EQ(barrier_lp.cone_var_start, 0); } -TEST(barrier, presolve_keeps_native_free_variables_before_cones) +TEST(barrier, presolve_keeps_direct_free_variables_before_cones) { // Layout: [x0, x1 | cone x2, x3, x4] with x0, x1 free and a 3-dimensional SOC block. - // SOCP barrier presolve keeps native free variables (no x = v - w split); cone_var_start + // SOCP barrier presolve keeps direct free variables (no x = v - w split); cone_var_start // and column count stay unchanged. raft::handle_t handle{}; init_handler(&handle); @@ -361,9 +361,9 @@ TEST(barrier, presolve_keeps_native_free_variables_before_cones) EXPECT_EQ(presolved_lp.cone_var_start, 2); EXPECT_EQ(presolved_lp.second_order_cone_dims, std::vector({3})); EXPECT_TRUE(presolve_info.free_variable_pairs.empty()); - ASSERT_EQ(presolve_info.native_free_linear_indices.size(), 2); - EXPECT_EQ(presolve_info.native_free_linear_indices[0], 0); - EXPECT_EQ(presolve_info.native_free_linear_indices[1], 1); + ASSERT_EQ(presolve_info.direct_free_variables.size(), 2); + EXPECT_EQ(presolve_info.direct_free_variables[0], 0); + EXPECT_EQ(presolve_info.direct_free_variables[1], 1); EXPECT_EQ(presolved_lp.lower[0], -inf); EXPECT_EQ(presolved_lp.lower[1], -inf); EXPECT_EQ(presolved_lp.upper[0], inf); @@ -874,7 +874,7 @@ TEST(barrier, free_linear_prefix_is_uncrushed_correctly_with_soc_block) // u = 1 // (t, u, v) in Q^3 // - // Native free variable l is kept through presolve; end-to-end solve returns + // Direct free variable l is kept through presolve; end-to-end solve returns // l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1. raft::handle_t handle{}; init_handler(&handle); From 03918bdc1bcd918869ad32addb00fc6e008271c3 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 22 May 2026 16:39:44 -0700 Subject: [PATCH 05/10] Style fixes --- cpp/src/barrier/barrier.cu | 309 ++++++++++-------- cpp/src/dual_simplex/presolve.cpp | 22 +- cpp/src/dual_simplex/scaling.cpp | 5 +- .../dual_simplex/simplex_solver_settings.hpp | 4 +- cpp/src/dual_simplex/solve.cpp | 10 +- cpp/src/pdlp/solve.cu | 4 +- .../dual_simplex/unit_tests/solve_barrier.cu | 54 +-- .../data_model/data_model.py | 1 - .../data_model/data_model_wrapper.pxd | 2 +- .../cuopt/cuopt/linear_programming/problem.py | 10 +- 10 files changed, 221 insertions(+), 200 deletions(-) diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu index a4c4b5b9bd..375d1b760e 100644 --- a/cpp/src/barrier/barrier.cu +++ b/cpp/src/barrier/barrier.cu @@ -66,8 +66,12 @@ namespace cuopt::linear_programming::dual_simplex { } // out[i] = is_direct_free_linear[i] ? 0 : a[i] * b[i] -[[maybe_unused]] static void pairwise_multiply_skip_direct_free_linear( - float* a, float* b, int* is_direct_free_linear, float* out, int size, rmm::cuda_stream_view stream) +[[maybe_unused]] static void pairwise_multiply_skip_direct_free_linear(float* a, + float* b, + int* is_direct_free_linear, + float* out, + int size, + rmm::cuda_stream_view stream) { cub::DeviceTransform::Transform( cuda::std::make_tuple(a, b, is_direct_free_linear), @@ -77,8 +81,12 @@ namespace cuopt::linear_programming::dual_simplex { stream.value()); } -[[maybe_unused]] static void pairwise_multiply_skip_direct_free_linear( - double* a, double* b, int* is_direct_free_linear, double* out, int size, rmm::cuda_stream_view stream) +[[maybe_unused]] static void pairwise_multiply_skip_direct_free_linear(double* a, + double* b, + int* is_direct_free_linear, + double* out, + int size, + rmm::cuda_stream_view stream) { cub::DeviceTransform::Transform( cuda::std::make_tuple(a, b, is_direct_free_linear), @@ -266,8 +274,10 @@ class iteration_data_t { is_direct_free_linear_host[j] = 1; } d_is_direct_free_linear_.resize(lp.num_cols, stream_view_); - raft::copy( - d_is_direct_free_linear_.data(), is_direct_free_linear_host.data(), lp.num_cols, stream_view_); + raft::copy(d_is_direct_free_linear_.data(), + is_direct_free_linear_host.data(), + lp.num_cols, + stream_view_); settings.log.printf("Direct free linear (QP): %d\n", n_direct_free_linear); } } @@ -316,9 +326,9 @@ class iteration_data_t { { raft::common::nvtx::range scope("Barrier: LP Data: SOC setup"); if (!lp.second_order_cone_dims.empty()) { - cone_var_start_ = lp.cone_var_start; - i_t total_cone_dim = - std::accumulate(lp.second_order_cone_dims.begin(), lp.second_order_cone_dims.end(), i_t(0)); + cone_var_start_ = lp.cone_var_start; + i_t total_cone_dim = std::accumulate( + lp.second_order_cone_dims.begin(), lp.second_order_cone_dims.end(), i_t(0)); cuopt_assert(cone_var_start_ >= 0, "cone_var_start must be nonnegative"); cuopt_assert(cone_var_start_ + total_cone_dim <= lp.num_cols, "cone variables exceed problem dimension"); @@ -330,7 +340,8 @@ class iteration_data_t { raft::device_span{}, stream_view_); cuopt_assert(cone_count() > 0, "second-order cone topology must contain at least one cone"); - cuopt_assert(cone_entry_count() == total_cone_dim, "second-order cone entry count mismatch"); + cuopt_assert(cone_entry_count() == total_cone_dim, + "second-order cone entry count mismatch"); } } @@ -505,7 +516,7 @@ class iteration_data_t { { raft::common::nvtx::range scope("Barrier: LP Data: Cholesky init"); i_t factorization_size = use_augmented ? lp.num_rows + lp.num_cols : lp.num_rows; - chol = std::make_unique>( + chol = std::make_unique>( handle_ptr, settings, factorization_size); chol->set_positive_definite(false); } @@ -624,91 +635,91 @@ class iteration_data_t { { raft::common::nvtx::range scope("Barrier: augmented: host CSR build"); for (i_t i = 0; i < n; i++) { - augmented_CSR.row_start[i] = q; - - const bool is_cone_row = is_cone_variable(i); - - if (is_cone_row) { - // Determine which cone this variable belongs to and its local row - i_t local_idx = i - cone_start(); - i_t k = 0; - while (k + 1 < cone_count() && - cone_offsets_host[k + 1] <= static_cast(local_idx)) { - k++; - } - i_t local_r = - static_cast(static_cast(local_idx) - cone_offsets_host[k]); - i_t q_k = static_cast(cone_offsets_host[k + 1] - cone_offsets_host[k]); - i_t cone_col_start = cone_start() + static_cast(cone_offsets_host[k]); - i_t block_base = static_cast(cone_block_offsets_host[k]) + local_r * q_k; - - // Merge-join: Q entries (sorted) with dense cone block columns (contiguous) - i_t qp = (nnzQ > 0) ? Q.col_start[i] : 0; - i_t q_end = (nnzQ > 0) ? Q.col_start[i + 1] : 0; - - // Q entries before cone block - while (qp < q_end && Q.i[qp] < cone_col_start) { - augmented_CSR.j[q] = Q.i[qp]; - augmented_CSR.x[q++] = -Q.x[qp]; - off_diag_Qnz++; - qp++; - } + augmented_CSR.row_start[i] = q; - // Dense cone block, absorbing any Q entries that fall inside - for (i_t c = 0; c < q_k; c++) { - i_t col = cone_col_start + c; - f_t q_contrib = f_t(0); - f_t initial_val = (c == local_r) ? f_t(-dual_perturb) - : f_t(0); // diagonal entry of the cone block column + const bool is_cone_row = is_cone_variable(i); - if (qp < q_end && Q.i[qp] == col) { - q_contrib = Q.x[qp]; + if (is_cone_row) { + // Determine which cone this variable belongs to and its local row + i_t local_idx = i - cone_start(); + i_t k = 0; + while (k + 1 < cone_count() && + cone_offsets_host[k + 1] <= static_cast(local_idx)) { + k++; + } + i_t local_r = + static_cast(static_cast(local_idx) - cone_offsets_host[k]); + i_t q_k = static_cast(cone_offsets_host[k + 1] - cone_offsets_host[k]); + i_t cone_col_start = cone_start() + static_cast(cone_offsets_host[k]); + i_t block_base = static_cast(cone_block_offsets_host[k]) + local_r * q_k; + + // Merge-join: Q entries (sorted) with dense cone block columns (contiguous) + i_t qp = (nnzQ > 0) ? Q.col_start[i] : 0; + i_t q_end = (nnzQ > 0) ? Q.col_start[i + 1] : 0; + + // Q entries before cone block + while (qp < q_end && Q.i[qp] < cone_col_start) { + augmented_CSR.j[q] = Q.i[qp]; + augmented_CSR.x[q++] = -Q.x[qp]; + off_diag_Qnz++; qp++; } - cone_csr_indices_host[block_base + c] = q; - cone_Q_values_host[block_base + c] = q_contrib; - if (col == i) { augmented_diagonal_indices[i] = q; } - augmented_CSR.j[q] = col; - augmented_CSR.x[q++] = initial_val - q_contrib; - } + // Dense cone block, absorbing any Q entries that fall inside + for (i_t c = 0; c < q_k; c++) { + i_t col = cone_col_start + c; + f_t q_contrib = f_t(0); + f_t initial_val = (c == local_r) ? f_t(-dual_perturb) + : f_t(0); // diagonal entry of the cone block column + + if (qp < q_end && Q.i[qp] == col) { + q_contrib = Q.x[qp]; + qp++; + } + + cone_csr_indices_host[block_base + c] = q; + cone_Q_values_host[block_base + c] = q_contrib; + if (col == i) { augmented_diagonal_indices[i] = q; } + augmented_CSR.j[q] = col; + augmented_CSR.x[q++] = initial_val - q_contrib; + } - // Q entries after cone block - while (qp < q_end) { - augmented_CSR.j[q] = Q.i[qp]; - augmented_CSR.x[q++] = -Q.x[qp]; - off_diag_Qnz++; - qp++; - } - } else if (nnzQ == 0) { - augmented_diagonal_indices[i] = q; - augmented_CSR.j[q] = i; - augmented_CSR.x[q++] = -diag[i] - dual_perturb; - } else { - // Q is symmetric - const i_t q_col_beg = Q.col_start[i]; - const i_t q_col_end = Q.col_start[i + 1]; - bool has_diagonal = false; - for (i_t p = q_col_beg; p < q_col_end; ++p) { - augmented_CSR.j[q] = Q.i[p]; - if (Q.i[p] == i) { - has_diagonal = true; - augmented_diagonal_indices[i] = q; - augmented_CSR.x[q++] = -Q.x[p] - diag[i] - dual_perturb; - } else { + // Q entries after cone block + while (qp < q_end) { + augmented_CSR.j[q] = Q.i[qp]; + augmented_CSR.x[q++] = -Q.x[qp]; off_diag_Qnz++; - augmented_CSR.x[q++] = -Q.x[p]; + qp++; } - } - if (!has_diagonal) { + } else if (nnzQ == 0) { augmented_diagonal_indices[i] = q; augmented_CSR.j[q] = i; augmented_CSR.x[q++] = -diag[i] - dual_perturb; + } else { + // Q is symmetric + const i_t q_col_beg = Q.col_start[i]; + const i_t q_col_end = Q.col_start[i + 1]; + bool has_diagonal = false; + for (i_t p = q_col_beg; p < q_col_end; ++p) { + augmented_CSR.j[q] = Q.i[p]; + if (Q.i[p] == i) { + has_diagonal = true; + augmented_diagonal_indices[i] = q; + augmented_CSR.x[q++] = -Q.x[p] - diag[i] - dual_perturb; + } else { + off_diag_Qnz++; + augmented_CSR.x[q++] = -Q.x[p]; + } + } + if (!has_diagonal) { + augmented_diagonal_indices[i] = q; + augmented_CSR.j[q] = i; + augmented_CSR.x[q++] = -diag[i] - dual_perturb; + } } - } - // AT block, we can use A in csc directly - const i_t col_beg = A.col_start[i]; - const i_t col_end = A.col_start[i + 1]; + // AT block, we can use A in csc directly + const i_t col_beg = A.col_start[i]; + const i_t col_end = A.col_start[i + 1]; for (i_t p = col_beg; p < col_end; ++p) { augmented_CSR.j[q] = A.i[p] + n; augmented_CSR.x[q++] = A.x[p]; @@ -783,19 +794,22 @@ class iteration_data_t { // Primal diagonal: linear block includes dual_perturb; SOC block is filled by scatter below. // Direct free variables use barrier D = 0 in augmented_multiply; omit span_diag[j] here so // the factorized matrix matches the matvec (only -q_diag - dual_perturb on the diagonal). - thrust::for_each_n(rmm::exec_policy(handle_ptr->get_stream()), - thrust::make_counting_iterator(0), - linear_n, - [span_x = cuopt::make_span(device_augmented.x), - span_diag_indices = cuopt::make_span(d_augmented_diagonal_indices_), - span_q_diag = cuopt::make_span(d_Q_diag_), - span_diag = cuopt::make_span(d_diag_), - span_is_direct_free_linear = cuopt::make_span(d_is_direct_free_linear_), - dual_perturb_value = dual_perturb] __device__(i_t j) { - f_t q_diag = span_q_diag.size() > 0 ? span_q_diag[j] : 0.0; - const f_t d_j = (span_is_direct_free_linear.size() > 0 && span_is_direct_free_linear[j]) ? f_t(0) : span_diag[j]; - span_x[span_diag_indices[j]] = -q_diag - d_j - dual_perturb_value; - }); + thrust::for_each_n( + rmm::exec_policy(handle_ptr->get_stream()), + thrust::make_counting_iterator(0), + linear_n, + [span_x = cuopt::make_span(device_augmented.x), + span_diag_indices = cuopt::make_span(d_augmented_diagonal_indices_), + span_q_diag = cuopt::make_span(d_Q_diag_), + span_diag = cuopt::make_span(d_diag_), + span_is_direct_free_linear = cuopt::make_span(d_is_direct_free_linear_), + dual_perturb_value = dual_perturb] __device__(i_t j) { + f_t q_diag = span_q_diag.size() > 0 ? span_q_diag[j] : 0.0; + const f_t d_j = (span_is_direct_free_linear.size() > 0 && span_is_direct_free_linear[j]) + ? f_t(0) + : span_diag[j]; + span_x[span_diag_indices[j]] = -q_diag - d_j - dual_perturb_value; + }); RAFT_CHECK_CUDA(handle_ptr->get_stream()); thrust::for_each_n(rmm::exec_policy(handle_ptr->get_stream()), @@ -1728,8 +1742,12 @@ class iteration_data_t { // r1 <- D * x_1 on linear indices; barrier D is zero on direct free variables const i_t linear_n = has_soc ? cone_start() : n; if (n_direct_free_linear > 0) { - pairwise_multiply_skip_direct_free_linear( - d_x1.data(), d_diag_.data(), d_is_direct_free_linear_.data(), d_r1.data(), linear_n, stream_view_); + pairwise_multiply_skip_direct_free_linear(d_x1.data(), + d_diag_.data(), + d_is_direct_free_linear_.data(), + d_r1.data(), + linear_n, + stream_view_); } else { pairwise_multiply(d_x1.data(), d_diag_.data(), d_r1.data(), linear_n, stream_view_); } @@ -1853,7 +1871,8 @@ class iteration_data_t { bool use_augmented; i_t symbolic_status; i_t n_direct_free_linear{0}; - rmm::device_uvector d_is_direct_free_linear_; // 1 if direct free linear (j < cone_start), else 0 + rmm::device_uvector + d_is_direct_free_linear_; // 1 if direct free linear (j < cone_start), else 0 // Adaptive regularization for the augmented system f_t dual_perturb{1e-8}; @@ -2064,9 +2083,9 @@ template int barrier_solver_t::initial_point(iteration_data_t& data) { raft::common::nvtx::range fun_scope("Barrier: initial_point"); - const bool use_augmented = data.use_augmented; - const bool has_soc = data.has_cones(); - const bool has_direct_free_linear = data.n_direct_free_linear > 0; + const bool use_augmented = data.use_augmented; + const bool has_soc = data.has_cones(); + const bool has_direct_free_linear = data.n_direct_free_linear > 0; // Mask used by the two ADAT/augmented branches below to enforce z > 0 on the linear block // while leaving the SOC cone block (kept feasible by NT scaling) alone. @@ -2595,8 +2614,9 @@ f_t barrier_solver_t::gpu_max_step_to_boundary(iteration_data_t& x, const rmm::device_uvector& dx) { - const bool has_soc = data.has_cones() && static_cast(x.size()) >= data.cone_end(); - const bool has_direct_free_linear = data.n_direct_free_linear > 0 && static_cast(x.size()) == lp.num_cols; + const bool has_soc = data.has_cones() && static_cast(x.size()) >= data.cone_end(); + const bool has_direct_free_linear = + data.n_direct_free_linear > 0 && static_cast(x.size()) == lp.num_cols; auto reduce_segment = [&](i_t start, i_t len) -> f_t { if (len <= 0) { return f_t(1); } @@ -2604,8 +2624,8 @@ f_t barrier_solver_t::gpu_max_step_to_boundary(iteration_data_t t) { - const f_t dx_val = thrust::get<0>(t); - const f_t x_val = thrust::get<1>(t); + const f_t dx_val = thrust::get<0>(t); + const f_t x_val = thrust::get<1>(t); const i_t is_direct_free_linear = thrust::get<2>(t); if (is_direct_free_linear) return f_t(1.0); if (dx_val < f_t(0.0)) return -x_val / dx_val; @@ -2662,13 +2682,13 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t 0; - const i_t m_c = data.cone_entry_count(); - const i_t cone_var_start = data.cone_start(); - const i_t linear_size = data.linear_xz_size(lp.num_cols); + const bool debug = false; + const bool use_augmented = data.use_augmented; + const bool has_soc = data.has_cones(); + const bool has_direct_free_linear = data.n_direct_free_linear > 0; + const i_t m_c = data.cone_entry_count(); + const i_t cone_var_start = data.cone_start(); + const i_t linear_size = data.linear_xz_size(lp.num_cols); // Linear (orthant) block only; SOC uses recover_cone_dz_from_target. auto recover_linear_orthant_dz = [&](raft::device_span target, @@ -2750,8 +2770,10 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t 0 && data.Q_diagonal) { cub::DeviceTransform::Transform( - cuda::std::make_tuple( - data.d_z_.data(), data.d_x_.data(), data.d_is_direct_free_linear_.data(), data.d_Q_diag_.data()), + cuda::std::make_tuple(data.d_z_.data(), + data.d_x_.data(), + data.d_is_direct_free_linear_.data(), + data.d_Q_diag_.data()), data.d_diag_.data(), linear_size, [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_direct_free_linear, f_t q_jj) { @@ -2761,7 +2783,8 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t& data, rmm::cuda_str raft::copy(data.d_y_.data(), data.y.data(), data.y.size(), stream); data.d_upper_bounds_.resize(data.upper_bounds.size(), stream); - raft::copy(data.d_upper_bounds_.data(), - data.upper_bounds.data(), - data.upper_bounds.size(), - stream); + raft::copy( + data.d_upper_bounds_.data(), data.upper_bounds.data(), data.upper_bounds.size(), stream); } // One-time static problem data (constant across barrier iterations). @@ -3493,15 +3515,15 @@ void copy_static_problem_data_to_device(const lp_problem_t& lp, data.d_dw_residual_.resize(data.n_upper_bounds, stream); } -// Per Mehrotra step: affine/corrector RHS only (iterate stays on device from initial sync / next_iterate). +// Per Mehrotra step: affine/corrector RHS only (iterate stays on device from initial sync / +// next_iterate). template void copy_step_rhs_to_device(iteration_data_t& data, rmm::cuda_stream_view stream) { raft::common::nvtx::range fun_scope("Barrier: copy_step_rhs_to_device"); data.d_bound_rhs_.resize(data.bound_rhs.size(), stream); - raft::copy( - data.d_bound_rhs_.data(), data.bound_rhs.data(), data.bound_rhs.size(), stream); + raft::copy(data.d_bound_rhs_.data(), data.bound_rhs.data(), data.bound_rhs.size(), stream); raft::copy(data.d_h_.data(), data.primal_rhs.data(), data.primal_rhs.size(), stream); raft::copy(data.d_dual_rhs_.data(), data.dual_rhs.data(), data.dual_rhs.size(), stream); @@ -3556,11 +3578,7 @@ void fill_affine_cone_complementarity_target(iteration_data_t& data, auto cone_target = raft::device_span(data.d_complementarity_target_.data() + cone_var_start, m_c); cub::DeviceTransform::Transform( - cones.z.data(), - cone_target.data(), - m_c, - [] HD(f_t z_val) { return -z_val; }, - stream.value()); + cones.z.data(), cone_target.data(), m_c, [] HD(f_t z_val) { return -z_val; }, stream.value()); RAFT_CUDA_TRY(cudaPeekAtLastError()); RAFT_CHECK_CUDA(stream); } @@ -3578,20 +3596,21 @@ void fill_corrector_cone_complementarity_target(iteration_data_t& data cones.z = raft::device_span(data.d_z_.data() + cone_var_start, m_c); auto cone_target = raft::device_span(data.d_complementarity_target_.data() + cone_var_start, m_c); - compute_combined_cone_rhs_term(raft::device_span(data.d_dx_aff_.data() + cone_var_start, m_c), - raft::device_span(data.d_dz_aff_.data() + cone_var_start, m_c), - cones, - sigma_mu, - cone_target, - stream); + compute_combined_cone_rhs_term( + raft::device_span(data.d_dx_aff_.data() + cone_var_start, m_c), + raft::device_span(data.d_dz_aff_.data() + cone_var_start, m_c), + cones, + sigma_mu, + cone_target, + stream); } template void barrier_solver_t::compute_affine_rhs(iteration_data_t& data) { raft::common::nvtx::range fun_scope("Barrier: compute_affine_rhs"); - const bool has_soc = data.has_cones(); - const i_t linear_size = data.linear_xz_size(lp.num_cols); + const bool has_soc = data.has_cones(); + const i_t linear_size = data.linear_xz_size(lp.num_cols); const i_t cone_var_start = data.cone_start(); const i_t m_c = data.cone_entry_count(); @@ -3723,19 +3742,19 @@ void barrier_solver_t::compute_target_mu( stream_view_); complementarity_aff_sum = complementarity_xz_aff_sum + complementarity_wv_aff_sum; - const f_t mu_denom = data.complementarity_degree(data.x.size(), data.n_upper_bounds); - mu_aff = complementarity_aff_sum / mu_denom; - sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); - new_mu = sigma * mu; + const f_t mu_denom = data.complementarity_degree(data.x.size(), data.n_upper_bounds); + mu_aff = complementarity_aff_sum / mu_denom; + sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); + new_mu = sigma * mu; } template void barrier_solver_t::compute_cc_rhs(iteration_data_t& data, f_t& new_mu) { raft::common::nvtx::range fun_scope("Barrier: compute_cc_rhs"); - const bool has_soc = data.has_cones(); - const bool has_direct_free_linear = data.n_direct_free_linear > 0; - const i_t linear_size = data.linear_xz_size(lp.num_cols); + const bool has_soc = data.has_cones(); + const bool has_direct_free_linear = data.n_direct_free_linear > 0; + const i_t linear_size = data.linear_xz_size(lp.num_cols); auto fill_linear_cc_rhs = [&](raft::device_span out, raft::device_span dx_aff, diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 6676660c29..de295c0c20 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -944,12 +944,13 @@ i_t presolve(const lp_problem_t& original, lp_problem_t& problem, presolve_info_t& presolve_info) { - problem = original; + problem = original; const i_t linear_cols = linear_var_count(problem); const bool has_cones = !problem.second_order_cone_dims.empty(); std::vector row_sense(problem.num_rows, '='); - // Check for free variables (linear block only; cone columns are handled by the barrier SOC layout) + // Check for free variables (linear block only; cone columns are handled by the barrier SOC + // layout) i_t free_variables = 0; for (i_t j = 0; j < linear_cols; j++) { if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; } @@ -991,12 +992,11 @@ i_t presolve(const lp_problem_t& original, problem.A.to_compressed_row(Arow); // Keep only rows safe for bound inference: no cone columns - if (has_cones) - { + if (has_cones) { std::vector safe_constraints; safe_constraints.reserve(constraints_to_check.size()); for (i_t i : constraints_to_check) { - bool touches_cone = false; + bool touches_cone = false; for (i_t p = Arow.row_start[i]; p < Arow.row_start[i + 1]; ++p) { const i_t j = Arow.j[p]; if (j >= linear_cols) { @@ -1022,7 +1022,7 @@ i_t presolve(const lp_problem_t& original, i_t last_free_i = -1; f_t last_free_coeff_i = 0.0; for (i_t p = row_start; p < row_end; p++) { - const i_t j = Arow.j[p]; + const i_t j = Arow.j[p]; if (j >= linear_cols) { continue; } const f_t aij = Arow.x[p]; const f_t lower_j = problem.lower[j]; @@ -1075,8 +1075,8 @@ i_t presolve(const lp_problem_t& original, // And we can derive two bounds from this: // x_j >= 1/a_ij * (rhs - lower_activity_i) // x_j <= 1/a_ij * (rhs - upper_activity_i) - const i_t j = last_free_i; - const f_t a_ij = last_free_coeff_i; + const i_t j = last_free_i; + const f_t a_ij = last_free_coeff_i; if (a_ij == 0) { continue; } const f_t max_bound = 1e10; bool bounded = false; @@ -1326,7 +1326,8 @@ i_t presolve(const lp_problem_t& original, problem.Q.check_matrix("Before free variable expansion"); // Free linear variables. We handle them directly in QP/SOCP or split them in LP. - const bool direct_free_linear = settings.barrier_presolve && free_variables > 0 && (problem.Q.n > 0 || has_cones); + const bool direct_free_linear = + settings.barrier_presolve && free_variables > 0 && (problem.Q.n > 0 || has_cones); if (direct_free_linear) { presolve_info.free_variable_pairs.clear(); presolve_info.direct_free_variables.clear(); @@ -1397,8 +1398,7 @@ i_t presolve(const lp_problem_t& original, problem.num_cols = num_cols; } - if (settings.barrier_presolve && settings.folding != 0 && problem.Q.n == 0 && - !has_cones) { + if (settings.barrier_presolve && settings.folding != 0 && problem.Q.n == 0 && !has_cones) { folding(problem, settings, presolve_info); } diff --git a/cpp/src/dual_simplex/scaling.cpp b/cpp/src/dual_simplex/scaling.cpp index 2db6c75b43..852c19e2e1 100644 --- a/cpp/src/dual_simplex/scaling.cpp +++ b/cpp/src/dual_simplex/scaling.cpp @@ -197,9 +197,8 @@ i_t column_scaling(const lp_problem_t& unscaled, max_col_norm = std::max(col_norm, max_col_norm); min_col_norm = std::min(col_norm, min_col_norm); } - settings.log.printf("Scaling matrix. Maximum column norm %e, minimum column norm %e\n", - max_col_norm, - min_col_norm); + settings.log.printf( + "Scaling matrix. Maximum column norm %e, minimum column norm %e\n", max_col_norm, min_col_norm); // scaled_A = unscaled_A * C for (i_t j = 0; j < n; ++j) { diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 66fa94bfc1..5328cf3fe1 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -164,8 +164,8 @@ struct simplex_solver_settings_t { bool eliminate_singletons; // true to eliminate singletons from the basis bool print_presolve_stats; // true to print presolve stats bool barrier_presolve; // bound shifts, free-var bounding; LP v-w split / folding when no cones - bool cudss_deterministic; // true to use cuDSS deterministic mode, false for non-deterministic - bool barrier; // true to use barrier method, false to use dual simplex method + bool cudss_deterministic; // true to use cuDSS deterministic mode, false for non-deterministic + bool barrier; // true to use barrier method, false to use dual simplex method bool deterministic; // true to use B&B deterministic mode, false to use non-deterministic mode bool eliminate_dense_columns; // true to eliminate dense columns from A*D*A^T bool barrier_iterative_refinement; // true to use iterative refinement for barrier method diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 7c23784802..15d51041f7 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -385,9 +385,9 @@ lp_status_t solve_linear_program_with_advanced_basis( template lp_status_t solve_linear_program_with_barrier(const user_problem_t& user_problem, - const simplex_solver_settings_t& settings, - f_t start_time, - lp_solution_t& solution) + const simplex_solver_settings_t& settings, + f_t start_time, + lp_solution_t& solution) { lp_status_t status = lp_status_t::UNSET; lp_problem_t original_lp(user_problem.handle_ptr, 1, 1, 1); @@ -698,8 +698,8 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us // Run crossover lp_solution_t crossover_solution(original_lp.num_rows, original_lp.num_cols); std::vector vstatus(original_lp.num_cols); - crossover_status_t crossover_status = crossover( - original_lp, settings, lp_solution, start_time, crossover_solution, vstatus); + crossover_status_t crossover_status = + crossover(original_lp, settings, lp_solution, start_time, crossover_solution, vstatus); settings.log.printf("Crossover status: %d\n", crossover_status); if (crossover_status == crossover_status_t::OPTIMAL) { barrier_status = lp_status_t::OPTIMAL; } } diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index b747203061..5c90e65cbc 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -508,8 +508,8 @@ run_barrier(dual_simplex::user_problem_t& user_problem, barrier_settings.barrier_presolve = true; barrier_settings.crossover = settings.crossover; barrier_settings.eliminate_dense_columns = settings.eliminate_dense_columns; - barrier_settings.barrier_iterative_refinement = settings.barrier_iterative_refinement; - barrier_settings.barrier_step_scale = settings.barrier_step_scale; + barrier_settings.barrier_iterative_refinement = settings.barrier_iterative_refinement; + barrier_settings.barrier_step_scale = settings.barrier_step_scale; barrier_settings.cudss_deterministic = settings.cudss_deterministic; barrier_settings.barrier_relaxed_feasibility_tol = settings.tolerances.relative_primal_tolerance; barrier_settings.barrier_relaxed_optimality_tol = settings.tolerances.relative_dual_tolerance; diff --git a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu index 4058bf3aa9..45fcd297e8 100644 --- a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu +++ b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu @@ -213,9 +213,9 @@ TEST(barrier, cone_metadata_reindexed_when_slack_is_inserted_before_cones) user_problem.cone_var_start = 1; simplex_solver_settings_t settings; - settings.barrier = true; - settings.dualize = 0; - settings.scale_columns = false; + settings.barrier = true; + settings.dualize = 0; + settings.scale_columns = false; std::vector new_slacks; dualize_info_t dualize_info; @@ -278,9 +278,9 @@ TEST(barrier, presolve_reindexes_cone_start_after_empty_column_removal) user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); simplex_solver_settings_t settings; - settings.barrier = true; - settings.dualize = 0; - settings.scale_columns = false; + settings.barrier = true; + settings.dualize = 0; + settings.scale_columns = false; std::vector new_slacks; dualize_info_t dualize_info; @@ -336,7 +336,7 @@ TEST(barrier, presolve_keeps_direct_free_variables_before_cones) user_problem.rhs = {1.0}; user_problem.row_sense = {'E'}; - user_problem.lower = {-inf, -inf, 0.0, 0.0, 0.0}; + user_problem.lower = {-inf, -inf, 0.0, 0.0, 0.0}; user_problem.upper.assign(n, inf); user_problem.num_range_rows = 0; user_problem.cone_var_start = 2; @@ -344,9 +344,9 @@ TEST(barrier, presolve_keeps_direct_free_variables_before_cones) user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); simplex_solver_settings_t settings; - settings.barrier = true; - settings.dualize = 0; - settings.scale_columns = false; + settings.barrier = true; + settings.dualize = 0; + settings.scale_columns = false; std::vector new_slacks; dualize_info_t dualize_info; @@ -441,8 +441,8 @@ TEST(barrier, rejects_middle_cone_input_before_barrier) user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); simplex_solver_settings_t settings; - settings.barrier = true; - settings.dualize = 0; + settings.barrier = true; + settings.dualize = 0; lp_solution_t solution(m, n); auto status = solve_linear_program_with_barrier(user_problem, settings, solution); @@ -495,8 +495,8 @@ TEST(barrier, socp_min_x0_subject_to_norm_constraint) user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); simplex_solver_settings_t settings; - settings.barrier = true; - settings.dualize = 0; + settings.barrier = true; + settings.dualize = 0; lp_solution_t solution(m, n); auto status = solve_linear_program_with_barrier(user_problem, settings, solution); @@ -558,8 +558,8 @@ TEST(barrier, mixed_linear_and_soc_block) user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); simplex_solver_settings_t settings; - settings.barrier = true; - settings.dualize = 0; + settings.barrier = true; + settings.dualize = 0; lp_solution_t solution(m, n); auto status = solve_linear_program_with_barrier(user_problem, settings, solution); @@ -623,9 +623,9 @@ TEST(barrier, mixed_linear_and_soc_tail_coupling) user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); simplex_solver_settings_t settings; - settings.barrier = true; - settings.dualize = 0; - settings.scale_columns = true; + settings.barrier = true; + settings.dualize = 0; + settings.scale_columns = true; lp_solution_t solution(m, n); auto status = solve_linear_program_with_barrier(user_problem, settings, solution); @@ -689,9 +689,9 @@ TEST(barrier, mixed_linear_and_soc_tail_coupling_with_inequality) user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); simplex_solver_settings_t settings; - settings.barrier = true; - settings.dualize = 0; - settings.scale_columns = true; + settings.barrier = true; + settings.dualize = 0; + settings.scale_columns = true; lp_solution_t solution(m, n); auto status = solve_linear_program_with_barrier(user_problem, settings, solution); @@ -766,8 +766,8 @@ TEST(barrier, mixed_linear_and_two_soc_blocks) user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); simplex_solver_settings_t settings; - settings.barrier = true; - settings.dualize = 0; + settings.barrier = true; + settings.dualize = 0; lp_solution_t solution(m, n); auto status = solve_linear_program_with_barrier(user_problem, settings, solution); @@ -846,9 +846,9 @@ TEST(barrier, mixed_linear_and_two_soc_blocks_with_inequality) user_problem.var_types.assign(n, variable_type_t::CONTINUOUS); simplex_solver_settings_t settings; - settings.barrier = true; - settings.dualize = 0; - settings.scale_columns = true; + settings.barrier = true; + settings.dualize = 0; + settings.scale_columns = true; lp_solution_t solution(m, n); auto status = solve_linear_program_with_barrier(user_problem, settings, solution); diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model.py b/python/cuopt/cuopt/linear_programming/data_model/data_model.py index 5e6fd6744d..f8b6dd609f 100644 --- a/python/cuopt/cuopt/linear_programming/data_model/data_model.py +++ b/python/cuopt/cuopt/linear_programming/data_model/data_model.py @@ -4,7 +4,6 @@ import os import time -import numpy as np from . import data_model_wrapper from .utilities import catch_cuopt_exception diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd index 9f9f5ba5cf..6c401b59f5 100644 --- a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd +++ b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa # SPDX-License-Identifier: Apache-2.0 from .data_model cimport * diff --git a/python/cuopt/cuopt/linear_programming/problem.py b/python/cuopt/cuopt/linear_programming/problem.py index 84ca43c5b5..84fa1b27d7 100644 --- a/python/cuopt/cuopt/linear_programming/problem.py +++ b/python/cuopt/cuopt/linear_programming/problem.py @@ -1316,7 +1316,9 @@ def __init__(self, expr, sense, rhs, name=""): ) = _quadratic_expression_to_qcmatrix(expr, rhs) self.linear_values = np.array(linear_values, dtype=np.float64) self.linear_indices = np.array(linear_indices, dtype=np.int32) - self.quadratic_values = np.array(quadratic_values, dtype=np.float64) + self.quadratic_values = np.array( + quadratic_values, dtype=np.float64 + ) self.quadratic_row_indices = np.array( quadratic_row_indices, dtype=np.int32 ) @@ -1379,7 +1381,7 @@ def compute_slack(self): index_to_var[v_idx].Value * coeff for v_idx, coeff in self.vindex_coeff_dict.items() ) - + return self.RHS - lhs @@ -1590,7 +1592,9 @@ def _to_data_model(self): dm.set_row_names(self.row_names) dm.set_problem_name(self.Name) - linear_constr_count = sum(1 for c in self.constrs if not c.is_quadratic) + linear_constr_count = sum( + 1 for c in self.constrs if not c.is_quadratic + ) quad_index = 0 for constr in self.constrs: if not constr.is_quadratic: From 068ce73ea463923ee74edef6def7c97c17e868f4 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 22 May 2026 16:41:18 -0700 Subject: [PATCH 06/10] Revert unnecessary changes to simplex_solver_settings.hpp --- cpp/src/dual_simplex/simplex_solver_settings.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 5328cf3fe1..500822e1bd 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -163,9 +163,9 @@ struct simplex_solver_settings_t { use_left_looking_lu; // true to use left looking LU factorization, false to use right looking bool eliminate_singletons; // true to eliminate singletons from the basis bool print_presolve_stats; // true to print presolve stats - bool barrier_presolve; // bound shifts, free-var bounding; LP v-w split / folding when no cones - bool cudss_deterministic; // true to use cuDSS deterministic mode, false for non-deterministic - bool barrier; // true to use barrier method, false to use dual simplex method + bool barrier_presolve; // true to use barrier presolve + bool cudss_deterministic; // true to use cuDSS deterministic mode, false for non-deterministic + bool barrier; // true to use barrier method, false to use dual simplex method bool deterministic; // true to use B&B deterministic mode, false to use non-deterministic mode bool eliminate_dense_columns; // true to eliminate dense columns from A*D*A^T bool barrier_iterative_refinement; // true to use iterative refinement for barrier method From 380396105558154ee5417061b15a203be29d88db Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 22 May 2026 16:51:52 -0700 Subject: [PATCH 07/10] Revert unintentional change to match 26.06 --- .../pdlp/termination_strategy/infeasibility_information.cu | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu index 96de3a788d..9268e17910 100644 --- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu +++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu @@ -26,7 +26,13 @@ #include #include +#include +#include +#include #include +#include +#include +#include namespace cuopt::linear_programming::detail { template From b94ae44bfe4dfd0543bdf21208b36922efc0f73e Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 22 May 2026 17:10:36 -0700 Subject: [PATCH 08/10] apply_soc_qcmatrix_conversion_for_simplex -> convert_quadratic_constraints_to_second_order_cones --- cpp/src/pdlp/translate.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/pdlp/translate.hpp b/cpp/src/pdlp/translate.hpp index 2bfaa127ed..774582a555 100644 --- a/cpp/src/pdlp/translate.hpp +++ b/cpp/src/pdlp/translate.hpp @@ -191,7 +191,7 @@ static dual_simplex::user_problem_t cuopt_problem_to_user_problem( user_problem.Q_values = model.Q_values; if (model.original_problem_ptr->has_quadratic_constraints()) { - detail::apply_soc_qcmatrix_conversion_for_simplex( + detail::convert_quadratic_constraints_to_second_order_cones( n, model.original_problem_ptr->get_quadratic_constraints(), csr_A, user_problem); } @@ -302,7 +302,7 @@ static dual_simplex::user_problem_t cuopt_optimization_problem_to_user user_problem.Q_values = model.get_quadratic_objective_values(); if (model.has_quadratic_constraints()) { - detail::apply_soc_qcmatrix_conversion_for_simplex( + detail::convert_quadratic_constraints_to_second_order_cones( static_cast(n), model.get_quadratic_constraints(), csr_A, user_problem); } From 3f2f3fcbf196f523fb279b4844e704292ff16499 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 22 May 2026 17:18:36 -0700 Subject: [PATCH 09/10] Clean up scaling and prints --- cpp/src/dual_simplex/scaling.cpp | 66 ++++++++++++------- cpp/src/dual_simplex/scaling.hpp | 10 +-- cpp/src/dual_simplex/solve.cpp | 4 +- cpp/src/pdlp/solve.cu | 4 +- .../dual_simplex/unit_tests/solve_barrier.cu | 4 +- 5 files changed, 52 insertions(+), 36 deletions(-) diff --git a/cpp/src/dual_simplex/scaling.cpp b/cpp/src/dual_simplex/scaling.cpp index 852c19e2e1..30aec6c782 100644 --- a/cpp/src/dual_simplex/scaling.cpp +++ b/cpp/src/dual_simplex/scaling.cpp @@ -13,11 +13,11 @@ namespace cuopt::linear_programming::dual_simplex { template -i_t column_scaling(const lp_problem_t& unscaled, - const simplex_solver_settings_t& settings, - lp_problem_t& scaled, - std::vector& col_scale, - std::vector& row_scale) +i_t scaling(const lp_problem_t& unscaled, + const simplex_solver_settings_t& settings, + lp_problem_t& scaled, + std::vector& col_scale, + std::vector& row_scale) { scaled = unscaled; i_t m = scaled.num_rows; @@ -161,6 +161,16 @@ i_t column_scaling(const lp_problem_t& unscaled, if (max_deviation < 0.1) break; } + // Ruiz col_scale accumulates reciprocals (c[j] = 1/sqrt(norm)), so invert + // to store column norms: C(j,j) = 1/col_scale[j]. + for (i_t j = 0; j < n; ++j) { + col_scale[j] = f_t(1) / col_scale[j]; + } + // Same for row_scale: invert so R(i,i) = 1/row_scale[i]. + for (i_t i = 0; i < m; ++i) { + row_scale[i] = f_t(1) / row_scale[i]; + } + f_t a_min = std::numeric_limits::max(); f_t a_max = 0; for (i_t p = 0; p < scaled.A.col_start[n]; ++p) { @@ -192,24 +202,31 @@ i_t column_scaling(const lp_problem_t& unscaled, const f_t x = scaled.A.x[p]; sum += x * x; } - const f_t col_norm = sum > 0 ? std::sqrt(sum) : 1.0; - col_scale[j] = f_t(1) / col_norm; - max_col_norm = std::max(col_norm, max_col_norm); - min_col_norm = std::min(col_norm, min_col_norm); + f_t col_norm_j = col_scale[j] = sum > 0 ? std::sqrt(sum) : 1.0; + max_col_norm = std::max(col_norm_j, max_col_norm); + min_col_norm = std::min(col_norm_j, min_col_norm); } settings.log.printf( "Scaling matrix. Maximum column norm %e, minimum column norm %e\n", max_col_norm, min_col_norm); + // C(j, j) = 1/col_scale(j) // scaled_A = unscaled_A * C for (i_t j = 0; j < n; ++j) { const i_t col_start = scaled.A.col_start[j]; const i_t col_end = scaled.A.col_start[j + 1]; for (i_t p = col_start; p < col_end; ++p) { - scaled.A.x[p] *= col_scale[j]; + scaled.A.x[p] /= col_scale[j]; } - scaled.objective[j] *= col_scale[j]; - if (scaled.lower[j] > -1e20) scaled.lower[j] /= col_scale[j]; - if (scaled.upper[j] < 1e20) scaled.upper[j] /= col_scale[j]; + } + // scaled_obj = C*unscaled_obj + for (i_t j = 0; j < n; ++j) { + scaled.objective[j] /= col_scale[j]; + } + // scaled_lower = C^{-1} * unscaled_lower + // scaled_upper = C^{-1} * unscaled_upper + for (i_t j = 0; j < n; ++j) { + scaled.lower[j] *= col_scale[j]; + scaled.upper[j] *= col_scale[j]; } for (i_t i = 0; i < unscaled.Q.n; ++i) { @@ -217,7 +234,7 @@ i_t column_scaling(const lp_problem_t& unscaled, const i_t row_end = unscaled.Q.row_start[i + 1]; for (i_t p = row_start; p < row_end; ++p) { const i_t col = unscaled.Q.j[p]; - scaled.Q.x[p] = unscaled.Q.x[p] * col_scale[i] * col_scale[col]; + scaled.Q.x[p] = unscaled.Q.x[p] / (col_scale[i] * col_scale[col]); } } return 0; @@ -236,28 +253,27 @@ void unscale_solution(const std::vector& col_scale, const i_t n = scaled_x.size(); unscaled_x.resize(n); unscaled_z.resize(n); + // C(j,j) = 1/col_scale[j], so x_orig = x_scaled / col_scale, z_orig = z_scaled * col_scale for (i_t j = 0; j < n; ++j) { - // column_scaling multiplies A(:,j) and c_j by col_scale[j], divides bounds: - // x_orig = col_scale .* x_scaled, z_orig = z_scaled / col_scale. - unscaled_x[j] = scaled_x[j] * col_scale[j]; - unscaled_z[j] = scaled_z[j] / col_scale[j]; + unscaled_x[j] = scaled_x[j] / col_scale[j]; + unscaled_z[j] = scaled_z[j] * col_scale[j]; } const i_t m = scaled_y.size(); unscaled_y.resize(m); + // R(i,i) = 1/row_scale[i], so y_orig = y_scaled / row_scale for (i_t i = 0; i < m; ++i) { - // y_unscaled = R * y_scaled (row scaling: constraint was scaled by R) - unscaled_y[i] = scaled_y[i] * row_scale[i]; + unscaled_y[i] = scaled_y[i] / row_scale[i]; } } #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE -template int column_scaling(const lp_problem_t& unscaled, - const simplex_solver_settings_t& settings, - lp_problem_t& scaled, - std::vector& col_scale, - std::vector& row_scale); +template int scaling(const lp_problem_t& unscaled, + const simplex_solver_settings_t& settings, + lp_problem_t& scaled, + std::vector& col_scale, + std::vector& row_scale); template void unscale_solution(const std::vector& col_scale, const std::vector& row_scale, diff --git a/cpp/src/dual_simplex/scaling.hpp b/cpp/src/dual_simplex/scaling.hpp index 94aca2055d..df0bf4d845 100644 --- a/cpp/src/dual_simplex/scaling.hpp +++ b/cpp/src/dual_simplex/scaling.hpp @@ -17,11 +17,11 @@ namespace cuopt::linear_programming::dual_simplex { template -i_t column_scaling(const lp_problem_t& unscaled, - const simplex_solver_settings_t& settings, - lp_problem_t& scaled, - std::vector& column_scaling, - std::vector& row_scaling); +i_t scaling(const lp_problem_t& unscaled, + const simplex_solver_settings_t& settings, + lp_problem_t& scaled, + std::vector& column_scaling, + std::vector& row_scaling); template void unscale_solution(const std::vector& column_scaling, diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 15d51041f7..26f399ac86 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -209,7 +209,7 @@ lp_status_t solve_linear_program_with_advanced_basis( std::vector row_scales_simplex; { raft::common::nvtx::range scope_scaling("DualSimplex::scaling"); - column_scaling(presolved_lp, settings, lp, column_scales, row_scales_simplex); + scaling(presolved_lp, settings, lp, column_scales, row_scales_simplex); } assert(presolved_lp.num_cols == lp.num_cols); lp_problem_t phase1_problem(original_lp.handle_ptr, 1, 1, 1); @@ -419,7 +419,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us presolved_lp.A.col_start[presolved_lp.num_cols]); std::vector column_scales; std::vector row_scales; - column_scaling(presolved_lp, barrier_settings, barrier_lp, column_scales, row_scales); + scaling(presolved_lp, barrier_settings, barrier_lp, column_scales, row_scales); // Solve using barrier lp_solution_t barrier_solution(barrier_lp.num_rows, barrier_lp.num_cols); diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index 5c90e65cbc..d9be010ebb 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -1751,7 +1751,7 @@ optimization_problem_solution_t solve_qp(optimization_problem_t(op_problem.get_quadratic_constraints().size())); } if (settings.user_problem_file != "") { @@ -1813,7 +1813,7 @@ optimization_problem_solution_t solve_lp( CUOPT_LOG_INFO("Problem has a quadratic objective. Using Barrier."); } if (op_problem.has_quadratic_constraints()) { - CUOPT_LOG_INFO("Problem has %d quadratic constraints. Using Barrier with SOC conversion.", + CUOPT_LOG_INFO("Problem has %d quadratic constraints. Converting to second-order cones and solving with barrier.", static_cast(op_problem.get_quadratic_constraints().size())); } settings.method = method_t::Barrier; diff --git a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu index 45fcd297e8..a5b48fe5a5 100644 --- a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu +++ b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu @@ -234,7 +234,7 @@ TEST(barrier, cone_metadata_reindexed_when_slack_is_inserted_before_cones) original_lp.A.col_start[original_lp.num_cols]); std::vector column_scales; std::vector row_scales; - column_scaling(original_lp, settings, barrier_lp, column_scales, row_scales); + scaling(original_lp, settings, barrier_lp, column_scales, row_scales); EXPECT_EQ(barrier_lp.second_order_cone_dims, user_problem.second_order_cone_dims); EXPECT_EQ(barrier_lp.cone_var_start, 2); @@ -301,7 +301,7 @@ TEST(barrier, presolve_reindexes_cone_start_after_empty_column_removal) presolved_lp.A.col_start[presolved_lp.num_cols]); std::vector column_scales; std::vector row_scales; - ASSERT_EQ(column_scaling(presolved_lp, settings, barrier_lp, column_scales, row_scales), 0); + ASSERT_EQ(scaling(presolved_lp, settings, barrier_lp, column_scales, row_scales), 0); EXPECT_EQ(barrier_lp.cone_var_start, 0); } From b128480352de884b9b9a8a39ea40c9272de43b3d Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 22 May 2026 17:23:20 -0700 Subject: [PATCH 10/10] More fixes for scaling --- cpp/src/dual_simplex/scaling.cpp | 80 ++++++++++++++++---------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/cpp/src/dual_simplex/scaling.cpp b/cpp/src/dual_simplex/scaling.cpp index 30aec6c782..92b4d3377d 100644 --- a/cpp/src/dual_simplex/scaling.cpp +++ b/cpp/src/dual_simplex/scaling.cpp @@ -16,14 +16,14 @@ template i_t scaling(const lp_problem_t& unscaled, const simplex_solver_settings_t& settings, lp_problem_t& scaled, - std::vector& col_scale, - std::vector& row_scale) + std::vector& column_scaling, + std::vector& row_scaling) { scaled = unscaled; i_t m = scaled.num_rows; i_t n = scaled.num_cols; - row_scale.assign(m, 1.0); + row_scaling.assign(m, 1.0); // ========================================================================= // Ruiz equilibration for SOCP (and QP) problems @@ -31,10 +31,10 @@ i_t scaling(const lp_problem_t& unscaled, // For SOCP problems, apply Ruiz equilibration: alternating row and column // infinity-norm scaling to bring the constraint matrix close to equilibrium. // This dramatically improves the conditioning of the augmented KKT system. - // Ruiz equilibration for SOCP (and QP) problems, applied only when the - // constraint matrix has a large row-norm imbalance. + // Applied only when the constraint matrix has a large row-norm imbalance. if (!unscaled.second_order_cone_dims.empty() || unscaled.Q.n > 0) { - col_scale.assign(n, 1.0); + // col_scale and row_scale accumulate reciprocal scale factors during Ruiz iterations. + std::vector col_scale(n, 1.0); // Decide whether Ruiz scaling is needed by checking row-norm imbalance. // If max_row_norm / min_row_norm is small, the matrix is already well-conditioned @@ -59,6 +59,7 @@ i_t scaling(const lp_problem_t& unscaled, if (row_norm_ratio < 100.0) { settings.log.printf("Skipping Ruiz equilibration (row norm ratio %.1f < 100)\n", row_norm_ratio); + column_scaling.assign(n, 1.0); return 0; } @@ -91,7 +92,7 @@ i_t scaling(const lp_problem_t& unscaled, Arow.x[p] *= r[i]; } scaled.rhs[i] *= r[i]; - row_scale[i] *= r[i]; + row_scaling[i] *= r[i]; } // --- Column scaling: scale each column by 1/sqrt(max|a_ij|) --- @@ -161,14 +162,15 @@ i_t scaling(const lp_problem_t& unscaled, if (max_deviation < 0.1) break; } - // Ruiz col_scale accumulates reciprocals (c[j] = 1/sqrt(norm)), so invert - // to store column norms: C(j,j) = 1/col_scale[j]. + // Ruiz col_scale/row_scaling accumulate reciprocals (c[j] = 1/sqrt(norm)). + // Invert to match the output convention: C(j,j) = 1/column_scaling[j], + // R(i,i) = 1/row_scaling[i]. + column_scaling.resize(n); for (i_t j = 0; j < n; ++j) { - col_scale[j] = f_t(1) / col_scale[j]; + column_scaling[j] = f_t(1) / col_scale[j]; } - // Same for row_scale: invert so R(i,i) = 1/row_scale[i]. for (i_t i = 0; i < m; ++i) { - row_scale[i] = f_t(1) / row_scale[i]; + row_scaling[i] = f_t(1) / row_scaling[i]; } f_t a_min = std::numeric_limits::max(); @@ -186,14 +188,13 @@ i_t scaling(const lp_problem_t& unscaled, if (!settings.scale_columns) { settings.log.printf("Skipping column scaling\n"); - col_scale.resize(n, 1.0); + column_scaling.resize(n, 1.0); return 0; } - col_scale.resize(n); - - f_t max_col_norm = 0; - f_t min_col_norm = std::numeric_limits::max(); + column_scaling.resize(n); + f_t max = 0; + f_t min = std::numeric_limits::max(); for (i_t j = 0; j < n; ++j) { const i_t col_start = scaled.A.col_start[j]; const i_t col_end = scaled.A.col_start[j + 1]; @@ -202,47 +203,47 @@ i_t scaling(const lp_problem_t& unscaled, const f_t x = scaled.A.x[p]; sum += x * x; } - f_t col_norm_j = col_scale[j] = sum > 0 ? std::sqrt(sum) : 1.0; - max_col_norm = std::max(col_norm_j, max_col_norm); - min_col_norm = std::min(col_norm_j, min_col_norm); + f_t col_norm_j = column_scaling[j] = sum > 0 ? std::sqrt(sum) : 1.0; + max = std::max(col_norm_j, max); + min = std::min(col_norm_j, min); } - settings.log.printf( - "Scaling matrix. Maximum column norm %e, minimum column norm %e\n", max_col_norm, min_col_norm); - // C(j, j) = 1/col_scale(j) + settings.log.printf("Scaling matrix. Maximum column norm %e, minimum column norm %e\n", max, min); + // C(j, j) = 1/column_scaling(j) // scaled_A = unscaled_A * C for (i_t j = 0; j < n; ++j) { const i_t col_start = scaled.A.col_start[j]; const i_t col_end = scaled.A.col_start[j + 1]; for (i_t p = col_start; p < col_end; ++p) { - scaled.A.x[p] /= col_scale[j]; + scaled.A.x[p] /= column_scaling[j]; } } // scaled_obj = C*unscaled_obj for (i_t j = 0; j < n; ++j) { - scaled.objective[j] /= col_scale[j]; + scaled.objective[j] /= column_scaling[j]; } // scaled_lower = C^{-1} * unscaled_lower // scaled_upper = C^{-1} * unscaled_upper for (i_t j = 0; j < n; ++j) { - scaled.lower[j] *= col_scale[j]; - scaled.upper[j] *= col_scale[j]; + scaled.lower[j] *= column_scaling[j]; + scaled.upper[j] *= column_scaling[j]; } for (i_t i = 0; i < unscaled.Q.n; ++i) { const i_t row_start = unscaled.Q.row_start[i]; const i_t row_end = unscaled.Q.row_start[i + 1]; + i_t row = i; for (i_t p = row_start; p < row_end; ++p) { - const i_t col = unscaled.Q.j[p]; - scaled.Q.x[p] = unscaled.Q.x[p] / (col_scale[i] * col_scale[col]); + i_t col = unscaled.Q.j[p]; + scaled.Q.x[p] = unscaled.Q.x[p] / (column_scaling[row] * column_scaling[col]); } } return 0; } template -void unscale_solution(const std::vector& col_scale, - const std::vector& row_scale, +void unscale_solution(const std::vector& column_scaling, + const std::vector& row_scaling, const std::vector& scaled_x, const std::vector& scaled_y, const std::vector& scaled_z, @@ -253,17 +254,16 @@ void unscale_solution(const std::vector& col_scale, const i_t n = scaled_x.size(); unscaled_x.resize(n); unscaled_z.resize(n); - // C(j,j) = 1/col_scale[j], so x_orig = x_scaled / col_scale, z_orig = z_scaled * col_scale for (i_t j = 0; j < n; ++j) { - unscaled_x[j] = scaled_x[j] / col_scale[j]; - unscaled_z[j] = scaled_z[j] * col_scale[j]; + unscaled_x[j] = scaled_x[j] / column_scaling[j]; + unscaled_z[j] = scaled_z[j] * column_scaling[j]; } const i_t m = scaled_y.size(); unscaled_y.resize(m); - // R(i,i) = 1/row_scale[i], so y_orig = y_scaled / row_scale + // R(i,i) = 1/row_scaling[i], so y_orig = y_scaled / row_scaling for (i_t i = 0; i < m; ++i) { - unscaled_y[i] = scaled_y[i] / row_scale[i]; + unscaled_y[i] = scaled_y[i] / row_scaling[i]; } } @@ -272,11 +272,11 @@ void unscale_solution(const std::vector& col_scale, template int scaling(const lp_problem_t& unscaled, const simplex_solver_settings_t& settings, lp_problem_t& scaled, - std::vector& col_scale, - std::vector& row_scale); + std::vector& column_scaling, + std::vector& row_scaling); -template void unscale_solution(const std::vector& col_scale, - const std::vector& row_scale, +template void unscale_solution(const std::vector& column_scaling, + const std::vector& row_scaling, const std::vector& scaled_x, const std::vector& scaled_y, const std::vector& scaled_z,