From c721556488391b8f80df4427aa2a58b71386c8b4 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Fri, 22 May 2026 14:48:27 -0700
Subject: [PATCH 01/10] Extend the barrier solver to SOCP problems

This PR extends the 2x2 augmented system formulation of the barrier
solver to solve SOCP problems. It supports both explicit cone
variables as well as the cone rows in the Ax=b constraint. It uses
Nesterov-Todd scaling to make the conic diagonal blocks symmetric.

Co-authored-by: Yan Zaretskiy <yan@fastmail.com>
Co-authored-by: Yan Zaretskiy <yzaretskiy@nvidia.com>
Co-authored-by: Yuwen Chen <yuwchen@nvidia.com>
---
 .../cuopt/linear_programming/constants.h      |    4 +
 .../linear_programming/io/data_model_view.hpp |    6 +-
 .../linear_programming/io/mps_data_model.hpp  |   20 +-
 .../optimization_problem_interface.hpp        |   11 +-
 .../pdlp/solver_settings.hpp                  |    3 +
 cpp/src/barrier/barrier.cu                    | 1818 ++++++++++++-----
 cpp/src/barrier/barrier.hpp                   |    2 +
 cpp/src/barrier/dense_vector.hpp              |   22 +
 cpp/src/dual_simplex/presolve.cpp             |  560 +++--
 cpp/src/dual_simplex/presolve.hpp             |   10 +-
 cpp/src/dual_simplex/scaling.cpp              |  225 +-
 cpp/src/dual_simplex/scaling.hpp              |    8 +-
 .../dual_simplex/simplex_solver_settings.hpp  |    4 +-
 cpp/src/dual_simplex/solve.cpp                |   88 +-
 cpp/src/dual_simplex/user_problem.hpp         |    2 +
 cpp/src/dual_simplex/vector_math.cuh          |   21 +-
 cpp/src/io/lp_parser.cpp                      |   43 +-
 cpp/src/io/mps_data_model.cpp                 |   78 +-
 cpp/src/io/mps_parser.cpp                     |  370 +++-
 cpp/src/io/mps_writer.cpp                     |   40 +-
 cpp/src/math_optimization/solver_settings.cu  |    3 +
 cpp/src/pdlp/solve.cu                         |   31 +-
 .../infeasibility_information.cu              |    6 -
 cpp/src/pdlp/translate.hpp                    |   23 +-
 cpp/tests/dual_simplex/CMakeLists.txt         |    3 +-
 .../dual_simplex/unit_tests/solve_barrier.cu  |  819 +++++++-
 cpp/tests/linear_programming/parser_test.cpp  |   40 +-
 cpp/tests/qp/unit_tests/no_constraints.cu     |    2 +
 cpp/tests/qp/unit_tests/two_variable_test.cu  |    1 +
 .../data_model/data_model.pxd                 |   18 +
 .../data_model/data_model.py                  |   86 +
 .../data_model/data_model_wrapper.pxd         |    4 +
 .../data_model/data_model_wrapper.pyx         |  140 +-
 .../cuopt/linear_programming/io/parser.pxd    |   12 +
 .../linear_programming/io/parser_wrapper.pyx  |   47 +
 .../cuopt/cuopt/linear_programming/problem.py |  226 +-
 .../linear_programming/test_python_API.py     |   10 +
 37 files changed, 3795 insertions(+), 1011 deletions(-)

diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h
index 3342ba375c..1dfccef1ea 100644
--- a/cpp/include/cuopt/linear_programming/constants.h
+++ b/cpp/include/cuopt/linear_programming/constants.h
@@ -47,6 +47,10 @@
 #define CUOPT_DUALIZE                              "dualize"
 #define CUOPT_ORDERING                             "ordering"
 #define CUOPT_BARRIER_DUAL_INITIAL_POINT           "barrier_dual_initial_point"
+#define CUOPT_BARRIER_RELATIVE_FEASIBILITY_TOLERANCE "barrier_relative_feasibility_tolerance"
+#define CUOPT_BARRIER_RELATIVE_OPTIMALITY_TOLERANCE  "barrier_relative_optimality_tolerance"
+#define CUOPT_BARRIER_RELATIVE_COMPLEMENTARITY_TOLERANCE \
+  "barrier_relative_complementarity_tolerance"
 #define CUOPT_BARRIER_ITERATIVE_REFINEMENT         "barrier_iterative_refinement"
 #define CUOPT_BARRIER_STEP_SCALE                   "barrier_step_scale"
 #define CUOPT_ELIMINATE_DENSE_COLUMNS              "eliminate_dense_columns"
diff --git a/cpp/include/cuopt/linear_programming/io/data_model_view.hpp b/cpp/include/cuopt/linear_programming/io/data_model_view.hpp
index ca2fd30393..aa76d122bf 100644
--- a/cpp/include/cuopt/linear_programming/io/data_model_view.hpp
+++ b/cpp/include/cuopt/linear_programming/io/data_model_view.hpp
@@ -434,9 +434,9 @@ class data_model_view_t {
          std::vector<f_t>(qc.linear_values.begin(), qc.linear_values.end()),
          std::vector<i_t>(qc.linear_indices.begin(), qc.linear_indices.end()),
          static_cast<f_t>(qc.rhs_value),
-         std::vector<f_t>(qc.quadratic_values.begin(), qc.quadratic_values.end()),
-         std::vector<i_t>(qc.quadratic_indices.begin(), qc.quadratic_indices.end()),
-         std::vector<i_t>(qc.quadratic_offsets.begin(), qc.quadratic_offsets.end())});
+         std::vector<i_t>(qc.quadratic_row_indices.begin(), qc.quadratic_row_indices.end()),
+         std::vector<i_t>(qc.quadratic_col_indices.begin(), qc.quadratic_col_indices.end()),
+         std::vector<f_t>(qc.quadratic_values.begin(), qc.quadratic_values.end())});
     }
   }
 
diff --git a/cpp/include/cuopt/linear_programming/io/mps_data_model.hpp b/cpp/include/cuopt/linear_programming/io/mps_data_model.hpp
index 9828a00c0c..dee74a3a42 100644
--- a/cpp/include/cuopt/linear_programming/io/mps_data_model.hpp
+++ b/cpp/include/cuopt/linear_programming/io/mps_data_model.hpp
@@ -239,7 +239,7 @@ class mps_data_model_t {
    * - row identity and type (from ROWS),
    * - sparse linear coefficients (from COLUMNS),
    * - RHS value (from RHS),
-   * - quadratic matrix Q in CSR (from QCMATRIX).
+   * - quadratic matrix Q in COO (SoA: row, col, value) from QCMATRIX — one triplet per nonzero.
    */
   struct quadratic_constraint_t {
     /** ROWS declaration index (among all constraint rows), not an index into the linear CSR. */
@@ -251,19 +251,21 @@ class mps_data_model_t {
     std::vector<f_t> linear_values{};
     std::vector<i_t> linear_indices{};
     f_t rhs_value{f_t(0)};
+    /** Q nonzeros: parallel arrays, same length (COO / SoA). Sorted by (row, col) in append. */
+    std::vector<i_t> quadratic_row_indices{};
+    std::vector<i_t> quadratic_col_indices{};
     std::vector<f_t> quadratic_values{};
-    std::vector<i_t> quadratic_indices{};
-    std::vector<i_t> quadratic_offsets{};
   };
 
   /**
    * @brief Append one complete quadratic constraint (row + linear + rhs + quadratic Q).
    * @note All span inputs are host memory; the model copies this data.
    * @param linear_values, linear_indices Same nnz; can be empty for a purely quadratic row (rare).
-   * @param quadratic_values, quadratic_indices CSR nnz; may be empty if Q is empty.
-   * @param quadratic_offsets CSR row starts; must be non-empty.
-   * @param constraint_row_type MPS ROWS type; must be 'L'. 'G' and 'E' quadratic rows are not
-   *        supported.
+   * @param quadratic_values, quadratic_row_indices, quadratic_col_indices COO triplets; same
+   *        length; may all be empty if Q is empty. Stored sorted by (row, col).
+   * @param constraint_row_type MPS ROWS type: 'L' (<=) or 'G' (>=). Stored as given; 'G' rows are
+   *        converted to '<=' form when building the SOCP for the barrier solver. Equality ('E') is
+   *        not supported.
    */
   void append_quadratic_constraint(i_t constraint_row_index,
                                    const std::string& constraint_row_name,
@@ -272,8 +274,8 @@ class mps_data_model_t {
                                    std::span<const i_t> linear_indices,
                                    f_t rhs_value,
                                    std::span<const f_t> quadratic_values,
-                                   std::span<const i_t> quadratic_indices,
-                                   std::span<const i_t> quadratic_offsets);
+                                   std::span<const i_t> quadratic_row_indices,
+                                   std::span<const i_t> quadratic_col_indices);
 
   const std::vector<quadratic_constraint_t>& get_quadratic_constraints() const;
 
diff --git a/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp b/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp
index aa164ca756..56b373c897 100644
--- a/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp
+++ b/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp
@@ -64,9 +64,10 @@ class optimization_problem_interface_t {
     std::vector<f_t> linear_values{};
     std::vector<i_t> linear_indices{};
     f_t rhs_value{f_t(0)};
+    /** Q in COO: parallel arrays, same length. */
+    std::vector<i_t> quadratic_row_indices{};
+    std::vector<i_t> quadratic_col_indices{};
     std::vector<f_t> quadratic_values{};
-    std::vector<i_t> quadratic_indices{};
-    std::vector<i_t> quadratic_offsets{};
   };
 
   virtual ~optimization_problem_interface_t() = default;
@@ -89,9 +90,9 @@ class optimization_problem_interface_t {
          std::vector<f_t>(qc.linear_values.begin(), qc.linear_values.end()),
          std::vector<i_t>(qc.linear_indices.begin(), qc.linear_indices.end()),
          static_cast<f_t>(qc.rhs_value),
-         std::vector<f_t>(qc.quadratic_values.begin(), qc.quadratic_values.end()),
-         std::vector<i_t>(qc.quadratic_indices.begin(), qc.quadratic_indices.end()),
-         std::vector<i_t>(qc.quadratic_offsets.begin(), qc.quadratic_offsets.end())});
+         std::vector<i_t>(qc.quadratic_row_indices.begin(), qc.quadratic_row_indices.end()),
+         std::vector<i_t>(qc.quadratic_col_indices.begin(), qc.quadratic_col_indices.end()),
+         std::vector<f_t>(qc.quadratic_values.begin(), qc.quadratic_values.end())});
     }
     set_quadratic_constraints(std::move(converted_constraints));
   }
diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
index b30286f9ce..3597b85016 100644
--- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
@@ -280,6 +280,9 @@ class pdlp_solver_settings_t {
   i_t dualize{-1};
   i_t ordering{-1};
   i_t barrier_dual_initial_point{-1};
+  f_t barrier_relative_feasibility_tolerance{1.0e-8};
+  f_t barrier_relative_optimality_tolerance{1.0e-8};
+  f_t barrier_relative_complementarity_tolerance{1.0e-8};
   bool eliminate_dense_columns{true};
   pdlp_precision_t pdlp_precision{pdlp_precision_t::DefaultPrecision};
   bool barrier_iterative_refinement{true};
diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu
index e7604fb60e..fd6f9dbc6e 100644
--- a/cpp/src/barrier/barrier.cu
+++ b/cpp/src/barrier/barrier.cu
@@ -14,6 +14,7 @@
 #include <barrier/dense_vector.hpp>
 #include <barrier/device_sparse_matrix.cuh>
 #include <barrier/iterative_refinement.hpp>
+#include <barrier/second_order_cone_kernels.cuh>
 #include <barrier/sparse_cholesky.cuh>
 #include <barrier/sparse_matrix_kernels.cuh>
 
@@ -34,6 +35,8 @@
 #include <utilities/macros.cuh>
 
 #include <numeric>
+#include <optional>
+#include <span>
 
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/core/nvtx.hpp>
@@ -43,6 +46,7 @@
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/transform_output_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
+#include <thrust/reduce.h>
 
 namespace cuopt::linear_programming::dual_simplex {
 
@@ -61,6 +65,29 @@ namespace cuopt::linear_programming::dual_simplex {
     cuda::std::make_tuple(a, b), out, size, cuda::std::multiplies<>{}, stream.value());
 }
 
+// out[i] = is_native_free_linear[i] ? 0 : a[i] * b[i]
+[[maybe_unused]] static void pairwise_multiply_skip_native_free_linear(
+  float* a, float* b, int* is_native_free_linear, float* out, int size, rmm::cuda_stream_view stream)
+{
+  cub::DeviceTransform::Transform(
+    cuda::std::make_tuple(a, b, is_native_free_linear),
+    out,
+    size,
+    [] __host__ __device__(float x_j, float d_j, int free_j) { return free_j ? 0.f : x_j * d_j; },
+    stream.value());
+}
+
+[[maybe_unused]] static void pairwise_multiply_skip_native_free_linear(
+  double* a, double* b, int* is_native_free_linear, double* out, int size, rmm::cuda_stream_view stream)
+{
+  cub::DeviceTransform::Transform(
+    cuda::std::make_tuple(a, b, is_native_free_linear),
+    out,
+    size,
+    [] __host__ __device__(double x_j, double d_j, int free_j) { return free_j ? 0.0 : x_j * d_j; },
+    stream.value());
+}
+
 [[maybe_unused]] static void axpy(
   float alpha, float* x, float beta, float* y, float* out, int size, rmm::cuda_stream_view stream)
 {
@@ -93,7 +120,7 @@ class iteration_data_t {
  public:
   iteration_data_t(const lp_problem_t<i_t, f_t>& lp,
                    i_t num_upper_bounds,
-                   const std::vector<i_t>& free_variable_indices,
+                   const std::vector<i_t>& native_free_linear_indices,
                    const csc_matrix_t<i_t, f_t>& Qin,
                    const simplex_solver_settings_t<i_t, f_t>& settings)
     : upper_bounds(num_upper_bounds),
@@ -164,10 +191,12 @@ class iteration_data_t {
       d_inv_diag(lp.num_cols, lp.handle_ptr->get_stream()),
       d_cols_to_remove(0, lp.handle_ptr->get_stream()),
       d_augmented_diagonal_indices_(0, lp.handle_ptr->get_stream()),
+      d_cone_csr_indices_(0, lp.handle_ptr->get_stream()),
+      d_cone_Q_values_(0, lp.handle_ptr->get_stream()),
       use_augmented(false),
       has_factorization(false),
-      n_free_vars(0),
-      d_is_free_(0, lp.handle_ptr->get_stream()),
+      n_native_free_linear(0),
+      d_is_native_free_linear_(0, lp.handle_ptr->get_stream()),
       num_factorizations(0),
       has_solve_info(false),
       settings_(settings),
@@ -209,9 +238,11 @@ class iteration_data_t {
       d_dw_residual_(0, lp.handle_ptr->get_stream()),
       d_wv_residual_(0, lp.handle_ptr->get_stream()),
       d_bound_rhs_(0, lp.handle_ptr->get_stream()),
-      d_complementarity_xz_rhs_(lp.num_cols, lp.handle_ptr->get_stream()),
+      d_complementarity_xz_rhs_(0, lp.handle_ptr->get_stream()),
       d_complementarity_wv_rhs_(0, lp.handle_ptr->get_stream()),
       d_dual_rhs_(lp.num_cols, lp.handle_ptr->get_stream()),
+      d_complementarity_target_(lp.num_cols, lp.handle_ptr->get_stream()),
+      d_cone_hessian_dx_(0, lp.handle_ptr->get_stream()),
       d_Q_diag_(0, lp.handle_ptr->get_stream()),
       d_Qx_(Qin.m, lp.handle_ptr->get_stream()),
       restrict_u_(0),
@@ -219,192 +250,242 @@ class iteration_data_t {
       sum_reduce_helper_(lp.handle_ptr->get_stream()),
       indefinite_Q(false),
       Q_diagonal(false),
-      symbolic_status(0)
+      symbolic_status(0),
+      cone_combined_step_(false),
+      cone_sigma_mu_(f_t(0))
   {
     raft::common::nvtx::range fun_scope("Barrier: LP Data Creation");
 
-    // Set up free variable tracking for QPs
-    if (!free_variable_indices.empty()) {
-      n_free_vars = free_variable_indices.size();
-      std::vector<i_t> is_free_host(lp.num_cols, 0);
-      for (i_t j : free_variable_indices) {
-        is_free_host[j] = 1;
+    {
+      raft::common::nvtx::range scope("Barrier: LP Data: native free linear");
+      // Set up native free linear variable tracking (linear columns only, j < cone_start).
+      if (!native_free_linear_indices.empty()) {
+        n_native_free_linear = native_free_linear_indices.size();
+        std::vector<i_t> is_native_free_linear_host(lp.num_cols, 0);
+        for (i_t j : native_free_linear_indices) {
+          is_native_free_linear_host[j] = 1;
+        }
+        d_is_native_free_linear_.resize(lp.num_cols, stream_view_);
+        raft::copy(
+          d_is_native_free_linear_.data(), is_native_free_linear_host.data(), lp.num_cols, stream_view_);
+        settings.log.printf("Native free linear (QP): %d\n", n_native_free_linear);
       }
-      d_is_free_.resize(lp.num_cols, stream_view_);
-      raft::copy(d_is_free_.data(), is_free_host.data(), lp.num_cols, stream_view_);
-      settings.log.printf("Free variables (QP)  : %d\n", n_free_vars);
     }
 
     bool has_Q   = Q.x.size() > 0;
     indefinite_Q = false;
-    if (has_Q) {
-      Qdiag.resize(lp.num_cols, 0.0);
-
-      for (i_t j = 0; j < Q.n; j++) {
-        const i_t col_start = Q.col_start[j];
-        const i_t col_end   = Q.col_start[j + 1];
-        for (i_t p = col_start; p < col_end; p++) {
-          const i_t i = Q.i[p];
-          if (j == i) {
-            Qdiag[j] = Q.x[p];
-            break;
+    {
+      raft::common::nvtx::range scope("Barrier: LP Data: Q setup");
+      if (has_Q) {
+        Qdiag.resize(lp.num_cols, 0.0);
+
+        for (i_t j = 0; j < Q.n; j++) {
+          const i_t col_start = Q.col_start[j];
+          const i_t col_end   = Q.col_start[j + 1];
+          for (i_t p = col_start; p < col_end; p++) {
+            const i_t i = Q.i[p];
+            if (j == i) {
+              Qdiag[j] = Q.x[p];
+              break;
+            }
           }
         }
-      }
 
-      Q_diagonal = Q.is_diagonal();
+        Q_diagonal = Q.is_diagonal();
 
-      if (Q_diagonal) {
-        // Check to ensure that Q is positive semi-definite
-        for (i_t j = 0; j < lp.num_cols; j++) {
-          if (Qdiag[j] < 0.0) {
-            settings_.log.printf(
-              "Q is not positive semidefinite: Q(%d, %d) = %e\n", j, j, Qdiag[j]);
-            indefinite_Q = true;
-            return;
+        if (Q_diagonal) {
+          // Check to ensure that Q is positive semi-definite
+          for (i_t j = 0; j < lp.num_cols; j++) {
+            if (Qdiag[j] < 0.0) {
+              settings_.log.printf(
+                "Q is not positive semidefinite: Q(%d, %d) = %e\n", j, j, Qdiag[j]);
+              indefinite_Q = true;
+              return;
+            }
           }
+        } else if (settings.check_Q) {
+          // TODO: Check to ensure that Q is positive semi-definite
+          // This requires us to perform a Cholesky factorization.
         }
-      } else if (settings.check_Q) {
-        // TODO: Check to ensure that Q is positive semi-definite
-        // This requires us to perform a Cholesky factorization.
+
+        d_Q_diag_.resize(lp.num_cols, stream_view_);
+        raft::copy(d_Q_diag_.data(), Qdiag.data(), Qdiag.size(), stream_view_);
       }
+    }
 
-      d_Q_diag_.resize(lp.num_cols, stream_view_);
-      raft::copy(d_Q_diag_.data(), Qdiag.data(), Qdiag.size(), stream_view_);
+    {
+      raft::common::nvtx::range scope("Barrier: LP Data: SOC setup");
+      if (!lp.second_order_cone_dims.empty()) {
+        cone_var_start_ = lp.cone_var_start;
+        i_t total_cone_dim =
+          std::accumulate(lp.second_order_cone_dims.begin(), lp.second_order_cone_dims.end(), i_t(0));
+        cuopt_assert(cone_var_start_ >= 0, "cone_var_start must be nonnegative");
+        cuopt_assert(cone_var_start_ + total_cone_dim <= lp.num_cols,
+                     "cone variables exceed problem dimension");
+        cuopt_assert(cone_var_start_ + total_cone_dim == lp.num_cols,
+                     "barrier expects [linear | cone] layout");
+        cones_.emplace(
+          std::span<const i_t>(lp.second_order_cone_dims.data(), lp.second_order_cone_dims.size()),
+          raft::device_span<f_t>{},
+          raft::device_span<f_t>{},
+          stream_view_);
+        cuopt_assert(cone_count() > 0, "second-order cone topology must contain at least one cone");
+        cuopt_assert(cone_entry_count() == total_cone_dim, "second-order cone entry count mismatch");
+      }
     }
 
-    // Allocating GPU flag data for Form ADAT
-    RAFT_CUDA_TRY(cub::DeviceSelect::Flagged(
-      nullptr,
-      flag_buffer_size,
-      d_inv_diag_prime.data(),  // Not the actual input but just to allcoate the memory
-      thrust::make_transform_iterator(d_cols_to_remove.data(), cuda::std::logical_not<i_t>{}),
-      d_inv_diag_prime.data(),
-      d_num_flag.data(),
-      inv_diag.size(),
-      stream_view_));
+    {
+      raft::common::nvtx::range scope("Barrier: LP Data: complementarity buffers");
+      const i_t linear_xz_rhs_size = linear_xz_size(lp.num_cols);
+      d_complementarity_xz_rhs_.resize(linear_xz_rhs_size, stream_view_);
 
-    d_flag_buffer.resize(flag_buffer_size, stream_view_);
+      // Allocating GPU flag data for Form ADAT
+      RAFT_CUDA_TRY(cub::DeviceSelect::Flagged(
+        nullptr,
+        flag_buffer_size,
+        d_inv_diag_prime.data(),  // Not the actual input but just to allcoate the memory
+        thrust::make_transform_iterator(d_cols_to_remove.data(), cuda::std::logical_not<i_t>{}),
+        d_inv_diag_prime.data(),
+        d_num_flag.data(),
+        inv_diag.size(),
+        stream_view_));
 
-    // Create the upper bounds vector
-    n_upper_bounds = 0;
-    for (i_t j = 0; j < lp.num_cols; j++) {
-      if (lp.upper[j] < inf) { upper_bounds[n_upper_bounds++] = j; }
+      d_flag_buffer.resize(flag_buffer_size, stream_view_);
     }
-    if (n_upper_bounds > 0) {
-      settings.log.printf("Upper bounds                : %d\n", n_upper_bounds);
+
+    {
+      raft::common::nvtx::range scope("Barrier: LP Data: upper bounds");
+      // Create the upper bounds vector
+      n_upper_bounds = 0;
+      for (i_t j = 0; j < lp.num_cols; j++) {
+        if (lp.upper[j] < inf) { upper_bounds[n_upper_bounds++] = j; }
+      }
+      if (n_upper_bounds > 0) {
+        settings.log.printf("Upper bounds                : %d\n", n_upper_bounds);
+      }
     }
 
-    // Decide if we are going to use the augmented system or not
-    n_dense_columns      = 0;
-    i_t n_dense_rows     = 0;
-    i_t max_row_nz       = 0;
-    f_t estimated_nz_AAT = 0.0;
     std::vector<i_t> dense_columns_unordered;
+    {
+      raft::common::nvtx::range scope("Barrier: LP Data: dense columns and augmented");
+      // Decide if we are going to use the augmented system or not
+      n_dense_columns      = 0;
+      i_t n_dense_rows     = 0;
+      i_t max_row_nz       = 0;
+      f_t estimated_nz_AAT = 0.0;
+
+      const bool has_soc = has_cones();
+      if (has_Q || has_soc) {
+        // QP and SOCP always use the augmented KKT; skip dense-column / ADAT heuristics.
+        use_augmented   = true;
+        n_dense_columns = 0;
+      } else {
+        f_t start_column_density = tic();
 
-    f_t start_column_density = tic();
-
-    // Do not look for dense columns if Q is not diagonal
-    if (!has_Q || Q_diagonal) {
-      find_dense_columns(
-        lp.A, settings, dense_columns_unordered, n_dense_rows, max_row_nz, estimated_nz_AAT);
-    }
-    if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
+        // Do not look for dense columns if Q is not diagonal
+        if (!has_Q || Q_diagonal) {
+          find_dense_columns(
+            lp.A, settings, dense_columns_unordered, n_dense_rows, max_row_nz, estimated_nz_AAT);
+        }
+        if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
 #ifdef PRINT_INFO
-    for (i_t j : dense_columns_unordered) {
-      settings.log.printf("Dense column %6d\n", j);
-    }
+        for (i_t j : dense_columns_unordered) {
+          settings.log.printf("Dense column %6d\n", j);
+        }
 #endif
-    float64_t column_density_time = toc(start_column_density);
-    if (!settings.eliminate_dense_columns) { dense_columns_unordered.clear(); }
-    n_dense_columns = static_cast<i_t>(dense_columns_unordered.size());
-    if (n_dense_columns > 0) {
-      settings.log.printf("Dense columns               : %d\n", n_dense_columns);
-    }
-    if (n_dense_rows > 0) {
-      settings.log.printf("Dense rows                  : %d\n", n_dense_rows);
-    }
-    settings.log.printf("Density estimator time      : %.2fs\n", column_density_time);
-    if ((settings.augmented != 0) &&
-        (n_dense_columns > 50 || n_dense_rows > 10 ||
-         lp.A.m == 0 /* handle case with no constraints */ ||
-         (max_row_nz > 5000 && estimated_nz_AAT > 1e10) || settings.augmented == 1)) {
-      use_augmented   = true;
-      n_dense_columns = 0;
-    }
-
-    if (has_Q && !use_augmented) {
-      // For now let's not deal with dense columns
-      n_dense_columns = 0;
-      use_augmented   = !Q_diagonal;
-    }
+        float64_t column_density_time = toc(start_column_density);
+        if (!settings.eliminate_dense_columns) { dense_columns_unordered.clear(); }
+        n_dense_columns = static_cast<i_t>(dense_columns_unordered.size());
+        if (n_dense_columns > 0) {
+          settings.log.printf("Dense columns               : %d\n", n_dense_columns);
+        }
+        if (n_dense_rows > 0) {
+          settings.log.printf("Dense rows                  : %d\n", n_dense_rows);
+        }
+        settings.log.printf("Density estimator time      : %.2fs\n", column_density_time);
+        if ((settings.augmented != 0) &&
+            (n_dense_columns > 50 || n_dense_rows > 10 ||
+             lp.A.m == 0 /* handle case with no constraints */ ||
+             (max_row_nz > 5000 && estimated_nz_AAT > 1e10) || settings.augmented == 1)) {
+          use_augmented   = true;
+          n_dense_columns = 0;
+        }
+      }
 
-    if (use_augmented) {
-      settings.log.printf("Linear system               : augmented\n");
-    } else {
-      settings.log.printf("Linear system               : ADAT\n");
+      if (use_augmented) {
+        settings.log.printf("Linear system               : augmented\n");
+      } else {
+        settings.log.printf("Linear system               : ADAT\n");
+      }
     }
 
-    // D = I + EET
-    diag.set_scalar(1.0);
-    if (n_upper_bounds > 0) {
-      for (i_t k = 0; k < n_upper_bounds; k++) {
-        i_t j   = upper_bounds[k];
-        diag[j] = 2.0;
+    {
+      raft::common::nvtx::range scope("Barrier: LP Data: diag and inv_diag");
+      // D = I + EET
+      diag.set_scalar(1.0);
+      if (n_upper_bounds > 0) {
+        for (i_t k = 0; k < n_upper_bounds; k++) {
+          i_t j   = upper_bounds[k];
+          diag[j] = 2.0;
+        }
       }
-    }
 
-    // D = I + EET + Q (if Q is diagonal)
-    if (has_Q && !use_augmented) {
-      // this means that Q is diagonal
-      for (i_t j = 0; j < Q.n; j++) {
-        diag[j] += Qdiag[j];
+      // D = I + EET + Q (if Q is diagonal)
+      if (has_Q && !use_augmented) {
+        // this means that Q is diagonal
+        for (i_t j = 0; j < Q.n; j++) {
+          diag[j] += Qdiag[j];
+        }
       }
-    }
 
-    inv_diag.set_scalar(1.0);
-    if (use_augmented) { diag.multiply_scalar(-1.0); }
-    if (n_upper_bounds > 0 || (has_Q && !use_augmented)) { diag.inverse(inv_diag); }
-    // TMP diag and inv_diag should directly created and filled on the GPU
-    raft::copy(d_inv_diag.data(), inv_diag.data(), inv_diag.size(), stream_view_);
-    inv_sqrt_diag.set_scalar(1.0);
-    if (n_upper_bounds > 0 || (has_Q && !use_augmented)) { inv_diag.sqrt(inv_sqrt_diag); }
+      inv_diag.set_scalar(1.0);
+      if (use_augmented) { diag.multiply_scalar(-1.0); }
+      if (n_upper_bounds > 0 || (has_Q && !use_augmented)) { diag.inverse(inv_diag); }
+      // TMP diag and inv_diag should directly created and filled on the GPU
+      raft::copy(d_inv_diag.data(), inv_diag.data(), inv_diag.size(), stream_view_);
+      inv_sqrt_diag.set_scalar(1.0);
+      if (n_upper_bounds > 0 || (has_Q && !use_augmented)) { inv_diag.sqrt(inv_sqrt_diag); }
+    }
 
     if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
 
-    // Copy A into AD
-    AD = lp.A;
-    if (!use_augmented && n_dense_columns > 0) {
-      cols_to_remove.resize(lp.num_cols, 0);
-      for (i_t k : dense_columns_unordered) {
-        cols_to_remove[k] = 1;
-      }
-      d_cols_to_remove.resize(cols_to_remove.size(), stream_view_);
-      raft::copy(
-        d_cols_to_remove.data(), cols_to_remove.data(), cols_to_remove.size(), stream_view_);
-      dense_columns.clear();
-      dense_columns.reserve(n_dense_columns);
-      for (i_t j = 0; j < lp.num_cols; j++) {
-        if (cols_to_remove[j]) { dense_columns.push_back(j); }
-      }
-      AD.remove_columns(cols_to_remove);
+    {
+      raft::common::nvtx::range scope("Barrier: LP Data: AD matrix setup");
+      // Copy A into AD
+      AD = lp.A;
+      if (!use_augmented && n_dense_columns > 0) {
+        cols_to_remove.resize(lp.num_cols, 0);
+        for (i_t k : dense_columns_unordered) {
+          cols_to_remove[k] = 1;
+        }
+        d_cols_to_remove.resize(cols_to_remove.size(), stream_view_);
+        raft::copy(
+          d_cols_to_remove.data(), cols_to_remove.data(), cols_to_remove.size(), stream_view_);
+        dense_columns.clear();
+        dense_columns.reserve(n_dense_columns);
+        for (i_t j = 0; j < lp.num_cols; j++) {
+          if (cols_to_remove[j]) { dense_columns.push_back(j); }
+        }
+        AD.remove_columns(cols_to_remove);
 
-      sparse_mark.resize(lp.num_cols, 1);
-      for (i_t k : dense_columns) {
-        sparse_mark[k] = 0;
-      }
+        sparse_mark.resize(lp.num_cols, 1);
+        for (i_t k : dense_columns) {
+          sparse_mark[k] = 0;
+        }
 
-      A_dense.resize(AD.m, n_dense_columns);
-      i_t k = 0;
-      for (i_t j : dense_columns) {
-        A_dense.from_sparse(lp.A, j, k++);
+        A_dense.resize(AD.m, n_dense_columns);
+        i_t k = 0;
+        for (i_t j : dense_columns) {
+          A_dense.from_sparse(lp.A, j, k++);
+        }
       }
-    }
 
-    AD.transpose(AT);
+      AD.transpose(AT);
+    }
 
     // device_AD / device_A / ADAT path is only used when forming ADAT (!use_augmented).
     if (!use_augmented) {
+      raft::common::nvtx::range scope("Barrier: LP Data: device AD path");
       device_AD.copy(AD, handle_ptr->get_stream());
       d_original_A_values.resize(device_AD.x.size(), handle_ptr->get_stream());
       raft::copy(d_original_A_values.data(),
@@ -421,44 +502,185 @@ class iteration_data_t {
     }
 
     if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
-    i_t factorization_size = use_augmented ? lp.num_rows + lp.num_cols : lp.num_rows;
-    chol =
-      std::make_unique<sparse_cholesky_cudss_t<i_t, f_t>>(handle_ptr, settings, factorization_size);
-    chol->set_positive_definite(false);
+    {
+      raft::common::nvtx::range scope("Barrier: LP Data: Cholesky init");
+      i_t factorization_size = use_augmented ? lp.num_rows + lp.num_cols : lp.num_rows;
+      chol = std::make_unique<sparse_cholesky_cudss_t<i_t, f_t>>(
+        handle_ptr, settings, factorization_size);
+      chol->set_positive_definite(false);
+    }
     if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
-    // Perform symbolic analysis
-    symbolic_status = 0;
-    if (use_augmented) {
-      // Build the sparsity pattern of the augmented system
-      form_augmented(true);
-      if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
-      symbolic_status = chol->analyze(device_augmented);
-    } else {
-      form_adat(true);
-      if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
-      symbolic_status = chol->analyze(device_ADAT);
+    {
+      raft::common::nvtx::range scope("Barrier: LP Data: symbolic analysis");
+      // Perform symbolic analysis
+      symbolic_status = 0;
+      if (use_augmented) {
+        {
+          raft::common::nvtx::range form_scope("Barrier: LP Data: form augmented");
+          // Build the sparsity pattern of the augmented system
+          form_augmented(true);
+        }
+        if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
+        symbolic_status = chol->analyze(device_augmented);
+      } else {
+        {
+          raft::common::nvtx::range form_scope("Barrier: LP Data: form ADAT");
+          form_adat(true);
+        }
+        if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; }
+        symbolic_status = chol->analyze(device_ADAT);
+      }
+    }
+  }
+
+  bool has_cones() const { return cones_.has_value(); }
+
+  cone_data_t<i_t, f_t>& cones()
+  {
+    cuopt_assert(cones_.has_value(), "second-order cone data is not initialized");
+    return *cones_;
+  }
+
+  const cone_data_t<i_t, f_t>& cones() const
+  {
+    cuopt_assert(cones_.has_value(), "second-order cone data is not initialized");
+    return *cones_;
+  }
+
+  i_t cone_count() const { return has_cones() ? cones_->n_cones : i_t(0); }
+
+  i_t cone_entry_count() const
+  {
+    return has_cones() ? static_cast<i_t>(cones_->n_cone_entries) : i_t(0);
+  }
+
+  i_t cone_start() const { return cone_var_start_; }
+
+  i_t cone_end() const { return cone_start() + cone_entry_count(); }
+
+  i_t linear_xz_size(std::size_t full_xz_size) const
+  {
+    return has_cones() ? cone_start() : static_cast<i_t>(full_xz_size);
+  }
+
+  bool is_cone_variable(i_t variable) const
+  {
+    return has_cones() && variable >= cone_start() && variable < cone_end();
+  }
+
+  f_t complementarity_degree(std::size_t num_primal_variables, i_t num_upper_bounds) const
+  {
+    const bool has_soc = has_cones();
+    f_t degree = static_cast<f_t>(num_primal_variables) + static_cast<f_t>(num_upper_bounds);
+    // Native QP free variables (linear only): no x·z complementarity in the barrier degree.
+    degree -= static_cast<f_t>(n_native_free_linear);
+    if (has_soc) {
+      degree -= static_cast<f_t>(cone_entry_count());
+      degree += static_cast<f_t>(cone_count());
     }
+    return degree;
   }
 
   void form_augmented(bool first_call = false)
   {
-    i_t n                    = A.n;
-    i_t m                    = A.m;
-    i_t nnzA                 = A.col_start[n];
-    i_t nnzQ                 = Q.n > 0 ? Q.col_start[n] : 0;
-    i_t factorization_size   = n + m;
-    const f_t dual_perturb   = 0.0;
-    const f_t primal_perturb = 1e-6;
+    i_t n                  = A.n;
+    i_t m                  = A.m;
+    i_t nnzA               = A.col_start[n];
+    i_t nnzQ               = Q.n > 0 ? Q.col_start[n] : 0;
+    i_t factorization_size = n + m;
+
+    const bool has_soc  = has_cones();
+    const i_t m_c       = cone_entry_count();
+    i_t total_block_nnz = 0;
+
+    std::vector<std::size_t> cone_offsets_host;
+    std::vector<std::size_t> cone_block_offsets_host;
+
     if (first_call) {
-      i_t new_nnz = 2 * nnzA + n + m + nnzQ;
+      if (has_soc) {
+        raft::common::nvtx::range scope("Barrier: augmented: SOC offsets");
+        // Initialize the cone block offsets
+        const i_t n_cones = cone_count();
+        cone_offsets_host.resize(n_cones + 1);
+        cone_block_offsets_host.resize(n_cones + 1);
+        raft::copy(
+          cone_offsets_host.data(), cones().cone_offsets.data(), n_cones + 1, stream_view_);
+        handle_ptr->sync_stream();
+        for (i_t k = 0; k < n_cones; ++k) {
+          const auto q_k                 = cone_offsets_host[k + 1] - cone_offsets_host[k];
+          cone_block_offsets_host[k + 1] = cone_block_offsets_host[k] + q_k * q_k;
+        }
+        total_block_nnz = static_cast<i_t>(cone_block_offsets_host[n_cones]);
+      }
+
+      i_t new_nnz = 2 * nnzA + n + m + nnzQ + total_block_nnz;  // conservative estimate of nnz
       csr_matrix_t<i_t, f_t> augmented_CSR(n + m, n + m, new_nnz);
       std::vector<i_t> augmented_diagonal_indices(n + m, -1);
+      std::vector<i_t> cone_csr_indices_host(total_block_nnz, -1);
+      std::vector<f_t> cone_Q_values_host(total_block_nnz, f_t(0));
       i_t q            = 0;
       i_t off_diag_Qnz = 0;
 
-      for (i_t i = 0; i < n; i++) {
+      {
+        raft::common::nvtx::range scope("Barrier: augmented: host CSR build");
+        for (i_t i = 0; i < n; i++) {
         augmented_CSR.row_start[i] = q;
-        if (nnzQ == 0) {
+
+        const bool is_cone_row = is_cone_variable(i);
+
+        if (is_cone_row) {
+          // Determine which cone this variable belongs to and its local row
+          i_t local_idx = i - cone_start();
+          i_t k         = 0;
+          while (k + 1 < cone_count() &&
+                 cone_offsets_host[k + 1] <= static_cast<std::size_t>(local_idx)) {
+            k++;
+          }
+          i_t local_r =
+            static_cast<i_t>(static_cast<std::size_t>(local_idx) - cone_offsets_host[k]);
+          i_t q_k            = static_cast<i_t>(cone_offsets_host[k + 1] - cone_offsets_host[k]);
+          i_t cone_col_start = cone_start() + static_cast<i_t>(cone_offsets_host[k]);
+          i_t block_base     = static_cast<i_t>(cone_block_offsets_host[k]) + local_r * q_k;
+
+          // Merge-join: Q entries (sorted) with dense cone block columns (contiguous)
+          i_t qp    = (nnzQ > 0) ? Q.col_start[i] : 0;
+          i_t q_end = (nnzQ > 0) ? Q.col_start[i + 1] : 0;
+
+          // Q entries before cone block
+          while (qp < q_end && Q.i[qp] < cone_col_start) {
+            augmented_CSR.j[q]   = Q.i[qp];
+            augmented_CSR.x[q++] = -Q.x[qp];
+            off_diag_Qnz++;
+            qp++;
+          }
+
+          // Dense cone block, absorbing any Q entries that fall inside
+          for (i_t c = 0; c < q_k; c++) {
+            i_t col         = cone_col_start + c;
+            f_t q_contrib   = f_t(0);
+            f_t initial_val = (c == local_r) ? f_t(-dual_perturb)
+                                             : f_t(0);  // diagonal entry of the cone block column
+
+            if (qp < q_end && Q.i[qp] == col) {
+              q_contrib = Q.x[qp];
+              qp++;
+            }
+
+            cone_csr_indices_host[block_base + c] = q;
+            cone_Q_values_host[block_base + c]    = q_contrib;
+            if (col == i) { augmented_diagonal_indices[i] = q; }
+            augmented_CSR.j[q]   = col;
+            augmented_CSR.x[q++] = initial_val - q_contrib;
+          }
+
+          // Q entries after cone block
+          while (qp < q_end) {
+            augmented_CSR.j[q]   = Q.i[qp];
+            augmented_CSR.x[q++] = -Q.x[qp];
+            off_diag_Qnz++;
+            qp++;
+          }
+        } else if (nnzQ == 0) {
           augmented_diagonal_indices[i] = q;
           augmented_CSR.j[q]            = i;
           augmented_CSR.x[q++]          = -diag[i] - dual_perturb;
@@ -487,42 +709,61 @@ class iteration_data_t {
         // AT block, we can use A in csc directly
         const i_t col_beg = A.col_start[i];
         const i_t col_end = A.col_start[i + 1];
-        for (i_t p = col_beg; p < col_end; ++p) {
-          augmented_CSR.j[q]   = A.i[p] + n;
-          augmented_CSR.x[q++] = A.x[p];
+          for (i_t p = col_beg; p < col_end; ++p) {
+            augmented_CSR.j[q]   = A.i[p] + n;
+            augmented_CSR.x[q++] = A.x[p];
+          }
+        }
+
+        for (i_t k = n; k < n + m; ++k) {
+          // A block, we can use AT in csc directly
+          augmented_CSR.row_start[k] = q;
+          const i_t l                = k - n;
+          const i_t col_beg          = AT.col_start[l];
+          const i_t col_end          = AT.col_start[l + 1];
+          for (i_t p = col_beg; p < col_end; ++p) {
+            augmented_CSR.j[q]   = AT.i[p];
+            augmented_CSR.x[q++] = AT.x[p];
+          }
+          augmented_diagonal_indices[k] = q;
+          augmented_CSR.j[q]            = k;
+          augmented_CSR.x[q++]          = primal_perturb;
         }
+        augmented_CSR.row_start[n + m] = q;
+        augmented_CSR.nz_max           = q;
+        augmented_CSR.j.resize(q);
+        augmented_CSR.x.resize(q);
+        i_t expected_nnz = 2 * nnzA + (n - m_c) + total_block_nnz + m + off_diag_Qnz;
+        settings_.log.debug("augmented nz %d predicted %d\n", q, expected_nnz);
+        cuopt_assert(q == expected_nnz, "augmented nnz != predicted");
+        cuopt_assert(A.col_start[n] == AT.col_start[m], "A nz != AT nz");
       }
 
-      for (i_t k = n; k < n + m; ++k) {
-        // A block, we can use AT in csc directly
-        augmented_CSR.row_start[k] = q;
-        const i_t l                = k - n;
-        const i_t col_beg          = AT.col_start[l];
-        const i_t col_end          = AT.col_start[l + 1];
-        for (i_t p = col_beg; p < col_end; ++p) {
-          augmented_CSR.j[q]   = AT.i[p];
-          augmented_CSR.x[q++] = AT.x[p];
+      {
+        raft::common::nvtx::range scope("Barrier: augmented: device upload");
+        device_augmented.copy(augmented_CSR, handle_ptr->get_stream());
+        d_augmented_diagonal_indices_.resize(augmented_diagonal_indices.size(),
+                                             handle_ptr->get_stream());
+        raft::copy(d_augmented_diagonal_indices_.data(),
+                   augmented_diagonal_indices.data(),
+                   augmented_diagonal_indices.size(),
+                   handle_ptr->get_stream());
+
+        if (has_soc) {
+          d_cone_csr_indices_.resize(total_block_nnz, handle_ptr->get_stream());
+          raft::copy(d_cone_csr_indices_.data(),
+                     cone_csr_indices_host.data(),
+                     total_block_nnz,
+                     handle_ptr->get_stream());
+          d_cone_Q_values_.resize(total_block_nnz, handle_ptr->get_stream());
+          raft::copy(d_cone_Q_values_.data(),
+                     cone_Q_values_host.data(),
+                     total_block_nnz,
+                     handle_ptr->get_stream());
         }
-        augmented_diagonal_indices[k] = q;
-        augmented_CSR.j[q]            = k;
-        augmented_CSR.x[q++]          = primal_perturb;
-      }
-      augmented_CSR.row_start[n + m] = q;
-      augmented_CSR.nz_max           = q;
-      augmented_CSR.j.resize(q);
-      augmented_CSR.x.resize(q);
-      settings_.log.debug("augmented nz %d predicted %d\n", q, off_diag_Qnz + nnzA + n);
-      cuopt_assert(q == 2 * nnzA + n + m + off_diag_Qnz, "augmented nnz != predicted");
-      cuopt_assert(A.col_start[n] == AT.col_start[m], "A nz != AT nz");
-
-      device_augmented.copy(augmented_CSR, handle_ptr->get_stream());
-      d_augmented_diagonal_indices_.resize(augmented_diagonal_indices.size(),
-                                           handle_ptr->get_stream());
-      raft::copy(d_augmented_diagonal_indices_.data(),
-                 augmented_diagonal_indices.data(),
-                 augmented_diagonal_indices.size(),
-                 handle_ptr->get_stream());
-      handle_ptr->sync_stream();
+
+        handle_ptr->sync_stream();
+      }
 #ifdef CHECK_SYMMETRY
       csc_matrix_t<i_t, f_t> augmented_transpose(1, 1, 1);
       augmented.transpose(augmented_transpose);
@@ -537,20 +778,26 @@ class iteration_data_t {
       cuopt_assert(error.norm1() <= 1e-2, "|| Aug - Aug^T ||_1 > 1e-2");
 #endif
     } else {
+      const i_t linear_n = has_soc ? cone_start() : n;
+
+      // Primal diagonal: linear block includes dual_perturb; SOC block is filled by scatter below.
+      // Native free variables use barrier D = 0 in augmented_multiply; omit span_diag[j] here so
+      // the factorized matrix matches the matvec (only -q_diag - dual_perturb on the diagonal).
       thrust::for_each_n(rmm::exec_policy(handle_ptr->get_stream()),
                          thrust::make_counting_iterator<i_t>(0),
-                         i_t(n),
+                         linear_n,
                          [span_x             = cuopt::make_span(device_augmented.x),
                           span_diag_indices  = cuopt::make_span(d_augmented_diagonal_indices_),
                           span_q_diag        = cuopt::make_span(d_Q_diag_),
                           span_diag          = cuopt::make_span(d_diag_),
+                          span_is_native_free_linear       = cuopt::make_span(d_is_native_free_linear_),
                           dual_perturb_value = dual_perturb] __device__(i_t j) {
-                           f_t q_diag = span_q_diag.size() > 0 ? span_q_diag[j] : 0.0;
-                           span_x[span_diag_indices[j]] =
-                             -q_diag - span_diag[j] - dual_perturb_value;
+                           f_t q_diag     = span_q_diag.size() > 0 ? span_q_diag[j] : 0.0;
+                           const f_t d_j  = (span_is_native_free_linear.size() > 0 && span_is_native_free_linear[j]) ? f_t(0) : span_diag[j];
+                           span_x[span_diag_indices[j]] = -q_diag - d_j - dual_perturb_value;
                          });
-
       RAFT_CHECK_CUDA(handle_ptr->get_stream());
+
       thrust::for_each_n(rmm::exec_policy(handle_ptr->get_stream()),
                          thrust::make_counting_iterator<i_t>(n),
                          i_t(m),
@@ -560,6 +807,16 @@ class iteration_data_t {
                            span_x[span_diag_indices[j]] = primal_perturb_value;
                          });
       RAFT_CHECK_CUDA(handle_ptr->get_stream());
+
+      if (has_soc) {
+        scatter_hessian_into_augmented(cones(),
+                                       device_augmented.x,
+                                       d_cone_csr_indices_,
+                                       d_cone_Q_values_,
+                                       handle_ptr->get_stream(),
+                                       dual_perturb);
+        RAFT_CHECK_CUDA(handle_ptr->get_stream());
+      }
     }
   }
 
@@ -570,43 +827,53 @@ class iteration_data_t {
     float64_t start_form_adat = tic();
     const i_t m               = AD.m;
 
-    raft::copy(device_AD.x.data(),
-               d_original_A_values.data(),
-               d_original_A_values.size(),
-               handle_ptr->get_stream());
-    if (n_dense_columns > 0) {
-      // Adjust inv_diag
-      d_inv_diag_prime.resize(AD.n, stream_view_);
-      // Copy If
-      cub::DeviceSelect::Flagged(
-        d_flag_buffer.data(),
-        flag_buffer_size,
-        d_inv_diag.data(),
-        thrust::make_transform_iterator(d_cols_to_remove.data(), cuda::std::logical_not<i_t>{}),
-        d_inv_diag_prime.data(),
-        d_num_flag.data(),
-        d_inv_diag.size(),
-        stream_view_);
-      RAFT_CHECK_CUDA(stream_view_);
-    } else {
-      d_inv_diag_prime.resize(inv_diag.size(), stream_view_);
-      raft::copy(d_inv_diag_prime.data(), d_inv_diag.data(), inv_diag.size(), stream_view_);
+    {
+      raft::common::nvtx::range scope("Barrier: Form ADAT: restore A");
+      raft::copy(device_AD.x.data(),
+                 d_original_A_values.data(),
+                 d_original_A_values.size(),
+                 handle_ptr->get_stream());
+    }
+    {
+      raft::common::nvtx::range scope("Barrier: Form ADAT: inv_diag prime");
+      if (n_dense_columns > 0) {
+        // Adjust inv_diag
+        d_inv_diag_prime.resize(AD.n, stream_view_);
+        // Copy If
+        cub::DeviceSelect::Flagged(
+          d_flag_buffer.data(),
+          flag_buffer_size,
+          d_inv_diag.data(),
+          thrust::make_transform_iterator(d_cols_to_remove.data(), cuda::std::logical_not<i_t>{}),
+          d_inv_diag_prime.data(),
+          d_num_flag.data(),
+          d_inv_diag.size(),
+          stream_view_);
+        RAFT_CHECK_CUDA(stream_view_);
+      } else {
+        d_inv_diag_prime.resize(inv_diag.size(), stream_view_);
+        raft::copy(d_inv_diag_prime.data(), d_inv_diag.data(), inv_diag.size(), stream_view_);
+      }
     }
 
     cuopt_assert(static_cast<i_t>(d_inv_diag_prime.size()) == AD.n,
                  "inv_diag_prime.size() != AD.n");
 
-    thrust::for_each_n(rmm::exec_policy(stream_view_),
-                       thrust::make_counting_iterator<i_t>(0),
-                       i_t(device_AD.x.size()),
-                       [span_x       = cuopt::make_span(device_AD.x),
-                        span_scale   = cuopt::make_span(d_inv_diag_prime),
-                        span_col_ind = cuopt::make_span(device_AD.col_index)] __device__(i_t i) {
-                         span_x[i] *= span_scale[span_col_ind[i]];
-                       });
-    RAFT_CHECK_CUDA(stream_view_);
+    {
+      raft::common::nvtx::range scope("Barrier: Form ADAT: scale AD");
+      thrust::for_each_n(rmm::exec_policy(stream_view_),
+                         thrust::make_counting_iterator<i_t>(0),
+                         i_t(device_AD.x.size()),
+                         [span_x       = cuopt::make_span(device_AD.x),
+                          span_scale   = cuopt::make_span(d_inv_diag_prime),
+                          span_col_ind = cuopt::make_span(device_AD.col_index)] __device__(i_t i) {
+                           span_x[i] *= span_scale[span_col_ind[i]];
+                         });
+      RAFT_CHECK_CUDA(stream_view_);
+    }
     if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return; }
     if (first_call) {
+      raft::common::nvtx::range scope("Barrier: Form ADAT: cusparse init");
       try {
         initialize_cusparse_data<i_t, f_t>(
           handle_ptr, device_A, device_AD, device_ADAT, cusparse_info);
@@ -617,8 +884,11 @@ class iteration_data_t {
     }
     if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return; }
 
-    multiply_kernels<i_t, f_t>(handle_ptr, device_A, device_AD, device_ADAT, cusparse_info);
-    handle_ptr->sync_stream();
+    {
+      raft::common::nvtx::range scope("Barrier: Form ADAT: ADAT multiply");
+      multiply_kernels<i_t, f_t>(handle_ptr, device_A, device_AD, device_ADAT, cusparse_info);
+      handle_ptr->sync_stream();
+    }
 
     auto adat_nnz       = device_ADAT.row_start.element(device_ADAT.m, handle_ptr->get_stream());
     float64_t adat_time = toc(start_form_adat);
@@ -1436,8 +1706,9 @@ class iteration_data_t {
                           f_t beta,
                           rmm::device_uvector<f_t>& y)
   {
-    const i_t m = A.m;
-    const i_t n = A.n;
+    const i_t m        = A.m;
+    const i_t n        = A.n;
+    const bool has_soc = has_cones();
 
     rmm::device_uvector<f_t> d_x1(n, handle_ptr->get_stream());
     rmm::device_uvector<f_t> d_x2(m, handle_ptr->get_stream());
@@ -1452,19 +1723,36 @@ class iteration_data_t {
     // y1 <- alpha ( -D * x_1 + A^T x_2) + beta * y1
 
     rmm::device_uvector<f_t> d_r1(n, handle_ptr->get_stream());
+    thrust::fill_n(rmm::exec_policy(stream_view_), d_r1.begin(), n, f_t(0));
 
-    // diag.pairwise_product(x1, r1);
-    // r1 <- D * x_1
-    pairwise_multiply(d_x1.data(), d_diag_.data(), d_r1.data(), n, stream_view_);
+    // r1 <- D * x_1 on linear indices; barrier D is zero on native free variables
+    const i_t linear_n = has_soc ? cone_start() : n;
+    if (n_native_free_linear > 0) {
+      pairwise_multiply_skip_native_free_linear(
+        d_x1.data(), d_diag_.data(), d_is_native_free_linear_.data(), d_r1.data(), linear_n, stream_view_);
+    } else {
+      pairwise_multiply(d_x1.data(), d_diag_.data(), d_r1.data(), linear_n, stream_view_);
+    }
+    RAFT_CHECK_CUDA(stream_view_);
 
-    // r1 <- Q x1 + D x1
+    // r1 <- D * x_1 + H x_1  (cone Hessian block H = S^T S; accumulate_cone_hessian_matvec)
+    if (has_soc) {
+      const i_t m_c = cone_entry_count();
+      accumulate_cone_hessian_matvec(raft::device_span<const f_t>(d_x1.data() + cone_start(), m_c),
+                                     cones(),
+                                     raft::device_span<f_t>(d_r1.data() + cone_start(), m_c),
+                                     stream_view_);
+      RAFT_CHECK_CUDA(stream_view_);
+    }
+
+    // r1 <- Q x1 + D x1 + H x1  (cone: same H as above)
     if (Q.n > 0) {
       // matrix_vector_multiply(Q, 1.0, x1, 1.0, r1);
       cusparse_Q_view_.spmv(1.0, d_x1, 1.0, d_r1);
     }
 
     // y1 <- - alpha * r1 + beta * y1
-    // y1.axpy(-alpha, r1, beta);
+    // flip the sign of r1 = (Q x1 + D x1 + H x1)
     axpy(-alpha, d_r1.data(), beta, d_y1.data(), d_y1.data(), n, stream_view_);
 
     // matrix_transpose_vector_multiply(A, alpha, x2, 1.0, y1);
@@ -1554,13 +1842,22 @@ class iteration_data_t {
   std::vector<f_t> Qdiag;
   bool Q_diagonal;
   rmm::device_uvector<i_t> d_augmented_diagonal_indices_;
+  rmm::device_uvector<i_t> d_cone_csr_indices_;
+  rmm::device_uvector<f_t> d_cone_Q_values_;
   bool indefinite_Q;
   cusparse_view_t<i_t, f_t> cusparse_Q_view_;
 
+  std::optional<cone_data_t<i_t, f_t>> cones_;
+  i_t cone_var_start_ = 0;
+
   bool use_augmented;
   i_t symbolic_status;
-  i_t n_free_vars{0};
-  rmm::device_uvector<i_t> d_is_free_;  // 1 if variable is free (QP only), 0 otherwise
+  i_t n_native_free_linear{0};
+  rmm::device_uvector<i_t> d_is_native_free_linear_;  // 1 if native free linear (j < cone_start), else 0
+
+  // Adaptive regularization for the augmented system
+  f_t dual_perturb{1e-8};
+  f_t primal_perturb{1e-8};
 
   std::unique_ptr<sparse_cholesky_base_t<i_t, f_t>> chol;
 
@@ -1656,6 +1953,8 @@ class iteration_data_t {
   rmm::device_uvector<f_t> d_complementarity_xz_rhs_;
   rmm::device_uvector<f_t> d_complementarity_wv_rhs_;
   rmm::device_uvector<f_t> d_dual_rhs_;
+  rmm::device_uvector<f_t> d_complementarity_target_;
+  rmm::device_uvector<f_t> d_cone_hessian_dx_;
 
   rmm::device_uvector<f_t> d_Q_diag_;
   rmm::device_uvector<f_t> d_Qx_;
@@ -1665,6 +1964,9 @@ class iteration_data_t {
   transform_reduce_helper_t<f_t> transform_reduce_helper_;
   sum_reduce_helper_t<f_t> sum_reduce_helper_;
 
+  bool cone_combined_step_;
+  f_t cone_sigma_mu_;
+
   rmm::cuda_stream_view stream_view_;
 
   const simplex_solver_settings_t<i_t, f_t>& settings_;
@@ -1763,6 +2065,8 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
 {
   raft::common::nvtx::range fun_scope("Barrier: initial_point");
   const bool use_augmented = data.use_augmented;
+  const bool has_soc       = data.has_cones();
+  const bool has_native_free_linear      = data.n_native_free_linear > 0;
 
   // Perform a numerical factorization
   i_t status;
@@ -1892,6 +2196,50 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
 
   dense_vector_t<i_t, f_t> dual_res(lp.num_cols);
   float64_t epsilon_adjust = 10.0;
+
+  // Data-dependent initial point for SOCP, following SeDuMi (Sturm, 1999).
+  //   mu = sqrt((1 + ||b||_inf) * (1 + ||c||_inf))
+  //   primal and dual: x = mu * e_K,  z = mu * e_K
+  // where e_K is the identity of the symmetric cone:
+  //   LP block: e = 1,  SOC block: e = (sqrt(2), 0, ..., 0)
+  if (has_soc) {
+    const i_t cs     = data.cone_start();
+    const f_t norm_b = vector_norm_inf<i_t, f_t>(lp.rhs);
+    const f_t norm_c = vector_norm_inf<i_t, f_t>(lp.objective);
+    const f_t mu     = std::sqrt((1.0 + norm_b) * (1.0 + norm_c));
+    const f_t sqrt2  = std::sqrt(2.0);
+    const f_t x_soc  = mu * sqrt2;
+    const f_t z_soc  = mu * sqrt2;
+    // Linear orthant
+    for (i_t j = 0; j < cs; ++j) {
+      data.x[j] = mu;
+      data.z[j] = mu;
+    }
+    if (has_native_free_linear) {
+      for (i_t j : presolve_info.native_free_linear_indices) {
+        if (j < cs) { data.z[j] = 0.0; }
+      }
+    }
+    // SOC blocks
+    i_t off = 0;
+    for (i_t k = 0; k < static_cast<i_t>(lp.second_order_cone_dims.size()); ++k) {
+      i_t q_k          = lp.second_order_cone_dims[k];
+      data.x[cs + off] = x_soc;
+      data.z[cs + off] = z_soc;
+      for (i_t j = 1; j < q_k; ++j) {
+        data.x[cs + off + j] = 0.0;
+        data.z[cs + off + j] = 0.0;
+      }
+      off += q_k;
+    }
+    data.y.set_scalar(0.0);
+    if (data.n_upper_bounds > 0) {
+      data.w.set_scalar(mu);
+      data.v.set_scalar(mu);
+    }
+    return 0;
+  }
+
   if (settings.barrier_dual_initial_point == -1 || settings.barrier_dual_initial_point == 0) {
     // Use the dual starting point suggested by the paper
     // On Implementing Mehrotra’s Predictor–Corrector Interior-Point Method for Linear Programming
@@ -1924,8 +2272,10 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
         data.v[k] = -c[j] + epsilon;
       }
     }
-    // Now hande the case with no upper bounds
+    // Now handle the case with no upper bounds (skip cone variables)
+    const i_t cone_end = data.cone_end();
     for (i_t j = 0; j < lp.num_cols; j++) {
+      if (has_soc && j >= data.cone_start() && j < cone_end) continue;
       if (lp.upper[j] == inf) {
         if (c[j] > epsilon_adjust) {
           data.z[j] = c[j];
@@ -1935,8 +2285,8 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
       }
     }
     // Free variables have z = 0 (no complementarity condition)
-    if (data.n_free_vars > 0) {
-      for (i_t j : presolve_info.free_variable_indices) {
+    if (has_native_free_linear) {
+      for (i_t j : presolve_info.native_free_linear_indices) {
         data.z[j] = 0.0;
       }
     }
@@ -1960,7 +2310,7 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
     data.v.multiply_scalar(-1.0);
 
     data.v.ensure_positive(epsilon_adjust);
-    data.z.ensure_positive(epsilon_adjust);
+    data.z.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count());
   } else {
     // First compute rhs = A*Dinv*c
     dense_vector_t<i_t, f_t> rhs(lp.num_rows);
@@ -1984,7 +2334,7 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
     data.gather_upper_bounds(data.z, data.v);
     data.v.multiply_scalar(-1.0);
     data.v.ensure_positive(epsilon_adjust);
-    data.z.ensure_positive(epsilon_adjust);
+    data.z.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count());
   }
 
   // Verify A'*y + z - E*v  - Q*x = c
@@ -2001,24 +2351,35 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
   settings.log.printf("||A^T y + z - E*v - Q*x - c ||: %e\n",
                       vector_norm2<i_t, f_t>(data.dual_residual));
 #endif
-  // Make sure (w, x, v, z) > 0
-  if (data.n_free_vars > 0) {
+  // Make sure (w, x, v, z) > 0. Cone blocks are not nonnegative orthant variables — skip them for
+  // box positivity; NT scaling keeps cone iterates feasible separately.
+  std::vector<f_t> free_x_save;
+  if (has_native_free_linear) {
+    free_x_save.resize(data.n_native_free_linear);
+    for (i_t k = 0; k < data.n_native_free_linear; ++k) {
+      free_x_save[k] = data.x[presolve_info.native_free_linear_indices[k]];
+    }
+  }
+  data.w.ensure_positive(epsilon_adjust);
+  if (has_soc) {
+    data.x.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count());
+  } else if (has_native_free_linear) {
     std::vector<i_t> nonnegative_variables(data.x.size(), 1);
-    for (i_t j : presolve_info.free_variable_indices) {
+    for (i_t j : presolve_info.native_free_linear_indices) {
       nonnegative_variables[j] = 0;
     }
-
     data.x.ensure_positive(epsilon_adjust, nonnegative_variables);
-
-    for (i_t j : presolve_info.free_variable_indices) {
-      data.z[j] = 0.0;
-    }
-
   } else {
     data.x.ensure_positive(epsilon_adjust);
   }
-  data.w.ensure_positive(epsilon_adjust);
-
+  // Native free variables (QP/SOCP): restore possibly negative x and set z = 0
+  if (has_native_free_linear) {
+    for (i_t k = 0; k < data.n_native_free_linear; ++k) {
+      i_t j     = presolve_info.native_free_linear_indices[k];
+      data.x[j] = free_x_save[k];
+      data.z[j] = 0.0;
+    }
+  }
 #ifdef PRINT_INFO
   settings.log.printf("min v %e min z %e\n", data.v.minimum(), data.z.minimum());
 #endif
@@ -2202,10 +2563,27 @@ void barrier_solver_t<i_t, f_t>::gpu_compute_residual_norms(const rmm::device_uv
   primal_residual_norm =
     std::max(device_vector_norm_inf<i_t, f_t>(data.d_primal_residual_, stream_view_),
              device_vector_norm_inf<i_t, f_t>(data.d_bound_residual_, stream_view_));
-  dual_residual_norm = device_vector_norm_inf<i_t, f_t>(data.d_dual_residual_, stream_view_);
+  dual_residual_norm       = device_vector_norm_inf<i_t, f_t>(data.d_dual_residual_, stream_view_);
+  const bool has_soc       = data.has_cones();
+  const i_t linear_xz_size = data.linear_xz_size(data.d_complementarity_xz_residual_.size());
+  auto linear_xz_span =
+    raft::device_span<const f_t>(data.d_complementarity_xz_residual_.data(), linear_xz_size);
   complementarity_residual_norm =
-    std::max(device_vector_norm_inf<i_t, f_t>(data.d_complementarity_xz_residual_, stream_view_),
+    std::max(device_vector_norm_inf<i_t, f_t>(linear_xz_span, stream_view_),
              device_vector_norm_inf<i_t, f_t>(data.d_complementarity_wv_residual_, stream_view_));
+  if (has_soc) {
+    f_t cone_complementarity_norm = f_t(0);
+    auto cone_dot                 = data.cones().scratch.template get_slot<0>();
+    data.cones().segmented_sum(
+      data.d_complementarity_xz_residual_.data() + data.cone_start(), cone_dot, stream_view_);
+    cone_complementarity_norm = thrust::reduce(rmm::exec_policy(stream_view_),
+                                               cone_dot.begin(),
+                                               cone_dot.end(),
+                                               f_t(0),
+                                               thrust::maximum<f_t>());
+    complementarity_residual_norm =
+      std::max(complementarity_residual_norm, cone_complementarity_norm);
+  }
 }
 
 template <typename i_t, typename f_t>
@@ -2213,42 +2591,58 @@ f_t barrier_solver_t<i_t, f_t>::gpu_max_step_to_boundary(iteration_data_t<i_t, f
                                                          const rmm::device_uvector<f_t>& x,
                                                          const rmm::device_uvector<f_t>& dx)
 {
-  // For x-sized vectors with free variables, skip free vars in ratio test
-  const bool has_free = data.n_free_vars > 0 && static_cast<i_t>(x.size()) == lp.num_cols;
-
-  if (has_free) {
-    auto is_free_ptr     = data.d_is_free_.data();
-    auto ratio_test_free = [is_free_ptr] HD(const thrust::tuple<f_t, f_t, i_t> t) {
-      const f_t dx_val  = thrust::get<0>(t);
-      const f_t x_val   = thrust::get<1>(t);
-      const i_t is_free = thrust::get<2>(t);
-      if (is_free) return f_t(1.0);
-      if (dx_val < f_t(0.0)) return -x_val / dx_val;
+  const bool has_soc  = data.has_cones() && static_cast<i_t>(x.size()) >= data.cone_end();
+  const bool has_native_free_linear = data.n_native_free_linear > 0 && static_cast<i_t>(x.size()) == lp.num_cols;
+
+  auto reduce_segment = [&](i_t start, i_t len) -> f_t {
+    if (len <= 0) { return f_t(1); }
+    if (has_native_free_linear) {
+      auto native_free_linear_ptr = data.d_is_native_free_linear_.data() + start;
+      // step size computation for nonnegative variables with free variables
+      auto ratio_test_free = [native_free_linear_ptr] HD(const thrust::tuple<f_t, f_t, i_t> t) {
+        const f_t dx_val  = thrust::get<0>(t);
+        const f_t x_val   = thrust::get<1>(t);
+        const i_t is_native_free_linear = thrust::get<2>(t);
+        if (is_native_free_linear) return f_t(1.0);
+        if (dx_val < f_t(0.0)) return -x_val / dx_val;
+        return f_t(1.0);
+      };
+      return data.transform_reduce_helper_.transform_reduce(
+        thrust::make_zip_iterator(dx.data() + start, x.data() + start, native_free_linear_ptr),
+        thrust::minimum<f_t>(),
+        ratio_test_free,
+        f_t(1.0),
+        len,
+        stream_view_);
+    }
+    // step size computation for nonnegative variables without free variables
+    auto ratio_test = [] HD(const thrust::tuple<f_t, f_t> t) {
+      const f_t dx = thrust::get<0>(t);
+      const f_t x  = thrust::get<1>(t);
+      if (dx < f_t(0.0)) return -x / dx;
       return f_t(1.0);
     };
-
     return data.transform_reduce_helper_.transform_reduce(
-      thrust::make_zip_iterator(dx.data(), x.data(), is_free_ptr),
+      thrust::make_zip_iterator(dx.data() + start, x.data() + start),
       thrust::minimum<f_t>(),
-      ratio_test_free,
+      ratio_test,
       f_t(1.0),
-      x.size(),
+      len,
       stream_view_);
-  }
+  };
 
-  return data.transform_reduce_helper_.transform_reduce(
-    thrust::make_zip_iterator(dx.data(), x.data()),
-    thrust::minimum<f_t>(),
-    [] HD(const thrust::tuple<f_t, f_t> t) {
-      const f_t dx = thrust::get<0>(t);
-      const f_t x  = thrust::get<1>(t);
+  // step size computation for when there are soc constraints
+  if (has_soc) {
+    const i_t cs         = data.cone_start();
+    const i_t mc         = data.cone_entry_count();
+    f_t alpha            = reduce_segment(0, cs);
+    const i_t tail_start = cs + mc;
+    const i_t tail_size  = static_cast<i_t>(x.size()) - tail_start;
+    if (tail_size > 0) { alpha = std::min(alpha, reduce_segment(tail_start, tail_size)); }
+    return alpha;
+  }
 
-      if (dx < f_t(0.0)) return -x / dx;
-      return f_t(1.0);
-    },
-    f_t(1.0),
-    x.size(),
-    stream_view_);
+  return reduce_segment(0, static_cast<i_t>(x.size()));
 }
 
 template <typename i_t, typename f_t>
@@ -2258,98 +2652,136 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
                                                              pinned_dense_vector_t<i_t, f_t>& dy,
                                                              pinned_dense_vector_t<i_t, f_t>& dv,
                                                              pinned_dense_vector_t<i_t, f_t>& dz,
+                                                             f_t& dual_perturb,
+                                                             f_t& primal_perturb,
                                                              f_t& max_residual)
 {
   raft::common::nvtx::range fun_scope("Barrier: compute_search_direction");
 
   const bool debug         = false;
   const bool use_augmented = data.use_augmented;
+  const bool has_soc       = data.has_cones();
+  const bool has_native_free_linear      = data.n_native_free_linear > 0;
+  const i_t m_c            = data.cone_entry_count();
+  const i_t cone_var_start = data.cone_start();
+  const i_t linear_size    = data.linear_xz_size(lp.num_cols);
+
+  // Linear (orthant) block only; SOC uses recover_cone_dz_from_target.
+  auto recover_linear_orthant_dz = [&](raft::device_span<const f_t> target,
+                                       raft::device_span<const f_t> z,
+                                       raft::device_span<const f_t> dx_span,
+                                       raft::device_span<const f_t> x,
+                                       raft::device_span<f_t> dz_span) {
+    if (dz_span.empty()) return;
+    if (has_native_free_linear) {
+      cub::DeviceTransform::Transform(
+        cuda::std::make_tuple(
+          target.data(), z.data(), dx_span.data(), x.data(), data.d_is_native_free_linear_.data()),
+        dz_span.data(),
+        dz_span.size(),
+        [] HD(f_t target_val, f_t z_val, f_t dx_val, f_t x_val, i_t is_native_free_linear) {
+          if (is_native_free_linear) return f_t(0);
+          return target_val - (z_val * dx_val) / x_val;
+        },
+        stream_view_.value());
+    } else {
+      cub::DeviceTransform::Transform(
+        cuda::std::make_tuple(target.data(), z.data(), dx_span.data(), x.data()),
+        dz_span.data(),
+        dz_span.size(),
+        [] HD(f_t target_val, f_t z_val, f_t dx_val, f_t x_val) {
+          return target_val - (z_val * dx_val) / x_val;
+        },
+        stream_view_.value());
+    }
+    RAFT_CHECK_CUDA(stream_view_);
+  };
 
-  {
-    raft::common::nvtx::range fun_scope("Barrier: GPU allocation and copies");
-
-    // TMP allocation and copy should happen only once where it's written in a first place
-    data.d_bound_rhs_.resize(data.bound_rhs.size(), stream_view_);
-    raft::copy(
-      data.d_bound_rhs_.data(), data.bound_rhs.data(), data.bound_rhs.size(), stream_view_);
-    data.d_x_.resize(data.x.size(), stream_view_);
-    raft::copy(data.d_x_.data(), data.x.data(), data.x.size(), stream_view_);
-    data.d_z_.resize(data.z.size(), stream_view_);
-    raft::copy(data.d_z_.data(), data.z.data(), data.z.size(), stream_view_);
-    data.d_w_.resize(data.w.size(), stream_view_);
-    raft::copy(data.d_w_.data(), data.w.data(), data.w.size(), stream_view_);
-    data.d_v_.resize(data.v.size(), stream_view_);
-    raft::copy(data.d_v_.data(), data.v.data(), data.v.size(), stream_view_);
-    data.d_upper_bounds_.resize(data.upper_bounds.size(), stream_view_);
-    raft::copy(data.d_upper_bounds_.data(),
-               data.upper_bounds.data(),
-               data.upper_bounds.size(),
-               stream_view_);
-    data.d_dy_.resize(dy.size(), stream_view_);
-    raft::copy(data.d_dy_.data(), dy.data(), dy.size(), stream_view_);
-    data.d_dx_.resize(dx.size(), stream_view_);
-    raft::copy(data.d_h_.data(), data.primal_rhs.data(), data.primal_rhs.size(), stream_view_);
-    raft::copy(data.d_dual_rhs_.data(), data.dual_rhs.data(), data.dual_rhs.size(), stream_view_);
-    data.d_dz_.resize(dz.size(), stream_view_);
-    data.d_dv_.resize(dv.size(), stream_view_);
-    data.d_dw_.resize(data.bound_rhs.size(), stream_view_);
-    raft::copy(data.d_dw_.data(), data.bound_rhs.data(), data.bound_rhs.size(), stream_view_);
-    data.d_dw_residual_.resize(data.n_upper_bounds, stream_view_);
-    data.d_wv_residual_.resize(data.d_complementarity_wv_rhs_.size(), stream_view_);
-    data.d_xz_residual_.resize(data.d_complementarity_xz_rhs_.size(), stream_view_);
-    data.d_primal_residual_.resize(lp.rhs.size(), stream_view_);
-    raft::copy(data.d_primal_residual_.data(), lp.rhs.data(), lp.rhs.size(), stream_view_);
-    data.d_bound_residual_.resize(data.bound_residual.size(), stream_view_);
-    data.d_upper_.resize(lp.upper.size(), stream_view_);
-    raft::copy(data.d_upper_.data(), lp.upper.data(), lp.upper.size(), stream_view_);
-  }
+  copy_step_rhs_to_device<i_t, f_t>(data, stream_view_);
+  data.d_dx_.resize(dx.size(), stream_view_);
+  data.d_dy_.resize(dy.size(), stream_view_);
+  data.d_dz_.resize(dz.size(), stream_view_);
+  data.d_dv_.resize(dv.size(), stream_view_);
 
   // Solves the linear system
   //
   //  dw dx dy dv dz
-  // [ 0 A  0   0  0 ] [ dw ] = [ rp  ]
-  // [ I E' 0   0  0 ] [ dx ]   [ rw  ]
-  // [ 0 0  A' -E  I ] [ dy ]   [ rd  ]
-  // [ 0 Z  0   0  X ] [ dv ]   [ rxz ]
-  // [ V 0  0   W  0 ] [ dz ]   [ rwv ]
+  // [ 0 A    0   0  0      ]  [ dw ]  = [ rp  ]
+  // [ I E'   0   0  0      ]  [ dx ]    [ rw  ]
+  // [ 0 0    A' -E  I      ]  [ dy ]    [ rd  ]
+  // [ 0 Z(S) 0   0  X(S^-T)]  [ dv ]    [ rxz ]
+  // [ V 0    0   W  0      ]  [ dz ]    [ rwv ]
+
+  // NT-scaling:
+  //  \lambda = Sx = S^-T z
+  //  Affine step: (\lambda + S \delta xa) \circ (\lambda + S^-T \delta za) = 0
+  //  S \delta xa + S^-T \delta za = - \lambda
+  //  \delta za = -S^T (S \delta xa + \lambda) = - S^T S \delta xa -S^T \lambda=  - S^T S \delta xa
+  //  - z
+  if (has_soc && !data.cone_combined_step_) {
+    auto& cones = data.cones();
+    cones.x     = raft::device_span<f_t>(data.d_x_.data() + cone_var_start, m_c);
+    cones.z     = raft::device_span<f_t>(data.d_z_.data() + cone_var_start, m_c);
+    launch_nt_scaling(cones, stream_view_);
+  }
 
   max_residual = 0.0;
   {
     raft::common::nvtx::range fun_scope("Barrier: GPU diag, inv diag and sqrt inv diag formation");
 
-    // diag = z ./ x
-    // For native free variables (QP): use Q diagonal if available, otherwise a static regularizer
-    if (data.n_free_vars > 0) {
-      constexpr f_t free_var_reg = 1e-7;
-      if (data.Q.n > 0 && data.Q_diagonal) {
+    // diag = z ./ x on the linear (non-cone) coordinates; cone block is overwritten below.
+    if (has_native_free_linear) {
+      if (use_augmented) {
+        // Augmented KKT: D = 0 on native free linear vars (Q curvature is in the KKT block, not D).
         cub::DeviceTransform::Transform(
           cuda::std::make_tuple(
-            data.d_z_.data(), data.d_x_.data(), data.d_is_free_.data(), data.d_Q_diag_.data()),
+            data.d_z_.data(), data.d_x_.data(), data.d_is_native_free_linear_.data()),
           data.d_diag_.data(),
-          data.d_diag_.size(),
-          [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_free, f_t q_jj) {
-            if (!is_free) return z_j / x_j;
-            return (q_jj > f_t(0)) ? f_t(0) : free_var_reg;
+          linear_size,
+          [] HD(f_t z_j, f_t x_j, i_t is_native_free_linear) {
+            return is_native_free_linear ? f_t(0) : (z_j / x_j);
           },
           stream_view_.value());
       } else {
-        cub::DeviceTransform::Transform(
-          cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data(), data.d_is_free_.data()),
-          data.d_diag_.data(),
-          data.d_diag_.size(),
-          [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_free) {
-            return is_free ? free_var_reg : (z_j / x_j);
-          },
-          stream_view_.value());
+        constexpr f_t free_var_reg = 1e-7;
+        if (data.Q.n > 0 && data.Q_diagonal) {
+          cub::DeviceTransform::Transform(
+            cuda::std::make_tuple(
+              data.d_z_.data(), data.d_x_.data(), data.d_is_native_free_linear_.data(), data.d_Q_diag_.data()),
+            data.d_diag_.data(),
+            linear_size,
+            [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_native_free_linear, f_t q_jj) {
+              if (!is_native_free_linear) return z_j / x_j;
+              return (q_jj > f_t(0)) ? f_t(0) : free_var_reg;
+            },
+            stream_view_.value());
+        } else {
+          cub::DeviceTransform::Transform(
+            cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data(), data.d_is_native_free_linear_.data()),
+            data.d_diag_.data(),
+            linear_size,
+            [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_native_free_linear) {
+              return is_native_free_linear ? free_var_reg : (z_j / x_j);
+            },
+            stream_view_.value());
+        }
       }
     } else {
       cub::DeviceTransform::Transform(cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data()),
                                       data.d_diag_.data(),
-                                      data.d_diag_.size(),
+                                      linear_size,
                                       cuda::std::divides<>{},
                                       stream_view_.value());
     }
     RAFT_CHECK_CUDA(stream_view_);
+
+    // TODO: need to double check this
+    //  Set cone diagonal part to be 1
+    if (has_soc) {
+      thrust::fill_n(
+        rmm::exec_policy(stream_view_), data.d_diag_.begin() + cone_var_start, m_c, f_t(1));
+    }
+
     // diag = z ./ x + E * (v ./ w) * E'
 
     if (data.n_upper_bounds > 0) {
@@ -2365,8 +2797,9 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
       RAFT_CHECK_CUDA(stream_view_);
     }
 
-    // diag = z ./ x + E * (v ./ w) * E' + Q (if Q is diagonal)
-    if (data.Q.n > 0 && data.Q_diagonal) {
+    // In ADAT mode, diagonal Q is folded into D. In augmented mode, Q is an explicit block in the
+    // KKT matrix, so adding it here would double count the quadratic objective.
+    if (!use_augmented && data.Q.n > 0 && data.Q_diagonal) {
       cub::DeviceTransform::Transform(
         cuda::std::make_tuple(data.d_Q_diag_.data(), data.d_diag_.data()),
         data.d_diag_.data(),
@@ -2376,18 +2809,25 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
       RAFT_CHECK_CUDA(stream_view_);
     }
 
-    // inv_diag = 1.0 ./ diag
-    cub::DeviceTransform::Transform(
-      data.d_diag_.data(),
-      data.d_inv_diag.data(),
-      data.d_diag_.size(),
-      [] HD(f_t diag) { return f_t(1) / diag; },
-      stream_view_.value());
-    RAFT_CHECK_CUDA(stream_view_);
     raft::copy(data.diag.data(), data.d_diag_.data(), data.d_diag_.size(), stream_view_);
-    raft::copy(data.inv_diag.data(), data.d_inv_diag.data(), data.d_inv_diag.size(), stream_view_);
+
+    // inv_diag and h = A*inv_diag*... are only used for the ADAT solve path.
+    if (!use_augmented) {
+      cub::DeviceTransform::Transform(
+        data.d_diag_.data(),
+        data.d_inv_diag.data(),
+        data.d_diag_.size(),
+        [] HD(f_t diag) { return f_t(1) / diag; },
+        stream_view_.value());
+      RAFT_CHECK_CUDA(stream_view_);
+      raft::copy(
+        data.inv_diag.data(), data.d_inv_diag.data(), data.d_inv_diag.size(), stream_view_);
+    }
   }
 
+  // Track whether we (re)factorize on this call.
+  const bool did_factorize = !data.has_factorization;
+
   // Form A*D*A' or the augmented system and factorize it
   if (!data.has_factorization) {
     raft::common::nvtx::range fun_scope("Barrier: ADAT");
@@ -2395,6 +2835,8 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
     i_t status;
     if (use_augmented) {
       RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_));
+      data.dual_perturb   = dual_perturb;
+      data.primal_perturb = primal_perturb;
       data.form_augmented();
       // Check halt after form_augmented (synchronous) and before factorize (~1s).
       // If halt was set while form_augmented ran, we catch it here and skip the
@@ -2431,76 +2873,75 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
     }
   }
 
-  // Compute h = primal_rhs + A*inv_diag*(dual_rhs - complementarity_xz_rhs ./ x +
-  // E*((complementarity_wv_rhs - v .* bound_rhs) ./ w) )
-  // TMP shouldn't be allocated when in GPU mode
-
+  // Primal RHS: dual_rhs - complementarity_target + E*((wv_rhs - v.*bound_rhs)./w)
+  // (linear: target = xz_rhs/x; native free: no xz term). Used as d_r1_ (augmented) and
+  // unscaled input to ADAT's h = primal_rhs + A*inv_diag*tmp3.
   {
-    raft::common::nvtx::range fun_scope("Barrier: GPU compute H");
-    // tmp3 <- E * ((complementarity_wv_rhs .- v .* bound_rhs) ./ w)
+    raft::common::nvtx::range fun_scope("Barrier: GPU assemble primal RHS");
     RAFT_CUDA_TRY(
       cudaMemsetAsync(data.d_tmp3_.data(), 0, sizeof(f_t) * data.d_tmp3_.size(), stream_view_));
-    cub::DeviceTransform::Transform(
-      cuda::std::make_tuple(data.d_bound_rhs_.data(),
-                            data.d_v_.data(),
-                            data.d_complementarity_wv_rhs_.data(),
-                            data.d_w_.data()),
-      thrust::make_permutation_iterator(data.d_tmp3_.data(), data.d_upper_bounds_.data()),
-      data.n_upper_bounds,
-      [] HD(f_t bound_rhs, f_t v, f_t complementarity_wv_rhs, f_t w) {
-        return (complementarity_wv_rhs - v * bound_rhs) / w;
-      },
-      stream_view_.value());
-    RAFT_CHECK_CUDA(stream_view_);
-    // tmp3 <- tmp3 .+ -(complementarity_xz_rhs ./ x) .+ dual_rhs
-    // tmp4 <- inv_diag .* tmp3
-    // For free variables, skip the complementarity_xz_rhs / x term
-    if (data.n_free_vars > 0) {
+    if (data.n_upper_bounds > 0) {
       cub::DeviceTransform::Transform(
-        cuda::std::make_tuple(data.d_inv_diag.data(),
-                              data.d_tmp3_.data(),
-                              data.d_complementarity_xz_rhs_.data(),
-                              data.d_x_.data(),
+        cuda::std::make_tuple(data.d_bound_rhs_.data(),
+                              data.d_v_.data(),
+                              data.d_complementarity_wv_rhs_.data(),
+                              data.d_w_.data()),
+        thrust::make_permutation_iterator(data.d_tmp3_.data(), data.d_upper_bounds_.data()),
+        data.n_upper_bounds,
+        [] HD(f_t bound_rhs, f_t v, f_t complementarity_wv_rhs, f_t w) {
+          return (complementarity_wv_rhs - v * bound_rhs) / w;
+        },
+        stream_view_.value());
+      RAFT_CHECK_CUDA(stream_view_);
+    }
+    if (has_native_free_linear) {
+      cub::DeviceTransform::Transform(
+        cuda::std::make_tuple(data.d_tmp3_.data(),
+                              data.d_complementarity_target_.data(),
                               data.d_dual_rhs_.data(),
-                              data.d_is_free_.data()),
-        thrust::make_zip_iterator(data.d_tmp3_.data(), data.d_tmp4_.data()),
+                              data.d_is_native_free_linear_.data()),
+        data.d_tmp3_.data(),
         lp.num_cols,
-        [] HD(f_t inv_diag, f_t tmp3, f_t complementarity_xz_rhs, f_t x, f_t dual_rhs, i_t is_free)
-          -> thrust::tuple<f_t, f_t> {
-          const f_t xz_term = is_free ? f_t(0) : (complementarity_xz_rhs / x);
-          const f_t tmp     = tmp3 - xz_term + dual_rhs;
-          return {tmp, inv_diag * tmp};
+        [] HD(f_t tmp3, f_t target, f_t dual_rhs, i_t is_native_free_linear) {
+          const f_t comp_term = is_native_free_linear ? f_t(0) : target;
+          return tmp3 + dual_rhs - comp_term;
         },
         stream_view_.value());
     } else {
       cub::DeviceTransform::Transform(
-        cuda::std::make_tuple(data.d_inv_diag.data(),
-                              data.d_tmp3_.data(),
-                              data.d_complementarity_xz_rhs_.data(),
-                              data.d_x_.data(),
-                              data.d_dual_rhs_.data()),
-        thrust::make_zip_iterator(data.d_tmp3_.data(), data.d_tmp4_.data()),
+        cuda::std::make_tuple(
+          data.d_tmp3_.data(), data.d_dual_rhs_.data(), data.d_complementarity_target_.data()),
+        data.d_tmp3_.data(),
         lp.num_cols,
-        [] HD(f_t inv_diag, f_t tmp3, f_t complementarity_xz_rhs, f_t x, f_t dual_rhs)
-          -> thrust::tuple<f_t, f_t> {
-          const f_t tmp = tmp3 + -(complementarity_xz_rhs / x) + dual_rhs;
-          return {tmp, inv_diag * tmp};
-        },
+        [] HD(f_t tmp3, f_t dual_rhs, f_t target) { return tmp3 + dual_rhs - target; },
         stream_view_.value());
     }
     RAFT_CHECK_CUDA(stream_view_);
     raft::copy(data.d_r1_.data(), data.d_tmp3_.data(), data.d_tmp3_.size(), stream_view_);
     raft::copy(data.d_r1_prime_.data(), data.d_tmp3_.data(), data.d_tmp3_.size(), stream_view_);
+  }
 
-    // h <- A @ tmp4 .+ primal_rhs
+  if (!use_augmented) {
+    raft::common::nvtx::range fun_scope("Barrier: GPU compute H");
+    cub::DeviceTransform::Transform(
+      cuda::std::make_tuple(data.d_inv_diag.data(), data.d_tmp3_.data()),
+      data.d_tmp4_.data(),
+      lp.num_cols,
+      [] HD(f_t inv_diag, f_t tmp3) { return inv_diag * tmp3; },
+      stream_view_.value());
+    RAFT_CHECK_CUDA(stream_view_);
     data.cusparse_view_.spmv(1, data.cusparse_tmp4_, 1, data.cusparse_h_);
   }
 
   if (use_augmented) {
     raft::common::nvtx::range fun_scope("Barrier: GPU augmented solve");
-    // r1 <- dual_rhs -complementarity_xz_rhs ./ x +  E * ((complementarity_wv_rhs - v .* bound_rhs)
-    // ./ w)
+    // Augmented RHS [dx; dy]: primal block is d_r1_ (assembled above).
+    //   linear j: dual_rhs[j] - complementarity_target[j]
+    //             + E_j*((complementarity_wv_rhs - v.*bound_rhs)./w)  (target = xz_rhs/x; free: 0)
+    //   cone j:   dual_rhs[j] - complementarity_target[j]  (NT target: -z or combined centering term)
+    // Constraint block: primal_rhs.
 
+    // TODO: d_augmented_rhs can be preallocated
     rmm::device_uvector<f_t> d_augmented_rhs(lp.num_cols + lp.num_rows, stream_view_);
     raft::copy(d_augmented_rhs.data(), data.d_r1_.data(), lp.num_cols, stream_view_);
     raft::copy(
@@ -2525,10 +2966,33 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
       }
     } op(data);
     if (settings.barrier_iterative_refinement) {
-      auto solve_err = iterative_refinement<i_t, f_t, op_t>(op, d_augmented_rhs, d_augmented_soln);
+      const f_t solve_err =
+        iterative_refinement<i_t, f_t, op_t>(op, d_augmented_rhs, d_augmented_soln);
       if (solve_err > 1e-1) {
         settings.log.printf("|| Aug (dx, dy) - aug_rhs || %e after IR\n", solve_err);
       }
+
+      // Adaptive regularization: increase/decrease based on IR quality.
+      // Only adapt on calls where we actually (re)factorized — the affine step.
+      if (did_factorize) {
+        constexpr f_t min_perturb = 1e-8;
+        constexpr f_t max_perturb = 1e-1;
+        if (solve_err > 1e-2) {
+          f_t old_dp     = dual_perturb;
+          dual_perturb   = std::min(max_perturb, dual_perturb * 10.0);
+          primal_perturb = std::min(max_perturb, primal_perturb * 10.0);
+          settings.log.printf(
+            "  reg UP: %e -> %e (solve_err=%e)\n", old_dp, dual_perturb, solve_err);
+        } else if (solve_err < 1e-4) {
+          f_t old_dp     = dual_perturb;
+          dual_perturb   = std::max(min_perturb, dual_perturb / 10.0);
+          primal_perturb = std::max(min_perturb, primal_perturb / 10.0);
+          if (old_dp != dual_perturb) {
+            settings.log.printf(
+              "  reg DOWN: %e -> %e (solve_err=%e)\n", old_dp, dual_perturb, solve_err);
+          }
+        }
+      }
     }
 
     raft::copy(data.d_dx_.data(), d_augmented_soln.data(), lp.num_cols, stream_view_);
@@ -2774,34 +3238,23 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
   {
     raft::common::nvtx::range fun_scope("Barrier: dz formation GPU");
 
-    // dz = (complementarity_xz_rhs - z.* dx) ./ x;
-    // For free variables, dz = 0
-    if (data.n_free_vars > 0) {
-      cub::DeviceTransform::Transform(
-        cuda::std::make_tuple(data.d_complementarity_xz_rhs_.data(),
-                              data.d_z_.data(),
-                              data.d_dx_.data(),
-                              data.d_x_.data(),
-                              data.d_is_free_.data()),
-        data.d_dz_.data(),
-        data.d_dz_.size(),
-        [] HD(f_t complementarity_xz_rhs, f_t z, f_t dx, f_t x, i_t is_free) {
-          return is_free ? f_t(0) : ((complementarity_xz_rhs - z * dx) / x);
-        },
-        stream_view_.value());
-    } else {
-      cub::DeviceTransform::Transform(
-        cuda::std::make_tuple(data.d_complementarity_xz_rhs_.data(),
-                              data.d_z_.data(),
-                              data.d_dx_.data(),
-                              data.d_x_.data()),
-        data.d_dz_.data(),
-        data.d_dz_.size(),
-        [] HD(f_t complementarity_xz_rhs, f_t z, f_t dx, f_t x) {
-          return (complementarity_xz_rhs - z * dx) / x;
-        },
-        stream_view_.value());
+    const i_t linear_dz_size = has_soc ? cone_var_start : static_cast<i_t>(data.d_dz_.size());
+
+    if (has_soc) {
+      recover_cone_dz_from_target(
+        raft::device_span<const f_t>(data.d_dx_.data() + cone_var_start, m_c),
+        data.cones(),
+        raft::device_span<const f_t>(data.d_complementarity_target_.data() + cone_var_start, m_c),
+        raft::device_span<f_t>(data.d_dz_.data() + cone_var_start, m_c),
+        stream_view_);
     }
+
+    recover_linear_orthant_dz(
+      raft::device_span<const f_t>(data.d_complementarity_target_.data(), linear_dz_size),
+      raft::device_span<const f_t>(data.d_z_.data(), linear_dz_size),
+      raft::device_span<const f_t>(data.d_dx_.data(), linear_dz_size),
+      raft::device_span<const f_t>(data.d_x_.data(), linear_dz_size),
+      raft::device_span<f_t>(data.d_dz_.data(), linear_dz_size));
     RAFT_CHECK_CUDA(stream_view_);
     raft::copy(dz.data(), data.d_dz_.data(), data.d_dz_.size(), stream_view_);
   }
@@ -2810,18 +3263,29 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
     raft::common::nvtx::range fun_scope("Barrier: xz_residual GPU");
 
     // xz_residual <- z .* dx + x .* dz - complementarity_xz_rhs
-    cub::DeviceTransform::Transform(
-      cuda::std::make_tuple(data.d_complementarity_xz_rhs_.data(),
-                            data.d_z_.data(),
-                            data.d_dz_.data(),
-                            data.d_dx_.data(),
-                            data.d_x_.data()),
-      data.d_xz_residual_.data(),
-      data.d_xz_residual_.size(),
-      [] HD(f_t complementarity_xz_rhs, f_t z, f_t dz, f_t dx, f_t x) {
-        return z * dx + x * dz - complementarity_xz_rhs;
-      },
-      stream_view_.value());
+    auto compute_linear_xz_residual = [&](raft::device_span<f_t> out,
+                                          raft::device_span<const f_t> rhs,
+                                          raft::device_span<const f_t> z,
+                                          raft::device_span<const f_t> dz_span,
+                                          raft::device_span<const f_t> dx_span,
+                                          raft::device_span<const f_t> x) {
+      if (out.empty()) return;
+      cub::DeviceTransform::Transform(
+        cuda::std::make_tuple(rhs.data(), z.data(), dz_span.data(), dx_span.data(), x.data()),
+        out.data(),
+        out.size(),
+        [] HD(f_t complementarity_xz_rhs, f_t z_val, f_t dz_val, f_t dx_val, f_t x_val) {
+          return z_val * dx_val + x_val * dz_val - complementarity_xz_rhs;
+        },
+        stream_view_.value());
+    };
+    compute_linear_xz_residual(
+      raft::device_span<f_t>(data.d_xz_residual_.data(), linear_size),
+      raft::device_span<const f_t>(data.d_complementarity_xz_rhs_.data(), linear_size),
+      raft::device_span<const f_t>(data.d_z_.data(), linear_size),
+      raft::device_span<const f_t>(data.d_dz_.data(), linear_size),
+      raft::device_span<const f_t>(data.d_dx_.data(), linear_size),
+      raft::device_span<const f_t>(data.d_x_.data(), linear_size));
     RAFT_CHECK_CUDA(stream_view_);
     const f_t xz_residual_norm =
       device_vector_norm_inf<i_t, f_t>(data.d_xz_residual_, stream_view_);
@@ -2981,40 +3445,187 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
   return 0;
 }
 
+// One-time host → device sync of the primal-dual iterate after initial_point (host-only).
 template <typename i_t, typename f_t>
-void barrier_solver_t<i_t, f_t>::compute_affine_rhs(iteration_data_t<i_t, f_t>& data)
+void copy_host_iterate_to_device(iteration_data_t<i_t, f_t>& data, rmm::cuda_stream_view stream)
 {
-  raft::common::nvtx::range fun_scope("Barrier: compute_affine_rhs");
+  raft::common::nvtx::range fun_scope("Barrier: copy_host_iterate_to_device");
+
+  data.d_x_.resize(data.x.size(), stream);
+  data.d_z_.resize(data.z.size(), stream);
+  raft::copy(data.d_x_.data(), data.x.data(), data.x.size(), stream);
+  raft::copy(data.d_z_.data(), data.z.data(), data.z.size(), stream);
+
+  data.d_w_.resize(data.w.size(), stream);
+  data.d_v_.resize(data.v.size(), stream);
+  raft::copy(data.d_w_.data(), data.w.data(), data.w.size(), stream);
+  raft::copy(data.d_v_.data(), data.v.data(), data.v.size(), stream);
+
+  data.d_y_.resize(data.y.size(), stream);
+  raft::copy(data.d_y_.data(), data.y.data(), data.y.size(), stream);
+
+  data.d_upper_bounds_.resize(data.upper_bounds.size(), stream);
+  raft::copy(data.d_upper_bounds_.data(),
+             data.upper_bounds.data(),
+             data.upper_bounds.size(),
+             stream);
+}
 
-  data.primal_rhs = data.primal_residual;
-  data.bound_rhs  = data.bound_residual;
-  data.dual_rhs   = data.dual_residual;
+// One-time static problem data (constant across barrier iterations).
+template <typename i_t, typename f_t>
+void copy_static_problem_data_to_device(const lp_problem_t<i_t, f_t>& lp,
+                                        iteration_data_t<i_t, f_t>& data,
+                                        rmm::cuda_stream_view stream)
+{
+  raft::common::nvtx::range fun_scope("Barrier: copy_static_problem_data_to_device");
 
-  raft::copy(data.d_complementarity_xz_rhs_.data(),
-             data.d_complementarity_xz_residual_.data(),
-             data.d_complementarity_xz_residual_.size(),
-             stream_view_);
-  raft::copy(data.d_complementarity_wv_rhs_.data(),
-             data.d_complementarity_wv_residual_.data(),
-             data.d_complementarity_wv_residual_.size(),
-             stream_view_);
+  data.d_primal_residual_.resize(lp.rhs.size(), stream);
+  raft::copy(data.d_primal_residual_.data(), lp.rhs.data(), lp.rhs.size(), stream);
+
+  data.d_upper_.resize(lp.upper.size(), stream);
+  raft::copy(data.d_upper_.data(), lp.upper.data(), lp.upper.size(), stream);
+
+  data.d_bound_residual_.resize(data.bound_residual.size(), stream);
+  data.d_dw_residual_.resize(data.n_upper_bounds, stream);
+}
+
+// Per Mehrotra step: affine/corrector RHS only (iterate stays on device from initial sync / next_iterate).
+template <typename i_t, typename f_t>
+void copy_step_rhs_to_device(iteration_data_t<i_t, f_t>& data, rmm::cuda_stream_view stream)
+{
+  raft::common::nvtx::range fun_scope("Barrier: copy_step_rhs_to_device");
+
+  data.d_bound_rhs_.resize(data.bound_rhs.size(), stream);
+  raft::copy(
+    data.d_bound_rhs_.data(), data.bound_rhs.data(), data.bound_rhs.size(), stream);
+
+  raft::copy(data.d_h_.data(), data.primal_rhs.data(), data.primal_rhs.size(), stream);
+  raft::copy(data.d_dual_rhs_.data(), data.dual_rhs.data(), data.dual_rhs.size(), stream);
+
+  data.d_dw_.resize(data.bound_rhs.size(), stream);
+  raft::copy(data.d_dw_.data(), data.bound_rhs.data(), data.bound_rhs.size(), stream);
+
+  data.d_xz_residual_.resize(data.d_complementarity_xz_rhs_.size(), stream);
+  data.d_wv_residual_.resize(data.d_complementarity_wv_rhs_.size(), stream);
+}
+
+template <typename i_t, typename f_t>
+void fill_linear_complementarity_target(iteration_data_t<i_t, f_t>& data,
+                                        raft::device_span<f_t> target,
+                                        raft::device_span<const f_t> xz_rhs,
+                                        raft::device_span<const f_t> x,
+                                        rmm::cuda_stream_view stream)
+{
+  if (target.empty()) return;
+  const bool has_native_free_linear = data.n_native_free_linear > 0;
+  if (has_native_free_linear) {
+    cub::DeviceTransform::Transform(
+      cuda::std::make_tuple(xz_rhs.data(), x.data(), data.d_is_native_free_linear_.data()),
+      target.data(),
+      target.size(),
+      [] HD(f_t complementarity_xz_rhs, f_t x_val, i_t is_native_free_linear) {
+        if (is_native_free_linear) return f_t(0);
+        return complementarity_xz_rhs / x_val;
+      },
+      stream.value());
+  } else {
+    cub::DeviceTransform::Transform(
+      cuda::std::make_tuple(xz_rhs.data(), x.data()),
+      target.data(),
+      target.size(),
+      [] HD(f_t complementarity_xz_rhs, f_t x_val) { return complementarity_xz_rhs / x_val; },
+      stream.value());
+  }
+  RAFT_CHECK_CUDA(stream);
+}
 
-  // x.*z ->  -x .* z
+template <typename i_t, typename f_t>
+void fill_affine_cone_complementarity_target(iteration_data_t<i_t, f_t>& data,
+                                             i_t cone_var_start,
+                                             i_t m_c,
+                                             rmm::cuda_stream_view stream)
+{
+  if (m_c == 0) return;
+  auto& cones = data.cones();
+  cones.x     = raft::device_span<f_t>(data.d_x_.data() + cone_var_start, m_c);
+  cones.z     = raft::device_span<f_t>(data.d_z_.data() + cone_var_start, m_c);
+  auto cone_target =
+    raft::device_span<f_t>(data.d_complementarity_target_.data() + cone_var_start, m_c);
   cub::DeviceTransform::Transform(
-    data.d_complementarity_xz_rhs_.data(),
-    data.d_complementarity_xz_rhs_.data(),
-    data.d_complementarity_xz_rhs_.size(),
-    [] HD(f_t xz_rhs) { return -xz_rhs; },
-    stream_view_.value());
-  RAFT_CHECK_CUDA(stream_view_);
+    cones.z.data(),
+    cone_target.data(),
+    m_c,
+    [] HD(f_t z_val) { return -z_val; },
+    stream.value());
+  RAFT_CUDA_TRY(cudaPeekAtLastError());
+  RAFT_CHECK_CUDA(stream);
+}
+
+template <typename i_t, typename f_t>
+void fill_corrector_cone_complementarity_target(iteration_data_t<i_t, f_t>& data,
+                                                i_t cone_var_start,
+                                                i_t m_c,
+                                                f_t sigma_mu,
+                                                rmm::cuda_stream_view stream)
+{
+  if (m_c == 0) return;
+  auto& cones = data.cones();
+  cones.x     = raft::device_span<f_t>(data.d_x_.data() + cone_var_start, m_c);
+  cones.z     = raft::device_span<f_t>(data.d_z_.data() + cone_var_start, m_c);
+  auto cone_target =
+    raft::device_span<f_t>(data.d_complementarity_target_.data() + cone_var_start, m_c);
+  compute_combined_cone_rhs_term(raft::device_span<const f_t>(data.d_dx_aff_.data() + cone_var_start, m_c),
+                                 raft::device_span<const f_t>(data.d_dz_aff_.data() + cone_var_start, m_c),
+                                 cones,
+                                 sigma_mu,
+                                 cone_target,
+                                 stream);
+}
+
+template <typename i_t, typename f_t>
+void barrier_solver_t<i_t, f_t>::compute_affine_rhs(iteration_data_t<i_t, f_t>& data)
+{
+  raft::common::nvtx::range fun_scope("Barrier: compute_affine_rhs");
+  const bool has_soc    = data.has_cones();
+  const i_t linear_size = data.linear_xz_size(lp.num_cols);
+  const i_t cone_var_start = data.cone_start();
+  const i_t m_c            = data.cone_entry_count();
+
+  data.primal_rhs          = data.primal_residual;
+  data.bound_rhs           = data.bound_residual;
+  data.dual_rhs            = data.dual_residual;
+  data.cone_combined_step_ = false;
+  data.cone_sigma_mu_      = f_t(0);
+
+  auto negate_complementarity_rhs = [&](raft::device_span<f_t> out,
+                                        raft::device_span<const f_t> residual) {
+    if (out.empty()) return;
+    cub::DeviceTransform::Transform(
+      residual.data(),
+      out.data(),
+      out.size(),
+      [] HD(f_t rhs) { return -rhs; },
+      stream_view_.value());
+  };
+  // xz -> -x .* z
+  negate_complementarity_rhs(
+    raft::device_span<f_t>(data.d_complementarity_xz_rhs_.data(), linear_size),
+    raft::device_span<const f_t>(data.d_complementarity_xz_residual_.data(), linear_size));
   // w.*v -> -w .* v
-  cub::DeviceTransform::Transform(
-    data.d_complementarity_wv_rhs_.data(),
-    data.d_complementarity_wv_rhs_.data(),
-    data.d_complementarity_wv_rhs_.size(),
-    [] HD(f_t wv_rhs) { return -wv_rhs; },
-    stream_view_.value());
+  negate_complementarity_rhs(cuopt::make_span(data.d_complementarity_wv_rhs_),
+                             cuopt::make_span(data.d_complementarity_wv_residual_));
   RAFT_CHECK_CUDA(stream_view_);
+
+  fill_linear_complementarity_target<i_t, f_t>(
+    data,
+    raft::device_span<f_t>(data.d_complementarity_target_.data(), linear_size),
+    raft::device_span<const f_t>(data.d_complementarity_xz_rhs_.data(), linear_size),
+    raft::device_span<const f_t>(data.d_x_.data(), linear_size),
+    stream_view_);
+  if (has_soc) {
+    cuopt_assert(cone_var_start + m_c == lp.num_cols, "barrier expects [linear | cone] layout");
+    fill_affine_cone_complementarity_target<i_t, f_t>(data, cone_var_start, m_c, stream_view_);
+  }
 }
 
 template <typename i_t, typename f_t>
@@ -3022,6 +3633,7 @@ void barrier_solver_t<i_t, f_t>::compute_target_mu(
   iteration_data_t<i_t, f_t>& data, f_t mu, f_t& mu_aff, f_t& sigma, f_t& new_mu)
 {
   raft::common::nvtx::range fun_scope("Barrier: compute_target_mu");
+  const bool has_soc = data.has_cones();
 
   f_t complementarity_aff_sum = 0.0;
   // TMP no copy and data should always be on the GPU
@@ -3040,7 +3652,23 @@ void barrier_solver_t<i_t, f_t>::compute_target_mu(
   f_t step_dual_aff   = std::min(gpu_max_step_to_boundary(data, data.d_v_, data.d_dv_aff_),
                                gpu_max_step_to_boundary(data, data.d_z_, data.d_dz_aff_));
 
-  if (data.Q.n > 0) { step_primal_aff = step_dual_aff = std::min(step_primal_aff, step_dual_aff); }
+  if (has_soc) {
+    i_t cs = data.cone_start();
+    i_t mc = data.cone_entry_count();
+    auto [cone_p, cone_d] =
+      compute_cone_step_length(data.cones(),
+                               raft::device_span<const f_t>(data.d_dx_aff_.data() + cs, mc),
+                               raft::device_span<const f_t>(data.d_dz_aff_.data() + cs, mc),
+                               std::min(step_primal_aff, step_dual_aff),
+                               stream_view_);
+    f_t cone_aff    = std::min(cone_p, cone_d);
+    step_primal_aff = std::min(step_primal_aff, cone_aff);
+    step_dual_aff   = step_primal_aff;
+  }
+
+  if (data.Q.n > 0 || has_soc) {
+    step_primal_aff = step_dual_aff = std::min(step_primal_aff, step_dual_aff);
+  }
 
   // Compute complementarity_xz_aff_sum = sum(x_aff * z_aff),
   // where x_aff = x + step_primal_aff * dx_aff and z_aff = z + step_dual_aff * dz_aff
@@ -3091,55 +3719,63 @@ void barrier_solver_t<i_t, f_t>::compute_target_mu(
     stream_view_);
 
   complementarity_aff_sum = complementarity_xz_aff_sum + complementarity_wv_aff_sum;
-  f_t mu_denom            = static_cast<f_t>(data.x.size()) + static_cast<f_t>(data.n_upper_bounds);
-  mu_denom -= static_cast<f_t>(data.n_free_vars);
-  mu_denom = std::max(mu_denom, f_t(1.0));
-  mu_aff   = complementarity_aff_sum / mu_denom;
-  sigma    = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0)));
-  new_mu   = sigma * mu_aff;
-}
-
-template <typename i_t, typename f_t>
-static void fill_linear_cc_rhs(iteration_data_t<i_t, f_t>& data,
-                               f_t new_mu,
-                               raft::device_span<f_t> out,
-                               raft::device_span<f_t> dx_aff,
-                               raft::device_span<f_t> dz_aff,
-                               rmm::cuda_stream_view stream_view)
-{
-  if (out.empty()) return;
-  if (data.n_free_vars > 0) {
-    auto is_free_ptr = data.d_is_free_.data();
-    cub::DeviceTransform::Transform(
-      cuda::std::make_tuple(dx_aff.data(), dz_aff.data(), is_free_ptr),
-      out.data(),
-      out.size(),
-      [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val, i_t is_free) {
-        return is_free ? f_t(0) : (-(dx_aff_val * dz_aff_val) + new_mu);
-      },
-      stream_view.value());
-  } else {
-    cub::DeviceTransform::Transform(
-      cuda::std::make_tuple(dx_aff.data(), dz_aff.data()),
-      out.data(),
-      out.size(),
-      [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val) { return -(dx_aff_val * dz_aff_val) + new_mu; },
-      stream_view.value());
-  }
+  const f_t mu_denom = data.complementarity_degree(data.x.size(), data.n_upper_bounds);
+  mu_aff             = complementarity_aff_sum / mu_denom;
+  sigma              = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0)));
+  new_mu             = sigma * mu;
 }
 
 template <typename i_t, typename f_t>
 void barrier_solver_t<i_t, f_t>::compute_cc_rhs(iteration_data_t<i_t, f_t>& data, f_t& new_mu)
 {
   raft::common::nvtx::range fun_scope("Barrier: compute_cc_rhs");
-
-  fill_linear_cc_rhs(data,
-                     new_mu,
-                     cuopt::make_span(data.d_complementarity_xz_rhs_),
-                     cuopt::make_span(data.d_dx_aff_),
-                     cuopt::make_span(data.d_dz_aff_),
-                     stream_view_);
+  const bool has_soc    = data.has_cones();
+  const bool has_native_free_linear   = data.n_native_free_linear > 0;
+  const i_t linear_size = data.linear_xz_size(lp.num_cols);
+
+  auto fill_linear_cc_rhs = [&](raft::device_span<f_t> out,
+                                raft::device_span<const f_t> dx_aff,
+                                raft::device_span<const f_t> dz_aff) {
+    if (out.empty()) return;
+    if (has_native_free_linear) {
+      auto native_free_linear_ptr = data.d_is_native_free_linear_.data();
+      cub::DeviceTransform::Transform(
+        cuda::std::make_tuple(dx_aff.data(), dz_aff.data(), native_free_linear_ptr),
+        out.data(),
+        out.size(),
+        [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val, i_t is_native_free_linear) {
+          return is_native_free_linear ? f_t(0) : (-(dx_aff_val * dz_aff_val) + new_mu);
+        },
+        stream_view_.value());
+    } else {
+      cub::DeviceTransform::Transform(
+        cuda::std::make_tuple(dx_aff.data(), dz_aff.data()),
+        out.data(),
+        out.size(),
+        [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val) { return -(dx_aff_val * dz_aff_val) + new_mu; },
+        stream_view_.value());
+    }
+  };
+  fill_linear_cc_rhs(raft::device_span<f_t>(data.d_complementarity_xz_rhs_.data(), linear_size),
+                     raft::device_span<const f_t>(data.d_dx_aff_.data(), linear_size),
+                     raft::device_span<const f_t>(data.d_dz_aff_.data(), linear_size));
   RAFT_CHECK_CUDA(stream_view_);
+
+  const i_t cone_var_start = data.cone_start();
+  const i_t m_c            = data.cone_entry_count();
+
+  fill_linear_complementarity_target<i_t, f_t>(
+    data,
+    raft::device_span<f_t>(data.d_complementarity_target_.data(), linear_size),
+    raft::device_span<const f_t>(data.d_complementarity_xz_rhs_.data(), linear_size),
+    raft::device_span<const f_t>(data.d_x_.data(), linear_size),
+    stream_view_);
+  if (has_soc) {
+    cuopt_assert(cone_var_start + m_c == lp.num_cols, "barrier expects [linear | cone] layout");
+    fill_corrector_cone_complementarity_target<i_t, f_t>(
+      data, cone_var_start, m_c, new_mu, stream_view_);
+  }
+
   cub::DeviceTransform::Transform(
     cuda::std::make_tuple(data.d_dw_aff_.data(), data.d_dv_aff_.data()),
     data.d_complementarity_wv_rhs_.data(),
@@ -3147,21 +3783,19 @@ void barrier_solver_t<i_t, f_t>::compute_cc_rhs(iteration_data_t<i_t, f_t>& data
     [new_mu] HD(f_t dw_aff, f_t dv_aff) { return -(dw_aff * dv_aff) + new_mu; },
     stream_view_.value());
   RAFT_CHECK_CUDA(stream_view_);
-
   // TMP should be CPU to 0 if CPU and GPU to 0 if GPU
   data.primal_rhs.set_scalar(0.0);
   data.bound_rhs.set_scalar(0.0);
   data.dual_rhs.set_scalar(0.0);
+  data.cone_combined_step_ = has_soc;
+  data.cone_sigma_mu_      = has_soc ? new_mu : f_t(0);
 }
 
 template <typename i_t, typename f_t>
 void barrier_solver_t<i_t, f_t>::compute_final_direction(iteration_data_t<i_t, f_t>& data)
 {
   raft::common::nvtx::range fun_scope("Barrier: compute_final_direction");
-  // TODO Nicolas: Redundant copies
-  data.d_y_.resize(data.y.size(), stream_view_);
   data.d_dy_aff_.resize(data.dy_aff.size(), stream_view_);
-  raft::copy(data.d_y_.data(), data.y.data(), data.y.size(), stream_view_);
   raft::copy(data.d_dy_aff_.data(), data.dy_aff.data(), data.dy_aff.size(), stream_view_);
 
 #ifdef FINITE_CHECK
@@ -3224,6 +3858,8 @@ void barrier_solver_t<i_t, f_t>::compute_primal_dual_step_length(iteration_data_
                                                                  f_t& step_dual)
 {
   raft::common::nvtx::range fun_scope("Barrier: compute_primal_dual_step_length");
+  const bool has_soc = data.has_cones();
+
   f_t max_step_primal = 0.0;
   f_t max_step_dual   = 0.0;
   max_step_primal     = std::min(gpu_max_step_to_boundary(data, data.d_w_, data.d_dw_),
@@ -3231,10 +3867,23 @@ void barrier_solver_t<i_t, f_t>::compute_primal_dual_step_length(iteration_data_
   max_step_dual       = std::min(gpu_max_step_to_boundary(data, data.d_v_, data.d_dv_),
                            gpu_max_step_to_boundary(data, data.d_z_, data.d_dz_));
 
+  if (has_soc) {
+    i_t cs = data.cone_start();
+    i_t mc = data.cone_entry_count();
+    auto [cone_primal, cone_dual] =
+      compute_cone_step_length(data.cones(),
+                               raft::device_span<const f_t>(data.d_dx_.data() + cs, mc),
+                               raft::device_span<const f_t>(data.d_dz_.data() + cs, mc),
+                               f_t(1),
+                               stream_view_);
+    max_step_primal = std::min(max_step_primal, cone_primal);
+    max_step_dual   = std::min(max_step_dual, cone_dual);
+  }
+
   step_primal = step_scale * max_step_primal;
   step_dual   = step_scale * max_step_dual;
 
-  if (data.Q.n > 0) { step_primal = step_dual = std::min(step_primal, step_dual); }
+  if (data.Q.n > 0 || has_soc) { step_primal = step_dual = std::min(step_primal, step_dual); }
 }
 
 template <typename i_t, typename f_t>
@@ -3325,11 +3974,8 @@ void barrier_solver_t<i_t, f_t>::compute_mu(iteration_data_t<i_t, f_t>& data, f_
 {
   raft::common::nvtx::range fun_scope("Barrier: compute_mu");
 
-  f_t mu_denom = static_cast<f_t>(data.x.size()) + static_cast<f_t>(data.n_upper_bounds);
-  mu_denom -= static_cast<f_t>(data.n_free_vars);  // free vars don't contribute to mu
-  mu_denom = std::max(mu_denom, f_t(1.0));
-
-  mu = (data.sum_reduce_helper_.sum(data.d_complementarity_xz_residual_.begin(),
+  const f_t mu_denom = data.complementarity_degree(data.x.size(), data.n_upper_bounds);
+  mu                 = (data.sum_reduce_helper_.sum(data.d_complementarity_xz_residual_.begin(),
                                     data.d_complementarity_xz_residual_.size(),
                                     stream_view_) +
         data.sum_reduce_helper_.sum(data.d_complementarity_wv_residual_.begin(),
@@ -3595,6 +4241,10 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
     if (lp.Q.n > 0) {
       settings.log.printf("Quadratic objective matrix: %d nonzeros\n", lp.Q.row_start[lp.Q.n]);
     }
+    if (lp.second_order_cone_dims.size() > 0) {
+      settings.log.printf("Second-order cones: %d\n",
+                          static_cast<int>(lp.second_order_cone_dims.size()));
+    }
     settings.log.printf("\n");
 
     // Compute the number of free variables
@@ -3613,7 +4263,7 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
     if (lp.Q.n > 0) { create_Q(lp, Q); }
 
     iteration_data_t<i_t, f_t> data(
-      lp, num_upper_bounds, presolve_info.free_variable_indices, Q, settings);
+      lp, num_upper_bounds, presolve_info.native_free_linear_indices, Q, settings);
     if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
       settings.log.printf("Barrier solver halted\n");
       return lp_status_t::CONCURRENT_LIMIT;
@@ -3651,17 +4301,42 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
       settings.log.printf("Unable to compute initial point\n");
       return lp_status_t::NUMERICAL_ISSUES;
     }
+
+    copy_host_iterate_to_device<i_t, f_t>(data, stream_view_);
+    copy_static_problem_data_to_device<i_t, f_t>(lp, data, stream_view_);
+
     compute_residuals<PinnedHostAllocator<f_t>>(data.w, data.x, data.y, data.v, data.z, data);
 
     f_t primal_residual_norm =
       std::max(vector_norm_inf<i_t, f_t>(data.primal_residual, stream_view_),
                vector_norm_inf<i_t, f_t>(data.bound_residual, stream_view_));
-    f_t dual_residual_norm = vector_norm_inf<i_t, f_t>(data.dual_residual, stream_view_);
+    f_t dual_residual_norm   = vector_norm_inf<i_t, f_t>(data.dual_residual, stream_view_);
+    const bool has_soc       = data.has_cones();
+    const i_t linear_xz_size = data.linear_xz_size(data.complementarity_xz_residual.size());
+    auto linear_xz_span =
+      raft::host_span<const f_t>(data.complementarity_xz_residual.data(), linear_xz_size);
+    // compute complementarity residual norm for nonnegativity constraints
     f_t complementarity_residual_norm =
-      std::max(vector_norm_inf<i_t, f_t>(data.complementarity_xz_residual, stream_view_),
+      std::max(vector_norm_inf<i_t, f_t>(linear_xz_span, stream_view_),
                vector_norm_inf<i_t, f_t>(data.complementarity_wv_residual, stream_view_));
-    f_t mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) /
-             (static_cast<f_t>(n) + static_cast<f_t>(num_upper_bounds));
+    // compute complementarity residual norm for second-order cones
+    if (has_soc) {
+      f_t cone_complementarity_norm = f_t(0);
+      i_t off                       = data.cone_start();
+      for (auto q_k : lp.second_order_cone_dims) {
+        f_t cone_dot = f_t(0);
+        for (i_t j = 0; j < q_k; ++j) {
+          cone_dot += data.complementarity_xz_residual[off + j];
+        }
+        cone_complementarity_norm = std::max(cone_complementarity_norm, cone_dot);
+        off += q_k;
+      }
+      complementarity_residual_norm =
+        std::max(complementarity_residual_norm, cone_complementarity_norm);
+    }
+    f_t mu_denom = data.complementarity_degree(n, num_upper_bounds);
+    f_t mu =
+      (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / mu_denom;
 
     f_t norm_b = vector_norm_inf<i_t, f_t>(data.b, stream_view_);
     f_t norm_c = vector_norm_inf<i_t, f_t>(data.c, stream_view_);
@@ -3686,6 +4361,11 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
     f_t dual_objective =
       data.b.inner_product(data.y) - data.restrict_u_.inner_product(data.v) - quad_objective;
 
+    f_t duality_gap_abs = std::abs(primal_objective - dual_objective);
+    f_t duality_gap_rel =
+      duality_gap_abs /
+      std::max(f_t(1), std::min(std::abs(primal_objective), std::abs(dual_objective)));
+
     i_t iter = 0;
     settings.log.printf("\n");
     settings.log.printf(
@@ -3702,15 +4382,21 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
                         relative_complementarity_residual,
                         elapsed_time);
 
+    // TODO: should polish termination condition,
     bool converged = primal_residual_norm < settings.barrier_relative_feasibility_tol &&
-                     dual_residual_norm < settings.barrier_relative_optimality_tol &&
-                     complementarity_residual_norm < settings.barrier_relative_complementarity_tol;
+                     dual_residual_norm < settings.barrier_relative_feasibility_tol &&
+                     (duality_gap_abs < settings.barrier_relative_complementarity_tol ||
+                      duality_gap_rel < settings.barrier_relative_complementarity_tol);
+
+    const i_t linear_xz_rhs_size = data.linear_xz_size(data.complementarity_xz_rhs.size());
 
     data.d_complementarity_xz_residual_.resize(data.complementarity_xz_residual.size(),
                                                stream_view_);
     data.d_complementarity_wv_residual_.resize(data.complementarity_wv_residual.size(),
                                                stream_view_);
-    data.d_complementarity_xz_rhs_.resize(data.complementarity_xz_rhs.size(), stream_view_);
+
+    // Resize the complementarity residual for x-z pairs (only nonnegativity constraints)
+    data.d_complementarity_xz_rhs_.resize(linear_xz_rhs_size, stream_view_);
     data.d_complementarity_wv_rhs_.resize(data.complementarity_wv_rhs.size(), stream_view_);
     raft::copy(data.d_complementarity_xz_residual_.data(),
                data.complementarity_xz_residual.data(),
@@ -3722,7 +4408,7 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
                stream_view_);
     raft::copy(data.d_complementarity_xz_rhs_.data(),
                data.complementarity_xz_rhs.data(),
-               data.complementarity_xz_rhs.size(),
+               linear_xz_rhs_size,
                stream_view_);
     raft::copy(data.d_complementarity_wv_rhs_.data(),
                data.complementarity_wv_rhs.data(),
@@ -3737,6 +4423,10 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
 
     const i_t iteration_limit = settings.iteration_limit;
 
+    // Adaptive regularization for the augmented system.
+    f_t dual_perturb   = 1e-8;
+    f_t primal_perturb = 1e-8;
+
     while (iter < iteration_limit) {
       raft::common::nvtx::range fun_scope("Barrier: iteration");
 
@@ -3749,12 +4439,21 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
         return lp_status_t::CONCURRENT_LIMIT;
       }
 
-      // Compute the affine step
+      // Compute the affine step. This is the call that (re)factorizes the
+      // augmented system, so the IR residual here drives the adaptation of
+      // dual_perturb / primal_perturb for the next iteration's matrix.
       compute_affine_rhs(data);
       f_t max_affine_residual = 0.0;
 
-      i_t status = gpu_compute_search_direction(
-        data, data.dw_aff, data.dx_aff, data.dy_aff, data.dv_aff, data.dz_aff, max_affine_residual);
+      i_t status = gpu_compute_search_direction(data,
+                                                data.dw_aff,
+                                                data.dx_aff,
+                                                data.dy_aff,
+                                                data.dv_aff,
+                                                data.dz_aff,
+                                                dual_perturb,
+                                                primal_perturb,
+                                                max_affine_residual);
       if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
         settings.log.printf("Barrier solver halted\n");
         return lp_status_t::CONCURRENT_LIMIT;
@@ -3789,10 +4488,19 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
 
       compute_cc_rhs(data, new_mu);
 
+      // Corrector / centering step: reuses the factorization built by the
+      // affine call above, so the perturbation is fixed for this solve
       f_t max_corrector_residual = 0.0;
 
-      status = gpu_compute_search_direction(
-        data, data.dw, data.dx, data.dy, data.dv, data.dz, max_corrector_residual);
+      status = gpu_compute_search_direction(data,
+                                            data.dw,
+                                            data.dx,
+                                            data.dy,
+                                            data.dv,
+                                            data.dz,
+                                            dual_perturb,
+                                            primal_perturb,
+                                            max_corrector_residual);
       if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
         settings.log.printf("Barrier solver halted\n");
         return lp_status_t::CONCURRENT_LIMIT;
@@ -3850,15 +4558,13 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
             relative_dual_residual < data.relative_dual_residual_save &&
             relative_complementarity_residual < data.relative_complementarity_residual_save &&
             primal_objective == primal_objective && dual_objective == dual_objective) {
-          settings.log.debug(
-            "Saving solution: feasibility %.2e (%.2e), optimality %.2e (%.2e), complementarity "
-            "%.2e (%.2e)\n",
+          settings.log.printf(
+            "Saving solution at iter %d: feasibility %.2e, optimality %.2e, complementarity "
+            "%.2e\n",
+            iter,
             relative_primal_residual,
-            primal_residual_norm,
             relative_dual_residual,
-            dual_residual_norm,
-            relative_complementarity_residual,
-            complementarity_residual_norm);
+            relative_complementarity_residual);
           data.w_save                                 = data.w;
           data.x_save                                 = data.x;
           data.y_save                                 = data.y;
@@ -3901,7 +4607,7 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
                           elapsed_time);
 
       bool primal_feasible = relative_primal_residual < settings.barrier_relative_feasibility_tol;
-      bool dual_feasible   = relative_dual_residual < settings.barrier_relative_optimality_tol;
+      bool dual_feasible   = relative_dual_residual < settings.barrier_relative_feasibility_tol;
       bool small_gap =
         relative_complementarity_residual < settings.barrier_relative_complementarity_tol;
 
diff --git a/cpp/src/barrier/barrier.hpp b/cpp/src/barrier/barrier.hpp
index 4d25fa930e..bec25bc255 100644
--- a/cpp/src/barrier/barrier.hpp
+++ b/cpp/src/barrier/barrier.hpp
@@ -101,6 +101,8 @@ class barrier_solver_t {
                                    pinned_dense_vector_t<i_t, f_t>& dy,
                                    pinned_dense_vector_t<i_t, f_t>& dv,
                                    pinned_dense_vector_t<i_t, f_t>& dz,
+                                   f_t& dual_perturb,
+                                   f_t& primal_perturb,
                                    f_t& max_residual);
 
  private:
diff --git a/cpp/src/barrier/dense_vector.hpp b/cpp/src/barrier/dense_vector.hpp
index 5d6f2b12ec..2d5d0b335c 100644
--- a/cpp/src/barrier/dense_vector.hpp
+++ b/cpp/src/barrier/dense_vector.hpp
@@ -184,6 +184,28 @@ class dense_vector_t : public std::vector<f_t, Allocator> {
     }
   }
 
+  void ensure_positive_skip_range(f_t epsilon_adjust, i_t skip_start, i_t skip_count)
+  {
+    if (skip_count == 0) {
+      ensure_positive(epsilon_adjust);
+      return;
+    }
+    const i_t n        = this->size();
+    const i_t skip_end = skip_start + skip_count;
+    f_t min_val        = std::numeric_limits<f_t>::max();
+    for (i_t i = 0; i < n; i++) {
+      if (i >= skip_start && i < skip_end) continue;
+      min_val = std::min(min_val, (*this)[i]);
+    }
+    if (min_val <= 0.0) {
+      const f_t delta = -min_val + epsilon_adjust;
+      for (i_t i = 0; i < n; i++) {
+        if (i >= skip_start && i < skip_end) continue;
+        (*this)[i] += delta;
+      }
+    }
+  }
+
   void ensure_positive(f_t epsilon_adjust, const std::vector<i_t>& mask)
   {
     f_t min_x   = inf;
diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp
index a193bddded..721465d2a1 100644
--- a/cpp/src/dual_simplex/presolve.cpp
+++ b/cpp/src/dual_simplex/presolve.cpp
@@ -16,10 +16,16 @@
 #include <algorithm>
 #include <cmath>
 #include <iostream>
-#include <numeric>
 
 namespace cuopt::linear_programming::dual_simplex {
 
+template <typename i_t, typename f_t>
+/** Number of leading linear columns; SOCP cone variables occupy [linear_cols, num_cols). */
+static i_t linear_var_count(const lp_problem_t<i_t, f_t>& problem)
+{
+  return problem.second_order_cone_dims.empty() ? problem.num_cols : problem.cone_var_start;
+}
+
 template <typename i_t, typename f_t>
 i_t remove_empty_cols(lp_problem_t<i_t, f_t>& problem,
                       i_t& num_empty_cols,
@@ -27,26 +33,18 @@ i_t remove_empty_cols(lp_problem_t<i_t, f_t>& problem,
 {
   constexpr bool verbose = false;
   if (verbose) { printf("Removing %d empty columns\n", num_empty_cols); }
-  // We have a variable x_j that does not appear in any rows
-  // The cost function
-  // sum_{k != j} c_k * x_k + c_j * x_j
-  // becomes
-  // sum_{k != j} c_k * x_k + c_j * l_j if c_j > 0
-  // or
-  // sum_{k != j} c_k * x_k + c_j * u_j if c_j < 0
   presolve_info.removed_variables.reserve(num_empty_cols);
   presolve_info.removed_values.reserve(num_empty_cols);
   presolve_info.removed_reduced_costs.reserve(num_empty_cols);
 
-  // Check to see if a variable participates in a quadratic objective
   std::vector<bool> has_quadratic_term(problem.num_cols, false);
+  i_t linear_cols = linear_var_count(problem);
 
   if (problem.Q.n > 0) {
-    for (i_t j = 0; j < problem.num_cols; ++j) {
+    for (i_t j = 0; j < linear_cols; ++j) {
       const i_t row_start = problem.Q.row_start[j];
       const i_t row_end   = problem.Q.row_start[j + 1];
       if (row_end - row_start == 0) { continue; }
-      // Q is symmetric, so its sufficient to check only the row size
       has_quadratic_term[j] = true;
     }
   }
@@ -55,12 +53,13 @@ i_t remove_empty_cols(lp_problem_t<i_t, f_t>& problem,
   i_t new_cols = 0;
   for (i_t j = 0; j < problem.num_cols; ++j) {
     bool remove_var = false;
-    if ((problem.A.col_start[j + 1] - problem.A.col_start[j]) == 0) {
-      if (problem.objective[j] >= 0 && problem.lower[j] > -inf && !has_quadratic_term[j]) {
+    if (j < linear_cols && (problem.A.col_start[j + 1] - problem.A.col_start[j]) == 0) {
+      bool non_removable = has_quadratic_term[j];
+      if (problem.objective[j] >= 0 && problem.lower[j] > -inf && !non_removable) {
         presolve_info.removed_values.push_back(problem.lower[j]);
         problem.obj_constant += problem.objective[j] * problem.lower[j];
         remove_var = true;
-      } else if (problem.objective[j] <= 0 && problem.upper[j] < inf && !has_quadratic_term[j]) {
+      } else if (problem.objective[j] <= 0 && problem.upper[j] < inf && !non_removable) {
         presolve_info.removed_values.push_back(problem.upper[j]);
         problem.obj_constant += problem.objective[j] * problem.upper[j];
         remove_var = true;
@@ -123,6 +122,12 @@ i_t remove_empty_cols(lp_problem_t<i_t, f_t>& problem,
     problem.Q.check_matrix("After removing empty columns");
   }
 
+  if (!problem.second_order_cone_dims.empty()) {
+    i_t new_cone_start = col_old_to_new[problem.cone_var_start];
+    assert(new_cone_start != -1);
+    problem.cone_var_start = new_cone_start;
+  }
+
   problem.objective = objective;
   problem.lower     = lower;
   problem.upper     = upper;
@@ -272,6 +277,113 @@ i_t convert_less_than_to_equal(const user_problem_t<i_t, f_t>& user_problem,
   // We must convert rows in the form: a_i^T x <= beta
   // into: a_i^T x + s_i = beta, s_i >= 0
 
+  if (!problem.second_order_cone_dims.empty()) {
+    const i_t old_num_cols   = problem.num_cols;
+    const i_t linear_cols    = linear_var_count(problem);
+    const i_t num_slacks     = less_rows;
+    const i_t num_cols       = old_num_cols + num_slacks;
+    const i_t old_nnz        = problem.A.col_start[old_num_cols];
+    const i_t nnz            = old_nnz + num_slacks;
+    const i_t new_cone_start = linear_cols + num_slacks;
+
+    auto old_A = problem.A;
+    csc_matrix_t<i_t, f_t> expanded_A(problem.A.m, num_cols, nnz);
+
+    std::vector<f_t> objective(num_cols, 0.0);
+    std::vector<f_t> lower(num_cols, 0.0);
+    std::vector<f_t> upper(num_cols, INFINITY);
+    std::vector<i_t> old_to_new(old_num_cols, -1);
+
+    for (i_t j = 0; j < linear_cols; ++j) {
+      old_to_new[j] = j;
+      objective[j]  = problem.objective[j];
+      lower[j]      = problem.lower[j];
+      upper[j]      = problem.upper[j];
+    }
+    for (i_t j = linear_cols; j < old_num_cols; ++j) {
+      old_to_new[j]            = j + num_slacks;
+      objective[old_to_new[j]] = problem.objective[j];
+      lower[old_to_new[j]]     = problem.lower[j];
+      upper[old_to_new[j]]     = problem.upper[j];
+    }
+
+    i_t nz = 0;
+    for (i_t j = 0; j < linear_cols; ++j) {
+      expanded_A.col_start[j] = nz;
+      for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) {
+        expanded_A.i[nz] = old_A.i[p];
+        expanded_A.x[nz] = old_A.x[p];
+        ++nz;
+      }
+    }
+
+    i_t slack_col = linear_cols;
+    for (i_t i = 0; i < problem.num_rows; i++) {
+      if (row_sense[i] == 'L') {
+        expanded_A.col_start[slack_col] = nz;
+        expanded_A.i[nz]                = i;
+        expanded_A.x[nz]                = 1.0;
+        new_slacks.push_back(slack_col);
+        row_sense[i] = 'E';
+        ++slack_col;
+        ++nz;
+        --less_rows;
+      }
+    }
+
+    for (i_t j = linear_cols; j < old_num_cols; ++j) {
+      i_t new_j                   = old_to_new[j];
+      expanded_A.col_start[new_j] = nz;
+      for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) {
+        expanded_A.i[nz] = old_A.i[p];
+        expanded_A.x[nz] = old_A.x[p];
+        ++nz;
+      }
+    }
+    expanded_A.col_start[num_cols] = nz;
+    assert(less_rows == 0);
+    assert(slack_col == new_cone_start);
+    assert(nz == nnz);
+
+    if (problem.Q.n > 0) {
+      const auto old_Q = problem.Q;
+      const i_t q_nnz  = old_Q.row_start[old_num_cols];
+
+      problem.Q.row_start.assign(num_cols + 1, 0);
+      for (i_t row = 0; row < old_num_cols; ++row) {
+        i_t new_row                      = old_to_new[row];
+        problem.Q.row_start[new_row + 1] = old_Q.row_start[row + 1] - old_Q.row_start[row];
+      }
+      for (i_t row = 0; row < num_cols; ++row) {
+        problem.Q.row_start[row + 1] += problem.Q.row_start[row];
+      }
+
+      problem.Q.j.resize(q_nnz);
+      problem.Q.x.resize(q_nnz);
+      auto row_starts = problem.Q.row_start;
+      for (i_t row = 0; row < old_num_cols; ++row) {
+        i_t new_row = old_to_new[row];
+        for (i_t p = old_Q.row_start[row]; p < old_Q.row_start[row + 1]; ++p) {
+          problem.Q.j[row_starts[new_row]] = old_to_new[old_Q.j[p]];
+          problem.Q.x[row_starts[new_row]] = old_Q.x[p];
+          ++row_starts[new_row];
+        }
+      }
+      problem.Q.m      = num_cols;
+      problem.Q.n      = num_cols;
+      problem.Q.nz_max = q_nnz;
+    }
+
+    problem.A              = expanded_A;
+    problem.A.n            = num_cols;
+    problem.objective      = objective;
+    problem.lower          = lower;
+    problem.upper          = upper;
+    problem.num_cols       = num_cols;
+    problem.cone_var_start = new_cone_start;
+    return 0;
+  }
+
   i_t num_cols = problem.num_cols + less_rows;
   i_t nnz      = problem.A.col_start[problem.num_cols] + less_rows;
   problem.A.col_start.resize(num_cols + 1);
@@ -571,16 +683,18 @@ void convert_user_problem(const user_problem_t<i_t, f_t>& user_problem,
   }
 
   // Copy info from user_problem to problem
-  problem.num_rows              = user_problem.num_rows;
-  problem.num_cols              = user_problem.num_cols;
-  problem.A                     = user_problem.A;
-  problem.objective             = user_problem.objective;
-  problem.obj_scale             = user_problem.obj_scale;
-  problem.obj_constant          = user_problem.obj_constant;
-  problem.objective_is_integral = user_problem.objective_is_integral;
-  problem.rhs                   = user_problem.rhs;
-  problem.lower                 = user_problem.lower;
-  problem.upper                 = user_problem.upper;
+  problem.num_rows               = user_problem.num_rows;
+  problem.num_cols               = user_problem.num_cols;
+  problem.A                      = user_problem.A;
+  problem.objective              = user_problem.objective;
+  problem.obj_scale              = user_problem.obj_scale;
+  problem.obj_constant           = user_problem.obj_constant;
+  problem.objective_is_integral  = user_problem.objective_is_integral;
+  problem.rhs                    = user_problem.rhs;
+  problem.lower                  = user_problem.lower;
+  problem.upper                  = user_problem.upper;
+  problem.cone_var_start         = user_problem.cone_var_start;
+  problem.second_order_cone_dims = user_problem.second_order_cone_dims;
 
   // Make a copy of row_sense so we can modify it
   std::vector<char> row_sense = user_problem.row_sense;
@@ -637,6 +751,7 @@ void convert_user_problem(const user_problem_t<i_t, f_t>& user_problem,
   settings.log.debug(
     "equality rows %d less rows %d columns %d\n", equal_rows, less_rows, problem.num_cols);
   if (settings.barrier && settings.dualize != 0 && user_problem.Q_values.size() == 0 &&
+      problem.second_order_cone_dims.empty() &&
       (settings.dualize == 1 ||
        (settings.dualize == -1 && less_rows > 1.2 * problem.num_cols && equal_rows < 2e4))) {
     settings.log.debug("Dualizing in presolve\n");
@@ -808,8 +923,8 @@ void convert_user_problem(const user_problem_t<i_t, f_t>& user_problem,
     problem.Q.x = user_problem.Q_values;
   }
 
-  // Add artifical variables
-  if (!settings.barrier_presolve) {
+  // Artificial vars break [linear | cone]; only for barrier_presolve-off LP-style layouts.
+  if (!settings.barrier_presolve && problem.second_order_cone_dims.empty()) {
     add_artifical_variables(problem, user_problem.range_rows, equality_rows, new_slacks);
   }
 }
@@ -821,13 +936,17 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
              presolve_info_t<i_t, f_t>& presolve_info)
 {
   problem = original;
+  const i_t linear_cols = linear_var_count(problem);
+  const bool has_cones  = !problem.second_order_cone_dims.empty();
   std::vector<char> row_sense(problem.num_rows, '=');
-  // Check for free variables
+
+  // Check for free variables (linear block only; cone columns are handled by the barrier SOC layout)
   i_t free_variables = 0;
-  for (i_t j = 0; j < problem.num_cols; j++) {
+  for (i_t j = 0; j < linear_cols; j++) {
     if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; }
   }
 
+  // Barrier presolve phase 1: bound free linear variables from equality rows.
   if (settings.barrier_presolve && free_variables > 0) {
     // Try to remove free variables
     std::vector<i_t> constraints_to_check;
@@ -835,7 +954,7 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     std::vector<i_t> row_marked(problem.num_rows, 0);
     current_free_variables.reserve(problem.num_cols);
     constraints_to_check.reserve(problem.num_rows);
-    for (i_t j = 0; j < problem.num_cols; j++) {
+    for (i_t j = 0; j < linear_cols; j++) {
       if (problem.lower[j] == -inf && problem.upper[j] == inf) {
         current_free_variables.push_back(j);
         const i_t col_start = problem.A.col_start[j];
@@ -859,10 +978,30 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     std::vector<f_t> upper_bound_coefficient(problem.num_cols, 0.0);
 
     if (!constraints_to_check.empty()) {
-      // Check if the constraints are feasible
       csr_matrix_t<i_t, f_t> Arow(0, 0, 0);
       problem.A.to_compressed_row(Arow);
 
+      // Keep only rows safe for phase-1 bound inference: no cone columns, exactly one free linear.
+      {
+        std::vector<i_t> safe_constraints;
+        safe_constraints.reserve(constraints_to_check.size());
+        for (i_t i : constraints_to_check) {
+          bool touches_cone        = false;
+          i_t free_linear_in_row = 0;
+          for (i_t p = Arow.row_start[i]; p < Arow.row_start[i + 1]; ++p) {
+            const i_t j = Arow.j[p];
+            if (j >= linear_cols) {
+              touches_cone = true;
+              continue;
+            }
+            if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_linear_in_row++; }
+          }
+          if (touches_cone || free_linear_in_row != 1) { continue; }
+          safe_constraints.push_back(i);
+        }
+        constraints_to_check.swap(safe_constraints);
+      }
+
       // The constraints are in the form:
       // sum_j a_j x_j = beta
       for (i_t i : constraints_to_check) {
@@ -876,6 +1015,7 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
         f_t last_free_coeff_i = 0.0;
         for (i_t p = row_start; p < row_end; p++) {
           const i_t j       = Arow.j[p];
+          if (j >= linear_cols) { continue; }
           const f_t aij     = Arow.x[p];
           const f_t lower_j = problem.lower[j];
           const f_t upper_j = problem.upper[j];
@@ -929,6 +1069,7 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
         // x_j <= 1/a_ij * (rhs - upper_activity_i)
         const i_t j         = last_free_i;
         const f_t a_ij      = last_free_coeff_i;
+        if (a_ij == 0) { continue; }
         const f_t max_bound = 1e10;
         bool bounded        = false;
         if (a_ij > 0) {
@@ -986,6 +1127,9 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
           problem.lower[j] = -inf;
         }
       }
+      if (!(problem.lower[j] == -inf && problem.upper[j] == inf)) {
+        presolve_info.phase1_bounded_linear_indices.push_back(j);
+      }
     }
 
     // Record bounded free variables for dual correction in uncrush.
@@ -1016,11 +1160,12 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     }
   }
 
+  // Barrier presolve phase 2: negate one-sided bounds (-inf < x <= u -> -u <= x < inf).
   // The original problem may have a variable without a lower bound
   // but a finite upper bound
-  // -inf < x_j <= u_j
+  // -inf < x_j <= u_j (linear variables only)
   i_t no_lower_bound = 0;
-  for (i_t j = 0; j < problem.num_cols; j++) {
+  for (i_t j = 0; j < linear_cols; j++) {
     if (problem.lower[j] == -inf && problem.upper[j] < inf) { no_lower_bound++; }
   }
 
@@ -1031,7 +1176,7 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
   // Handle -inf < x_j <= u_j by substituting x'_j = -x_j, giving -u_j <= x'_j < inf
   if (settings.barrier_presolve && no_lower_bound > 0) {
     presolve_info.negated_variables.reserve(no_lower_bound);
-    for (i_t j = 0; j < problem.num_cols; j++) {
+    for (i_t j = 0; j < linear_cols; j++) {
       if (problem.lower[j] == -inf && problem.upper[j] < inf) {
         presolve_info.negated_variables.push_back(j);
 
@@ -1066,10 +1211,10 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     }
   }
 
-  // The original problem may have nonzero lower bounds
-  // 0 != l_j <= x_j <= u_j
+  // Barrier presolve phase 3: shift nonzero lower bounds to zero (cone/stack columns not shifted).
+  // 0 != l_j <= x_j <= u_j (linear variables only; cone/stack columns are not shifted)
   i_t nonzero_lower_bounds = 0;
-  for (i_t j = 0; j < problem.num_cols; j++) {
+  for (i_t j = 0; j < linear_cols; j++) {
     if (problem.lower[j] != 0.0 && problem.lower[j] > -inf) { nonzero_lower_bounds++; }
   }
   if (settings.barrier_presolve && nonzero_lower_bounds > 0) {
@@ -1092,7 +1237,7 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     // so we get the constant term c_j * l_j
 
     std::vector<bool> lower_bounds_removed(problem.num_cols, false);
-    for (i_t j = 0; j < problem.num_cols; j++) {
+    for (i_t j = 0; j < linear_cols; j++) {
       if (problem.lower[j] != 0.0 && problem.lower[j] > -inf) {
         lower_bounds_removed[j]               = true;
         presolve_info.removed_lower_bounds[j] = problem.lower[j];
@@ -1101,7 +1246,7 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
 
     auto old_objective = problem.objective;
     if (problem.Q.n > 0) {
-      for (i_t row = 0; row < problem.num_cols; row++) {
+      for (i_t row = 0; row < linear_cols; row++) {
         i_t row_start = problem.Q.row_start[row];
         i_t row_end   = problem.Q.row_start[row + 1];
         for (i_t p = row_start; p < row_end; p++) {
@@ -1122,7 +1267,7 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     }
 
     std::vector<f_t> kahan_compensation(problem.num_rows, 0.0);
-    for (i_t j = 0; j < problem.num_cols; j++) {
+    for (i_t j = 0; j < linear_cols; j++) {
       if (lower_bounds_removed[j]) {
         i_t col_start = problem.A.col_start[j];
         i_t col_end   = problem.A.col_start[j + 1];
@@ -1142,7 +1287,7 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     }
   }
 
-  // Check for empty rows
+  // Barrier presolve phase 4: remove empty rows and empty linear columns.
   i_t num_empty_rows = 0;
   {
     csr_matrix_t<i_t, f_t> Arow(0, 0, 0);
@@ -1160,7 +1305,7 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
   // Check for empty cols
   i_t num_empty_cols = 0;
   {
-    for (i_t j = 0; j < problem.num_cols; ++j) {
+    for (i_t j = 0; j < linear_cols; ++j) {
       if ((problem.A.col_start[j + 1] - problem.A.col_start[j]) == 0) { num_empty_cols++; }
     }
   }
@@ -1169,19 +1314,33 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     remove_empty_cols(problem, num_empty_cols, presolve_info);
   }
 
+  // Check for free variables (exclude cone variables — they are naturally unbounded)
+  free_variables = 0;
+  for (i_t j = 0; j < linear_cols; j++) {
+    if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; }
+  }
   problem.Q.check_matrix("Before free variable expansion");
 
-  // For QPs, keep free variables as-is rather than
-  // splitting x = v - w. The barrier solver handles them natively with a
-  // static regularizer on the diagonal instead of z/x complementarity terms.
-  if (settings.barrier_presolve && free_variables > 0 && problem.Q.n > 0) {
-    presolve_info.free_variable_indices.clear();
-    for (i_t j = 0; j < problem.num_cols; j++) {
+  // Barrier presolve phase 5: free linear variables — 5a native (QP/SOCP) or 5b v-w split (LP).
+  // QP/SOCP augmented system: keep free linears as-is (no x = v - w); barrier uses D = 0 on them.
+  const bool register_native_free_linear = free_variables > 0 && (problem.Q.n > 0 || has_cones);
+  if (register_native_free_linear) {
+    presolve_info.free_variable_pairs.clear();
+    presolve_info.native_free_linear_indices.clear();
+    // Only linear decision variables can be "native" free variables; cone/stack columns
+    // are unbounded by construction and must not be counted here.
+    i_t native_free_count = 0;
+    for (i_t j = 0; j < linear_cols; j++) {
       if (problem.lower[j] == -inf && problem.upper[j] == inf) {
-        presolve_info.free_variable_indices.push_back(j);
+        presolve_info.native_free_linear_indices.push_back(j);
+        native_free_count++;
       }
     }
-  } else if (settings.barrier_presolve && free_variables > 0) {
+    settings.log.printf(
+      "Keeping %d native free linear variables for augmented-system barrier (QP/SOCP)\n",
+      native_free_count);
+  } else if (settings.barrier_presolve && !has_cones && free_variables > 0) {
+    // Phase 5b: x_j = v - w with v, w >= 0 (LP without cones; SOCP/QP use phase 5a).
     // We have a variable x_j: with -inf < x_j < inf
     // we create new variables v and w with 0 <= v, w and x_j = v - w
     // Constraints
@@ -1194,123 +1353,163 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     // becomes
     // sum_{k != j} c_k x_k + c_j v - c_j w
 
-    std::vector<i_t> pair_index(problem.num_cols, -1);
-    i_t num_cols = problem.num_cols + free_variables;
-    i_t nnz      = problem.A.col_start[problem.num_cols];
-    for (i_t j = 0; j < problem.num_cols; j++) {
-      if (problem.lower[j] == -inf && problem.upper[j] == inf) {
-        nnz += (problem.A.col_start[j + 1] - problem.A.col_start[j]);
+    const i_t old_num_cols = problem.num_cols;
+    const i_t new_cone_start =
+      problem.second_order_cone_dims.empty() ? 0 : linear_cols + free_variables;
+    const i_t num_cols = old_num_cols + free_variables;
+
+    auto old_A         = problem.A;
+    auto old_Q         = problem.Q;
+    auto old_objective = problem.objective;
+    auto old_lower     = problem.lower;
+    auto old_upper     = problem.upper;
+
+    std::vector<i_t> partner_index(old_num_cols, -1);
+    std::vector<i_t> orig_to_new(old_num_cols, -1);
+    std::vector<bool> is_free(old_num_cols, false);
+
+    i_t next_partner = linear_cols;
+    for (i_t j = 0; j < linear_cols; ++j) {
+      orig_to_new[j] = j;
+      if (old_lower[j] == -inf && old_upper[j] == inf) {
+        is_free[j]       = true;
+        partner_index[j] = next_partner++;
       }
     }
+    for (i_t j = linear_cols; j < old_num_cols; ++j) {
+      orig_to_new[j] = j + free_variables;
+    }
+    assert(next_partner == new_cone_start || problem.second_order_cone_dims.empty());
 
-    problem.A.col_start.resize(num_cols + 1);
-    problem.A.i.resize(nnz);
-    problem.A.x.resize(nnz);
-    problem.lower.resize(num_cols);
-    problem.upper.resize(num_cols);
-    problem.objective.resize(num_cols);
+    i_t nnz = old_A.col_start[old_num_cols];
+    for (i_t j = 0; j < linear_cols; ++j) {
+      if (is_free[j]) { nnz += old_A.col_start[j + 1] - old_A.col_start[j]; }
+    }
 
-    presolve_info.free_variable_pairs.resize(free_variables * 2);
-    i_t pair_count = 0;
-    i_t q          = problem.A.col_start[problem.num_cols];
-    i_t col        = problem.num_cols;
-    for (i_t j = 0; j < problem.num_cols; j++) {
-      if (problem.lower[j] == -inf && problem.upper[j] == inf) {
-        for (i_t p = problem.A.col_start[j]; p < problem.A.col_start[j + 1]; p++) {
-          i_t i          = problem.A.i[p];
-          f_t aij        = problem.A.x[p];
-          problem.A.i[q] = i;
-          problem.A.x[q] = -aij;
-          q++;
+    csc_matrix_t<i_t, f_t> expanded_A(problem.A.m, num_cols, nnz);
+    i_t nz = 0;
+    for (i_t j = 0; j < linear_cols; ++j) {
+      expanded_A.col_start[j] = nz;
+      for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) {
+        expanded_A.i[nz] = old_A.i[p];
+        expanded_A.x[nz] = old_A.x[p];
+        ++nz;
+      }
+    }
+    for (i_t j = 0; j < linear_cols; ++j) {
+      if (partner_index[j] != -1) {
+        expanded_A.col_start[partner_index[j]] = nz;
+        for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) {
+          expanded_A.i[nz] = old_A.i[p];
+          expanded_A.x[nz] = -old_A.x[p];
+          ++nz;
         }
-        problem.lower[col]                              = 0.0;
-        problem.upper[col]                              = inf;
-        problem.objective[col]                          = -problem.objective[j];
-        presolve_info.free_variable_pairs[pair_count++] = j;
-        presolve_info.free_variable_pairs[pair_count++] = col;
-        pair_index[j]                                   = col;
-        problem.A.col_start[++col]                      = q;
-        problem.lower[j]                                = 0.0;
       }
     }
+    for (i_t j = linear_cols; j < old_num_cols; ++j) {
+      i_t new_j                   = orig_to_new[j];
+      expanded_A.col_start[new_j] = nz;
+      for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) {
+        expanded_A.i[nz] = old_A.i[p];
+        expanded_A.x[nz] = old_A.x[p];
+        ++nz;
+      }
+    }
+    expanded_A.col_start[num_cols] = nz;
+
+    std::vector<f_t> objective(num_cols);
+    std::vector<f_t> lower(num_cols, -INFINITY);
+    std::vector<f_t> upper(num_cols, INFINITY);
+    presolve_info.free_variable_pairs.clear();
+    presolve_info.free_variable_pairs.reserve(free_variables * 2);
+
+    for (i_t j = 0; j < linear_cols; ++j) {
+      objective[j] = old_objective[j];
+      if (is_free[j]) {
+        lower[j]                    = 0.0;
+        upper[j]                    = inf;
+        objective[partner_index[j]] = -old_objective[j];
+        lower[partner_index[j]]     = 0.0;
+        upper[partner_index[j]]     = inf;
+        presolve_info.free_variable_pairs.push_back(j);
+        presolve_info.free_variable_pairs.push_back(partner_index[j]);
+      } else {
+        lower[j] = old_lower[j];
+        upper[j] = old_upper[j];
+      }
+    }
+    for (i_t j = linear_cols; j < old_num_cols; ++j) {
+      i_t new_j        = orig_to_new[j];
+      objective[new_j] = old_objective[j];
+      lower[new_j]     = old_lower[j];
+      upper[new_j]     = old_upper[j];
+    }
 
-    if (problem.Q.n > 0) {
+    if (old_Q.n > 0) {
       std::vector<i_t> row_counts(num_cols, 0);
-      i_t nz_count = problem.Q.row_start[problem.num_cols];
-      for (i_t row = 0; row < problem.Q.n; row++) {
-        i_t q_start     = problem.Q.row_start[row];
-        i_t q_end       = problem.Q.row_start[row + 1];
-        row_counts[row] = q_end - q_start;
-        for (i_t qj = q_start; qj < q_end; qj++) {
-          i_t col = problem.Q.j[qj];
-          if (pair_index[row] != -1 && pair_index[col] != -1) {
-            assert(pair_index[row] >= problem.num_cols);
-            assert(pair_index[col] >= problem.num_cols);
-            row_counts[row]++;
-            row_counts[pair_index[row]] += 2;
-            nz_count += 3;
-          } else if (pair_index[col] != -1) {
-            assert(pair_index[col] >= problem.num_cols);
-            row_counts[row]++;
+      i_t nz_count = 0;
+      for (i_t row = 0; row < old_Q.n; ++row) {
+        i_t new_row     = orig_to_new[row];
+        i_t partner_row = partner_index[row];
+        i_t q_start     = old_Q.row_start[row];
+        i_t q_end       = old_Q.row_start[row + 1];
+        for (i_t qj = q_start; qj < q_end; ++qj) {
+          i_t col         = old_Q.j[qj];
+          i_t partner_col = partner_index[col];
+          row_counts[new_row]++;
+          nz_count++;
+          if (partner_col != -1) {
+            row_counts[new_row]++;
             nz_count++;
-          } else if (pair_index[row] != -1) {
-            assert(pair_index[row] >= problem.num_cols);
-            row_counts[pair_index[row]]++;
+          }
+          if (partner_row != -1) {
+            row_counts[partner_row]++;
             nz_count++;
+            if (partner_col != -1) {
+              row_counts[partner_row]++;
+              nz_count++;
+            }
           }
         }
       }
 
       std::vector<i_t> Q_row_start(num_cols + 1);
       Q_row_start[0] = 0;
-      for (i_t row = 0; row < num_cols; row++) {
+      for (i_t row = 0; row < num_cols; ++row) {
         Q_row_start[row + 1] = Q_row_start[row] + row_counts[row];
       }
       std::vector<i_t> Q_j(nz_count);
       std::vector<f_t> Q_x(nz_count);
       auto row_starts = Q_row_start;
-      // First copy the original Q ma
-      for (i_t row = 0; row < problem.Q.n; row++) {
-        i_t q_start = problem.Q.row_start[row];
-        i_t q_end   = problem.Q.row_start[row + 1];
-        i_t q_nz    = Q_row_start[row];
-        for (i_t qj = q_start; qj < q_end; qj++) {
-          i_t col   = problem.Q.j[qj];
-          f_t qij   = problem.Q.x[qj];
-          Q_j[q_nz] = col;
-          Q_x[q_nz] = qij;
-          q_nz++;
-        }
-        row_starts[row] = q_nz;
-      }
-
-      // Expand the Q matrix for the free variables
-      for (i_t row = 0; row < problem.Q.n; row++) {
-        i_t q_start = problem.Q.row_start[row];
-        i_t q_end   = problem.Q.row_start[row + 1];
-        for (i_t qj = q_start; qj < q_end; qj++) {
-          i_t col = problem.Q.j[qj];
-          f_t qij = problem.Q.x[qj];
-          if (pair_index[row] != -1 && pair_index[col] != -1) {
-            Q_j[row_starts[row]] = pair_index[col];
-            Q_x[row_starts[row]] = -qij;
-            row_starts[row]++;
-
-            Q_j[row_starts[pair_index[row]]] = col;
-            Q_x[row_starts[pair_index[row]]] = -qij;
-            row_starts[pair_index[row]]++;
-
-            Q_j[row_starts[pair_index[row]]] = pair_index[col];
-            Q_x[row_starts[pair_index[row]]] = qij;
-            row_starts[pair_index[row]]++;
-          } else if (pair_index[col] != -1) {
-            Q_j[row_starts[row]] = pair_index[col];
-            Q_x[row_starts[row]] = -qij;
-            row_starts[row]++;
-          } else if (pair_index[row] != -1) {
-            Q_j[row_starts[pair_index[row]]] = col;
-            Q_x[row_starts[pair_index[row]]] = -qij;
-            row_starts[pair_index[row]]++;
+      for (i_t row = 0; row < old_Q.n; ++row) {
+        i_t new_row     = orig_to_new[row];
+        i_t partner_row = partner_index[row];
+        i_t q_start     = old_Q.row_start[row];
+        i_t q_end       = old_Q.row_start[row + 1];
+        for (i_t qj = q_start; qj < q_end; ++qj) {
+          i_t col         = old_Q.j[qj];
+          f_t qij         = old_Q.x[qj];
+          i_t new_col     = orig_to_new[col];
+          i_t partner_col = partner_index[col];
+
+          Q_j[row_starts[new_row]] = new_col;
+          Q_x[row_starts[new_row]] = qij;
+          row_starts[new_row]++;
+
+          if (partner_col != -1) {
+            Q_j[row_starts[new_row]] = partner_col;
+            Q_x[row_starts[new_row]] = -qij;
+            row_starts[new_row]++;
+          }
+          if (partner_row != -1) {
+            Q_j[row_starts[partner_row]] = new_col;
+            Q_x[row_starts[partner_row]] = -qij;
+            row_starts[partner_row]++;
+            if (partner_col != -1) {
+              Q_j[row_starts[partner_row]] = partner_col;
+              Q_x[row_starts[partner_row]] = qij;
+              row_starts[partner_row]++;
+            }
           }
         }
       }
@@ -1323,12 +1522,17 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
       problem.Q.check_matrix("After free variable expansion");
     }
 
-    // assert(problem.A.p[num_cols] == nnz);
-    problem.A.n      = num_cols;
-    problem.num_cols = num_cols;
+    problem.A         = expanded_A;
+    problem.A.n       = num_cols;
+    problem.objective = objective;
+    problem.lower     = lower;
+    problem.upper     = upper;
+    problem.num_cols  = num_cols;
+    if (!problem.second_order_cone_dims.empty()) { problem.cone_var_start = new_cone_start; }
   }
 
-  if (settings.barrier_presolve && settings.folding != 0 && problem.Q.n == 0) {
+  if (settings.barrier_presolve && settings.folding != 0 && problem.Q.n == 0 &&
+      problem.second_order_cone_dims.empty()) {
     folding(problem, settings, presolve_info);
   }
 
@@ -1406,7 +1610,7 @@ void crush_primal_solution(const user_problem_t<i_t, f_t>& user_problem,
   // including previously added slacks, are reset before writing new values.
   solution.assign(problem.num_cols, 0.0);
   for (i_t j = 0; j < user_problem.num_cols; j++) {
-    solution[j] = user_solution[j];
+    solution[user_col_to_problem_col(user_problem, problem, j)] = user_solution[j];
   }
 
   std::vector<f_t> primal_residual(problem.num_rows);
@@ -1446,7 +1650,7 @@ void crush_primal_solution_with_slack(const user_problem_t<i_t, f_t>& user_probl
   // Re-crush can be called with a reused output vector; clear stale entries first.
   solution.assign(problem.num_cols, 0.0);
   for (i_t j = 0; j < user_problem.num_cols; j++) {
-    solution[j] = user_solution[j];
+    solution[user_col_to_problem_col(user_problem, problem, j)] = user_solution[j];
   }
 
   std::vector<f_t> primal_residual(problem.num_rows);
@@ -1493,9 +1697,9 @@ f_t crush_dual_solution(const user_problem_t<i_t, f_t>& user_problem,
   for (i_t i = 0; i < user_problem.num_rows; i++) {
     y[i] = user_y[i];
   }
-  z.resize(problem.num_cols);
+  z.assign(problem.num_cols, 0.0);
   for (i_t j = 0; j < user_problem.num_cols; j++) {
-    z[j] = user_z[j];
+    z[user_col_to_problem_col(user_problem, problem, j)] = user_z[j];
   }
 
   std::vector<bool> is_range_row(problem.num_rows, false);
@@ -1562,6 +1766,17 @@ f_t crush_dual_solution(const user_problem_t<i_t, f_t>& user_problem,
   return dual_res_inf;
 }
 
+template <typename i_t, typename f_t>
+static i_t user_col_to_problem_col(const user_problem_t<i_t, f_t>& user_problem,
+                                   const lp_problem_t<i_t, f_t>& problem,
+                                   i_t user_col)
+{
+  if (user_problem.second_order_cone_dims.empty()) { return user_col; }
+  if (problem.cone_var_start <= user_problem.cone_var_start) { return user_col; }
+  if (user_col < user_problem.cone_var_start) { return user_col; }
+  return problem.cone_var_start + (user_col - user_problem.cone_var_start);
+}
+
 template <typename i_t, typename f_t>
 void uncrush_primal_solution(const user_problem_t<i_t, f_t>& user_problem,
                              const lp_problem_t<i_t, f_t>& problem,
@@ -1571,9 +1786,9 @@ void uncrush_primal_solution(const user_problem_t<i_t, f_t>& user_problem,
   user_solution.resize(user_problem.num_cols);
   assert(problem.num_cols >= user_problem.num_cols);
   assert(solution.size() >= user_problem.num_cols);
-  std::copy(solution.begin(),
-            solution.begin() + std::min((i_t)solution.size(), user_problem.num_cols),
-            user_solution.data());
+  for (i_t j = 0; j < user_problem.num_cols; ++j) {
+    user_solution[j] = solution[user_col_to_problem_col(user_problem, problem, j)];
+  }
 }
 
 template <typename i_t, typename f_t>
@@ -1677,13 +1892,25 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
   if (num_free_variables > 0) {
     settings.log.printf("Post-solve: Handling free variables %d\n", num_free_variables);
     // We added free variables so we need to map the crushed solution back to the original variables
+    std::vector<bool> remove_partner(input_x.size(), false);
     for (i_t k = 0; k < 2 * num_free_variables; k += 2) {
       const i_t u = free_variable_pairs[k];
       const i_t v = free_variable_pairs[k + 1];
       input_x[u] -= input_x[v];
+      remove_partner[v] = true;
+    }
+    std::vector<f_t> compact_x;
+    std::vector<f_t> compact_z;
+    compact_x.reserve(input_x.size() - num_free_variables);
+    compact_z.reserve(input_z.size() - num_free_variables);
+    for (i_t j = 0; j < static_cast<i_t>(input_x.size()); ++j) {
+      if (!remove_partner[j]) {
+        compact_x.push_back(input_x[j]);
+        compact_z.push_back(input_z[j]);
+      }
     }
-    input_z.resize(input_z.size() - num_free_variables);
-    input_x.resize(input_x.size() - num_free_variables);
+    input_x = compact_x;
+    input_z = compact_z;
   }
 
   if (presolve_info.removed_variables.size() > 0) {
@@ -1754,6 +1981,11 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
     }
   }
 
+  if (!presolve_info.phase1_bounded_linear_indices.empty()) {
+    settings.log.printf("Post-solve: %d linear column(s) had phase-1 bound tightening (x unchanged)\n",
+                        static_cast<int>(presolve_info.phase1_bounded_linear_indices.size()));
+  }
+
   // Dual correction for originally free variables that received implied bounds.
   // Barrier produced (y, z) with z_j != 0 satisfying A^T y + z = c + Qx.
   // We need corrected (y_bar, z_bar) with z_bar_j = 0 for all j in F_b where
@@ -1772,6 +2004,8 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
     settings.log.printf("Post-solve: Correcting duals for %d bounded free variables\n",
                         static_cast<i_t>(presolve_info.bounded_free_variables.size()));
     const csc_matrix_t<i_t, f_t>& A = original_problem.A;
+    csr_matrix_t<i_t, f_t> A_row(A.m, A.n, A.nnz());
+    A.to_compressed_row(A_row);
     // Traverse in reverse order, to ensure that all z_j = 0 after the correction
     for (auto it = presolve_info.bounded_free_variables.rbegin();
          it != presolve_info.bounded_free_variables.rend();
@@ -1781,15 +2015,9 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
       if (w_j == 0.0) { continue; }
       const f_t du = w_j / bfv.coefficient;
       input_y[bfv.constraint] += du;
-      for (i_t j = 0; j < A.n; j++) {
-        const i_t col_start = A.col_start[j];
-        const i_t col_end   = A.col_start[j + 1];
-        for (i_t p = col_start; p < col_end; p++) {
-          if (A.i[p] == bfv.constraint) {
-            input_z[j] -= A.x[p] * du;
-            break;
-          }
-        }
+      const auto [row_start, row_end] = A_row.get_constraint_range(bfv.constraint);
+      for (i_t p = row_start; p < row_end; ++p) {
+        input_z[A_row.j[p]] -= A_row.x[p] * du;
       }
     }
   }
diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp
index c050454b33..bc67627c21 100644
--- a/cpp/src/dual_simplex/presolve.hpp
+++ b/cpp/src/dual_simplex/presolve.hpp
@@ -49,6 +49,8 @@ struct lp_problem_t {
   f_t obj_constant;
   f_t obj_scale;  // 1.0 for min, -1.0 for max
   bool objective_is_integral{false};
+  i_t cone_var_start{0};
+  std::vector<i_t> second_order_cone_dims;
 
   void write_mps(const std::string& path) const
   {
@@ -162,10 +164,14 @@ struct presolve_info_t {
 
   // Variables that were negated to handle -inf < x_j <= u_j
   std::vector<i_t> negated_variables;
+
+  // Free variable indices for QP/SOCP augmented barrier (not split, handled natively)
+  std::vector<i_t> native_free_linear_indices;
+
+  // Linear columns where phase 1 tightened a fully free var to a one-sided bound (same x index)
+  std::vector<i_t> phase1_bounded_linear_indices;
   // Originally-free variables that received implied bounds, with the constraint used
   std::vector<bounded_free_var_t<i_t, f_t>> bounded_free_variables;
-  // Free variable indices for QP augmented system (not split, handled natively)
-  std::vector<i_t> free_variable_indices;
 };
 
 template <typename i_t, typename f_t>
diff --git a/cpp/src/dual_simplex/scaling.cpp b/cpp/src/dual_simplex/scaling.cpp
index 1531c91486..2db6c75b43 100644
--- a/cpp/src/dual_simplex/scaling.cpp
+++ b/cpp/src/dual_simplex/scaling.cpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -16,21 +16,174 @@ template <typename i_t, typename f_t>
 i_t column_scaling(const lp_problem_t<i_t, f_t>& unscaled,
                    const simplex_solver_settings_t<i_t, f_t>& settings,
                    lp_problem_t<i_t, f_t>& scaled,
-                   std::vector<f_t>& column_scaling)
+                   std::vector<f_t>& col_scale,
+                   std::vector<f_t>& row_scale)
 {
   scaled = unscaled;
   i_t m  = scaled.num_rows;
   i_t n  = scaled.num_cols;
 
-  if (!settings.scale_columns || unscaled.Q.n > 0) {
+  row_scale.assign(m, 1.0);
+
+  // =========================================================================
+  // Ruiz equilibration for SOCP (and QP) problems
+  // =========================================================================
+  // For SOCP problems, apply Ruiz equilibration: alternating row and column
+  // infinity-norm scaling to bring the constraint matrix close to equilibrium.
+  // This dramatically improves the conditioning of the augmented KKT system.
+  // Ruiz equilibration for SOCP (and QP) problems, applied only when the
+  // constraint matrix has a large row-norm imbalance.
+  if (!unscaled.second_order_cone_dims.empty() || unscaled.Q.n > 0) {
+    col_scale.assign(n, 1.0);
+
+    // Decide whether Ruiz scaling is needed by checking row-norm imbalance.
+    // If max_row_norm / min_row_norm is small, the matrix is already well-conditioned
+    // and scaling can hurt (e.g. by amplifying tiny noise coefficients).
+    csr_matrix_t<i_t, f_t> Arow_check(0, 0, 0);
+    scaled.A.to_compressed_row(Arow_check);
+    f_t max_row_norm = 0;
+    f_t min_row_norm = std::numeric_limits<f_t>::max();
+    for (i_t i = 0; i < m; ++i) {
+      f_t row_norm = 0;
+      for (i_t p = Arow_check.row_start[i]; p < Arow_check.row_start[i + 1]; ++p) {
+        f_t a = std::abs(Arow_check.x[p]);
+        if (a > row_norm) row_norm = a;
+      }
+      if (row_norm > 0) {
+        max_row_norm = std::max(max_row_norm, row_norm);
+        min_row_norm = std::min(min_row_norm, row_norm);
+      }
+    }
+    f_t row_norm_ratio = (min_row_norm > 0) ? max_row_norm / min_row_norm : 1.0;
+
+    if (row_norm_ratio < 100.0) {
+      settings.log.printf("Skipping Ruiz equilibration (row norm ratio %.1f < 100)\n",
+                          row_norm_ratio);
+      return 0;
+    }
+
+    // Apply Ruiz equilibration
+    csr_matrix_t<i_t, f_t> Arow(0, 0, 0);
+    scaled.A.to_compressed_row(Arow);
+
+    constexpr i_t max_ruiz_iterations = 10;
+    for (i_t iter = 0; iter < max_ruiz_iterations; ++iter) {
+      f_t max_deviation = 0.0;
+
+      // --- Row scaling: scale each row by 1/sqrt(max|a_ij|) ---
+      std::vector<f_t> r(m);
+      for (i_t i = 0; i < m; ++i) {
+        f_t rm = 0.0;
+        for (i_t p = Arow.row_start[i]; p < Arow.row_start[i + 1]; ++p) {
+          f_t a = std::abs(Arow.x[p]);
+          if (a > rm) rm = a;
+        }
+        r[i]          = rm > 0 ? 1.0 / std::sqrt(rm) : 1.0;
+        max_deviation = std::max(max_deviation, std::abs(rm - 1.0));
+      }
+      for (i_t j = 0; j < n; ++j) {
+        for (i_t p = scaled.A.col_start[j]; p < scaled.A.col_start[j + 1]; ++p) {
+          scaled.A.x[p] *= r[scaled.A.i[p]];
+        }
+      }
+      for (i_t i = 0; i < m; ++i) {
+        for (i_t p = Arow.row_start[i]; p < Arow.row_start[i + 1]; ++p) {
+          Arow.x[p] *= r[i];
+        }
+        scaled.rhs[i] *= r[i];
+        row_scale[i] *= r[i];
+      }
+
+      // --- Column scaling: scale each column by 1/sqrt(max|a_ij|) ---
+      // For cone variables, use a uniform scale per cone block to preserve SOC structure.
+      std::vector<f_t> c(n);
+      const i_t cone_start = unscaled.second_order_cone_dims.empty() ? n : unscaled.cone_var_start;
+
+      // Linear columns: scale independently
+      for (i_t j = 0; j < cone_start; ++j) {
+        f_t cm = 0.0;
+        for (i_t p = scaled.A.col_start[j]; p < scaled.A.col_start[j + 1]; ++p) {
+          f_t a = std::abs(scaled.A.x[p]);
+          if (a > cm) cm = a;
+        }
+        c[j]          = cm > 0 ? 1.0 / std::sqrt(cm) : 1.0;
+        max_deviation = std::max(max_deviation, std::abs(cm - 1.0));
+      }
+
+      // Cone columns: uniform scale per cone block
+      i_t cone_off = cone_start;
+      for (i_t k = 0; k < static_cast<i_t>(unscaled.second_order_cone_dims.size()); ++k) {
+        i_t q_k = unscaled.second_order_cone_dims[k];
+        // Find max column inf-norm across all columns in this cone
+        f_t cone_max = 0.0;
+        for (i_t j = cone_off; j < cone_off + q_k; ++j) {
+          for (i_t p = scaled.A.col_start[j]; p < scaled.A.col_start[j + 1]; ++p) {
+            f_t a = std::abs(scaled.A.x[p]);
+            if (a > cone_max) cone_max = a;
+          }
+        }
+        f_t cone_scale = cone_max > 0 ? 1.0 / std::sqrt(cone_max) : 1.0;
+        max_deviation  = std::max(max_deviation, std::abs(cone_max - 1.0));
+        for (i_t j = cone_off; j < cone_off + q_k; ++j) {
+          c[j] = cone_scale;
+        }
+        cone_off += q_k;
+      }
+      for (i_t j = 0; j < n; ++j) {
+        for (i_t p = scaled.A.col_start[j]; p < scaled.A.col_start[j + 1]; ++p) {
+          scaled.A.x[p] *= c[j];
+        }
+      }
+      for (i_t i = 0; i < m; ++i) {
+        for (i_t p = Arow.row_start[i]; p < Arow.row_start[i + 1]; ++p) {
+          Arow.x[p] *= c[Arow.j[p]];
+        }
+      }
+      for (i_t j = 0; j < n; ++j) {
+        scaled.objective[j] *= c[j];
+        col_scale[j] *= c[j];
+      }
+      // Bounds use +/-inf for unbounded sides (see types.hpp). Use +/-1e20 as a practical
+      // sentinel: we do not expect finite bounds beyond this magnitude, and skipping scale
+      // on |bound| >= 1e20 avoids overflow when dividing very large limits by small c[j].
+      for (i_t j = 0; j < n; ++j) {
+        if (scaled.lower[j] > -1e20) scaled.lower[j] /= c[j];
+        if (scaled.upper[j] < 1e20) scaled.upper[j] /= c[j];
+      }
+      if (scaled.Q.n > 0) {
+        for (i_t row = 0; row < scaled.Q.m; ++row) {
+          for (i_t p = scaled.Q.row_start[row]; p < scaled.Q.row_start[row + 1]; ++p) {
+            i_t col = scaled.Q.j[p];
+            scaled.Q.x[p] *= c[row] * c[col];
+          }
+        }
+      }
+      if (max_deviation < 0.1) break;
+    }
+
+    f_t a_min = std::numeric_limits<f_t>::max();
+    f_t a_max = 0;
+    for (i_t p = 0; p < scaled.A.col_start[n]; ++p) {
+      f_t a = std::abs(scaled.A.x[p]);
+      if (a > 0) {
+        a_min = std::min(a_min, a);
+        a_max = std::max(a_max, a);
+      }
+    }
+    settings.log.printf("Ruiz equilibration: coefficient range [%e, %e]\n", a_min, a_max);
+    return 0;
+  }
+
+  if (!settings.scale_columns) {
     settings.log.printf("Skipping column scaling\n");
-    column_scaling.resize(n, 1.0);
+    col_scale.resize(n, 1.0);
     return 0;
   }
 
-  column_scaling.resize(n);
-  f_t max = 0;
-  f_t min = std::numeric_limits<f_t>::max();
+  col_scale.resize(n);
+
+  f_t max_col_norm = 0;
+  f_t min_col_norm = std::numeric_limits<f_t>::max();
   for (i_t j = 0; j < n; ++j) {
     const i_t col_start = scaled.A.col_start[j];
     const i_t col_end   = scaled.A.col_start[j + 1];
@@ -39,57 +192,63 @@ i_t column_scaling(const lp_problem_t<i_t, f_t>& unscaled,
       const f_t x = scaled.A.x[p];
       sum += x * x;
     }
-    f_t col_norm_j = column_scaling[j] = sum > 0 ? std::sqrt(sum) : 1.0;
-    max                                = std::max(col_norm_j, max);
-    min                                = std::min(col_norm_j, min);
+    const f_t col_norm = sum > 0 ? std::sqrt(sum) : 1.0;
+    col_scale[j]       = f_t(1) / col_norm;
+    max_col_norm       = std::max(col_norm, max_col_norm);
+    min_col_norm       = std::min(col_norm, min_col_norm);
   }
-  settings.log.printf("Scaling matrix. Maximum column norm %e, minimum column norm %e\n", max, min);
-  // C(j, j) = 1/column_scaling(j)
+  settings.log.printf("Scaling matrix. Maximum column norm %e, minimum column norm %e\n",
+                      max_col_norm,
+                      min_col_norm);
 
   // scaled_A = unscaled_A * C
   for (i_t j = 0; j < n; ++j) {
     const i_t col_start = scaled.A.col_start[j];
     const i_t col_end   = scaled.A.col_start[j + 1];
     for (i_t p = col_start; p < col_end; ++p) {
-      scaled.A.x[p] /= column_scaling[j];
+      scaled.A.x[p] *= col_scale[j];
     }
-  }
-  // scaled_obj = C*unscaled_obj
-  for (i_t j = 0; j < n; ++j) {
-    scaled.objective[j] /= column_scaling[j];
-  }
-  // scaled_lower = C^{-1} * unscaled_lower
-  // scaled_upper = C^{-1} * unscaled_upper
-  for (i_t j = 0; j < n; ++j) {
-    scaled.lower[j] *= column_scaling[j];
-    scaled.upper[j] *= column_scaling[j];
+    scaled.objective[j] *= col_scale[j];
+    if (scaled.lower[j] > -1e20) scaled.lower[j] /= col_scale[j];
+    if (scaled.upper[j] < 1e20) scaled.upper[j] /= col_scale[j];
   }
 
   for (i_t i = 0; i < unscaled.Q.n; ++i) {
     const i_t row_start = unscaled.Q.row_start[i];
     const i_t row_end   = unscaled.Q.row_start[i + 1];
-    i_t row             = i;
     for (i_t p = row_start; p < row_end; ++p) {
-      i_t col       = unscaled.Q.j[p];
-      scaled.Q.x[p] = unscaled.Q.x[p] / (column_scaling[row] * column_scaling[col]);
+      const i_t col = unscaled.Q.j[p];
+      scaled.Q.x[p] = unscaled.Q.x[p] * col_scale[i] * col_scale[col];
     }
   }
   return 0;
 }
 
 template <typename i_t, typename f_t>
-void unscale_solution(const std::vector<f_t>& column_scaling,
+void unscale_solution(const std::vector<f_t>& col_scale,
+                      const std::vector<f_t>& row_scale,
                       const std::vector<f_t>& scaled_x,
+                      const std::vector<f_t>& scaled_y,
                       const std::vector<f_t>& scaled_z,
                       std::vector<f_t>& unscaled_x,
+                      std::vector<f_t>& unscaled_y,
                       std::vector<f_t>& unscaled_z)
 {
   const i_t n = scaled_x.size();
   unscaled_x.resize(n);
   unscaled_z.resize(n);
   for (i_t j = 0; j < n; ++j) {
-    unscaled_x[j] = scaled_x[j] / column_scaling[j];
-    unscaled_z[j] = scaled_z[j] * column_scaling[j];
+    // column_scaling multiplies A(:,j) and c_j by col_scale[j], divides bounds:
+    // x_orig = col_scale .* x_scaled, z_orig = z_scaled / col_scale.
+    unscaled_x[j] = scaled_x[j] * col_scale[j];
+    unscaled_z[j] = scaled_z[j] / col_scale[j];
+  }
+
+  const i_t m = scaled_y.size();
+  unscaled_y.resize(m);
+  for (i_t i = 0; i < m; ++i) {
+    // y_unscaled = R * y_scaled  (row scaling: constraint was scaled by R)
+    unscaled_y[i] = scaled_y[i] * row_scale[i];
   }
 }
 
@@ -98,12 +257,16 @@ void unscale_solution(const std::vector<f_t>& column_scaling,
 template int column_scaling<int, double>(const lp_problem_t<int, double>& unscaled,
                                          const simplex_solver_settings_t<int, double>& settings,
                                          lp_problem_t<int, double>& scaled,
-                                         std::vector<double>& column_scaling);
+                                         std::vector<double>& col_scale,
+                                         std::vector<double>& row_scale);
 
-template void unscale_solution<int, double>(const std::vector<double>& column_scaling,
+template void unscale_solution<int, double>(const std::vector<double>& col_scale,
+                                            const std::vector<double>& row_scale,
                                             const std::vector<double>& scaled_x,
+                                            const std::vector<double>& scaled_y,
                                             const std::vector<double>& scaled_z,
                                             std::vector<double>& unscaled_x,
+                                            std::vector<double>& unscaled_y,
                                             std::vector<double>& unscaled_z);
 
 #endif
diff --git a/cpp/src/dual_simplex/scaling.hpp b/cpp/src/dual_simplex/scaling.hpp
index 120660c765..94aca2055d 100644
--- a/cpp/src/dual_simplex/scaling.hpp
+++ b/cpp/src/dual_simplex/scaling.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -20,13 +20,17 @@ template <typename i_t, typename f_t>
 i_t column_scaling(const lp_problem_t<i_t, f_t>& unscaled,
                    const simplex_solver_settings_t<i_t, f_t>& settings,
                    lp_problem_t<i_t, f_t>& scaled,
-                   std::vector<f_t>& column_scaling);
+                   std::vector<f_t>& column_scaling,
+                   std::vector<f_t>& row_scaling);
 
 template <typename i_t, typename f_t>
 void unscale_solution(const std::vector<f_t>& column_scaling,
+                      const std::vector<f_t>& row_scaling,
                       const std::vector<f_t>& scaled_x,
+                      const std::vector<f_t>& scaled_y,
                       const std::vector<f_t>& scaled_z,
                       std::vector<f_t>& unscaled_x,
+                      std::vector<f_t>& unscaled_y,
                       std::vector<f_t>& unscaled_z);
 
 }  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp
index 500822e1bd..35510c37b9 100644
--- a/cpp/src/dual_simplex/simplex_solver_settings.hpp
+++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp
@@ -79,7 +79,7 @@ struct simplex_solver_settings_t {
       use_left_looking_lu(false),
       eliminate_singletons(true),
       print_presolve_stats(true),
-      barrier_presolve(false),
+      barrier_presolve(true),
       cudss_deterministic(false),
       deterministic(false),
       barrier(false),
@@ -163,7 +163,7 @@ struct simplex_solver_settings_t {
     use_left_looking_lu;  // true to use left looking LU factorization, false to use right looking
   bool eliminate_singletons;  // true to eliminate singletons from the basis
   bool print_presolve_stats;  // true to print presolve stats
-  bool barrier_presolve;      // true to use barrier presolve
+  bool barrier_presolve;  // bound shifts, free-var bounding; LP v-w split / folding when no cones
   bool cudss_deterministic;   // true to use cuDSS deterministic mode, false for non-deterministic
   bool barrier;               // true to use barrier method, false to use dual simplex method
   bool deterministic;  // true to use B&B deterministic mode, false to use non-deterministic mode
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index 9d8a62efc3..cf39c99e20 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -61,6 +61,42 @@ void write_matlab(const std::string& filename, const dual_simplex::lp_problem_t<
   fclose(fid);
 }
 
+template <typename i_t, typename f_t>
+bool validate_barrier_cone_layout(const lp_problem_t<i_t, f_t>& problem,
+                                  const simplex_solver_settings_t<i_t, f_t>& settings)
+{
+  if (problem.second_order_cone_dims.empty()) { return true; }
+
+  i_t cone_end = problem.cone_var_start;
+  for (auto q_k : problem.second_order_cone_dims) {
+    if (q_k <= 1) {
+      settings.log.printf(
+        "Error: second-order cone dimensions must be at least 2; use linear variables instead of "
+        "Q^1\n");
+      return false;
+    }
+    cone_end += q_k;
+  }
+
+  if (cone_end != problem.num_cols) {
+    settings.log.printf("Error: conic variables must form a trailing block [linear | cone]\n");
+    return false;
+  }
+
+  for (i_t j = problem.cone_var_start; j < cone_end; ++j) {
+    if (problem.lower[j] != 0.0 && problem.lower[j] > -inf) {
+      settings.log.printf("Error: explicit lower bound on conic variable %d is not supported\n", j);
+      return false;
+    }
+    if (problem.upper[j] < inf) {
+      settings.log.printf("Error: explicit upper bound on conic variable %d is not supported\n", j);
+      return false;
+    }
+  }
+
+  return true;
+}
+
 }  // namespace
 
 template <typename i_t, typename f_t>
@@ -170,9 +206,10 @@ lp_status_t solve_linear_program_with_advanced_basis(
                             presolved_lp.num_cols,
                             presolved_lp.A.col_start[presolved_lp.num_cols]);
   std::vector<f_t> column_scales;
+  std::vector<f_t> row_scales_simplex;
   {
     raft::common::nvtx::range scope_scaling("DualSimplex::scaling");
-    column_scaling(presolved_lp, settings, lp, column_scales);
+    column_scaling(presolved_lp, settings, lp, column_scales, row_scales_simplex);
   }
   assert(presolved_lp.num_cols == lp.num_cols);
   lp_problem_t<i_t, f_t> phase1_problem(original_lp.handle_ptr, 1, 1, 1);
@@ -293,13 +330,21 @@ lp_status_t solve_linear_program_with_advanced_basis(
     }
     if (status == dual::status_t::OPTIMAL) {
       std::vector<f_t> unscaled_x(lp.num_cols);
+      std::vector<f_t> unscaled_y(lp.num_rows);
       std::vector<f_t> unscaled_z(lp.num_cols);
-      unscale_solution<i_t, f_t>(column_scales, solution.x, solution.z, unscaled_x, unscaled_z);
+      unscale_solution<i_t, f_t>(column_scales,
+                                 row_scales_simplex,
+                                 solution.x,
+                                 solution.y,
+                                 solution.z,
+                                 unscaled_x,
+                                 unscaled_y,
+                                 unscaled_z);
       uncrush_solution(presolve_info,
                        settings,
                        original_lp,
                        unscaled_x,
-                       solution.y,
+                       unscaled_y,
                        unscaled_z,
                        original_solution.x,
                        original_solution.y,
@@ -349,16 +394,18 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
 
   // Convert the user problem to a linear program with only equality constraints
   std::vector<i_t> new_slacks;
-  simplex_solver_settings_t<i_t, f_t> barrier_settings = settings;
-  barrier_settings.barrier_presolve                    = true;
   dualize_info_t<i_t, f_t> dualize_info;
-  convert_user_problem(user_problem, barrier_settings, original_lp, new_slacks, dualize_info);
+  convert_user_problem(user_problem, settings, original_lp, new_slacks, dualize_info);
+  if (!validate_barrier_cone_layout(original_lp, settings)) {
+    return lp_status_t::NUMERICAL_ISSUES;
+  }
+
   lp_solution_t<i_t, f_t> lp_solution(original_lp.num_rows, original_lp.num_cols);
 
   // Presolve the linear program
   presolve_info_t<i_t, f_t> presolve_info;
   lp_problem_t<i_t, f_t> presolved_lp(user_problem.handle_ptr, 1, 1, 1);
-  const i_t ok = presolve(original_lp, barrier_settings, presolved_lp, presolve_info);
+  const i_t ok = presolve(original_lp, settings, presolved_lp, presolve_info);
   if (ok == CONCURRENT_HALT_RETURN) { return lp_status_t::CONCURRENT_LIMIT; }
   if (ok == TIME_LIMIT_RETURN) { return lp_status_t::TIME_LIMIT; }
   if (ok == -1) { return lp_status_t::INFEASIBLE; }
@@ -369,12 +416,13 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
                                     presolved_lp.num_cols,
                                     presolved_lp.A.col_start[presolved_lp.num_cols]);
   std::vector<f_t> column_scales;
-  column_scaling(presolved_lp, barrier_settings, barrier_lp, column_scales);
+  std::vector<f_t> row_scales;
+  column_scaling(presolved_lp, settings, barrier_lp, column_scales, row_scales);
 
   // Solve using barrier
   lp_solution_t<i_t, f_t> barrier_solution(barrier_lp.num_rows, barrier_lp.num_cols);
 
-  barrier_solver_t<i_t, f_t> barrier_solver(barrier_lp, presolve_info, barrier_settings);
+  barrier_solver_t<i_t, f_t> barrier_solver(barrier_lp, presolve_info, settings);
   lp_status_t barrier_status = barrier_solver.solve(start_time, barrier_solution);
   if (barrier_status == lp_status_t::OPTIMAL) {
 #ifdef COMPUTE_SCALED_RESIDUALS
@@ -394,9 +442,16 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
 #endif
     // Unscale the solution
     std::vector<f_t> unscaled_x(barrier_lp.num_cols);
+    std::vector<f_t> unscaled_y(barrier_lp.num_rows);
     std::vector<f_t> unscaled_z(barrier_lp.num_cols);
-    unscale_solution<i_t, f_t>(
-      column_scales, barrier_solution.x, barrier_solution.z, unscaled_x, unscaled_z);
+    unscale_solution<i_t, f_t>(column_scales,
+                               row_scales,
+                               barrier_solution.x,
+                               barrier_solution.y,
+                               barrier_solution.z,
+                               unscaled_x,
+                               unscaled_y,
+                               unscaled_z);
 
     std::vector<f_t> residual = presolved_lp.rhs;
     matrix_vector_multiply(presolved_lp.A, 1.0, unscaled_x, -1.0, residual);
@@ -410,7 +465,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
         unscaled_dual_residual[j] -= presolved_lp.objective[j];
       }
       matrix_transpose_vector_multiply(
-        presolved_lp.A, 1.0, barrier_solution.y, 1.0, unscaled_dual_residual);
+        presolved_lp.A, 1.0, unscaled_y, 1.0, unscaled_dual_residual);
       f_t unscaled_dual_residual_norm = vector_norm_inf<i_t, f_t>(unscaled_dual_residual);
       settings.log.printf(
         "Unscaled Dual infeasibility     (abs/rel): %.2e/%.2e\n",
@@ -420,10 +475,10 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
 
     // Undo presolve
     uncrush_solution(presolve_info,
-                     barrier_settings,
+                     settings,
                      original_lp,
                      unscaled_x,
-                     barrier_solution.y,
+                     unscaled_y,
                      unscaled_z,
                      lp_solution.x,
                      lp_solution.y,
@@ -564,7 +619,8 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
     uncrush_primal_solution(user_problem, original_lp, lp_solution.x, solution.x);
     uncrush_dual_solution(
       user_problem, original_lp, lp_solution.y, lp_solution.z, solution.y, solution.z);
-    solution.objective          = barrier_solution.objective;
+    solution.objective =
+      barrier_solution.user_objective / user_problem.obj_scale - user_problem.obj_constant;
     solution.user_objective     = barrier_solution.user_objective;
     solution.l2_primal_residual = barrier_solution.l2_primal_residual;
     solution.l2_dual_residual   = barrier_solution.l2_dual_residual;
@@ -641,7 +697,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
     lp_solution_t<i_t, f_t> crossover_solution(original_lp.num_rows, original_lp.num_cols);
     std::vector<variable_status_t> vstatus(original_lp.num_cols);
     crossover_status_t crossover_status = crossover(
-      original_lp, barrier_settings, lp_solution, start_time, crossover_solution, vstatus);
+      original_lp, settings, lp_solution, start_time, crossover_solution, vstatus);
     settings.log.printf("Crossover status: %d\n", crossover_status);
     if (crossover_status == crossover_status_t::OPTIMAL) { barrier_status = lp_status_t::OPTIMAL; }
   }
diff --git a/cpp/src/dual_simplex/user_problem.hpp b/cpp/src/dual_simplex/user_problem.hpp
index 73c4c391be..8b0588064c 100644
--- a/cpp/src/dual_simplex/user_problem.hpp
+++ b/cpp/src/dual_simplex/user_problem.hpp
@@ -52,6 +52,8 @@ struct user_problem_t {
   std::vector<i_t> Q_offsets;
   std::vector<i_t> Q_indices;
   std::vector<f_t> Q_values;
+  i_t cone_var_start{0};
+  std::vector<i_t> second_order_cone_dims;
 };
 
 }  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/dual_simplex/vector_math.cuh b/cpp/src/dual_simplex/vector_math.cuh
index abc7263858..32c75ea366 100644
--- a/cpp/src/dual_simplex/vector_math.cuh
+++ b/cpp/src/dual_simplex/vector_math.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -9,6 +9,10 @@
 
 #include <cub/cub.cuh>
 
+#include <raft/core/copy.hpp>
+#include <raft/core/device_span.hpp>
+#include <raft/core/host_span.hpp>
+
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
@@ -28,6 +32,7 @@ struct norm_inf_max {
 template <typename i_t, typename f_t, typename InputIteratorT>
 f_t device_custom_vector_norm_inf(InputIteratorT in, i_t size, rmm::cuda_stream_view stream_view)
 {
+  if (size == 0) { return 0; }
   // FIXME: Tmp storage stored in vector_math class.
   auto d_out = rmm::device_scalar<f_t>(stream_view);
   rmm::device_uvector<uint8_t> d_temp_storage(0, stream_view);
@@ -62,6 +67,12 @@ f_t device_vector_norm_inf(const rmm::device_uvector<f_t>& in, rmm::cuda_stream_
   return device_custom_vector_norm_inf<i_t, f_t>(in.data(), in.size(), stream_view);
 }
 
+template <typename i_t, typename f_t>
+f_t device_vector_norm_inf(raft::device_span<const f_t> in, rmm::cuda_stream_view stream_view)
+{
+  return device_custom_vector_norm_inf<i_t, f_t>(in.data(), in.size(), stream_view);
+}
+
 // TMP we should just have a CPU and GPU version to do the comparison
 // Should never have to norm inf a CPU vector if we are using the GPU
 template <typename i_t, typename f_t, typename Allocator>
@@ -71,4 +82,12 @@ f_t vector_norm_inf(const std::vector<f_t, Allocator>& x, rmm::cuda_stream_view
   return device_vector_norm_inf<i_t, f_t>(d_x, stream_view);
 }
 
+template <typename i_t, typename f_t>
+f_t vector_norm_inf(raft::host_span<const f_t> x, rmm::cuda_stream_view stream_view)
+{
+  rmm::device_uvector<f_t> d_x(x.size(), stream_view);
+  raft::copy(d_x.data(), x.data(), x.size(), stream_view);
+  return device_vector_norm_inf<i_t, f_t>(d_x, stream_view);
+}
+
 }  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/io/lp_parser.cpp b/cpp/src/io/lp_parser.cpp
index 84daa101b4..81ec63f323 100644
--- a/cpp/src/io/lp_parser.cpp
+++ b/cpp/src/io/lp_parser.cpp
@@ -118,41 +118,42 @@ bool is_free_keyword(std::string_view lower) { return lower == "free"; }
 
 bool is_infinity_text(std::string_view lower) { return lower == "inf" || lower == "infinity"; }
 
-// Builds the symmetric Q in CSR from LP-format raw upper-triangular triples.
+// Builds the symmetric Q in COO from LP-format raw upper-triangular triples.
 // Each input triple (i, j, c) with i <= j represents `c * x_i * x_j` in the
 // LP source. The output Q satisfies x^T Q x = sum of those terms.
 //   Diagonal (i == j): Q[i,i] = c (one entry).
 //   Off-diagonal (i != j): Q[i,j] = Q[j,i] = c/2 (two entries; symmetric split).
 template <typename i_t, typename f_t>
-void build_symmetric_q_csr(const std::vector<std::tuple<i_t, i_t, f_t>>& raw_triples,
+void build_symmetric_q_coo(const std::vector<std::tuple<i_t, i_t, f_t>>& raw_triples,
                            i_t n_vars,
-                           std::vector<f_t>& out_values,
-                           std::vector<i_t>& out_indices,
-                           std::vector<i_t>& out_offsets)
+                           std::vector<i_t>& out_row_indices,
+                           std::vector<i_t>& out_col_indices,
+                           std::vector<f_t>& out_values)
 {
-  std::vector<std::vector<std::pair<i_t, f_t>>> row_data(n_vars);
+  std::vector<std::vector<std::pair<i_t, f_t>>> row_data(static_cast<size_t>(n_vars));
   for (const auto& [i, j, c] : raw_triples) {
     if (i == j) {
-      row_data[i].emplace_back(i, c);
+      row_data[static_cast<size_t>(i)].emplace_back(i, c);
     } else {
-      row_data[i].emplace_back(j, c / f_t(2));
-      row_data[j].emplace_back(i, c / f_t(2));
+      row_data[static_cast<size_t>(i)].emplace_back(j, c / f_t(2));
+      row_data[static_cast<size_t>(j)].emplace_back(i, c / f_t(2));
     }
   }
   for (auto& row : row_data) {
     std::sort(row.begin(), row.end());
   }
-  out_offsets.clear();
-  out_indices.clear();
+  out_row_indices.clear();
+  out_col_indices.clear();
   out_values.clear();
-  out_offsets.reserve(static_cast<size_t>(n_vars) + 1);
-  out_offsets.push_back(0);
+  out_row_indices.reserve(raw_triples.size() * 2);
+  out_col_indices.reserve(raw_triples.size() * 2);
+  out_values.reserve(raw_triples.size() * 2);
   for (i_t r = 0; r < n_vars; ++r) {
-    for (const auto& [col, val] : row_data[r]) {
+    for (const auto& [col, val] : row_data[static_cast<size_t>(r)]) {
+      out_row_indices.push_back(r);
+      out_col_indices.push_back(col);
       out_values.push_back(val);
-      out_indices.push_back(col);
     }
-    out_offsets.push_back(static_cast<i_t>(out_values.size()));
   }
 }
 
@@ -1507,10 +1508,10 @@ void flush_quadratic_constraints(mps_data_model_t<i_t, f_t>& problem,
   const i_t linear_row_count = static_cast<i_t>(parser.row_names.size());
   i_t k                      = 0;
   for (const auto& block : parser.quadratic_constraint_blocks) {
+    std::vector<i_t> q_row_indices;
+    std::vector<i_t> q_col_indices;
     std::vector<f_t> q_values;
-    std::vector<i_t> q_indices;
-    std::vector<i_t> q_offsets;
-    build_symmetric_q_csr(block.quad_triples, n_vars, q_values, q_indices, q_offsets);
+    build_symmetric_q_coo(block.quad_triples, n_vars, q_row_indices, q_col_indices, q_values);
     problem.append_quadratic_constraint(linear_row_count + k,
                                         block.row_name,
                                         static_cast<char>(block.row_type),
@@ -1518,8 +1519,8 @@ void flush_quadratic_constraints(mps_data_model_t<i_t, f_t>& problem,
                                         block.linear_indices,
                                         block.rhs_value,
                                         q_values,
-                                        q_indices,
-                                        q_offsets);
+                                        q_row_indices,
+                                        q_col_indices);
     ++k;
   }
 }
diff --git a/cpp/src/io/mps_data_model.cpp b/cpp/src/io/mps_data_model.cpp
index 7ae359e450..0172918c62 100644
--- a/cpp/src/io/mps_data_model.cpp
+++ b/cpp/src/io/mps_data_model.cpp
@@ -9,6 +9,7 @@
 #include <utilities/error.hpp>
 
 #include <algorithm>
+#include <numeric>
 #include <utility>
 
 namespace cuopt::linear_programming::io {
@@ -139,32 +140,51 @@ void mps_data_model_t<i_t, f_t>::set_quadratic_objective_matrix(std::span<const
 }
 
 template <typename i_t, typename f_t>
-void mps_data_model_t<i_t, f_t>::append_quadratic_constraint(i_t constraint_row_index,
-                                                             const std::string& constraint_row_name,
-                                                             char constraint_row_type,
-                                                             std::span<const f_t> linear_values,
-                                                             std::span<const i_t> linear_indices,
-                                                             f_t rhs_value,
-                                                             std::span<const f_t> quadratic_values,
-                                                             std::span<const i_t> quadratic_indices,
-                                                             std::span<const i_t> quadratic_offsets)
+void mps_data_model_t<i_t, f_t>::append_quadratic_constraint(
+  i_t constraint_row_index,
+  const std::string& constraint_row_name,
+  char constraint_row_type,
+  std::span<const f_t> linear_values,
+  std::span<const i_t> linear_indices,
+  f_t rhs_value,
+  std::span<const f_t> quadratic_values,
+  std::span<const i_t> quadratic_row_indices,
+  std::span<const i_t> quadratic_col_indices)
 {
   mps_parser_expects(constraint_row_index >= 0,
                      error_type_t::ValidationError,
                      "constraint_row_index must be non-negative");
 
-  mps_parser_expects(constraint_row_type == 'L',
+  mps_parser_expects(constraint_row_type == 'L' || constraint_row_type == 'G',
                      error_type_t::ValidationError,
-                     "Quadratic constraint ROWS type must be 'L' (less-or-equal); got '%c'. "
-                     "Only 'L' is supported for convex quadratic constraints.",
+                     "Quadratic constraint ROWS type must be 'L' (<=) or 'G' (>=); got '%c'.",
                      constraint_row_type);
 
   mps_parser_expects(linear_values.size() == linear_indices.size(),
                      error_type_t::ValidationError,
                      "linear_values and linear_indices must have the same nnz count");
 
-  mps_parser_expects(
-    !quadratic_offsets.empty(), error_type_t::ValidationError, "quadratic_offsets cannot be empty");
+  const size_t q_nnz = quadratic_values.size();
+  mps_parser_expects(q_nnz == quadratic_row_indices.size(),
+                     error_type_t::ValidationError,
+                     "quadratic_values and quadratic_row_indices must have the same length");
+  mps_parser_expects(q_nnz == quadratic_col_indices.size(),
+                     error_type_t::ValidationError,
+                     "quadratic_values and quadratic_col_indices must have the same length");
+
+  if (!linear_values.empty()) {
+    mps_parser_expects(linear_values.data() != nullptr && linear_indices.data() != nullptr,
+                       error_type_t::ValidationError,
+                       "linear_values and linear_indices cannot be null when non-empty");
+  }
+
+  if (q_nnz > 0) {
+    mps_parser_expects(quadratic_values.data() != nullptr &&
+                         quadratic_row_indices.data() != nullptr &&
+                         quadratic_col_indices.data() != nullptr,
+                       error_type_t::ValidationError,
+                       "Q COO spans cannot be null when nnz > 0");
+  }
 
   quadratic_constraint_t qc;
   qc.constraint_row_index = constraint_row_index;
@@ -173,9 +193,33 @@ void mps_data_model_t<i_t, f_t>::append_quadratic_constraint(i_t constraint_row_
   qc.rhs_value            = rhs_value;
   qc.linear_values.assign(linear_values.begin(), linear_values.end());
   qc.linear_indices.assign(linear_indices.begin(), linear_indices.end());
-  qc.quadratic_values.assign(quadratic_values.begin(), quadratic_values.end());
-  qc.quadratic_indices.assign(quadratic_indices.begin(), quadratic_indices.end());
-  qc.quadratic_offsets.assign(quadratic_offsets.begin(), quadratic_offsets.end());
+
+  if (q_nnz == 0) {
+    qc.quadratic_row_indices.clear();
+    qc.quadratic_col_indices.clear();
+    qc.quadratic_values.clear();
+  } else {
+    std::vector<i_t> wr(quadratic_row_indices.begin(), quadratic_row_indices.end());
+    std::vector<i_t> wc(quadratic_col_indices.begin(), quadratic_col_indices.end());
+    std::vector<f_t> wv(quadratic_values.begin(), quadratic_values.end());
+
+    std::vector<size_t> perm(q_nnz);
+    std::iota(perm.begin(), perm.end(), size_t{0});
+    std::stable_sort(perm.begin(), perm.end(), [&](size_t a, size_t b) {
+      if (wr[a] != wr[b]) { return wr[a] < wr[b]; }
+      return wc[a] < wc[b];
+    });
+
+    qc.quadratic_row_indices.resize(q_nnz);
+    qc.quadratic_col_indices.resize(q_nnz);
+    qc.quadratic_values.resize(q_nnz);
+    for (size_t t = 0; t < q_nnz; ++t) {
+      const size_t ix             = perm[t];
+      qc.quadratic_row_indices[t] = wr[ix];
+      qc.quadratic_col_indices[t] = wc[ix];
+      qc.quadratic_values[t]      = wv[ix];
+    }
+  }
 
   quadratic_constraints_.push_back(std::move(qc));
 }
diff --git a/cpp/src/io/mps_parser.cpp b/cpp/src/io/mps_parser.cpp
index 535938b09c..40b47b03bd 100644
--- a/cpp/src/io/mps_parser.cpp
+++ b/cpp/src/io/mps_parser.cpp
@@ -17,14 +17,18 @@
 #include <fstream>
 #include <limits>
 #include <memory>
+#include <numeric>
+#include <span>
 #include <sstream>
 #include <string>
+#include <tuple>
 #include <unordered_set>
 #include <utility>
 
 namespace {
 using cuopt::linear_programming::io::error_type_t;
 using cuopt::linear_programming::io::mps_parser_expects;
+using cuopt::linear_programming::io::mps_parser_expects_fatal;
 
 std::vector<char> string_to_buffer(std::string_view input)
 {
@@ -34,6 +38,229 @@ std::vector<char> string_to_buffer(std::string_view input)
 }
 }  // end namespace
 
+namespace {
+
+/** @brief ceil(log2(x)) for x >= 1; used for sparse-vs-dense CSR heuristic only. */
+inline size_t ceil_log2_size_t(size_t x)
+{
+  size_t b = 0;
+  size_t y = (x <= 1) ? 0 : (x - 1);
+  while (y > 0) {
+    y >>= 1;
+    ++b;
+  }
+  return (b == 0) ? 1 : b;
+}
+
+/**
+ * @brief Reusable scratch for converting (row,col,value) triples to CSR via CSC (double transpose).
+ *
+ * Avoids `std::vector<std::vector<...>>` per column/row, which is allocation-heavy for large Q
+ * blocks (e.g. many QCMATRIX sections in portfolio models).
+ */
+template <typename i_t, typename f_t>
+struct double_transpose_scratch_t {
+  std::vector<i_t> col_nnz{};
+  std::vector<i_t> col_off{};
+  std::vector<i_t> col_wr{};
+  std::vector<i_t> csc_rows{};
+  std::vector<f_t> csc_vals{};
+  std::vector<i_t> row_nnz{};
+  std::vector<i_t> row_off{};
+  std::vector<i_t> row_wr{};
+  /** Indices 0..nnz-1 for sort-by-(row,col) sparse CSR path (no symmetrization). */
+  std::vector<size_t> sort_perm{};
+};
+
+/**
+ * @brief CSR from triples without full O(num_cols) column sweeps: count per row, sort by (row,col),
+ * scatter. Same numeric result ordering as the dense double-transpose path for non-symmetrized
+ * inputs when column order within a row is the stable sort order for equal keys (see std::sort).
+ *
+ * Used for QCMATRIX / QMATRIX when nnz is small enough vs matrix dimension that sorting beats
+ * scanning every column index twice.
+ */
+template <typename i_t, typename f_t>
+void triples_to_csr_sparse_sorted_flat(const std::vector<std::tuple<i_t, i_t, f_t>>& entries,
+                                       i_t num_rows,
+                                       i_t num_cols,
+                                       f_t value_scale,
+                                       double_transpose_scratch_t<i_t, f_t>& scratch,
+                                       std::vector<f_t>& out_values,
+                                       std::vector<i_t>& out_indices,
+                                       std::vector<i_t>& out_offsets)
+{
+  static_cast<void>(num_cols);  // square Q paths pass num_cols == num_rows
+  const size_t nnz = entries.size();
+  const size_t nr  = static_cast<size_t>(num_rows);
+
+  scratch.row_nnz.assign(nr, 0);
+  for (const auto& e : entries) {
+    ++scratch.row_nnz[static_cast<size_t>(std::get<0>(e))];
+  }
+
+  scratch.row_off.resize(nr + 1);
+  scratch.row_off[0] = 0;
+  for (size_t r = 0; r < nr; ++r) {
+    scratch.row_off[r + 1] = scratch.row_off[r] + scratch.row_nnz[r];
+  }
+  const size_t csr_nnz = static_cast<size_t>(scratch.row_off[nr]);
+
+  out_values.resize(csr_nnz);
+  out_indices.resize(csr_nnz);
+
+  scratch.sort_perm.resize(nnz);
+  std::iota(scratch.sort_perm.begin(), scratch.sort_perm.end(), size_t{0});
+  std::stable_sort(scratch.sort_perm.begin(), scratch.sort_perm.end(), [&](size_t a, size_t b) {
+    const auto& ta = entries[a];
+    const auto& tb = entries[b];
+    const i_t ra   = std::get<0>(ta);
+    const i_t rb   = std::get<0>(tb);
+    if (ra != rb) { return ra < rb; }
+    return std::get<1>(ta) < std::get<1>(tb);
+  });
+
+  scratch.row_wr.resize(nr);
+  std::copy(scratch.row_off.begin(), scratch.row_off.begin() + nr, scratch.row_wr.begin());
+
+  for (size_t k = 0; k < nnz; ++k) {
+    const size_t ix                     = scratch.sort_perm[k];
+    const auto& e                       = entries[ix];
+    const i_t row                       = std::get<0>(e);
+    const i_t col                       = std::get<1>(e);
+    const f_t val                       = std::get<2>(e);
+    const size_t sr                     = static_cast<size_t>(row);
+    const i_t w                         = scratch.row_wr[sr]++;
+    out_indices[static_cast<size_t>(w)] = col;
+    out_values[static_cast<size_t>(w)]  = val * value_scale;
+  }
+
+  out_offsets = std::move(scratch.row_off);
+}
+
+/**
+ * @brief Build CSR from coordinate triples using the same double-transpose semantics as the
+ * previous nested-vector implementation: CSC column-major buckets, then CSR with columns sorted
+ * within each row (column index ascending).
+ *
+ * @param symmetrize_upper_triangular If true (QUADOBJ), each off-diagonal (r,c) also adds (c,r).
+ */
+template <typename i_t, typename f_t>
+void triples_to_csr_double_transpose_flat(const std::vector<std::tuple<i_t, i_t, f_t>>& entries,
+                                          i_t num_rows,
+                                          i_t num_cols,
+                                          bool symmetrize_upper_triangular,
+                                          f_t value_scale,
+                                          double_transpose_scratch_t<i_t, f_t>& scratch,
+                                          std::vector<f_t>& out_values,
+                                          std::vector<i_t>& out_indices,
+                                          std::vector<i_t>& out_offsets)
+{
+  if (entries.empty()) {
+    out_values.clear();
+    out_indices.clear();
+    out_offsets.assign(static_cast<size_t>(num_rows) + 1, 0);
+    return;
+  }
+
+  const size_t nc    = static_cast<size_t>(num_cols);
+  const size_t nr    = static_cast<size_t>(num_rows);
+  const size_t nnz   = entries.size();
+  const size_t n_dim = std::max(nr, nc);
+
+  // Without symmetrization (QCMATRIX, QMATRIX), avoid O(n) full-column sweeps when blocks are
+  // sparse: sort triples by (row,col) and scatter in one pass. QUADOBJ keeps the dense transpose
+  // path because symmetrization duplicates off-diagonal entries.
+  if (!symmetrize_upper_triangular) {
+    const size_t lg = ceil_log2_size_t(nnz);
+    // Roughly: sort cost ~ nnz·lg vs ~O(n) column iterations in dense path; 48 is a tunable fudge.
+    constexpr size_t k_sparse_vs_dense_heuristic = 96;
+    if (nnz * lg < k_sparse_vs_dense_heuristic * n_dim) {
+      triples_to_csr_sparse_sorted_flat(
+        entries, num_rows, num_cols, value_scale, scratch, out_values, out_indices, out_offsets);
+      return;
+    }
+  }
+
+  scratch.col_nnz.assign(nc, 0);
+  for (const auto& entry : entries) {
+    const i_t r = std::get<0>(entry);
+    const i_t c = std::get<1>(entry);
+    scratch.col_nnz[static_cast<size_t>(c)]++;
+    if (symmetrize_upper_triangular && r != c) { scratch.col_nnz[static_cast<size_t>(r)]++; }
+  }
+
+  scratch.col_off.resize(nc + 1);
+  scratch.col_off[0] = 0;
+  for (size_t c = 0; c < nc; ++c) {
+    scratch.col_off[c + 1] = scratch.col_off[c] + scratch.col_nnz[c];
+  }
+  const i_t csc_nnz = scratch.col_off[nc];
+  scratch.csc_rows.resize(static_cast<size_t>(csc_nnz));
+  scratch.csc_vals.resize(static_cast<size_t>(csc_nnz));
+  scratch.col_wr.resize(nc);
+  std::copy(scratch.col_off.begin(), scratch.col_off.begin() + nc, scratch.col_wr.begin());
+
+  for (const auto& entry : entries) {
+    const i_t r = std::get<0>(entry);
+    const i_t c = std::get<1>(entry);
+    const f_t v = std::get<2>(entry);
+    {
+      const size_t sc                          = static_cast<size_t>(c);
+      const i_t p                              = scratch.col_wr[sc]++;
+      scratch.csc_rows[static_cast<size_t>(p)] = r;
+      scratch.csc_vals[static_cast<size_t>(p)] = v;
+    }
+    if (symmetrize_upper_triangular && r != c) {
+      const size_t sr                          = static_cast<size_t>(r);
+      const i_t p                              = scratch.col_wr[sr]++;
+      scratch.csc_rows[static_cast<size_t>(p)] = c;
+      scratch.csc_vals[static_cast<size_t>(p)] = v;
+    }
+  }
+
+  scratch.row_nnz.assign(nr, 0);
+  for (i_t cc = 0; cc < num_cols; ++cc) {
+    const size_t col_z = static_cast<size_t>(cc);
+    const i_t lo       = scratch.col_off[col_z];
+    const i_t hi       = scratch.col_off[col_z + 1];
+    for (i_t t = lo; t < hi; ++t) {
+      const i_t row = scratch.csc_rows[static_cast<size_t>(t)];
+      scratch.row_nnz[static_cast<size_t>(row)]++;
+    }
+  }
+
+  scratch.row_off.resize(nr + 1);
+  scratch.row_off[0] = 0;
+  for (size_t r = 0; r < nr; ++r) {
+    scratch.row_off[r + 1] = scratch.row_off[r] + scratch.row_nnz[r];
+  }
+  const size_t csr_nnz = static_cast<size_t>(scratch.row_off[nr]);
+
+  out_values.resize(csr_nnz);
+  out_indices.resize(csr_nnz);
+  scratch.row_wr.resize(nr);
+  std::copy(scratch.row_off.begin(), scratch.row_off.begin() + nr, scratch.row_wr.begin());
+
+  for (i_t cc = 0; cc < num_cols; ++cc) {
+    const size_t col_z = static_cast<size_t>(cc);
+    const i_t lo       = scratch.col_off[col_z];
+    const i_t hi       = scratch.col_off[col_z + 1];
+    for (i_t t = lo; t < hi; ++t) {
+      const i_t row                       = scratch.csc_rows[static_cast<size_t>(t)];
+      const f_t val                       = scratch.csc_vals[static_cast<size_t>(t)];
+      const size_t sr                     = static_cast<size_t>(row);
+      const i_t w                         = scratch.row_wr[sr]++;
+      out_indices[static_cast<size_t>(w)] = cc;
+      out_values[static_cast<size_t>(w)]  = val * value_scale;
+    }
+  }
+
+  out_offsets = std::move(scratch.row_off);
+}
+
+}  // namespace
+
 namespace cuopt::linear_programming::io {
 
 template <typename i_t>
@@ -288,115 +515,72 @@ void mps_parser_t<i_t, f_t>::fill_problem(mps_data_model_t<i_t, f_t>& problem)
   problem.set_variable_types(std::move(var_types));
   problem.set_maximize(maximize);
 
-  // Helper function to build CSR format using double transpose (O(m+n+nnz) instead of
-  // O(nnz*log(nnz))) For QUADOBJ: handles upper triangular input by expanding to full symmetric
-  // matrix.
+  // Build CSR from (row,col,value) triples via double transpose (CSC then CSR). Uses flat buffers
+  // plus reusable scratch to avoid per-column/per-row `std::vector` allocations — important for
+  // large Q blocks and many QCMATRIX sections.
   //
-  // @p value_scale:
-  // QUADOBJ/QMATRIX use 0.5 (MPS ½ xᵀQx vs internal xᵀQx);
-  // QCMATRIX uses 1.0 (symmetric Q defines xᵀQx directly in the constraint).
-  auto build_csr_via_transpose = [](const std::vector<std::tuple<i_t, i_t, f_t>>& entries,
-                                    i_t num_rows,
-                                    i_t num_cols,
-                                    bool symmetrize_upper_triangular,
-                                    f_t value_scale) {
-    struct CSRResult {
-      std::vector<f_t> values;
-      std::vector<i_t> indices;
-      std::vector<i_t> offsets;
-    };
-
-    if (entries.empty()) {
-      CSRResult result;
-      result.offsets.resize(num_rows + 1, 0);
-      return result;
-    }
-
-    // First transpose: build CSC format (entries sorted by column)
-    std::vector<std::vector<std::pair<i_t, f_t>>> csc_data(num_cols);
-    for (const auto& entry : entries) {
-      i_t row = std::get<0>(entry);
-      i_t col = std::get<1>(entry);
-      f_t val = std::get<2>(entry);
-
-      // For QUADOBJ (upper triangular), add both (row,col) and (col,row) if off-diagonal
-      csc_data[col].emplace_back(row, val);
-      if (symmetrize_upper_triangular && row != col) { csc_data[row].emplace_back(col, val); }
-    }
-
-    // Second transpose: convert CSC to CSR (entries sorted by row, columns within rows sorted)
-    std::vector<std::vector<std::pair<i_t, f_t>>> csr_data(num_rows);
-    for (i_t col = 0; col < num_cols; ++col) {
-      for (const auto& [row, val] : csc_data[col]) {
-        csr_data[row].emplace_back(col, val);
-      }
-    }
-
-    // Build final CSR format
-    CSRResult result;
-    result.offsets.reserve(num_rows + 1);
-    result.offsets.push_back(0);
-
-    for (i_t row = 0; row < num_rows; ++row) {
-      for (const auto& [col, val] : csr_data[row]) {
-        // While the mps format expects to optimize for 0.5 xT Q x, cuopt optimizes for xT Q xExpand
-        // commentComment on line L488 so we have to multiply the value by value_scale=0.5 to get
-        // the correct value.
-        result.values.push_back(val * value_scale);
-        result.indices.push_back(col);
-      }
-      result.offsets.push_back(result.values.size());
-    }
-
-    return result;
-  };
+  // value_scale: QUADOBJ/QMATRIX use 0.5 (MPS ½ xᵀQx vs internal xᵀQx); QCMATRIX uses 1.0.
+  double_transpose_scratch_t<i_t, f_t> triple_csr_scratch{};
+  std::vector<f_t> quad_csr_values{};
+  std::vector<i_t> quad_csr_indices{};
+  std::vector<i_t> quad_csr_offsets{};
 
   // Process QUADOBJ data if present (upper triangular format)
   if (!quadobj_entries.empty()) {
-    // Convert quadratic objective entries to CSR format using double transpose
-    // QUADOBJ stores upper triangular elements, so we expand to full symmetric matrix
     constexpr f_t k_mps_quad_half_scale = f_t(0.5);  // MPS ½ xᵀQx vs internal xᵀQx
-    auto csr_result                     = build_csr_via_transpose(
-      quadobj_entries, num_vars_for_quad, num_vars_for_quad, true, k_mps_quad_half_scale);
-
-    // Use optimized double transpose method - O(m+n+nnz) instead of O(nnz*log(nnz))
-    problem.set_quadratic_objective_matrix(
-      csr_result.values, csr_result.indices, csr_result.offsets);
+    triples_to_csr_double_transpose_flat(quadobj_entries,
+                                         num_vars_for_quad,
+                                         num_vars_for_quad,
+                                         true,
+                                         k_mps_quad_half_scale,
+                                         triple_csr_scratch,
+                                         quad_csr_values,
+                                         quad_csr_indices,
+                                         quad_csr_offsets);
+    problem.set_quadratic_objective_matrix(quad_csr_values, quad_csr_indices, quad_csr_offsets);
   } else if (!qmatrix_entries.empty()) {
-    // Convert quadratic objective entries to CSR format using double transpose
-    // QMATRIX stores full symmetric matrix
     constexpr f_t k_mps_quad_half_scale = f_t(0.5);
-    auto csr_result                     = build_csr_via_transpose(
-      qmatrix_entries, num_vars_for_quad, num_vars_for_quad, false, k_mps_quad_half_scale);
-
-    // Use optimized double transpose method - O(m+n+nnz) instead of O(nnz*log(nnz))
-    problem.set_quadratic_objective_matrix(
-      csr_result.values, csr_result.indices, csr_result.offsets);
+    triples_to_csr_double_transpose_flat(qmatrix_entries,
+                                         num_vars_for_quad,
+                                         num_vars_for_quad,
+                                         false,
+                                         k_mps_quad_half_scale,
+                                         triple_csr_scratch,
+                                         quad_csr_values,
+                                         quad_csr_indices,
+                                         quad_csr_offsets);
+    problem.set_quadratic_objective_matrix(quad_csr_values, quad_csr_indices, quad_csr_offsets);
   }
 
   // QCMATRIX: one symmetric Q per constraint row (no extra ½ factor vs file coeffs).
-  // Bundle row metadata, row-linear coefficients (from COLUMNS), rhs, and quadratic part together.
+  // Store Q in COO (row, col, value) per block — avoids O(n) CSR row-pointer materialization.
   constexpr f_t k_qcmatrix_value_scale = f_t(1);
-  const i_t linear_row_count = static_cast<i_t>(row_types.size() - quadratic_row_ids.size());
-  i_t quadratic_row_id       = 0;
   for (const auto& block : qcmatrix_blocks_) {
-    auto csr_result = build_csr_via_transpose(
-      block.entries, num_vars_for_quad, num_vars_for_quad, false, k_qcmatrix_value_scale);
     const i_t row_id = block.constraint_row_id;
     mps_parser_expects(row_id >= 0 && row_id < static_cast<i_t>(row_types.size()),
                        error_type_t::ValidationError,
                        "QCMATRIX row index %d is out of range for constraints",
                        static_cast<int>(row_id));
-    problem.append_quadratic_constraint(linear_row_count + quadratic_row_id,
-                                        row_names[row_id],
-                                        static_cast<char>(row_types[row_id]),
-                                        A_values[row_id],
-                                        A_indices[row_id],
-                                        b_values[row_id],
-                                        csr_result.values,
-                                        csr_result.indices,
-                                        csr_result.offsets);
-    ++quadratic_row_id;
+    const i_t nnz = static_cast<i_t>(block.entries.size());
+    std::vector<i_t> qc_rows(static_cast<size_t>(nnz));
+    std::vector<i_t> qc_cols(static_cast<size_t>(nnz));
+    std::vector<f_t> qc_vals(static_cast<size_t>(nnz));
+    for (i_t e = 0; e < nnz; ++e) {
+      qc_rows[static_cast<size_t>(e)] = std::get<0>(block.entries[static_cast<size_t>(e)]);
+      qc_cols[static_cast<size_t>(e)] = std::get<1>(block.entries[static_cast<size_t>(e)]);
+      qc_vals[static_cast<size_t>(e)] =
+        std::get<2>(block.entries[static_cast<size_t>(e)]) * k_qcmatrix_value_scale;
+    }
+    problem.append_quadratic_constraint(
+      row_id,
+      row_names[row_id],
+      static_cast<char>(row_types[row_id]),
+      std::span<const f_t>(A_values[row_id].data(), A_values[row_id].size()),
+      std::span<const i_t>(A_indices[row_id].data(), A_indices[row_id].size()),
+      b_values[row_id],
+      std::span<const f_t>(qc_vals.data(), qc_vals.size()),
+      std::span<const i_t>(qc_rows.data(), qc_rows.size()),
+      std::span<const i_t>(qc_cols.data(), qc_cols.size()));
   }
 
   if (!quadratic_row_ids.empty()) {
@@ -694,9 +878,7 @@ mps_parser_t<i_t, f_t>::mps_parser_t(mps_data_model_t<i_t, f_t>& problem,
   // raft::common::nvtx::range fun_scope("mps parser");
 
   std::vector<char> buf = detail::file_to_string(file);
-
   parse_string(buf.data());
-
   fill_problem(problem);
 }
 
diff --git a/cpp/src/io/mps_writer.cpp b/cpp/src/io/mps_writer.cpp
index 73489277ce..dae7d75a9a 100644
--- a/cpp/src/io/mps_writer.cpp
+++ b/cpp/src/io/mps_writer.cpp
@@ -157,7 +157,15 @@ void mps_writer_t<i_t, f_t>::write(const std::string& mps_file_path)
   std::vector<f_t> constraint_bounds(problem_.get_constraint_bounds().size());
   std::vector<f_t> variable_lower_bounds(problem_.get_variable_lower_bounds().size());
   std::vector<f_t> variable_upper_bounds(problem_.get_variable_upper_bounds().size());
-  std::vector<char> variable_types(problem_.get_variable_types().size());
+  // Default unset variable types to continuous ('C'); API models often omit set_variable_types.
+  std::vector<char> variable_types(static_cast<size_t>(n_variables), 'C');
+  {
+    const auto& src_types = problem_.get_variable_types();
+    const size_t n_copy   = std::min(src_types.size(), variable_types.size());
+    for (size_t j = 0; j < n_copy; ++j) {
+      variable_types[j] = src_types[j];
+    }
+  }
   std::vector<char> row_types(problem_.get_row_types().size());
   std::vector<i_t> constraint_matrix_offsets(problem_.get_constraint_matrix_offsets().size());
   std::vector<i_t> constraint_matrix_indices(problem_.get_constraint_matrix_indices().size());
@@ -178,9 +186,6 @@ void mps_writer_t<i_t, f_t>::write(const std::string& mps_file_path)
     problem_.get_variable_upper_bounds().data(),
     problem_.get_variable_upper_bounds().data() + problem_.get_variable_upper_bounds().size(),
     variable_upper_bounds.data());
-  std::copy(problem_.get_variable_types().data(),
-            problem_.get_variable_types().data() + problem_.get_variable_types().size(),
-            variable_types.data());
   std::copy(problem_.get_row_types().data(),
             problem_.get_row_types().data() + problem_.get_row_types().size(),
             row_types.data());
@@ -243,8 +248,8 @@ void mps_writer_t<i_t, f_t>::write(const std::string& mps_file_path)
     const auto& qc = quadratic_constraints[q];
     std::string row_name =
       qc.constraint_row_name.empty() ? "QC" + std::to_string(q) : qc.constraint_row_name;
-    // Quadratic rows are currently restricted to MPS 'L' (<=).
-    mps_file << " L  " << row_name << "\n";
+    char const type = qc.constraint_row_type;
+    mps_file << " " << type << "  " << row_name << "\n";
   }
 
   // COLUMNS section
@@ -497,20 +502,19 @@ void mps_writer_t<i_t, f_t>::write(const std::string& mps_file_path)
   if (problem_.has_quadratic_constraints()) {
     for (const auto& qc : problem_.get_quadratic_constraints()) {
       mps_file << "QCMATRIX   " << qc.constraint_row_name << "\n";
-      const i_t n_quad_rows = static_cast<i_t>(qc.quadratic_offsets.size()) - 1;
-      for (i_t i = 0; i < n_quad_rows; ++i) {
+      const i_t nnz = static_cast<i_t>(qc.quadratic_values.size());
+      for (i_t p = 0; p < nnz; ++p) {
+        const i_t i              = qc.quadratic_row_indices[static_cast<size_t>(p)];
+        const i_t j              = qc.quadratic_col_indices[static_cast<size_t>(p)];
+        f_t v                    = qc.quadratic_values[static_cast<size_t>(p)];
         std::string row_var_name = static_cast<size_t>(i) < problem_.get_variable_names().size()
-                                     ? problem_.get_variable_names()[i]
+                                     ? problem_.get_variable_names()[static_cast<size_t>(i)]
                                      : "C" + std::to_string(i);
-        for (i_t p = qc.quadratic_offsets[i]; p < qc.quadratic_offsets[i + 1]; ++p) {
-          i_t j                    = qc.quadratic_indices[p];
-          f_t v                    = qc.quadratic_values[p];
-          std::string col_var_name = static_cast<size_t>(j) < problem_.get_variable_names().size()
-                                       ? problem_.get_variable_names()[j]
-                                       : "C" + std::to_string(j);
-          if (v != f_t(0)) {
-            mps_file << "    " << row_var_name << " " << col_var_name << " " << v << "\n";
-          }
+        std::string col_var_name = static_cast<size_t>(j) < problem_.get_variable_names().size()
+                                     ? problem_.get_variable_names()[static_cast<size_t>(j)]
+                                     : "C" + std::to_string(j);
+        if (v != f_t(0)) {
+          mps_file << "    " << row_var_name << " " << col_var_name << " " << v << "\n";
         }
       }
     }
diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu
index 0b4c360b10..505b4e02f7 100644
--- a/cpp/src/math_optimization/solver_settings.cu
+++ b/cpp/src/math_optimization/solver_settings.cu
@@ -98,6 +98,9 @@ solver_settings_t<i_t, f_t>::solver_settings_t() : pdlp_settings(), mip_settings
     {CUOPT_MIP_INTEGRALITY_TOLERANCE, &mip_settings.tolerances.integrality_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-5)},
     {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, f_t(0.0), std::numeric_limits<f_t>::infinity(), std::max(f_t(1e-10), std::numeric_limits<f_t>::epsilon())},
     {CUOPT_MIP_RELATIVE_GAP, &mip_settings.tolerances.relative_mip_gap, f_t(0.0), f_t(1e-1), f_t(1e-4)},
+    {CUOPT_BARRIER_RELATIVE_FEASIBILITY_TOLERANCE, &pdlp_settings.barrier_relative_feasibility_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-8)},
+    {CUOPT_BARRIER_RELATIVE_OPTIMALITY_TOLERANCE, &pdlp_settings.barrier_relative_optimality_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-8)},
+    {CUOPT_BARRIER_RELATIVE_COMPLEMENTARITY_TOLERANCE, &pdlp_settings.barrier_relative_complementarity_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-8)},
     {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits<f_t>::epsilon())},
     {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits<f_t>::epsilon())},
     {CUOPT_MIP_CUT_CHANGE_THRESHOLD, &mip_settings.cut_change_threshold, f_t(-1.0), std::numeric_limits<f_t>::infinity(), f_t(-1.0)},
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index 43d507b2f9..cf741eee0b 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -507,9 +507,14 @@ run_barrier(dual_simplex::user_problem_t<i_t, f_t>& user_problem,
   barrier_settings.barrier                         = true;
   barrier_settings.crossover                       = settings.crossover;
   barrier_settings.eliminate_dense_columns         = settings.eliminate_dense_columns;
-  barrier_settings.barrier_iterative_refinement    = settings.barrier_iterative_refinement;
-  barrier_settings.barrier_step_scale              = settings.barrier_step_scale;
+  barrier_settings.barrier_iterative_refinement = settings.barrier_iterative_refinement;
+  barrier_settings.barrier_step_scale           = settings.barrier_step_scale;
   barrier_settings.cudss_deterministic             = settings.cudss_deterministic;
+  barrier_settings.barrier_relative_feasibility_tol =
+    settings.barrier_relative_feasibility_tolerance;
+  barrier_settings.barrier_relative_optimality_tol = settings.barrier_relative_optimality_tolerance;
+  barrier_settings.barrier_relative_complementarity_tol =
+    settings.barrier_relative_complementarity_tolerance;
   barrier_settings.barrier_relaxed_feasibility_tol = settings.tolerances.relative_primal_tolerance;
   barrier_settings.barrier_relaxed_optimality_tol  = settings.tolerances.relative_dual_tolerance;
   barrier_settings.barrier_relaxed_complementarity_tol = settings.tolerances.relative_gap_tolerance;
@@ -1749,6 +1754,10 @@ optimization_problem_solution_t<i_t, f_t> solve_qp(optimization_problem_t<i_t, f
     auto qp_timer = cuopt::timer_t(settings.time_limit);
 
     raft::common::nvtx::range fun_scope("Running QP solver");
+    if (op_problem.has_quadratic_constraints()) {
+      CUOPT_LOG_INFO("Problem has %d quadratic constraints. Using Barrier with SOC conversion.",
+                     static_cast<int>(op_problem.get_quadratic_constraints().size()));
+    }
     if (settings.user_problem_file != "") {
       CUOPT_LOG_INFO("Writing user problem to file: %s", settings.user_problem_file.c_str());
       op_problem.write_to_mps(settings.user_problem_file);
@@ -1803,6 +1812,24 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
     // This needs to be called before pdlp is initialized
     init_handler(op_problem.get_handle_ptr());
 
+    if (op_problem.has_quadratic_objective() || op_problem.has_quadratic_constraints()) {
+      if (op_problem.has_quadratic_objective()) {
+        CUOPT_LOG_INFO("Problem has a quadratic objective. Using Barrier.");
+      }
+      if (op_problem.has_quadratic_constraints()) {
+        CUOPT_LOG_INFO("Problem has %d quadratic constraints. Using Barrier with SOC conversion.",
+                       static_cast<int>(op_problem.get_quadratic_constraints().size()));
+      }
+      settings.method    = method_t::Barrier;
+      settings.presolver = presolver_t::None;
+      // Quadratic objective support is minimization-only.
+      if (op_problem.has_quadratic_objective() && op_problem.get_sense()) {
+        CUOPT_LOG_ERROR("Quadratic problems must be minimized");
+        return optimization_problem_solution_t<i_t, f_t>(pdlp_termination_status_t::NumericalError,
+                                                         op_problem.get_handle_ptr()->get_stream());
+      }
+    }
+
     raft::common::nvtx::range fun_scope("Running solver");
 
     if (problem_checking) {
diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu
index 9268e17910..96de3a788d 100644
--- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu
+++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu
@@ -26,13 +26,7 @@
 #include <raft/linalg/unary_op.cuh>
 #include <raft/util/cuda_utils.cuh>
 
-#include <thrust/device_ptr.h>
-#include <thrust/extrema.h>
-#include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/transform_output_iterator.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/transform_reduce.h>
-#include <thrust/tuple.h>
 
 namespace cuopt::linear_programming::detail {
 template <typename i_t, typename f_t>
diff --git a/cpp/src/pdlp/translate.hpp b/cpp/src/pdlp/translate.hpp
index 7d78da80eb..2bfaa127ed 100644
--- a/cpp/src/pdlp/translate.hpp
+++ b/cpp/src/pdlp/translate.hpp
@@ -12,10 +12,16 @@
 
 #include <dual_simplex/presolve.hpp>
 #include <dual_simplex/sparse_matrix.hpp>
+#include <dual_simplex/sparse_vector.hpp>
+
+#include <barrier/translate_soc.hpp>
 
 #include <utilities/copy_helpers.hpp>
 
 #include <limits>
+#include <tuple>
+#include <utility>
+#include <vector>
 
 namespace cuopt::linear_programming {
 
@@ -111,8 +117,6 @@ static dual_simplex::user_problem_t<i_t, f_t> cuopt_problem_to_user_problem(
   csr_A.j = std::vector<i_t>(cuopt::host_copy(model.variables, handle_ptr->get_stream()));
   csr_A.row_start = std::vector<i_t>(cuopt::host_copy(model.offsets, handle_ptr->get_stream()));
 
-  csr_A.to_compressed_col(user_problem.A);
-
   user_problem.rhs.resize(m);
   user_problem.row_sense.resize(m);
   user_problem.range_rows.clear();
@@ -186,6 +190,13 @@ static dual_simplex::user_problem_t<i_t, f_t> cuopt_problem_to_user_problem(
   user_problem.Q_indices = model.Q_indices;
   user_problem.Q_values  = model.Q_values;
 
+  if (model.original_problem_ptr->has_quadratic_constraints()) {
+    detail::apply_soc_qcmatrix_conversion_for_simplex<i_t, f_t>(
+      n, model.original_problem_ptr->get_quadratic_constraints(), csr_A, user_problem);
+  }
+
+  csr_A.to_compressed_col(user_problem.A);
+
   return user_problem;
 }
 
@@ -211,7 +222,6 @@ static dual_simplex::user_problem_t<i_t, f_t> cuopt_optimization_problem_to_user
     csr_A.row_start.resize(1);
     csr_A.row_start[0] = 0;
   }
-  csr_A.to_compressed_col(user_problem.A);
 
   user_problem.rhs.resize(m);
   user_problem.row_sense.resize(m);
@@ -291,6 +301,13 @@ static dual_simplex::user_problem_t<i_t, f_t> cuopt_optimization_problem_to_user
   user_problem.Q_indices = model.get_quadratic_objective_indices();
   user_problem.Q_values  = model.get_quadratic_objective_values();
 
+  if (model.has_quadratic_constraints()) {
+    detail::apply_soc_qcmatrix_conversion_for_simplex<i_t, f_t>(
+      static_cast<int>(n), model.get_quadratic_constraints(), csr_A, user_problem);
+  }
+
+  csr_A.to_compressed_col(user_problem.A);
+
   return user_problem;
 }
 
diff --git a/cpp/tests/dual_simplex/CMakeLists.txt b/cpp/tests/dual_simplex/CMakeLists.txt
index dc4ab35b73..f6dff93227 100644
--- a/cpp/tests/dual_simplex/CMakeLists.txt
+++ b/cpp/tests/dual_simplex/CMakeLists.txt
@@ -6,4 +6,5 @@
 ConfigureTest(DUAL_SIMPLEX_TEST
     ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solve.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solve_barrier.cu
-    LABELS numopt)
+    ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/second_order_cone_kernels.cu
+)
diff --git a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu
index b0cbe624dc..ae4efb9b86 100644
--- a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu
+++ b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu
@@ -5,8 +5,6 @@
  */
 /* clang-format on */
 
-#include <cstdio>
-
 #include <utilities/common_utils.hpp>
 #include <utilities/copy_helpers.hpp>
 
@@ -16,6 +14,7 @@
 #include <cuopt/linear_programming/pdlp/solver_settings.hpp>
 #include <cuopt/linear_programming/solve.hpp>
 #include <dual_simplex/presolve.hpp>
+#include <dual_simplex/scaling.hpp>
 #include <dual_simplex/solve.hpp>
 #include <dual_simplex/tic_toc.hpp>
 #include <dual_simplex/user_problem.hpp>
@@ -146,7 +145,6 @@ TEST(barrier, dual_variable_greater_than)
   user_problem.A.x[nnz++] = 1.0;
   user_problem.A.i[nnz]   = 1;
   user_problem.A.x[nnz++] = 2.0;
-  user_problem.A.print_matrix();
   EXPECT_EQ(nnz, nz);
 
   user_problem.rhs.resize(m);
@@ -181,6 +179,821 @@ TEST(barrier, dual_variable_greater_than)
   EXPECT_NEAR(solution.z[1], 0.0, 1e-5);
 }
 
+TEST(barrier, cone_metadata_reindexed_when_slack_is_inserted_before_cones)
+{
+  raft::handle_t handle{};
+  init_handler(&handle);
+
+  using namespace cuopt::linear_programming::dual_simplex;
+  user_problem_t<int, double> user_problem(&handle);
+
+  constexpr int m       = 1;
+  constexpr int n       = 5;
+  constexpr int nz      = 5;
+  user_problem.num_rows = m;
+  user_problem.num_cols = n;
+  user_problem.objective.assign(n, 0.0);
+  user_problem.A.m      = m;
+  user_problem.A.n      = n;
+  user_problem.A.nz_max = nz;
+  user_problem.A.reallocate(nz);
+  user_problem.A.col_start.resize(n + 1);
+  for (int j = 0; j < n; ++j) {
+    user_problem.A.col_start[j] = j;
+    user_problem.A.i[j]         = 0;
+    user_problem.A.x[j]         = 1.0;
+  }
+  user_problem.A.col_start[n] = nz;
+  user_problem.rhs            = {1.0};
+  user_problem.row_sense      = {'L'};
+  user_problem.lower.assign(n, 0.0);
+  user_problem.upper.assign(n, inf);
+  user_problem.num_range_rows         = 0;
+  user_problem.second_order_cone_dims = {2, 2};
+  user_problem.cone_var_start         = 1;
+
+  simplex_solver_settings_t<int, double> settings;
+  settings.barrier          = true;
+  settings.dualize          = 0;
+  settings.scale_columns    = false;
+
+  std::vector<int> new_slacks;
+  dualize_info_t<int, double> dualize_info;
+  lp_problem_t<int, double> original_lp(user_problem.handle_ptr, 1, 1, 1);
+  convert_user_problem(user_problem, settings, original_lp, new_slacks, dualize_info);
+
+  ASSERT_EQ(new_slacks.size(), 1);
+  EXPECT_EQ(new_slacks[0], 1);
+  EXPECT_EQ(original_lp.num_cols, 6);
+  EXPECT_EQ(original_lp.second_order_cone_dims, user_problem.second_order_cone_dims);
+  EXPECT_EQ(original_lp.cone_var_start, 2);
+
+  lp_problem_t<int, double> barrier_lp(user_problem.handle_ptr,
+                                       original_lp.num_rows,
+                                       original_lp.num_cols,
+                                       original_lp.A.col_start[original_lp.num_cols]);
+  std::vector<double> column_scales;
+  std::vector<double> row_scales;
+  column_scaling(original_lp, settings, barrier_lp, column_scales, row_scales);
+
+  EXPECT_EQ(barrier_lp.second_order_cone_dims, user_problem.second_order_cone_dims);
+  EXPECT_EQ(barrier_lp.cone_var_start, 2);
+}
+
+TEST(barrier, presolve_reindexes_cone_start_after_empty_column_removal)
+{
+  raft::handle_t handle{};
+  init_handler(&handle);
+
+  using namespace cuopt::linear_programming::dual_simplex;
+  user_problem_t<int, double> user_problem(&handle);
+
+  constexpr int m  = 1;
+  constexpr int n  = 4;
+  constexpr int nz = 3;
+
+  user_problem.num_rows  = m;
+  user_problem.num_cols  = n;
+  user_problem.objective = {1.0, 0.0, 0.0, 0.0};
+
+  user_problem.A.m      = m;
+  user_problem.A.n      = n;
+  user_problem.A.nz_max = nz;
+  user_problem.A.reallocate(nz);
+  user_problem.A.col_start = {0, 0, 1, 2, 3};
+  user_problem.A.i[0]      = 0;
+  user_problem.A.x[0]      = 1.0;
+  user_problem.A.i[1]      = 0;
+  user_problem.A.x[1]      = -1.0;
+  user_problem.A.i[2]      = 0;
+  user_problem.A.x[2]      = 0.5;
+
+  user_problem.rhs       = {1.0};
+  user_problem.row_sense = {'E'};
+  user_problem.lower.assign(n, 0.0);
+  user_problem.upper.assign(n, inf);
+  user_problem.num_range_rows         = 0;
+  user_problem.cone_var_start         = 1;
+  user_problem.second_order_cone_dims = {3};
+  user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
+
+  simplex_solver_settings_t<int, double> settings;
+  settings.barrier          = true;
+  settings.dualize          = 0;
+  settings.scale_columns    = false;
+
+  std::vector<int> new_slacks;
+  dualize_info_t<int, double> dualize_info;
+  lp_problem_t<int, double> original_lp(user_problem.handle_ptr, 1, 1, 1);
+  convert_user_problem(user_problem, settings, original_lp, new_slacks, dualize_info);
+
+  presolve_info_t<int, double> presolve_info;
+  lp_problem_t<int, double> presolved_lp(user_problem.handle_ptr, 1, 1, 1);
+  ASSERT_EQ(presolve(original_lp, settings, presolved_lp, presolve_info), 0);
+
+  EXPECT_EQ(presolved_lp.num_cols, 3);
+  EXPECT_EQ(presolved_lp.second_order_cone_dims, std::vector<int>({3}));
+  EXPECT_EQ(presolved_lp.cone_var_start, 0);
+
+  lp_problem_t<int, double> barrier_lp(user_problem.handle_ptr,
+                                       presolved_lp.num_rows,
+                                       presolved_lp.num_cols,
+                                       presolved_lp.A.col_start[presolved_lp.num_cols]);
+  std::vector<double> column_scales;
+  std::vector<double> row_scales;
+  ASSERT_EQ(column_scaling(presolved_lp, settings, barrier_lp, column_scales, row_scales), 0);
+  EXPECT_EQ(barrier_lp.cone_var_start, 0);
+}
+
+TEST(barrier, presolve_keeps_native_free_variables_before_cones)
+{
+  // Layout: [x0, x1 | cone x2, x3, x4] with x0, x1 free and a 3-dimensional SOC block.
+  // SOCP barrier presolve keeps native free variables (no x = v - w split); cone_var_start
+  // and column count stay unchanged.
+  raft::handle_t handle{};
+  init_handler(&handle);
+
+  using namespace cuopt::linear_programming::dual_simplex;
+  user_problem_t<int, double> user_problem(&handle);
+
+  constexpr int m  = 1;
+  constexpr int n  = 5;
+  constexpr int nz = 5;
+
+  user_problem.num_rows  = m;
+  user_problem.num_cols  = n;
+  user_problem.objective = {0.0, 0.0, 0.0, 0.0, 0.0};
+
+  user_problem.A.m      = m;
+  user_problem.A.n      = n;
+  user_problem.A.nz_max = nz;
+  user_problem.A.reallocate(nz);
+  user_problem.A.col_start = {0, 1, 2, 3, 4, 5};
+  for (int j = 0; j < n; ++j) {
+    user_problem.A.i[j] = 0;
+    user_problem.A.x[j] = 1.0;
+  }
+
+  user_problem.rhs       = {1.0};
+  user_problem.row_sense = {'E'};
+  user_problem.lower = {-inf, -inf, 0.0, 0.0, 0.0};
+  user_problem.upper.assign(n, inf);
+  user_problem.num_range_rows         = 0;
+  user_problem.cone_var_start         = 2;
+  user_problem.second_order_cone_dims = {3};
+  user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
+
+  simplex_solver_settings_t<int, double> settings;
+  settings.barrier          = true;
+  settings.dualize          = 0;
+  settings.scale_columns    = false;
+
+  std::vector<int> new_slacks;
+  dualize_info_t<int, double> dualize_info;
+  lp_problem_t<int, double> original_lp(user_problem.handle_ptr, 1, 1, 1);
+  convert_user_problem(user_problem, settings, original_lp, new_slacks, dualize_info);
+
+  presolve_info_t<int, double> presolve_info;
+  lp_problem_t<int, double> presolved_lp(user_problem.handle_ptr, 1, 1, 1);
+  ASSERT_EQ(presolve(original_lp, settings, presolved_lp, presolve_info), 0);
+
+  EXPECT_EQ(presolved_lp.num_cols, 5);
+  EXPECT_EQ(presolved_lp.cone_var_start, 2);
+  EXPECT_EQ(presolved_lp.second_order_cone_dims, std::vector<int>({3}));
+  EXPECT_TRUE(presolve_info.free_variable_pairs.empty());
+  ASSERT_EQ(presolve_info.native_free_linear_indices.size(), 2);
+  EXPECT_EQ(presolve_info.native_free_linear_indices[0], 0);
+  EXPECT_EQ(presolve_info.native_free_linear_indices[1], 1);
+  EXPECT_EQ(presolved_lp.lower[0], -inf);
+  EXPECT_EQ(presolved_lp.lower[1], -inf);
+  EXPECT_EQ(presolved_lp.upper[0], inf);
+  EXPECT_EQ(presolved_lp.upper[1], inf);
+}
+
+TEST(barrier, uncrush_solution_removes_non_tail_free_variable_partner)
+{
+  using namespace cuopt::linear_programming::dual_simplex;
+
+  raft::handle_t handle{};
+  init_handler(&handle);
+  lp_problem_t<int, double> original_lp(&handle, 0, 3, 0);
+
+  presolve_info_t<int, double> presolve_info;
+  presolve_info.free_variable_pairs = {0, 1};
+
+  simplex_solver_settings_t<int, double> settings;
+  std::vector<double> crushed_x{5.0, 2.0, 9.0, 8.0};
+  std::vector<double> crushed_y{};
+  std::vector<double> crushed_z{7.0, 11.0, 13.0, 17.0};
+  std::vector<double> uncrushed_x(3);
+  std::vector<double> uncrushed_y(0);
+  std::vector<double> uncrushed_z(3);
+
+  uncrush_solution(presolve_info,
+                   settings,
+                   original_lp,
+                   crushed_x,
+                   crushed_y,
+                   crushed_z,
+                   uncrushed_x,
+                   uncrushed_y,
+                   uncrushed_z);
+
+  EXPECT_EQ(uncrushed_x, std::vector<double>({3.0, 9.0, 8.0}));
+  EXPECT_EQ(uncrushed_z, std::vector<double>({7.0, 13.0, 17.0}));
+}
+
+TEST(barrier, rejects_middle_cone_input_before_barrier)
+{
+  raft::handle_t handle{};
+  init_handler(&handle);
+
+  using namespace cuopt::linear_programming::dual_simplex;
+  user_problem_t<int, double> user_problem(&handle);
+
+  constexpr int m  = 3;
+  constexpr int n  = 5;
+  constexpr int nz = 3;
+
+  user_problem.num_rows  = m;
+  user_problem.num_cols  = n;
+  user_problem.objective = {1.0, 0.0, 0.0, 0.0, 1.0};
+
+  user_problem.A.m      = m;
+  user_problem.A.n      = n;
+  user_problem.A.nz_max = nz;
+  user_problem.A.reallocate(nz);
+  user_problem.A.col_start = {0, 1, 1, 2, 2, 3};
+  user_problem.A.i[0]      = 0;
+  user_problem.A.x[0]      = 1.0;
+  user_problem.A.i[1]      = 1;
+  user_problem.A.x[1]      = 1.0;
+  user_problem.A.i[2]      = 2;
+  user_problem.A.x[2]      = 1.0;
+
+  user_problem.rhs       = {2.0, 1.0, 3.0};
+  user_problem.row_sense = {'E', 'E', 'E'};
+  user_problem.lower.assign(n, 0.0);
+  user_problem.upper.assign(n, inf);
+  user_problem.num_range_rows         = 0;
+  user_problem.cone_var_start         = 1;
+  user_problem.second_order_cone_dims = {3};
+  user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
+
+  simplex_solver_settings_t<int, double> settings;
+  settings.barrier          = true;
+  settings.dualize          = 0;
+  lp_solution_t<int, double> solution(m, n);
+
+  auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
+  EXPECT_EQ(status, lp_status_t::NUMERICAL_ISSUES);
+}
+
+TEST(barrier, socp_min_x0_subject_to_norm_constraint)
+{
+  // minimize x_0
+  // subject to x_1 = 1
+  //            (x_0, x_1, x_2) in Q^3
+  //
+  // Optimal: x* = (1, 1, 0), obj* = 1
+
+  raft::handle_t handle{};
+  init_handler(&handle);
+
+  using namespace cuopt::linear_programming::dual_simplex;
+  user_problem_t<int, double> user_problem(&handle);
+
+  constexpr int m  = 1;
+  constexpr int n  = 3;
+  constexpr int nz = 1;
+
+  user_problem.num_rows = m;
+  user_problem.num_cols = n;
+
+  user_problem.objective = {1.0, 0.0, 0.0};
+
+  user_problem.A.m      = m;
+  user_problem.A.n      = n;
+  user_problem.A.nz_max = nz;
+  user_problem.A.reallocate(nz);
+  user_problem.A.col_start = {0, 0, 1, 1};
+  user_problem.A.i[0]      = 0;
+  user_problem.A.x[0]      = 1.0;
+
+  user_problem.rhs       = {1.0};
+  user_problem.row_sense = {'E'};
+
+  user_problem.lower = {0.0, 0.0, 0.0};
+  user_problem.upper = {inf, inf, inf};
+
+  user_problem.num_range_rows = 0;
+  user_problem.problem_name   = "socp_norm_cone";
+
+  user_problem.cone_var_start         = 0;
+  user_problem.second_order_cone_dims = {3};
+
+  user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
+
+  simplex_solver_settings_t<int, double> settings;
+  settings.barrier          = true;
+  settings.dualize          = 0;
+
+  lp_solution_t<int, double> solution(m, n);
+  auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
+  EXPECT_EQ(status, lp_status_t::OPTIMAL);
+  EXPECT_NEAR(solution.objective, 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[0], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[1], 1.0, 1e-4);
+  EXPECT_NEAR(std::abs(solution.x[2]), 0.0, 1e-4);
+}
+
+TEST(barrier, mixed_linear_and_soc_block)
+{
+  // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3.
+  //
+  // minimize   l
+  // subject to l - t = 0
+  //            u     = 1
+  //            (t, u, v) in Q^3
+  //
+  // Optimal: l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1.
+  raft::handle_t handle{};
+  init_handler(&handle);
+
+  using namespace cuopt::linear_programming::dual_simplex;
+  user_problem_t<int, double> user_problem(&handle);
+
+  constexpr int m  = 2;
+  constexpr int n  = 4;
+  constexpr int nz = 4;
+
+  user_problem.num_rows  = m;
+  user_problem.num_cols  = n;
+  user_problem.objective = {1.0, 0.0, 0.0, 0.0};
+
+  user_problem.A.m      = m;
+  user_problem.A.n      = n;
+  user_problem.A.nz_max = nz;
+  user_problem.A.reallocate(nz);
+  // Columns: l, t, u, v
+  user_problem.A.col_start = {0, 1, 2, 3, 3};
+  user_problem.A.i[0]      = 0;
+  user_problem.A.x[0]      = 1.0;
+  user_problem.A.i[1]      = 0;
+  user_problem.A.x[1]      = -1.0;
+  user_problem.A.i[2]      = 1;
+  user_problem.A.x[2]      = 1.0;
+
+  user_problem.rhs       = {0.0, 1.0};
+  user_problem.row_sense = {'E', 'E'};
+
+  user_problem.lower = {0.0, 0.0, 0.0, 0.0};
+  user_problem.upper = {inf, inf, inf, inf};
+
+  user_problem.num_range_rows = 0;
+  user_problem.problem_name   = "mixed_linear_and_soc_block";
+
+  user_problem.cone_var_start         = 1;
+  user_problem.second_order_cone_dims = {3};
+  user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
+
+  simplex_solver_settings_t<int, double> settings;
+  settings.barrier          = true;
+  settings.dualize          = 0;
+
+  lp_solution_t<int, double> solution(m, n);
+  auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
+
+  EXPECT_EQ(status, lp_status_t::OPTIMAL);
+  EXPECT_NEAR(solution.objective, 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[0], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[1], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[2], 1.0, 1e-4);
+  EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4);
+}
+
+TEST(barrier, mixed_linear_and_soc_tail_coupling)
+{
+  // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3.
+  //
+  // minimize   t
+  // subject to l - u = 0
+  //            l + u = 2
+  //            (t, u, v) in Q^3
+  //
+  // Optimal: l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1.
+  raft::handle_t handle{};
+  init_handler(&handle);
+
+  using namespace cuopt::linear_programming::dual_simplex;
+  user_problem_t<int, double> user_problem(&handle);
+
+  constexpr int m  = 2;
+  constexpr int n  = 4;
+  constexpr int nz = 4;
+
+  user_problem.num_rows  = m;
+  user_problem.num_cols  = n;
+  user_problem.objective = {0.0, 1.0, 0.0, 0.0};
+
+  user_problem.A.m      = m;
+  user_problem.A.n      = n;
+  user_problem.A.nz_max = nz;
+  user_problem.A.reallocate(nz);
+  // Columns: l, t, u, v
+  user_problem.A.col_start = {0, 2, 2, 4, 4};
+  user_problem.A.i[0]      = 0;
+  user_problem.A.x[0]      = 1.0;
+  user_problem.A.i[1]      = 1;
+  user_problem.A.x[1]      = 1.0;
+  user_problem.A.i[2]      = 0;
+  user_problem.A.x[2]      = -1.0;
+  user_problem.A.i[3]      = 1;
+  user_problem.A.x[3]      = 1.0;
+
+  user_problem.rhs       = {0.0, 2.0};
+  user_problem.row_sense = {'E', 'E'};
+  user_problem.lower     = {0.0, 0.0, 0.0, 0.0};
+  user_problem.upper     = {inf, inf, inf, inf};
+
+  user_problem.num_range_rows         = 0;
+  user_problem.problem_name           = "mixed_linear_and_soc_tail_coupling";
+  user_problem.cone_var_start         = 1;
+  user_problem.second_order_cone_dims = {3};
+  user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
+
+  simplex_solver_settings_t<int, double> settings;
+  settings.barrier          = true;
+  settings.dualize          = 0;
+  settings.scale_columns    = true;
+
+  lp_solution_t<int, double> solution(m, n);
+  auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
+
+  EXPECT_EQ(status, lp_status_t::OPTIMAL);
+  EXPECT_NEAR(solution.objective, 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[0], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[1], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[2], 1.0, 1e-4);
+  EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4);
+}
+
+TEST(barrier, mixed_linear_and_soc_tail_coupling_with_inequality)
+{
+  // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3.
+  //
+  // minimize   t
+  // subject to l - u = 0
+  //            l + u >= 2
+  //            (t, u, v) in Q^3
+  //
+  // Optimal: l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1.
+  raft::handle_t handle{};
+  init_handler(&handle);
+
+  using namespace cuopt::linear_programming::dual_simplex;
+  user_problem_t<int, double> user_problem(&handle);
+
+  constexpr int m  = 2;
+  constexpr int n  = 4;
+  constexpr int nz = 4;
+
+  user_problem.num_rows  = m;
+  user_problem.num_cols  = n;
+  user_problem.objective = {0.0, 1.0, 0.0, 0.0};
+
+  user_problem.A.m      = m;
+  user_problem.A.n      = n;
+  user_problem.A.nz_max = nz;
+  user_problem.A.reallocate(nz);
+  // Columns: l, t, u, v
+  user_problem.A.col_start = {0, 2, 2, 4, 4};
+  user_problem.A.i[0]      = 0;
+  user_problem.A.x[0]      = 1.0;
+  user_problem.A.i[1]      = 1;
+  user_problem.A.x[1]      = 1.0;
+  user_problem.A.i[2]      = 0;
+  user_problem.A.x[2]      = -1.0;
+  user_problem.A.i[3]      = 1;
+  user_problem.A.x[3]      = 1.0;
+
+  user_problem.rhs       = {0.0, 2.0};
+  user_problem.row_sense = {'E', 'G'};
+  user_problem.lower     = {0.0, 0.0, 0.0, 0.0};
+  user_problem.upper     = {inf, inf, inf, inf};
+
+  user_problem.num_range_rows         = 0;
+  user_problem.problem_name           = "mixed_linear_and_soc_tail_coupling_with_inequality";
+  user_problem.cone_var_start         = 1;
+  user_problem.second_order_cone_dims = {3};
+  user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
+
+  simplex_solver_settings_t<int, double> settings;
+  settings.barrier          = true;
+  settings.dualize          = 0;
+  settings.scale_columns    = true;
+
+  lp_solution_t<int, double> solution(m, n);
+  auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
+
+  EXPECT_EQ(status, lp_status_t::OPTIMAL);
+  EXPECT_NEAR(solution.objective, 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[0], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[1], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[2], 1.0, 1e-4);
+  EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4);
+}
+
+TEST(barrier, mixed_linear_and_two_soc_blocks)
+{
+  // Variables ordered as [l1, l2 | t1, u1, v1 | t2, u2, v2],
+  // where (t1, u1, v1), (t2, u2, v2) \in Q^3.
+  //
+  // minimize   t1 + t2
+  // subject to l1 - u1 = 0
+  //            l2 - u2 = 0
+  //            l1 + l2 = 3
+  //            l1 - l2 = 1
+  //
+  // Optimal: l1* = 2, l2* = 1, t1* = 2, u1* = 2, v1* = 0,
+  //          t2* = 1, u2* = 1, v2* = 0, obj* = 3.
+  raft::handle_t handle{};
+  init_handler(&handle);
+
+  using namespace cuopt::linear_programming::dual_simplex;
+  user_problem_t<int, double> user_problem(&handle);
+
+  constexpr int m  = 4;
+  constexpr int n  = 8;
+  constexpr int nz = 8;
+
+  user_problem.num_rows  = m;
+  user_problem.num_cols  = n;
+  user_problem.objective = {0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0};
+
+  user_problem.A.m      = m;
+  user_problem.A.n      = n;
+  user_problem.A.nz_max = nz;
+  user_problem.A.reallocate(nz);
+  // Columns: l1, l2, t1, u1, v1, t2, u2, v2
+  user_problem.A.col_start = {0, 3, 6, 6, 7, 7, 7, 8, 8};
+  user_problem.A.i[0]      = 0;
+  user_problem.A.x[0]      = 1.0;
+  user_problem.A.i[1]      = 2;
+  user_problem.A.x[1]      = 1.0;
+  user_problem.A.i[2]      = 3;
+  user_problem.A.x[2]      = 1.0;
+  user_problem.A.i[3]      = 1;
+  user_problem.A.x[3]      = 1.0;
+  user_problem.A.i[4]      = 2;
+  user_problem.A.x[4]      = 1.0;
+  user_problem.A.i[5]      = 3;
+  user_problem.A.x[5]      = -1.0;
+  user_problem.A.i[6]      = 0;
+  user_problem.A.x[6]      = -1.0;
+  user_problem.A.i[7]      = 1;
+  user_problem.A.x[7]      = -1.0;
+
+  user_problem.rhs       = {0.0, 0.0, 3.0, 1.0};
+  user_problem.row_sense = {'E', 'E', 'E', 'E'};
+  user_problem.lower.assign(n, 0.0);
+  user_problem.upper.assign(n, inf);
+
+  user_problem.num_range_rows         = 0;
+  user_problem.problem_name           = "mixed_linear_and_two_soc_blocks";
+  user_problem.cone_var_start         = 2;
+  user_problem.second_order_cone_dims = {3, 3};
+  user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
+
+  simplex_solver_settings_t<int, double> settings;
+  settings.barrier          = true;
+  settings.dualize          = 0;
+
+  lp_solution_t<int, double> solution(m, n);
+  auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
+
+  EXPECT_EQ(status, lp_status_t::OPTIMAL);
+  EXPECT_NEAR(solution.objective, 3.0, 1e-4);
+  EXPECT_NEAR(solution.x[0], 2.0, 1e-4);
+  EXPECT_NEAR(solution.x[1], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[2], 2.0, 1e-4);
+  EXPECT_NEAR(solution.x[3], 2.0, 1e-4);
+  EXPECT_NEAR(std::abs(solution.x[4]), 0.0, 1e-4);
+  EXPECT_NEAR(solution.x[5], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[6], 1.0, 1e-4);
+  EXPECT_NEAR(std::abs(solution.x[7]), 0.0, 1e-4);
+}
+
+TEST(barrier, mixed_linear_and_two_soc_blocks_with_inequality)
+{
+  // Variables ordered as [l1, l2 | t1, u1, v1 | t2, u2, v2],
+  // where (t1, u1, v1), (t2, u2, v2) \in Q^3.
+  //
+  // minimize   t1 + t2
+  // subject to l1 - u1 = 0
+  //            l2 - u2 = 0
+  //            l1 + l2 >= 3
+  //            l1 - l2 = 1
+  //
+  // Optimal: l1* = 2, l2* = 1, t1* = 2, u1* = 2, v1* = 0,
+  //          t2* = 1, u2* = 1, v2* = 0, obj* = 3.
+  raft::handle_t handle{};
+  init_handler(&handle);
+
+  using namespace cuopt::linear_programming::dual_simplex;
+  user_problem_t<int, double> user_problem(&handle);
+
+  constexpr int m  = 4;
+  constexpr int n  = 8;
+  constexpr int nz = 8;
+
+  user_problem.num_rows  = m;
+  user_problem.num_cols  = n;
+  user_problem.objective = {0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0};
+
+  user_problem.A.m      = m;
+  user_problem.A.n      = n;
+  user_problem.A.nz_max = nz;
+  user_problem.A.reallocate(nz);
+  // Columns: l1, l2, t1, u1, v1, t2, u2, v2
+  user_problem.A.col_start = {0, 3, 6, 6, 7, 7, 7, 8, 8};
+  user_problem.A.i[0]      = 0;
+  user_problem.A.x[0]      = 1.0;
+  user_problem.A.i[1]      = 2;
+  user_problem.A.x[1]      = 1.0;
+  user_problem.A.i[2]      = 3;
+  user_problem.A.x[2]      = 1.0;
+  user_problem.A.i[3]      = 1;
+  user_problem.A.x[3]      = 1.0;
+  user_problem.A.i[4]      = 2;
+  user_problem.A.x[4]      = 1.0;
+  user_problem.A.i[5]      = 3;
+  user_problem.A.x[5]      = -1.0;
+  user_problem.A.i[6]      = 0;
+  user_problem.A.x[6]      = -1.0;
+  user_problem.A.i[7]      = 1;
+  user_problem.A.x[7]      = -1.0;
+
+  user_problem.rhs       = {0.0, 0.0, 3.0, 1.0};
+  user_problem.row_sense = {'E', 'E', 'G', 'E'};
+  user_problem.lower.assign(n, 0.0);
+  user_problem.upper.assign(n, inf);
+
+  user_problem.num_range_rows         = 0;
+  user_problem.problem_name           = "mixed_linear_and_two_soc_blocks_with_inequality";
+  user_problem.cone_var_start         = 2;
+  user_problem.second_order_cone_dims = {3, 3};
+  user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
+
+  simplex_solver_settings_t<int, double> settings;
+  settings.barrier          = true;
+  settings.dualize          = 0;
+  settings.scale_columns    = true;
+
+  lp_solution_t<int, double> solution(m, n);
+  auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
+
+  EXPECT_EQ(status, lp_status_t::OPTIMAL);
+  EXPECT_NEAR(solution.objective, 3.0, 1e-4);
+  EXPECT_NEAR(solution.x[0], 2.0, 1e-4);
+  EXPECT_NEAR(solution.x[1], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[2], 2.0, 1e-4);
+  EXPECT_NEAR(solution.x[3], 2.0, 1e-4);
+  EXPECT_NEAR(std::abs(solution.x[4]), 0.0, 1e-4);
+  EXPECT_NEAR(solution.x[5], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[6], 1.0, 1e-4);
+  EXPECT_NEAR(std::abs(solution.x[7]), 0.0, 1e-4);
+}
+
+TEST(barrier, free_linear_prefix_is_uncrushed_correctly_with_soc_block)
+{
+  // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3 and l is free.
+  //
+  // minimize   t
+  // subject to l - u = 0
+  //            u     = 1
+  //            (t, u, v) in Q^3
+  //
+  // Native free variable l is kept through presolve; end-to-end solve returns
+  // l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1.
+  raft::handle_t handle{};
+  init_handler(&handle);
+
+  using namespace cuopt::linear_programming::dual_simplex;
+  user_problem_t<int, double> user_problem(&handle);
+
+  constexpr int m  = 2;
+  constexpr int n  = 4;
+  constexpr int nz = 3;
+
+  user_problem.num_rows  = m;
+  user_problem.num_cols  = n;
+  user_problem.objective = {0.0, 1.0, 0.0, 0.0};
+
+  user_problem.A.m      = m;
+  user_problem.A.n      = n;
+  user_problem.A.nz_max = nz;
+  user_problem.A.reallocate(nz);
+  // Columns: l, t, u, v
+  user_problem.A.col_start = {0, 1, 1, 3, 3};
+  user_problem.A.i[0]      = 0;
+  user_problem.A.x[0]      = 1.0;
+  user_problem.A.i[1]      = 0;
+  user_problem.A.x[1]      = -1.0;
+  user_problem.A.i[2]      = 1;
+  user_problem.A.x[2]      = 1.0;
+
+  user_problem.rhs       = {0.0, 1.0};
+  user_problem.row_sense = {'E', 'E'};
+  user_problem.lower     = {-inf, 0.0, 0.0, 0.0};
+  user_problem.upper     = {inf, inf, inf, inf};
+
+  user_problem.num_range_rows         = 0;
+  user_problem.problem_name           = "free_linear_prefix_is_uncrushed_correctly_with_soc_block";
+  user_problem.cone_var_start         = 1;
+  user_problem.second_order_cone_dims = {3};
+  user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
+
+  simplex_solver_settings_t<int, double> settings;
+  settings.barrier = true;
+  settings.dualize = 0;
+
+  lp_solution_t<int, double> solution(m, n);
+  auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
+
+  EXPECT_EQ(status, lp_status_t::OPTIMAL);
+  EXPECT_NEAR(solution.objective, 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[0], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[1], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[2], 1.0, 1e-4);
+  EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4);
+}
+
+TEST(barrier, qp_with_soc_block)
+{
+  // Variables ordered as [l | t, u, v], where (t, u, v) \in Q^3.
+  //
+  // minimize   0.5 l^2 + t
+  // subject to l + u = 2
+  //            (t, u, v) in Q^3
+  //
+  // Since t >= |u| and u = 2 - l with l >= 0, the objective becomes
+  // 0.5 l^2 + |2 - l|, which is minimized at l* = 1, u* = 1, t* = 1, v* = 0.
+  raft::handle_t handle{};
+  init_handler(&handle);
+
+  using namespace cuopt::linear_programming::dual_simplex;
+  user_problem_t<int, double> user_problem(&handle);
+
+  constexpr int m  = 1;
+  constexpr int n  = 4;
+  constexpr int nz = 2;
+
+  user_problem.num_rows  = m;
+  user_problem.num_cols  = n;
+  user_problem.objective = {0.0, 1.0, 0.0, 0.0};
+
+  user_problem.A.m      = m;
+  user_problem.A.n      = n;
+  user_problem.A.nz_max = nz;
+  user_problem.A.reallocate(nz);
+  // Columns: l, t, u, v
+  user_problem.A.col_start = {0, 1, 1, 2, 2};
+  user_problem.A.i[0]      = 0;
+  user_problem.A.x[0]      = 1.0;
+  user_problem.A.i[1]      = 0;
+  user_problem.A.x[1]      = 1.0;
+
+  user_problem.rhs       = {2.0};
+  user_problem.row_sense = {'E'};
+  user_problem.lower.assign(n, 0.0);
+  user_problem.upper.assign(n, inf);
+
+  user_problem.Q_offsets = {0, 1, 1, 1, 1};
+  user_problem.Q_indices = {0};
+  user_problem.Q_values  = {1.0};
+
+  user_problem.num_range_rows         = 0;
+  user_problem.problem_name           = "qp_with_soc_block";
+  user_problem.cone_var_start         = 1;
+  user_problem.second_order_cone_dims = {3};
+  user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
+
+  simplex_solver_settings_t<int, double> settings;
+  settings.barrier = true;
+  settings.dualize = 0;
+
+  lp_solution_t<int, double> solution(m, n);
+  auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
+
+  EXPECT_EQ(status, lp_status_t::OPTIMAL);
+  EXPECT_NEAR(solution.objective, 1.5, 1e-4);
+  EXPECT_NEAR(solution.x[0], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[1], 1.0, 1e-4);
+  EXPECT_NEAR(solution.x[2], 1.0, 1e-4);
+  EXPECT_NEAR(std::abs(solution.x[3]), 0.0, 1e-4);
+}
+
 TEST(barrier, min_x_squared_free_variable_dual_correction)
 {
   // minimize   x^2         (Q = [2.0], so 0.5 * x^T Q x = x^2)
diff --git a/cpp/tests/linear_programming/parser_test.cpp b/cpp/tests/linear_programming/parser_test.cpp
index 4263a594ce..086432e7b3 100644
--- a/cpp/tests/linear_programming/parser_test.cpp
+++ b/cpp/tests/linear_programming/parser_test.cpp
@@ -2155,8 +2155,9 @@ End
   EXPECT_EQ(qc.constraint_row_type, static_cast<char>(LesserThanOrEqual));
   EXPECT_NEAR(qc.rhs_value, 10.0, tolerance);
   EXPECT_TRUE(qc.linear_indices.empty());
-  // Q = diag(1, 1). CSR: offsets=[0, 1, 2], indices=[0, 1], values=[1, 1].
-  EXPECT_EQ(qc.quadratic_offsets, (std::vector<int>{0, 1, 2}));
+  // Q = diag(1, 1) stored as COO triplets (row, col, value).
+  EXPECT_EQ(qc.quadratic_row_indices, (std::vector<int>{0, 1}));
+  EXPECT_EQ(qc.quadratic_col_indices, (std::vector<int>{0, 1}));
   ASSERT_EQ(qc.quadratic_values.size(), 2u);
   EXPECT_NEAR(qc.quadratic_values[0], 1.0, tolerance);
   EXPECT_NEAR(qc.quadratic_values[1], 1.0, tolerance);
@@ -2165,7 +2166,7 @@ End
 TEST(lp_parser, qc_cross_term_splits_symmetrically)
 {
   // `4 x*y` in the LP source means coefficient on x_i * x_j = 4 in the
-  // symmetric x^T Q x. Split into Q[x,y] = Q[y,x] = 2 in the CSR.
+  // symmetric x^T Q x. Split into Q[x,y] = Q[y,x] = 2 in the COO storage.
   auto m = parse_lp_string(R"LP(
 Minimize
   x + y
@@ -2175,8 +2176,9 @@ End
 )LP");
   ASSERT_EQ(m.get_quadratic_constraints().size(), 1u);
   const auto& qc = nth_qc(m, 0);
-  // Q has 4 entries (all of [[1,2],[2,1]]).
-  EXPECT_EQ(qc.quadratic_offsets, (std::vector<int>{0, 2, 4}));
+  // Q has 4 entries (all of [[1,2],[2,1]]) stored as COO triplets.
+  EXPECT_EQ(qc.quadratic_row_indices, (std::vector<int>{0, 0, 1, 1}));
+  EXPECT_EQ(qc.quadratic_col_indices, (std::vector<int>{0, 1, 0, 1}));
   ASSERT_EQ(qc.quadratic_values.size(), 4u);
   EXPECT_NEAR(qc.quadratic_values[0], 1.0, tolerance);  // (0, 0)
   EXPECT_NEAR(qc.quadratic_values[1], 2.0, tolerance);  // (0, 1)
@@ -2730,16 +2732,16 @@ TEST(qps_parser, qcmatrix_append_api)
   model_t::quadratic_constraint_t default_qcm;
   EXPECT_EQ(0, default_qcm.constraint_row_index);
   EXPECT_TRUE(default_qcm.quadratic_values.empty());
-  EXPECT_TRUE(default_qcm.quadratic_indices.empty());
-  EXPECT_TRUE(default_qcm.quadratic_offsets.empty());
+  EXPECT_TRUE(default_qcm.quadratic_row_indices.empty());
+  EXPECT_TRUE(default_qcm.quadratic_col_indices.empty());
   EXPECT_TRUE(default_qcm.linear_values.empty());
   EXPECT_TRUE(default_qcm.linear_indices.empty());
   EXPECT_EQ(0.0, default_qcm.rhs_value);
 
   // QC0: [[10, 2], [2, 2]]
   const std::vector<double> qc0_values        = {10.0, 2.0, 2.0, 2.0};
-  const std::vector<int> qc0_indices          = {0, 1, 0, 1};
-  const std::vector<int> qc0_offsets          = {0, 2, 4};
+  const std::vector<int> qc0_row_indices      = {0, 0, 1, 1};
+  const std::vector<int> qc0_col_indices      = {0, 1, 0, 1};
   const std::vector<double> qc0_linear_values = {1.0, 1.0};
   const std::vector<int> qc0_linear_indices   = {0, 1};
   model.append_quadratic_constraint(0,
@@ -2749,13 +2751,13 @@ TEST(qps_parser, qcmatrix_append_api)
                                     qc0_linear_indices,
                                     5.0,
                                     qc0_values,
-                                    qc0_indices,
-                                    qc0_offsets);
+                                    qc0_row_indices,
+                                    qc0_col_indices);
 
   // QC1: [[4, 1], [1, 6]]
   const std::vector<double> qc1_values        = {4.0, 1.0, 1.0, 6.0};
-  const std::vector<int> qc1_indices          = {0, 1, 0, 1};
-  const std::vector<int> qc1_offsets          = {0, 2, 4};
+  const std::vector<int> qc1_row_indices      = {0, 0, 1, 1};
+  const std::vector<int> qc1_col_indices      = {0, 1, 0, 1};
   const std::vector<double> qc1_linear_values = {3.0, 1.0};
   const std::vector<int> qc1_linear_indices   = {0, 1};
   model.append_quadratic_constraint(1,
@@ -2765,8 +2767,8 @@ TEST(qps_parser, qcmatrix_append_api)
                                     qc1_linear_indices,
                                     10.0,
                                     qc1_values,
-                                    qc1_indices,
-                                    qc1_offsets);
+                                    qc1_row_indices,
+                                    qc1_col_indices);
 
   ASSERT_TRUE(model.has_quadratic_constraints());
   const auto& qcs = model.get_quadratic_constraints();
@@ -2779,8 +2781,8 @@ TEST(qps_parser, qcmatrix_append_api)
   EXPECT_EQ(qc0_linear_indices, qcs[0].linear_indices);
   EXPECT_EQ(5.0, qcs[0].rhs_value);
   EXPECT_EQ(qc0_values, qcs[0].quadratic_values);
-  EXPECT_EQ(qc0_indices, qcs[0].quadratic_indices);
-  EXPECT_EQ(qc0_offsets, qcs[0].quadratic_offsets);
+  EXPECT_EQ(qc0_row_indices, qcs[0].quadratic_row_indices);
+  EXPECT_EQ(qc0_col_indices, qcs[0].quadratic_col_indices);
 
   EXPECT_EQ(1, qcs[1].constraint_row_index);
   EXPECT_EQ("QC1", qcs[1].constraint_row_name);
@@ -2789,8 +2791,8 @@ TEST(qps_parser, qcmatrix_append_api)
   EXPECT_EQ(qc1_linear_indices, qcs[1].linear_indices);
   EXPECT_EQ(10.0, qcs[1].rhs_value);
   EXPECT_EQ(qc1_values, qcs[1].quadratic_values);
-  EXPECT_EQ(qc1_indices, qcs[1].quadratic_indices);
-  EXPECT_EQ(qc1_offsets, qcs[1].quadratic_offsets);
+  EXPECT_EQ(qc1_row_indices, qcs[1].quadratic_row_indices);
+  EXPECT_EQ(qc1_col_indices, qcs[1].quadratic_col_indices);
 }
 
 // QCQP MPS: each quadratic constraint bundles row + linear + rhs + quadratic.
diff --git a/cpp/tests/qp/unit_tests/no_constraints.cu b/cpp/tests/qp/unit_tests/no_constraints.cu
index 75190b37fc..dc99a2ce5c 100644
--- a/cpp/tests/qp/unit_tests/no_constraints.cu
+++ b/cpp/tests/qp/unit_tests/no_constraints.cu
@@ -52,6 +52,8 @@ TEST(no_constraints_test, simple_test)
   op_problem.set_quadratic_objective_matrix(Q_values_host, 2, Q_indices_host, 2, Q_offsets_host, 3);
 
   auto settings = cuopt::linear_programming::pdlp_solver_settings_t<int, double>();
+  // Tighter than default: unconstrained QP needs small x at the origin, not just small objective.
+  settings.barrier_relative_complementarity_tolerance = 1e-12;
   auto solution = cuopt::linear_programming::solve_lp(op_problem, settings);
 
   EXPECT_EQ(solution.get_termination_status(),
diff --git a/cpp/tests/qp/unit_tests/two_variable_test.cu b/cpp/tests/qp/unit_tests/two_variable_test.cu
index aac618994a..f1fb4852ec 100644
--- a/cpp/tests/qp/unit_tests/two_variable_test.cu
+++ b/cpp/tests/qp/unit_tests/two_variable_test.cu
@@ -59,6 +59,7 @@ TEST(two_variable_test, simple_test)
   op_problem.set_quadratic_objective_matrix(Q_values_host, 2, Q_indices_host, 2, Q_offsets_host, 3);
 
   auto settings = cuopt::linear_programming::pdlp_solver_settings_t<int, double>();
+  settings.barrier_relative_complementarity_tolerance = 1e-12;
   auto solution = cuopt::linear_programming::solve_lp(op_problem, settings);
 
   EXPECT_EQ(solution.get_termination_status(),
diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model.pxd b/python/cuopt/cuopt/linear_programming/data_model/data_model.pxd
index 4a83f3a058..b25ba76e3a 100644
--- a/python/cuopt/cuopt/linear_programming/data_model/data_model.pxd
+++ b/python/cuopt/cuopt/linear_programming/data_model/data_model.pxd
@@ -7,11 +7,27 @@
 # cython: embedsignature = True
 # cython: language_level = 3
 
+from libc.stddef cimport size_t
 from libcpp cimport bool
 from libcpp.string cimport string
 from libcpp.vector cimport vector
 
 
+cdef extern from "cuopt/linear_programming/io/mps_data_model.hpp" namespace "cuopt::linear_programming::io" nogil: # noqa
+
+    cdef cppclass mps_data_model_t[i_t, f_t]:
+        cppclass quadratic_constraint_t:
+            int constraint_row_index
+            string constraint_row_name
+            char constraint_row_type
+            vector[double] linear_values
+            vector[int] linear_indices
+            double rhs_value
+            vector[int] quadratic_row_indices
+            vector[int] quadratic_col_indices
+            vector[double] quadratic_values
+
+
 cdef extern from "cuopt/linear_programming/io/data_model_view.hpp" namespace "cuopt::linear_programming::io" nogil: # noqa
 
     cdef cppclass data_model_view_t[i_t, f_t]:
@@ -54,6 +70,8 @@ cdef extern from "cuopt/linear_programming/io/data_model_view.hpp" namespace "cu
         void set_row_names(const vector[string] row_names) except +
         void set_problem_name(const string problem_name) except +
         void set_objective_name(const string objective_name) except +
+        void set_quadratic_constraints(
+            vector[mps_data_model_t[i_t, f_t].quadratic_constraint_t] constraints) except +
 
 
 cdef extern from "cuopt/linear_programming/io/writer.hpp" namespace "cuopt::linear_programming::io" nogil: # noqa
diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model.py b/python/cuopt/cuopt/linear_programming/data_model/data_model.py
index 648809eac1..5e6fd6744d 100644
--- a/python/cuopt/cuopt/linear_programming/data_model/data_model.py
+++ b/python/cuopt/cuopt/linear_programming/data_model/data_model.py
@@ -4,6 +4,8 @@
 import os
 import time
 
+import numpy as np
+
 from . import data_model_wrapper
 from .utilities import catch_cuopt_exception
 
@@ -288,6 +290,90 @@ def set_quadratic_objective_matrix(self, Q_values, Q_indices, Q_offsets):
         """
         super().set_quadratic_objective_matrix(Q_values, Q_indices, Q_offsets)
 
+    def get_quadratic_constraints(self):
+        """
+        Return quadratic (QCMATRIX) constraints appended to this model.
+
+        Each entry is a dict with keys including ``constraint_row_index``,
+        ``constraint_row_name``, ``constraint_row_type``, COO arrays, and ``rhs_value``.
+        """
+        return self.quadratic_constraints
+
+    @catch_cuopt_exception
+    def clear_quadratic_constraints(self):
+        """
+        Remove all quadratic (QCMATRIX) constraints from the model.
+
+        Quadratic constraints are converted to second-order cone form internally
+        and solved with the barrier method.
+        """
+        super().clear_quadratic_constraints()
+
+    @catch_cuopt_exception
+    def add_quadratic_constraint(
+        self,
+        constraint_row_index,
+        constraint_row_name="",
+        linear_values=None,
+        linear_indices=None,
+        rhs_value=0.0,
+        quadratic_values=None,
+        quadratic_row_indices=None,
+        quadratic_col_indices=None,
+        sense="L",
+    ):
+        """
+        Add one quadratic constraint in MPS QCMATRIX form.
+
+        Each constraint has a linear part (optional) and a quadratic part in COO
+        format. Call multiple times to add several QCMATRIX rows; each call must
+        use a distinct ``constraint_row_index``.
+
+        Parameters
+        ----------
+        constraint_row_index : int
+            ROWS declaration index for this quadratic row (among all rows).
+        constraint_row_name : str, optional
+            Optional row name (for MPS export).
+        linear_values, linear_indices : array-like, optional
+            Sparse linear coefficients on the same variable index space.
+        rhs_value : float, optional
+            Right-hand side of the quadratic row.
+        quadratic_values, quadratic_row_indices, quadratic_col_indices : array-like
+            COO triplets for the quadratic matrix Q in
+            ``linear^T x + x^T Q x {sense} rhs_value``.
+        sense : str, optional
+            MPS row type: ``'L'`` (default, ``<=``) or ``'G'`` (``>=``), same values as
+            :meth:`set_row_types`. Rows are stored with the given sense; ``'G'`` is converted to
+            ``'L'`` when the barrier solver builds second-order cones from QCMATRIX data.
+            Equality (``'E'``) is not supported.
+
+        Notes
+        -----
+        When any quadratic constraint is present, cuOpt selects the barrier
+        solver and converts QCMATRIX rows to second-order cones.
+        """
+        if hasattr(sense, "value"):
+            sense = sense.value
+        if sense == "E":
+            raise ValueError("Equality constraints are not supported.")
+        if sense not in ("L", "G"):
+            raise ValueError(
+                f"Invalid sense {sense!r}; use 'L' or 'G' like set_row_types "
+                "(equality 'E' is not supported)."
+            )
+        super().add_quadratic_constraint(
+            constraint_row_index,
+            constraint_row_name,
+            linear_values,
+            linear_indices,
+            rhs_value,
+            quadratic_values,
+            quadratic_row_indices,
+            quadratic_col_indices,
+            constraint_row_type=sense,
+        )
+
     @catch_cuopt_exception
     def set_variable_lower_bounds(self, variable_lower_bounds):
         """
diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd
index 6c798a0f6a..9f9f5ba5cf 100644
--- a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd
+++ b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd
@@ -12,3 +12,7 @@ from libcpp.memory cimport unique_ptr
 
 cdef class DataModel:
     cdef unique_ptr[data_model_view_t[int, double]] c_data_model_view
+
+    cdef void _set_cpp_quadratic_constraints(
+        self, data_model_view_t[int, double]* c_data_model_view
+    )
diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx
index 7722fb2437..7c764b1413 100644
--- a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx
+++ b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx
@@ -7,7 +7,7 @@
 # cython: embedsignature = True
 # cython: language_level = 3
 
-from .data_model cimport data_model_view_t, write_mps
+from .data_model cimport data_model_view_t, mps_data_model_t, write_mps
 
 import warnings
 
@@ -17,10 +17,14 @@ from cuopt.utilities import get_data_ptr
 
 from libc.stdint cimport uintptr_t
 from libcpp.memory cimport unique_ptr
+from libcpp.string cimport string
 from libcpp.utility cimport move
+from libcpp.vector cimport vector
 
 
 def type_cast(np_obj, np_type, name):
+    if not isinstance(np_obj, np.ndarray):
+        np_obj = np.asarray(np_obj)
     obj_type = np_obj.dtype
 
     if ((np.issubdtype(np_type, np.floating) and
@@ -62,6 +66,79 @@ cdef class DataModel:
         self.variable_types = np.array([])
         self.variable_names = np.array([])
         self.row_names = np.array([])
+        self.quadratic_constraints = []
+
+    def clear_quadratic_constraints(self):
+        self.quadratic_constraints = []
+
+    def add_quadratic_constraint(
+        self,
+        constraint_row_index,
+        constraint_row_name="",
+        linear_values=None,
+        linear_indices=None,
+        rhs_value=0.0,
+        quadratic_values=None,
+        quadratic_row_indices=None,
+        quadratic_col_indices=None,
+        constraint_row_type="L",
+    ):
+        linear_values = (
+            np.array([], dtype=np.float64)
+            if linear_values is None
+            else type_cast(linear_values, np.float64, "linear_values")
+        )
+        linear_indices = (
+            np.array([], dtype=np.int32)
+            if linear_indices is None
+            else type_cast(linear_indices, np.int32, "linear_indices")
+        )
+        if linear_values.shape[0] != linear_indices.shape[0]:
+            raise ValueError("linear_values and linear_indices must have the same length")
+        quadratic_values = (
+            np.array([], dtype=np.float64)
+            if quadratic_values is None
+            else type_cast(quadratic_values, np.float64, "quadratic_values")
+        )
+        quadratic_row_indices = (
+            np.array([], dtype=np.int32)
+            if quadratic_row_indices is None
+            else type_cast(quadratic_row_indices, np.int32, "quadratic_row_indices")
+        )
+        quadratic_col_indices = (
+            np.array([], dtype=np.int32)
+            if quadratic_col_indices is None
+            else type_cast(quadratic_col_indices, np.int32, "quadratic_col_indices")
+        )
+        if not (
+            quadratic_values.shape[0]
+            == quadratic_row_indices.shape[0]
+            == quadratic_col_indices.shape[0]
+        ):
+            raise ValueError(
+                "quadratic_values, quadratic_row_indices, and "
+                "quadratic_col_indices must have the same length"
+            )
+        row_type = str(constraint_row_type)
+        if row_type == "E":
+            raise ValueError("Equality constraints are not supported.")
+        if row_type not in ("L", "G"):
+            raise ValueError(
+                f"Invalid constraint_row_type {row_type!r}; use 'L' or 'G' like set_row_types."
+            )
+        self.quadratic_constraints.append(
+            {
+                "constraint_row_index": int(constraint_row_index),
+                "constraint_row_name": str(constraint_row_name),
+                "constraint_row_type": row_type,
+                "linear_values": linear_values,
+                "linear_indices": linear_indices,
+                "rhs_value": float(rhs_value),
+                "quadratic_values": quadratic_values,
+                "quadratic_row_indices": quadratic_row_indices,
+                "quadratic_col_indices": quadratic_col_indices,
+            }
+        )
 
     def set_maximize(self, maximize):
         self.maximize = maximize
@@ -379,10 +456,65 @@ cdef class DataModel:
                 self.get_initial_dual_solution().shape[0]
             )
 
+        if self.quadratic_constraints:
+            self._set_cpp_quadratic_constraints(c_data_model_view)
+
+    cdef void _set_cpp_quadratic_constraints(
+        self, data_model_view_t[int, double]* c_data_model_view
+    ):
+        cdef vector[mps_data_model_t[int, double].quadratic_constraint_t] constraints
+        cdef mps_data_model_t[int, double].quadratic_constraint_t qc
+        cdef dict item
+        cdef size_t i
+        cdef uintptr_t c_linear_values
+        cdef uintptr_t c_linear_indices
+        cdef uintptr_t c_quadratic_values
+        cdef uintptr_t c_quadratic_row_indices
+        cdef uintptr_t c_quadratic_col_indices
+        cdef size_t linear_nnz
+        cdef size_t quadratic_nnz
+
+        for item in self.quadratic_constraints:
+            qc.constraint_row_index = item["constraint_row_index"]
+            qc.constraint_row_name = item["constraint_row_name"].encode("utf-8")
+            qc.constraint_row_type = ord(item.get("constraint_row_type", "L"))
+            qc.rhs_value = item["rhs_value"]
+
+            linear_nnz = item["linear_values"].shape[0]
+            qc.linear_values.resize(linear_nnz)
+            qc.linear_indices.resize(linear_nnz)
+            if linear_nnz > 0:
+                c_linear_values = get_data_ptr(item["linear_values"])
+                c_linear_indices = get_data_ptr(item["linear_indices"])
+                for i in range(linear_nnz):
+                    qc.linear_values[i] = (<double*>c_linear_values)[i]
+                    qc.linear_indices[i] = (<int*>c_linear_indices)[i]
+
+            quadratic_nnz = item["quadratic_values"].shape[0]
+            qc.quadratic_values.resize(quadratic_nnz)
+            qc.quadratic_row_indices.resize(quadratic_nnz)
+            qc.quadratic_col_indices.resize(quadratic_nnz)
+            if quadratic_nnz > 0:
+                c_quadratic_values = get_data_ptr(item["quadratic_values"])
+                c_quadratic_row_indices = get_data_ptr(item["quadratic_row_indices"])
+                c_quadratic_col_indices = get_data_ptr(item["quadratic_col_indices"])
+                for i in range(quadratic_nnz):
+                    qc.quadratic_values[i] = (<double*>c_quadratic_values)[i]
+                    qc.quadratic_row_indices[i] = (<int*>c_quadratic_row_indices)[i]
+                    qc.quadratic_col_indices[i] = (<int*>c_quadratic_col_indices)[i]
+
+            constraints.push_back(qc)
+
+        c_data_model_view.set_quadratic_constraints(constraints)
+
     def writeMPS(self, user_problem_file):
-        self.variable_types = type_cast(
-            self.variable_types, "S1", "variable_types"
-        )
+        n_vars = self.get_variable_lower_bounds().shape[0]
+        if self.variable_types.shape[0] == 0 and n_vars > 0:
+            self.variable_types = np.array(["C"] * n_vars, dtype="S1")
+        else:
+            self.variable_types = type_cast(
+                self.variable_types, "S1", "variable_types"
+            )
         self.set_data_model_view()
         write_mps(self.c_data_model_view.get()[0],
                   user_problem_file.encode('utf-8'))
diff --git a/python/cuopt/cuopt/linear_programming/io/parser.pxd b/python/cuopt/cuopt/linear_programming/io/parser.pxd
index 402873f9ab..b4bea47d3c 100644
--- a/python/cuopt/cuopt/linear_programming/io/parser.pxd
+++ b/python/cuopt/cuopt/linear_programming/io/parser.pxd
@@ -15,6 +15,17 @@ from libcpp.vector cimport vector
 cdef extern from "cuopt/linear_programming/io/mps_data_model.hpp" namespace "cuopt::linear_programming::io": # noqa
 
     cdef cppclass mps_data_model_t[i_t, f_t]:
+        cppclass quadratic_constraint_t:
+            int constraint_row_index
+            string constraint_row_name
+            char constraint_row_type
+            vector[double] linear_values
+            vector[int] linear_indices
+            double rhs_value
+            vector[int] quadratic_row_indices
+            vector[int] quadratic_col_indices
+            vector[double] quadratic_values
+
         bool maximize_
         vector[f_t] A_
         vector[i_t] A_indices_
@@ -36,6 +47,7 @@ cdef extern from "cuopt/linear_programming/io/mps_data_model.hpp" namespace "cuo
         vector[char] row_types_
         string objective_name_
         string problem_name_
+        const vector[quadratic_constraint_t]& get_quadratic_constraints() const
 
 cdef extern from "cuopt/linear_programming/io/utilities/cython_parser.hpp" namespace "cuopt::cython": # noqa
 
diff --git a/python/cuopt/cuopt/linear_programming/io/parser_wrapper.pyx b/python/cuopt/cuopt/linear_programming/io/parser_wrapper.pyx
index 61e10b1864..3cc0e1a110 100644
--- a/python/cuopt/cuopt/linear_programming/io/parser_wrapper.pyx
+++ b/python/cuopt/cuopt/linear_programming/io/parser_wrapper.pyx
@@ -135,6 +135,53 @@ cdef _marshal_data_model(mps_data_model_t[int, double]* dm, data_model):
     data_model.set_objective_name(dm.objective_name_.decode())
     data_model.set_problem_name(dm.problem_name_.decode())
 
+    cdef size_t qi
+    cdef size_t n_qc = dm.get_quadratic_constraints().size()
+    cdef mps_data_model_t[int, double].quadratic_constraint_t qc
+    cdef size_t linear_nnz, quadratic_nnz
+    cdef double[:] linear_values_view
+    cdef int[:] linear_indices_view
+    cdef double[:] quadratic_values_view
+    cdef int[:] quadratic_row_indices_view
+    cdef int[:] quadratic_col_indices_view
+
+    for qi in range(n_qc):
+        qc = dm.get_quadratic_constraints()[qi]
+        linear_nnz = qc.linear_values.size()
+        if linear_nnz > 0:
+            linear_values_view = <double[:linear_nnz]>qc.linear_values.data()
+            linear_values = np.asarray(linear_values_view).copy()
+            linear_indices_view = <int[:linear_nnz]>qc.linear_indices.data()
+            linear_indices = np.asarray(linear_indices_view).copy()
+        else:
+            linear_values = None
+            linear_indices = None
+
+        quadratic_nnz = qc.quadratic_values.size()
+        if quadratic_nnz > 0:
+            quadratic_values_view = <double[:quadratic_nnz]>qc.quadratic_values.data()
+            quadratic_values = np.asarray(quadratic_values_view).copy()
+            quadratic_row_indices_view = <int[:quadratic_nnz]>qc.quadratic_row_indices.data()
+            quadratic_row_indices = np.asarray(quadratic_row_indices_view).copy()
+            quadratic_col_indices_view = <int[:quadratic_nnz]>qc.quadratic_col_indices.data()
+            quadratic_col_indices = np.asarray(quadratic_col_indices_view).copy()
+        else:
+            quadratic_values = None
+            quadratic_row_indices = None
+            quadratic_col_indices = None
+
+        data_model.add_quadratic_constraint(
+            qc.constraint_row_index,
+            qc.constraint_row_name.decode("utf-8"),
+            linear_values=linear_values,
+            linear_indices=linear_indices,
+            rhs_value=qc.rhs_value,
+            quadratic_values=quadratic_values,
+            quadratic_row_indices=quadratic_row_indices,
+            quadratic_col_indices=quadratic_col_indices,
+            sense=chr(qc.constraint_row_type),
+        )
+
     return data_model
 
 
diff --git a/python/cuopt/cuopt/linear_programming/problem.py b/python/cuopt/cuopt/linear_programming/problem.py
index 25d9d634da..84ca43c5b5 100644
--- a/python/cuopt/cuopt/linear_programming/problem.py
+++ b/python/cuopt/cuopt/linear_programming/problem.py
@@ -256,8 +256,10 @@ def __mul__(self, other):
                 qvars1 = [self] * len(other.vars)
                 qvars2 = other.vars
                 qcoeffs = other.coefficients
-                vars = [self]
-                coeffs = [other.constant]
+                vars, coeffs = [], []
+                if other.constant != 0.0:
+                    vars = [self]
+                    coeffs = [other.constant]
                 return QuadraticExpression(
                     qvars1=qvars1,
                     qvars2=qvars2,
@@ -283,6 +285,8 @@ def __le__(self, other):
                 # var1 <= var2   -> var1 - var2 <= 0
                 expr = self - other
                 return Constraint(expr, LE, 0.0)
+            case QuadraticExpression():
+                return Constraint(self - other, LE, 0.0)
             case _:
                 raise ValueError("Unsupported operation")
 
@@ -295,6 +299,8 @@ def __ge__(self, other):
                 # var1 >= var2   ->  var1 - var2 >= 0
                 expr = self - other
                 return Constraint(expr, GE, 0.0)
+            case QuadraticExpression():
+                return Constraint(self - other, GE, 0.0)
             case _:
                 raise ValueError("Unsupported operation")
 
@@ -314,8 +320,9 @@ def __eq__(self, other):
 class QuadraticExpression:
     """
     QuadraticExpressions contain quadratic terms, linear terms, and a constant.
-    QuadraticExpressions can be used to create quadratic objectives in
-    the Problem.
+    Use them for quadratic objectives (``Problem.setObjective``) or quadratic
+    constraints via ``<=`` or ``>=`` comparisons passed to
+    :py:meth:`Problem.addConstraint` (equality is not supported).
     QuadraticExpressions can be added and subtracted with other
     QuadraticExpressions, LinearExpressions, and Variables, and can also
     be multiplied and divided by scalars.
@@ -734,6 +741,9 @@ def __rsub__(self, other):
         # other - self  -> other + self * -1.0
         return other + self * -1.0
 
+    def __neg__(self):
+        return self * -1.0
+
     def __imul__(self, other):
         # Compute expr *= constant
         match other:
@@ -834,13 +844,91 @@ def __truediv__(self, other):
                 )
 
     def __le__(self, other):
-        raise Exception("Quadratic constraints not supported")
+        match other:
+            case int() | float():
+                return Constraint(self, LE, float(other))
+            case Variable() | LinearExpression() | QuadraticExpression():
+                return Constraint(self - other, LE, 0.0)
+            case _:
+                raise ValueError(
+                    "Can't compare QuadraticExpression with type %s"
+                    % type(other).__name__
+                )
 
     def __ge__(self, other):
-        raise Exception("Quadratic constraints not supported")
+        match other:
+            case int() | float():
+                return Constraint(self, GE, float(other))
+            case Variable() | LinearExpression() | QuadraticExpression():
+                return Constraint(self - other, GE, 0.0)
+            case _:
+                raise ValueError(
+                    "Can't compare QuadraticExpression with type %s"
+                    % type(other).__name__
+                )
 
     def __eq__(self, other):
-        raise Exception("Quadratic constraints not supported")
+        raise ValueError("Equality constraints are not supported.")
+
+
+def _quadratic_expression_to_qcmatrix(expr, rhs):
+    """Build QCMATRIX COO data for a quadratic row ``expr`` sense ``rhs``.
+
+    Used for both ``<=`` (``LE``) and ``>=`` (``GE``); row sense is stored on
+    ``Constraint.Sense``, not in this helper. The constant term is moved to
+    ``rhs_value``.
+
+    Duplicate linear variable indices and duplicate Q (row, col) triplets are
+    merged by summing coefficients, matching linear ``Constraint`` behavior.
+    """
+    rhs_value = float(rhs) - expr.constant
+
+    linear_coeff = {}
+    for var, coeff in zip(expr.vars, expr.coefficients):
+        if coeff == 0.0:
+            continue
+        idx = var.index
+        linear_coeff[idx] = linear_coeff.get(idx, 0.0) + coeff
+
+    linear_indices = []
+    linear_values = []
+    for idx in sorted(linear_coeff):
+        coeff = linear_coeff[idx]
+        if coeff != 0.0:
+            linear_indices.append(idx)
+            linear_values.append(coeff)
+
+    quad_coeff = {}
+    for var1, var2, coeff in zip(expr.qvars1, expr.qvars2, expr.qcoefficients):
+        if coeff == 0.0:
+            continue
+        key = (var1.index, var2.index)
+        quad_coeff[key] = quad_coeff.get(key, 0.0) + coeff
+    if expr.qmatrix is not None:
+        q_coo = expr.qmatrix.tocoo()
+        for row, col, value in zip(q_coo.row, q_coo.col, q_coo.data):
+            if value == 0.0:
+                continue
+            key = (expr.qvars[row].index, expr.qvars[col].index)
+            quad_coeff[key] = quad_coeff.get(key, 0.0) + value
+
+    quadratic_row_indices = []
+    quadratic_col_indices = []
+    quadratic_values = []
+    for (row, col), value in sorted(quad_coeff.items()):
+        if value != 0.0:
+            quadratic_row_indices.append(row)
+            quadratic_col_indices.append(col)
+            quadratic_values.append(value)
+
+    return (
+        linear_values,
+        linear_indices,
+        quadratic_values,
+        quadratic_row_indices,
+        quadratic_col_indices,
+        rhs_value,
+    )
 
 
 class LinearExpression:
@@ -1150,6 +1238,8 @@ def __le__(self, other):
                 # expr1 <= expr2   -> expr1 - expr2 <= 0
                 expr = self - other
                 return Constraint(expr, LE, 0.0)
+            case QuadraticExpression():
+                return Constraint(self - other, LE, 0.0)
 
     def __ge__(self, other):
         match other:
@@ -1159,6 +1249,8 @@ def __ge__(self, other):
                 # expr1 >= expr2   ->  expr1 - expr2 >= 0
                 expr = self - other
                 return Constraint(expr, GE, 0.0)
+            case QuadraticExpression():
+                return Constraint(self - other, GE, 0.0)
 
     def __eq__(self, other):
         match other:
@@ -1172,16 +1264,15 @@ def __eq__(self, other):
 
 class Constraint:
     """
-    cuOpt constraint object containing a linear expression,
-    the sense of the constraint, and the right-hand side of
-    the constraint.
-    Constraints are associated with a problem and can be
-    created using :py:meth:`Problem.addConstraint`.
+    cuOpt constraint object containing a linear or quadratic (QCMATRIX)
+    expression, the sense of the constraint, and the right-hand side.
+    Constraints are associated with a problem and can be created using
+    :py:meth:`Problem.addConstraint`.
 
     Parameters
     ----------
-    expr : LinearExpression
-        Linear expression corresponding to a problem.
+    expr : LinearExpression or QuadraticExpression
+        Expression corresponding to the constraint.
     sense : enum
         Sense of the constraint. Either LE for <=,
         GE for >= or EQ for == .
@@ -1197,7 +1288,9 @@ class Constraint:
     Sense : LE, GE or EQ
         Row sense. LE for <=, GE for >= or EQ for == .
     RHS : float
-        Constraint right-hand side value.
+        Constraint right-hand side value (linear rows).
+    is_quadratic : bool
+        True when the row is exported as a QCMATRIX quadratic constraint.
     Slack : float
         Computed LHS - RHS with current solution.
     DualValue : float
@@ -1205,10 +1298,41 @@ class Constraint:
     """
 
     def __init__(self, expr, sense, rhs, name=""):
+        self.index = -1
+        self.Sense = sense
+        self.ConstraintName = name
+        self.DualValue = float("nan")
+        self.Slack = float("nan")
+
+        if isinstance(expr, QuadraticExpression):
+            self.is_quadratic = True
+            (
+                linear_values,
+                linear_indices,
+                quadratic_values,
+                quadratic_row_indices,
+                quadratic_col_indices,
+                rhs_value,
+            ) = _quadratic_expression_to_qcmatrix(expr, rhs)
+            self.linear_values = np.array(linear_values, dtype=np.float64)
+            self.linear_indices = np.array(linear_indices, dtype=np.int32)
+            self.quadratic_values = np.array(quadratic_values, dtype=np.float64)
+            self.quadratic_row_indices = np.array(
+                quadratic_row_indices, dtype=np.int32
+            )
+            self.quadratic_col_indices = np.array(
+                quadratic_col_indices, dtype=np.int32
+            )
+            self.rhs_value = rhs_value
+            self.RHS = rhs_value
+            self.vindex_coeff_dict = {}
+            self.vars = expr.vars
+            return
+
+        self.is_quadratic = False
         self.vindex_coeff_dict = {}
         nz = len(expr)
         self.vars = expr.vars
-        self.index = -1
         for i in range(nz):
             v_idx = expr.vars[i].index
             v_coeff = expr.coefficients[i]
@@ -1217,11 +1341,7 @@ def __init__(self, expr, sense, rhs, name=""):
                 if v_idx in self.vindex_coeff_dict
                 else v_coeff
             )
-        self.Sense = sense
         self.RHS = rhs - expr.getConstant()
-        self.ConstraintName = name
-        self.DualValue = float("nan")
-        self.Slack = float("nan")
 
     def __len__(self):
         return len(self.vindex_coeff_dict)
@@ -1254,9 +1374,12 @@ def getCoefficient(self, var):
 
     def compute_slack(self):
         # Computes the constraint Slack in the current solution.
-        lhs = 0.0
-        for var in self.vars:
-            lhs += var.Value * self.vindex_coeff_dict[var.index]
+        index_to_var = {var.index: var for var in self.vars}
+        lhs = sum(
+            index_to_var[v_idx].Value * coeff
+            for v_idx, coeff in self.vindex_coeff_dict.items()
+        )
+        
         return self.RHS - lhs
 
 
@@ -1401,6 +1524,8 @@ def _to_data_model(self):
                 "values": [],
             }
             for constr in self.constrs:
+                if constr.is_quadratic:
+                    continue
                 csr_dict["column_indices"].extend(
                     list(constr.vindex_coeff_dict.keys())
                 )
@@ -1420,6 +1545,8 @@ def _to_data_model(self):
 
         else:
             for constr in self.constrs:
+                if constr.is_quadratic:
+                    continue
                 self.rhs.append(constr.RHS)
                 self.row_sense.append(constr.Sense)
 
@@ -1463,6 +1590,27 @@ def _to_data_model(self):
         dm.set_row_names(self.row_names)
         dm.set_problem_name(self.Name)
 
+        linear_constr_count = sum(1 for c in self.constrs if not c.is_quadratic)
+        quad_index = 0
+        for constr in self.constrs:
+            if not constr.is_quadratic:
+                continue
+            row_name = constr.ConstraintName
+            if row_name == "":
+                row_name = "Q" + str(constr.index)
+            dm.add_quadratic_constraint(
+                constraint_row_index=linear_constr_count + quad_index,
+                constraint_row_name=row_name,
+                linear_values=constr.linear_values,
+                linear_indices=constr.linear_indices,
+                rhs_value=constr.rhs_value,
+                quadratic_values=constr.quadratic_values,
+                quadratic_row_indices=constr.quadratic_row_indices,
+                quadratic_col_indices=constr.quadratic_col_indices,
+                sense=constr.Sense,
+            )
+            quad_index += 1
+
         self.model = dm
 
     def update(self):
@@ -1530,15 +1678,15 @@ def addVariable(
     def addConstraint(self, constr, name=""):
         """
         Adds a constraint to the problem defined by constraint object
-        and name. A constraint is generated using LinearExpression,
-        Sense and RHS.
+        and name. A constraint is generated using LinearExpression or
+        QuadraticExpression comparisons (``<=``, ``>=``, or ``==``).
 
         Parameters
         ----------
         constr : :py:class:`Constraint`
-            Constructed using LinearExpressions (See Examples)
+            Constructed using expression comparisons (see Examples).
         name : string
-            Name of the variable. Optional.
+            Name of the constraint. Optional.
 
         Examples
         --------
@@ -1548,6 +1696,7 @@ def addConstraint(self, constr, name=""):
         >>> problem.addConstraint(2*x - 3*y <= 10, name="Constr1")
         >>> expr = 3*x + y
         >>> problem.addConstraint(expr + x == 20, name="Constr2")
+        >>> problem.addConstraint(-x*x + y*y <= 0, name="soc")
         """
         if self.solved:
             self.reset_solved_values()  # Reset all solved values
@@ -1587,6 +1736,10 @@ def updateConstraint(self, constr, coeffs=[], rhs=None):
         """
         self.reset_solved_values()
         if isinstance(constr, Constraint):
+            if constr.is_quadratic:
+                raise ValueError(
+                    "updateConstraint applies to linear constraints only"
+                )
             if isinstance(coeffs, dict):
                 coeffs = coeffs.items()
             for var, coeff in coeffs:
@@ -1827,6 +1980,12 @@ def NumConstraints(self):
         # Returns number of contraints in the problem.
         return len(self.constrs)
 
+    def getQuadraticConstraints(self):
+        """
+        Returns all quadratic (QCMATRIX) constraints in the problem.
+        """
+        return [c for c in self.constrs if c.is_quadratic]
+
     @property
     def NumNZs(self):
         # Returns number of non-zeros in the problem.
@@ -1873,6 +2032,8 @@ def getCSR(self):
             return self.dict_to_object(self.constraint_csr_matrix)
         csr_dict = {"row_pointers": [0], "column_indices": [], "values": []}
         for constr in self.constrs:
+            if constr.is_quadratic:
+                continue
             csr_dict["column_indices"].extend(
                 list(constr.vindex_coeff_dict.keys())
             )
@@ -1949,10 +2110,14 @@ def populate_solution(self, solution):
         dual_sol = None
         if not IsMIP:
             dual_sol = solution.get_dual_solution()
-        for i, constr in enumerate(self.constrs):
-            if dual_sol is not None and len(dual_sol) > 0:
-                constr.DualValue = dual_sol[i]
+        linear_row = 0
+        for constr in self.constrs:
+            if constr.is_quadratic:
+                continue
+            if dual_sol is not None and len(dual_sol) > linear_row:
+                constr.DualValue = dual_sol[linear_row]
             constr.Slack = constr.compute_slack()
+            linear_row += 1
         self.solved = True
 
     def solve(self, settings=solver_settings.SolverSettings()):
@@ -1977,3 +2142,4 @@ def solve(self, settings=solver_settings.SolverSettings()):
         solution = solver.Solve(self.model, settings)
         # Post Solve
         self.populate_solution(solution)
+        return solution
diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
index 66242550aa..749601926d 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
@@ -150,6 +150,16 @@ def test_model():
     assert prob.ObjValue == pytest.approx(5 * x.Value + 3 * y.Value + 70)
 
 
+def test_constraint_duplicate_terms_slack():
+    """Merged coeffs in vindex_coeff_dict must not be double-counted in slack."""
+    prob = Problem()
+    x = prob.addVariable()
+    c = prob.addConstraint(5 * x + 7 * x <= 18)
+    assert c.getCoefficient(x) == 12
+    x.Value = 1.0
+    assert c.compute_slack() == pytest.approx(6.0)
+
+
 def test_semi_continuous_variable():
     prob = Problem("Semi-continuous")
     x = prob.addVariable(lb=5.0, ub=10.0, vtype=SEMI_CONTINUOUS, name="x")

From 3e7a526a269b928f57e878f59d8268f37103cf32 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Fri, 22 May 2026 15:00:27 -0700
Subject: [PATCH 02/10] Add label back to test

---
 cpp/tests/dual_simplex/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/tests/dual_simplex/CMakeLists.txt b/cpp/tests/dual_simplex/CMakeLists.txt
index f6dff93227..2df648377d 100644
--- a/cpp/tests/dual_simplex/CMakeLists.txt
+++ b/cpp/tests/dual_simplex/CMakeLists.txt
@@ -7,4 +7,4 @@ ConfigureTest(DUAL_SIMPLEX_TEST
     ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solve.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solve_barrier.cu
     ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/second_order_cone_kernels.cu
-)
+    LABELS numopt)

From 911cb044094ea3678d2b76b7dacf545e54782d67 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Fri, 22 May 2026 15:25:20 -0700
Subject: [PATCH 03/10] Remove barrier specific tolerances. These should not be
 exposed to the user

---
 .../cuopt/linear_programming/constants.h      |  4 ----
 .../pdlp/solver_settings.hpp                  |  3 ---
 cpp/src/barrier/barrier.cu                    |  8 +++-----
 .../dual_simplex/simplex_solver_settings.hpp  |  2 +-
 cpp/src/dual_simplex/solve.cpp                | 20 ++++++++++---------
 cpp/src/math_optimization/solver_settings.cu  |  3 ---
 cpp/src/pdlp/solve.cu                         |  5 -----
 cpp/tests/qp/unit_tests/no_constraints.cu     |  2 --
 cpp/tests/qp/unit_tests/two_variable_test.cu  |  1 -
 9 files changed, 15 insertions(+), 33 deletions(-)

diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h
index 1dfccef1ea..3342ba375c 100644
--- a/cpp/include/cuopt/linear_programming/constants.h
+++ b/cpp/include/cuopt/linear_programming/constants.h
@@ -47,10 +47,6 @@
 #define CUOPT_DUALIZE                              "dualize"
 #define CUOPT_ORDERING                             "ordering"
 #define CUOPT_BARRIER_DUAL_INITIAL_POINT           "barrier_dual_initial_point"
-#define CUOPT_BARRIER_RELATIVE_FEASIBILITY_TOLERANCE "barrier_relative_feasibility_tolerance"
-#define CUOPT_BARRIER_RELATIVE_OPTIMALITY_TOLERANCE  "barrier_relative_optimality_tolerance"
-#define CUOPT_BARRIER_RELATIVE_COMPLEMENTARITY_TOLERANCE \
-  "barrier_relative_complementarity_tolerance"
 #define CUOPT_BARRIER_ITERATIVE_REFINEMENT         "barrier_iterative_refinement"
 #define CUOPT_BARRIER_STEP_SCALE                   "barrier_step_scale"
 #define CUOPT_ELIMINATE_DENSE_COLUMNS              "eliminate_dense_columns"
diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
index 3597b85016..b30286f9ce 100644
--- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
@@ -280,9 +280,6 @@ class pdlp_solver_settings_t {
   i_t dualize{-1};
   i_t ordering{-1};
   i_t barrier_dual_initial_point{-1};
-  f_t barrier_relative_feasibility_tolerance{1.0e-8};
-  f_t barrier_relative_optimality_tolerance{1.0e-8};
-  f_t barrier_relative_complementarity_tolerance{1.0e-8};
   bool eliminate_dense_columns{true};
   pdlp_precision_t pdlp_precision{pdlp_precision_t::DefaultPrecision};
   bool barrier_iterative_refinement{true};
diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu
index fd6f9dbc6e..b39d75c6ae 100644
--- a/cpp/src/barrier/barrier.cu
+++ b/cpp/src/barrier/barrier.cu
@@ -4382,11 +4382,9 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
                         relative_complementarity_residual,
                         elapsed_time);
 
-    // TODO: should polish termination condition,
     bool converged = primal_residual_norm < settings.barrier_relative_feasibility_tol &&
-                     dual_residual_norm < settings.barrier_relative_feasibility_tol &&
-                     (duality_gap_abs < settings.barrier_relative_complementarity_tol ||
-                      duality_gap_rel < settings.barrier_relative_complementarity_tol);
+                     dual_residual_norm < settings.barrier_relative_optimality_tol &&
+                     complementarity_residual_norm < settings.barrier_relative_complementarity_tol;
 
     const i_t linear_xz_rhs_size = data.linear_xz_size(data.complementarity_xz_rhs.size());
 
@@ -4607,7 +4605,7 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
                           elapsed_time);
 
       bool primal_feasible = relative_primal_residual < settings.barrier_relative_feasibility_tol;
-      bool dual_feasible   = relative_dual_residual < settings.barrier_relative_feasibility_tol;
+      bool dual_feasible   = relative_dual_residual < settings.barrier_relative_optimality_tol;
       bool small_gap =
         relative_complementarity_residual < settings.barrier_relative_complementarity_tol;
 
diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp
index 35510c37b9..66fa94bfc1 100644
--- a/cpp/src/dual_simplex/simplex_solver_settings.hpp
+++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp
@@ -79,7 +79,7 @@ struct simplex_solver_settings_t {
       use_left_looking_lu(false),
       eliminate_singletons(true),
       print_presolve_stats(true),
-      barrier_presolve(true),
+      barrier_presolve(false),
       cudss_deterministic(false),
       deterministic(false),
       barrier(false),
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index cf39c99e20..7c23784802 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -385,18 +385,20 @@ lp_status_t solve_linear_program_with_advanced_basis(
 
 template <typename i_t, typename f_t>
 lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& user_problem,
-                                              const simplex_solver_settings_t<i_t, f_t>& settings,
-                                              f_t start_time,
-                                              lp_solution_t<i_t, f_t>& solution)
+                                               const simplex_solver_settings_t<i_t, f_t>& settings,
+                                               f_t start_time,
+                                               lp_solution_t<i_t, f_t>& solution)
 {
   lp_status_t status = lp_status_t::UNSET;
   lp_problem_t<i_t, f_t> original_lp(user_problem.handle_ptr, 1, 1, 1);
 
   // Convert the user problem to a linear program with only equality constraints
   std::vector<i_t> new_slacks;
+  simplex_solver_settings_t<i_t, f_t> barrier_settings = settings;
+  barrier_settings.barrier_presolve                    = true;
   dualize_info_t<i_t, f_t> dualize_info;
-  convert_user_problem(user_problem, settings, original_lp, new_slacks, dualize_info);
-  if (!validate_barrier_cone_layout(original_lp, settings)) {
+  convert_user_problem(user_problem, barrier_settings, original_lp, new_slacks, dualize_info);
+  if (!validate_barrier_cone_layout(original_lp, barrier_settings)) {
     return lp_status_t::NUMERICAL_ISSUES;
   }
 
@@ -405,7 +407,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
   // Presolve the linear program
   presolve_info_t<i_t, f_t> presolve_info;
   lp_problem_t<i_t, f_t> presolved_lp(user_problem.handle_ptr, 1, 1, 1);
-  const i_t ok = presolve(original_lp, settings, presolved_lp, presolve_info);
+  const i_t ok = presolve(original_lp, barrier_settings, presolved_lp, presolve_info);
   if (ok == CONCURRENT_HALT_RETURN) { return lp_status_t::CONCURRENT_LIMIT; }
   if (ok == TIME_LIMIT_RETURN) { return lp_status_t::TIME_LIMIT; }
   if (ok == -1) { return lp_status_t::INFEASIBLE; }
@@ -417,12 +419,12 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
                                     presolved_lp.A.col_start[presolved_lp.num_cols]);
   std::vector<f_t> column_scales;
   std::vector<f_t> row_scales;
-  column_scaling(presolved_lp, settings, barrier_lp, column_scales, row_scales);
+  column_scaling(presolved_lp, barrier_settings, barrier_lp, column_scales, row_scales);
 
   // Solve using barrier
   lp_solution_t<i_t, f_t> barrier_solution(barrier_lp.num_rows, barrier_lp.num_cols);
 
-  barrier_solver_t<i_t, f_t> barrier_solver(barrier_lp, presolve_info, settings);
+  barrier_solver_t<i_t, f_t> barrier_solver(barrier_lp, presolve_info, barrier_settings);
   lp_status_t barrier_status = barrier_solver.solve(start_time, barrier_solution);
   if (barrier_status == lp_status_t::OPTIMAL) {
 #ifdef COMPUTE_SCALED_RESIDUALS
@@ -475,7 +477,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
 
     // Undo presolve
     uncrush_solution(presolve_info,
-                     settings,
+                     barrier_settings,
                      original_lp,
                      unscaled_x,
                      unscaled_y,
diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu
index 505b4e02f7..0b4c360b10 100644
--- a/cpp/src/math_optimization/solver_settings.cu
+++ b/cpp/src/math_optimization/solver_settings.cu
@@ -98,9 +98,6 @@ solver_settings_t<i_t, f_t>::solver_settings_t() : pdlp_settings(), mip_settings
     {CUOPT_MIP_INTEGRALITY_TOLERANCE, &mip_settings.tolerances.integrality_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-5)},
     {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, f_t(0.0), std::numeric_limits<f_t>::infinity(), std::max(f_t(1e-10), std::numeric_limits<f_t>::epsilon())},
     {CUOPT_MIP_RELATIVE_GAP, &mip_settings.tolerances.relative_mip_gap, f_t(0.0), f_t(1e-1), f_t(1e-4)},
-    {CUOPT_BARRIER_RELATIVE_FEASIBILITY_TOLERANCE, &pdlp_settings.barrier_relative_feasibility_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-8)},
-    {CUOPT_BARRIER_RELATIVE_OPTIMALITY_TOLERANCE, &pdlp_settings.barrier_relative_optimality_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-8)},
-    {CUOPT_BARRIER_RELATIVE_COMPLEMENTARITY_TOLERANCE, &pdlp_settings.barrier_relative_complementarity_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-8)},
     {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits<f_t>::epsilon())},
     {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits<f_t>::epsilon())},
     {CUOPT_MIP_CUT_CHANGE_THRESHOLD, &mip_settings.cut_change_threshold, f_t(-1.0), std::numeric_limits<f_t>::infinity(), f_t(-1.0)},
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index cf741eee0b..4bbd2613d3 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -510,11 +510,6 @@ run_barrier(dual_simplex::user_problem_t<i_t, f_t>& user_problem,
   barrier_settings.barrier_iterative_refinement = settings.barrier_iterative_refinement;
   barrier_settings.barrier_step_scale           = settings.barrier_step_scale;
   barrier_settings.cudss_deterministic             = settings.cudss_deterministic;
-  barrier_settings.barrier_relative_feasibility_tol =
-    settings.barrier_relative_feasibility_tolerance;
-  barrier_settings.barrier_relative_optimality_tol = settings.barrier_relative_optimality_tolerance;
-  barrier_settings.barrier_relative_complementarity_tol =
-    settings.barrier_relative_complementarity_tolerance;
   barrier_settings.barrier_relaxed_feasibility_tol = settings.tolerances.relative_primal_tolerance;
   barrier_settings.barrier_relaxed_optimality_tol  = settings.tolerances.relative_dual_tolerance;
   barrier_settings.barrier_relaxed_complementarity_tol = settings.tolerances.relative_gap_tolerance;
diff --git a/cpp/tests/qp/unit_tests/no_constraints.cu b/cpp/tests/qp/unit_tests/no_constraints.cu
index dc99a2ce5c..75190b37fc 100644
--- a/cpp/tests/qp/unit_tests/no_constraints.cu
+++ b/cpp/tests/qp/unit_tests/no_constraints.cu
@@ -52,8 +52,6 @@ TEST(no_constraints_test, simple_test)
   op_problem.set_quadratic_objective_matrix(Q_values_host, 2, Q_indices_host, 2, Q_offsets_host, 3);
 
   auto settings = cuopt::linear_programming::pdlp_solver_settings_t<int, double>();
-  // Tighter than default: unconstrained QP needs small x at the origin, not just small objective.
-  settings.barrier_relative_complementarity_tolerance = 1e-12;
   auto solution = cuopt::linear_programming::solve_lp(op_problem, settings);
 
   EXPECT_EQ(solution.get_termination_status(),
diff --git a/cpp/tests/qp/unit_tests/two_variable_test.cu b/cpp/tests/qp/unit_tests/two_variable_test.cu
index f1fb4852ec..aac618994a 100644
--- a/cpp/tests/qp/unit_tests/two_variable_test.cu
+++ b/cpp/tests/qp/unit_tests/two_variable_test.cu
@@ -59,7 +59,6 @@ TEST(two_variable_test, simple_test)
   op_problem.set_quadratic_objective_matrix(Q_values_host, 2, Q_indices_host, 2, Q_offsets_host, 3);
 
   auto settings = cuopt::linear_programming::pdlp_solver_settings_t<int, double>();
-  settings.barrier_relative_complementarity_tolerance = 1e-12;
   auto solution = cuopt::linear_programming::solve_lp(op_problem, settings);
 
   EXPECT_EQ(solution.get_termination_status(),

From 6b2c011d1142cafd7ef567fc473dff13110db3bf Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Fri, 22 May 2026 16:37:26 -0700
Subject: [PATCH 04/10] Clean up barrier and presolve. Native -> Direct free
 variables.

---
 cpp/src/barrier/barrier.cu                    | 202 ++++++------
 cpp/src/barrier/dense_vector.hpp              |  22 --
 cpp/src/dual_simplex/presolve.cpp             | 295 +++++-------------
 cpp/src/dual_simplex/presolve.hpp             |   7 +-
 cpp/src/pdlp/solve.cu                         |   1 +
 .../dual_simplex/unit_tests/solve_barrier.cu  |  12 +-
 6 files changed, 187 insertions(+), 352 deletions(-)

diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu
index b39d75c6ae..a4c4b5b9bd 100644
--- a/cpp/src/barrier/barrier.cu
+++ b/cpp/src/barrier/barrier.cu
@@ -65,23 +65,23 @@ namespace cuopt::linear_programming::dual_simplex {
     cuda::std::make_tuple(a, b), out, size, cuda::std::multiplies<>{}, stream.value());
 }
 
-// out[i] = is_native_free_linear[i] ? 0 : a[i] * b[i]
-[[maybe_unused]] static void pairwise_multiply_skip_native_free_linear(
-  float* a, float* b, int* is_native_free_linear, float* out, int size, rmm::cuda_stream_view stream)
+// out[i] = is_direct_free_linear[i] ? 0 : a[i] * b[i]
+[[maybe_unused]] static void pairwise_multiply_skip_direct_free_linear(
+  float* a, float* b, int* is_direct_free_linear, float* out, int size, rmm::cuda_stream_view stream)
 {
   cub::DeviceTransform::Transform(
-    cuda::std::make_tuple(a, b, is_native_free_linear),
+    cuda::std::make_tuple(a, b, is_direct_free_linear),
     out,
     size,
     [] __host__ __device__(float x_j, float d_j, int free_j) { return free_j ? 0.f : x_j * d_j; },
     stream.value());
 }
 
-[[maybe_unused]] static void pairwise_multiply_skip_native_free_linear(
-  double* a, double* b, int* is_native_free_linear, double* out, int size, rmm::cuda_stream_view stream)
+[[maybe_unused]] static void pairwise_multiply_skip_direct_free_linear(
+  double* a, double* b, int* is_direct_free_linear, double* out, int size, rmm::cuda_stream_view stream)
 {
   cub::DeviceTransform::Transform(
-    cuda::std::make_tuple(a, b, is_native_free_linear),
+    cuda::std::make_tuple(a, b, is_direct_free_linear),
     out,
     size,
     [] __host__ __device__(double x_j, double d_j, int free_j) { return free_j ? 0.0 : x_j * d_j; },
@@ -120,7 +120,7 @@ class iteration_data_t {
  public:
   iteration_data_t(const lp_problem_t<i_t, f_t>& lp,
                    i_t num_upper_bounds,
-                   const std::vector<i_t>& native_free_linear_indices,
+                   const std::vector<i_t>& direct_free_variables,
                    const csc_matrix_t<i_t, f_t>& Qin,
                    const simplex_solver_settings_t<i_t, f_t>& settings)
     : upper_bounds(num_upper_bounds),
@@ -195,8 +195,8 @@ class iteration_data_t {
       d_cone_Q_values_(0, lp.handle_ptr->get_stream()),
       use_augmented(false),
       has_factorization(false),
-      n_native_free_linear(0),
-      d_is_native_free_linear_(0, lp.handle_ptr->get_stream()),
+      n_direct_free_linear(0),
+      d_is_direct_free_linear_(0, lp.handle_ptr->get_stream()),
       num_factorizations(0),
       has_solve_info(false),
       settings_(settings),
@@ -257,18 +257,18 @@ class iteration_data_t {
     raft::common::nvtx::range fun_scope("Barrier: LP Data Creation");
 
     {
-      raft::common::nvtx::range scope("Barrier: LP Data: native free linear");
-      // Set up native free linear variable tracking (linear columns only, j < cone_start).
-      if (!native_free_linear_indices.empty()) {
-        n_native_free_linear = native_free_linear_indices.size();
-        std::vector<i_t> is_native_free_linear_host(lp.num_cols, 0);
-        for (i_t j : native_free_linear_indices) {
-          is_native_free_linear_host[j] = 1;
+      raft::common::nvtx::range scope("Barrier: LP Data: direct free linear");
+      // Set up direct free linear variable tracking (linear columns only, j < cone_start).
+      if (!direct_free_variables.empty()) {
+        n_direct_free_linear = direct_free_variables.size();
+        std::vector<i_t> is_direct_free_linear_host(lp.num_cols, 0);
+        for (i_t j : direct_free_variables) {
+          is_direct_free_linear_host[j] = 1;
         }
-        d_is_native_free_linear_.resize(lp.num_cols, stream_view_);
+        d_is_direct_free_linear_.resize(lp.num_cols, stream_view_);
         raft::copy(
-          d_is_native_free_linear_.data(), is_native_free_linear_host.data(), lp.num_cols, stream_view_);
-        settings.log.printf("Native free linear (QP): %d\n", n_native_free_linear);
+          d_is_direct_free_linear_.data(), is_direct_free_linear_host.data(), lp.num_cols, stream_view_);
+        settings.log.printf("Direct free linear (QP): %d\n", n_direct_free_linear);
       }
     }
 
@@ -572,8 +572,8 @@ class iteration_data_t {
   {
     const bool has_soc = has_cones();
     f_t degree = static_cast<f_t>(num_primal_variables) + static_cast<f_t>(num_upper_bounds);
-    // Native QP free variables (linear only): no x·z complementarity in the barrier degree.
-    degree -= static_cast<f_t>(n_native_free_linear);
+    // Direct QP free variables (linear only): no x·z complementarity in the barrier degree.
+    degree -= static_cast<f_t>(n_direct_free_linear);
     if (has_soc) {
       degree -= static_cast<f_t>(cone_entry_count());
       degree += static_cast<f_t>(cone_count());
@@ -781,7 +781,7 @@ class iteration_data_t {
       const i_t linear_n = has_soc ? cone_start() : n;
 
       // Primal diagonal: linear block includes dual_perturb; SOC block is filled by scatter below.
-      // Native free variables use barrier D = 0 in augmented_multiply; omit span_diag[j] here so
+      // Direct free variables use barrier D = 0 in augmented_multiply; omit span_diag[j] here so
       // the factorized matrix matches the matvec (only -q_diag - dual_perturb on the diagonal).
       thrust::for_each_n(rmm::exec_policy(handle_ptr->get_stream()),
                          thrust::make_counting_iterator<i_t>(0),
@@ -790,10 +790,10 @@ class iteration_data_t {
                           span_diag_indices  = cuopt::make_span(d_augmented_diagonal_indices_),
                           span_q_diag        = cuopt::make_span(d_Q_diag_),
                           span_diag          = cuopt::make_span(d_diag_),
-                          span_is_native_free_linear       = cuopt::make_span(d_is_native_free_linear_),
+                          span_is_direct_free_linear       = cuopt::make_span(d_is_direct_free_linear_),
                           dual_perturb_value = dual_perturb] __device__(i_t j) {
                            f_t q_diag     = span_q_diag.size() > 0 ? span_q_diag[j] : 0.0;
-                           const f_t d_j  = (span_is_native_free_linear.size() > 0 && span_is_native_free_linear[j]) ? f_t(0) : span_diag[j];
+                           const f_t d_j  = (span_is_direct_free_linear.size() > 0 && span_is_direct_free_linear[j]) ? f_t(0) : span_diag[j];
                            span_x[span_diag_indices[j]] = -q_diag - d_j - dual_perturb_value;
                          });
       RAFT_CHECK_CUDA(handle_ptr->get_stream());
@@ -1725,11 +1725,11 @@ class iteration_data_t {
     rmm::device_uvector<f_t> d_r1(n, handle_ptr->get_stream());
     thrust::fill_n(rmm::exec_policy(stream_view_), d_r1.begin(), n, f_t(0));
 
-    // r1 <- D * x_1 on linear indices; barrier D is zero on native free variables
+    // r1 <- D * x_1 on linear indices; barrier D is zero on direct free variables
     const i_t linear_n = has_soc ? cone_start() : n;
-    if (n_native_free_linear > 0) {
-      pairwise_multiply_skip_native_free_linear(
-        d_x1.data(), d_diag_.data(), d_is_native_free_linear_.data(), d_r1.data(), linear_n, stream_view_);
+    if (n_direct_free_linear > 0) {
+      pairwise_multiply_skip_direct_free_linear(
+        d_x1.data(), d_diag_.data(), d_is_direct_free_linear_.data(), d_r1.data(), linear_n, stream_view_);
     } else {
       pairwise_multiply(d_x1.data(), d_diag_.data(), d_r1.data(), linear_n, stream_view_);
     }
@@ -1852,8 +1852,8 @@ class iteration_data_t {
 
   bool use_augmented;
   i_t symbolic_status;
-  i_t n_native_free_linear{0};
-  rmm::device_uvector<i_t> d_is_native_free_linear_;  // 1 if native free linear (j < cone_start), else 0
+  i_t n_direct_free_linear{0};
+  rmm::device_uvector<i_t> d_is_direct_free_linear_;  // 1 if direct free linear (j < cone_start), else 0
 
   // Adaptive regularization for the augmented system
   f_t dual_perturb{1e-8};
@@ -2066,7 +2066,18 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
   raft::common::nvtx::range fun_scope("Barrier: initial_point");
   const bool use_augmented = data.use_augmented;
   const bool has_soc       = data.has_cones();
-  const bool has_native_free_linear      = data.n_native_free_linear > 0;
+  const bool has_direct_free_linear      = data.n_direct_free_linear > 0;
+
+  // Mask used by the two ADAT/augmented branches below to enforce z > 0 on the linear block
+  // while leaving the SOC cone block (kept feasible by NT scaling) alone.
+  std::vector<i_t> nonnegative_z(lp.num_cols, 1);
+  if (has_soc) {
+    const i_t cone_start = data.cone_start();
+    const i_t cone_end   = data.cone_end();
+    for (i_t i = cone_start; i < cone_end; ++i) {
+      nonnegative_z[i] = 0;
+    }
+  }
 
   // Perform a numerical factorization
   i_t status;
@@ -2215,8 +2226,8 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
       data.x[j] = mu;
       data.z[j] = mu;
     }
-    if (has_native_free_linear) {
-      for (i_t j : presolve_info.native_free_linear_indices) {
+    if (has_direct_free_linear) {
+      for (i_t j : presolve_info.direct_free_variables) {
         if (j < cs) { data.z[j] = 0.0; }
       }
     }
@@ -2285,8 +2296,8 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
       }
     }
     // Free variables have z = 0 (no complementarity condition)
-    if (has_native_free_linear) {
-      for (i_t j : presolve_info.native_free_linear_indices) {
+    if (has_direct_free_linear) {
+      for (i_t j : presolve_info.direct_free_variables) {
         data.z[j] = 0.0;
       }
     }
@@ -2310,7 +2321,7 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
     data.v.multiply_scalar(-1.0);
 
     data.v.ensure_positive(epsilon_adjust);
-    data.z.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count());
+    data.z.ensure_positive(epsilon_adjust, nonnegative_z);
   } else {
     // First compute rhs = A*Dinv*c
     dense_vector_t<i_t, f_t> rhs(lp.num_rows);
@@ -2334,7 +2345,7 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
     data.gather_upper_bounds(data.z, data.v);
     data.v.multiply_scalar(-1.0);
     data.v.ensure_positive(epsilon_adjust);
-    data.z.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count());
+    data.z.ensure_positive(epsilon_adjust, nonnegative_z);
   }
 
   // Verify A'*y + z - E*v  - Q*x = c
@@ -2351,32 +2362,25 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
   settings.log.printf("||A^T y + z - E*v - Q*x - c ||: %e\n",
                       vector_norm2<i_t, f_t>(data.dual_residual));
 #endif
-  // Make sure (w, x, v, z) > 0. Cone blocks are not nonnegative orthant variables — skip them for
-  // box positivity; NT scaling keeps cone iterates feasible separately.
-  std::vector<f_t> free_x_save;
-  if (has_native_free_linear) {
-    free_x_save.resize(data.n_native_free_linear);
-    for (i_t k = 0; k < data.n_native_free_linear; ++k) {
-      free_x_save[k] = data.x[presolve_info.native_free_linear_indices[k]];
-    }
-  }
+  // Make sure (w, x, v, z) > 0. Skip cone variables and free variables being handled directly.
   data.w.ensure_positive(epsilon_adjust);
+  std::vector<i_t> nonnegative_variables(data.x.size(), 1);
   if (has_soc) {
-    data.x.ensure_positive_skip_range(epsilon_adjust, data.cone_start(), data.cone_entry_count());
-  } else if (has_native_free_linear) {
-    std::vector<i_t> nonnegative_variables(data.x.size(), 1);
-    for (i_t j : presolve_info.native_free_linear_indices) {
+    const i_t cone_start = data.cone_start();
+    const i_t cone_end   = data.cone_end();
+    for (i_t i = cone_start; i < cone_end; ++i) {
+      nonnegative_variables[i] = 0;
+    }
+  }
+  if (has_direct_free_linear) {
+    for (i_t j : presolve_info.direct_free_variables) {
       nonnegative_variables[j] = 0;
     }
-    data.x.ensure_positive(epsilon_adjust, nonnegative_variables);
-  } else {
-    data.x.ensure_positive(epsilon_adjust);
   }
-  // Native free variables (QP/SOCP): restore possibly negative x and set z = 0
-  if (has_native_free_linear) {
-    for (i_t k = 0; k < data.n_native_free_linear; ++k) {
-      i_t j     = presolve_info.native_free_linear_indices[k];
-      data.x[j] = free_x_save[k];
+  data.x.ensure_positive(epsilon_adjust, nonnegative_variables);
+  // Direct free variables: reduced cost z = 0 (no complementarity condition).
+  if (has_direct_free_linear) {
+    for (i_t j : presolve_info.direct_free_variables) {
       data.z[j] = 0.0;
     }
   }
@@ -2592,23 +2596,23 @@ f_t barrier_solver_t<i_t, f_t>::gpu_max_step_to_boundary(iteration_data_t<i_t, f
                                                          const rmm::device_uvector<f_t>& dx)
 {
   const bool has_soc  = data.has_cones() && static_cast<i_t>(x.size()) >= data.cone_end();
-  const bool has_native_free_linear = data.n_native_free_linear > 0 && static_cast<i_t>(x.size()) == lp.num_cols;
+  const bool has_direct_free_linear = data.n_direct_free_linear > 0 && static_cast<i_t>(x.size()) == lp.num_cols;
 
   auto reduce_segment = [&](i_t start, i_t len) -> f_t {
     if (len <= 0) { return f_t(1); }
-    if (has_native_free_linear) {
-      auto native_free_linear_ptr = data.d_is_native_free_linear_.data() + start;
+    if (has_direct_free_linear) {
+      auto direct_free_linear_ptr = data.d_is_direct_free_linear_.data() + start;
       // step size computation for nonnegative variables with free variables
-      auto ratio_test_free = [native_free_linear_ptr] HD(const thrust::tuple<f_t, f_t, i_t> t) {
+      auto ratio_test_free = [direct_free_linear_ptr] HD(const thrust::tuple<f_t, f_t, i_t> t) {
         const f_t dx_val  = thrust::get<0>(t);
         const f_t x_val   = thrust::get<1>(t);
-        const i_t is_native_free_linear = thrust::get<2>(t);
-        if (is_native_free_linear) return f_t(1.0);
+        const i_t is_direct_free_linear = thrust::get<2>(t);
+        if (is_direct_free_linear) return f_t(1.0);
         if (dx_val < f_t(0.0)) return -x_val / dx_val;
         return f_t(1.0);
       };
       return data.transform_reduce_helper_.transform_reduce(
-        thrust::make_zip_iterator(dx.data() + start, x.data() + start, native_free_linear_ptr),
+        thrust::make_zip_iterator(dx.data() + start, x.data() + start, direct_free_linear_ptr),
         thrust::minimum<f_t>(),
         ratio_test_free,
         f_t(1.0),
@@ -2661,7 +2665,7 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
   const bool debug         = false;
   const bool use_augmented = data.use_augmented;
   const bool has_soc       = data.has_cones();
-  const bool has_native_free_linear      = data.n_native_free_linear > 0;
+  const bool has_direct_free_linear      = data.n_direct_free_linear > 0;
   const i_t m_c            = data.cone_entry_count();
   const i_t cone_var_start = data.cone_start();
   const i_t linear_size    = data.linear_xz_size(lp.num_cols);
@@ -2673,14 +2677,14 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
                                        raft::device_span<const f_t> x,
                                        raft::device_span<f_t> dz_span) {
     if (dz_span.empty()) return;
-    if (has_native_free_linear) {
+    if (has_direct_free_linear) {
       cub::DeviceTransform::Transform(
         cuda::std::make_tuple(
-          target.data(), z.data(), dx_span.data(), x.data(), data.d_is_native_free_linear_.data()),
+          target.data(), z.data(), dx_span.data(), x.data(), data.d_is_direct_free_linear_.data()),
         dz_span.data(),
         dz_span.size(),
-        [] HD(f_t target_val, f_t z_val, f_t dx_val, f_t x_val, i_t is_native_free_linear) {
-          if (is_native_free_linear) return f_t(0);
+        [] HD(f_t target_val, f_t z_val, f_t dx_val, f_t x_val, i_t is_direct_free_linear) {
+          if (is_direct_free_linear) return f_t(0);
           return target_val - (z_val * dx_val) / x_val;
         },
         stream_view_.value());
@@ -2730,16 +2734,16 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
     raft::common::nvtx::range fun_scope("Barrier: GPU diag, inv diag and sqrt inv diag formation");
 
     // diag = z ./ x on the linear (non-cone) coordinates; cone block is overwritten below.
-    if (has_native_free_linear) {
+    if (has_direct_free_linear) {
       if (use_augmented) {
-        // Augmented KKT: D = 0 on native free linear vars (Q curvature is in the KKT block, not D).
+        // Augmented KKT: D = 0 on direct free linear vars (Q curvature is in the KKT block, not D).
         cub::DeviceTransform::Transform(
           cuda::std::make_tuple(
-            data.d_z_.data(), data.d_x_.data(), data.d_is_native_free_linear_.data()),
+            data.d_z_.data(), data.d_x_.data(), data.d_is_direct_free_linear_.data()),
           data.d_diag_.data(),
           linear_size,
-          [] HD(f_t z_j, f_t x_j, i_t is_native_free_linear) {
-            return is_native_free_linear ? f_t(0) : (z_j / x_j);
+          [] HD(f_t z_j, f_t x_j, i_t is_direct_free_linear) {
+            return is_direct_free_linear ? f_t(0) : (z_j / x_j);
           },
           stream_view_.value());
       } else {
@@ -2747,21 +2751,21 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
         if (data.Q.n > 0 && data.Q_diagonal) {
           cub::DeviceTransform::Transform(
             cuda::std::make_tuple(
-              data.d_z_.data(), data.d_x_.data(), data.d_is_native_free_linear_.data(), data.d_Q_diag_.data()),
+              data.d_z_.data(), data.d_x_.data(), data.d_is_direct_free_linear_.data(), data.d_Q_diag_.data()),
             data.d_diag_.data(),
             linear_size,
-            [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_native_free_linear, f_t q_jj) {
-              if (!is_native_free_linear) return z_j / x_j;
+            [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_direct_free_linear, f_t q_jj) {
+              if (!is_direct_free_linear) return z_j / x_j;
               return (q_jj > f_t(0)) ? f_t(0) : free_var_reg;
             },
             stream_view_.value());
         } else {
           cub::DeviceTransform::Transform(
-            cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data(), data.d_is_native_free_linear_.data()),
+            cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data(), data.d_is_direct_free_linear_.data()),
             data.d_diag_.data(),
             linear_size,
-            [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_native_free_linear) {
-              return is_native_free_linear ? free_var_reg : (z_j / x_j);
+            [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_direct_free_linear) {
+              return is_direct_free_linear ? free_var_reg : (z_j / x_j);
             },
             stream_view_.value());
         }
@@ -2874,7 +2878,7 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
   }
 
   // Primal RHS: dual_rhs - complementarity_target + E*((wv_rhs - v.*bound_rhs)./w)
-  // (linear: target = xz_rhs/x; native free: no xz term). Used as d_r1_ (augmented) and
+  // (linear: target = xz_rhs/x; direct free: no xz term). Used as d_r1_ (augmented) and
   // unscaled input to ADAT's h = primal_rhs + A*inv_diag*tmp3.
   {
     raft::common::nvtx::range fun_scope("Barrier: GPU assemble primal RHS");
@@ -2894,16 +2898,16 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
         stream_view_.value());
       RAFT_CHECK_CUDA(stream_view_);
     }
-    if (has_native_free_linear) {
+    if (has_direct_free_linear) {
       cub::DeviceTransform::Transform(
         cuda::std::make_tuple(data.d_tmp3_.data(),
                               data.d_complementarity_target_.data(),
                               data.d_dual_rhs_.data(),
-                              data.d_is_native_free_linear_.data()),
+                              data.d_is_direct_free_linear_.data()),
         data.d_tmp3_.data(),
         lp.num_cols,
-        [] HD(f_t tmp3, f_t target, f_t dual_rhs, i_t is_native_free_linear) {
-          const f_t comp_term = is_native_free_linear ? f_t(0) : target;
+        [] HD(f_t tmp3, f_t target, f_t dual_rhs, i_t is_direct_free_linear) {
+          const f_t comp_term = is_direct_free_linear ? f_t(0) : target;
           return tmp3 + dual_rhs - comp_term;
         },
         stream_view_.value());
@@ -3517,14 +3521,14 @@ void fill_linear_complementarity_target(iteration_data_t<i_t, f_t>& data,
                                         rmm::cuda_stream_view stream)
 {
   if (target.empty()) return;
-  const bool has_native_free_linear = data.n_native_free_linear > 0;
-  if (has_native_free_linear) {
+  const bool has_direct_free_linear = data.n_direct_free_linear > 0;
+  if (has_direct_free_linear) {
     cub::DeviceTransform::Transform(
-      cuda::std::make_tuple(xz_rhs.data(), x.data(), data.d_is_native_free_linear_.data()),
+      cuda::std::make_tuple(xz_rhs.data(), x.data(), data.d_is_direct_free_linear_.data()),
       target.data(),
       target.size(),
-      [] HD(f_t complementarity_xz_rhs, f_t x_val, i_t is_native_free_linear) {
-        if (is_native_free_linear) return f_t(0);
+      [] HD(f_t complementarity_xz_rhs, f_t x_val, i_t is_direct_free_linear) {
+        if (is_direct_free_linear) return f_t(0);
         return complementarity_xz_rhs / x_val;
       },
       stream.value());
@@ -3730,21 +3734,21 @@ void barrier_solver_t<i_t, f_t>::compute_cc_rhs(iteration_data_t<i_t, f_t>& data
 {
   raft::common::nvtx::range fun_scope("Barrier: compute_cc_rhs");
   const bool has_soc    = data.has_cones();
-  const bool has_native_free_linear   = data.n_native_free_linear > 0;
+  const bool has_direct_free_linear   = data.n_direct_free_linear > 0;
   const i_t linear_size = data.linear_xz_size(lp.num_cols);
 
   auto fill_linear_cc_rhs = [&](raft::device_span<f_t> out,
                                 raft::device_span<const f_t> dx_aff,
                                 raft::device_span<const f_t> dz_aff) {
     if (out.empty()) return;
-    if (has_native_free_linear) {
-      auto native_free_linear_ptr = data.d_is_native_free_linear_.data();
+    if (has_direct_free_linear) {
+      auto direct_free_linear_ptr = data.d_is_direct_free_linear_.data();
       cub::DeviceTransform::Transform(
-        cuda::std::make_tuple(dx_aff.data(), dz_aff.data(), native_free_linear_ptr),
+        cuda::std::make_tuple(dx_aff.data(), dz_aff.data(), direct_free_linear_ptr),
         out.data(),
         out.size(),
-        [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val, i_t is_native_free_linear) {
-          return is_native_free_linear ? f_t(0) : (-(dx_aff_val * dz_aff_val) + new_mu);
+        [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val, i_t is_direct_free_linear) {
+          return is_direct_free_linear ? f_t(0) : (-(dx_aff_val * dz_aff_val) + new_mu);
         },
         stream_view_.value());
     } else {
@@ -4263,7 +4267,7 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, lp_solution_t<i_t,
     if (lp.Q.n > 0) { create_Q(lp, Q); }
 
     iteration_data_t<i_t, f_t> data(
-      lp, num_upper_bounds, presolve_info.native_free_linear_indices, Q, settings);
+      lp, num_upper_bounds, presolve_info.direct_free_variables, Q, settings);
     if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
       settings.log.printf("Barrier solver halted\n");
       return lp_status_t::CONCURRENT_LIMIT;
diff --git a/cpp/src/barrier/dense_vector.hpp b/cpp/src/barrier/dense_vector.hpp
index 2d5d0b335c..5d6f2b12ec 100644
--- a/cpp/src/barrier/dense_vector.hpp
+++ b/cpp/src/barrier/dense_vector.hpp
@@ -184,28 +184,6 @@ class dense_vector_t : public std::vector<f_t, Allocator> {
     }
   }
 
-  void ensure_positive_skip_range(f_t epsilon_adjust, i_t skip_start, i_t skip_count)
-  {
-    if (skip_count == 0) {
-      ensure_positive(epsilon_adjust);
-      return;
-    }
-    const i_t n        = this->size();
-    const i_t skip_end = skip_start + skip_count;
-    f_t min_val        = std::numeric_limits<f_t>::max();
-    for (i_t i = 0; i < n; i++) {
-      if (i >= skip_start && i < skip_end) continue;
-      min_val = std::min(min_val, (*this)[i]);
-    }
-    if (min_val <= 0.0) {
-      const f_t delta = -min_val + epsilon_adjust;
-      for (i_t i = 0; i < n; i++) {
-        if (i >= skip_start && i < skip_end) continue;
-        (*this)[i] += delta;
-      }
-    }
-  }
-
   void ensure_positive(f_t epsilon_adjust, const std::vector<i_t>& mask)
   {
     f_t min_x   = inf;
diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp
index 721465d2a1..6676660c29 100644
--- a/cpp/src/dual_simplex/presolve.cpp
+++ b/cpp/src/dual_simplex/presolve.cpp
@@ -33,10 +33,18 @@ i_t remove_empty_cols(lp_problem_t<i_t, f_t>& problem,
 {
   constexpr bool verbose = false;
   if (verbose) { printf("Removing %d empty columns\n", num_empty_cols); }
+  // We have a variable x_j that does not appear in any rows
+  // The cost function
+  // sum_{k != j} c_k * x_k + c_j * x_j
+  // becomes
+  // sum_{k != j} c_k * x_k + c_j * l_j if c_j > 0
+  // or
+  // sum_{k != j} c_k * x_k + c_j * u_j if c_j < 0
   presolve_info.removed_variables.reserve(num_empty_cols);
   presolve_info.removed_values.reserve(num_empty_cols);
   presolve_info.removed_reduced_costs.reserve(num_empty_cols);
 
+  // Check to see if a variable participates in a quadratic objective
   std::vector<bool> has_quadratic_term(problem.num_cols, false);
   i_t linear_cols = linear_var_count(problem);
 
@@ -45,6 +53,7 @@ i_t remove_empty_cols(lp_problem_t<i_t, f_t>& problem,
       const i_t row_start = problem.Q.row_start[j];
       const i_t row_end   = problem.Q.row_start[j + 1];
       if (row_end - row_start == 0) { continue; }
+      // Q is symmetric, so its sufficient to check only the row size
       has_quadratic_term[j] = true;
     }
   }
@@ -923,8 +932,8 @@ void convert_user_problem(const user_problem_t<i_t, f_t>& user_problem,
     problem.Q.x = user_problem.Q_values;
   }
 
-  // Artificial vars break [linear | cone]; only for barrier_presolve-off LP-style layouts.
-  if (!settings.barrier_presolve && problem.second_order_cone_dims.empty()) {
+  // Add artifical variables
+  if (!settings.barrier_presolve) {
     add_artifical_variables(problem, user_problem.range_rows, equality_rows, new_slacks);
   }
 }
@@ -946,7 +955,6 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; }
   }
 
-  // Barrier presolve phase 1: bound free linear variables from equality rows.
   if (settings.barrier_presolve && free_variables > 0) {
     // Try to remove free variables
     std::vector<i_t> constraints_to_check;
@@ -978,25 +986,25 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     std::vector<f_t> upper_bound_coefficient(problem.num_cols, 0.0);
 
     if (!constraints_to_check.empty()) {
+      // Check if the constraints are feasible
       csr_matrix_t<i_t, f_t> Arow(0, 0, 0);
       problem.A.to_compressed_row(Arow);
 
-      // Keep only rows safe for phase-1 bound inference: no cone columns, exactly one free linear.
+      // Keep only rows safe for bound inference: no cone columns
+      if (has_cones)
       {
         std::vector<i_t> safe_constraints;
         safe_constraints.reserve(constraints_to_check.size());
         for (i_t i : constraints_to_check) {
           bool touches_cone        = false;
-          i_t free_linear_in_row = 0;
           for (i_t p = Arow.row_start[i]; p < Arow.row_start[i + 1]; ++p) {
             const i_t j = Arow.j[p];
             if (j >= linear_cols) {
               touches_cone = true;
               continue;
             }
-            if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_linear_in_row++; }
           }
-          if (touches_cone || free_linear_in_row != 1) { continue; }
+          if (touches_cone) { continue; }
           safe_constraints.push_back(i);
         }
         constraints_to_check.swap(safe_constraints);
@@ -1127,9 +1135,6 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
           problem.lower[j] = -inf;
         }
       }
-      if (!(problem.lower[j] == -inf && problem.upper[j] == inf)) {
-        presolve_info.phase1_bounded_linear_indices.push_back(j);
-      }
     }
 
     // Record bounded free variables for dual correction in uncrush.
@@ -1160,7 +1165,6 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     }
   }
 
-  // Barrier presolve phase 2: negate one-sided bounds (-inf < x <= u -> -u <= x < inf).
   // The original problem may have a variable without a lower bound
   // but a finite upper bound
   // -inf < x_j <= u_j (linear variables only)
@@ -1211,8 +1215,8 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     }
   }
 
-  // Barrier presolve phase 3: shift nonzero lower bounds to zero (cone/stack columns not shifted).
-  // 0 != l_j <= x_j <= u_j (linear variables only; cone/stack columns are not shifted)
+  // The original problem may have nonzero lower bounds
+  // 0 != l_j <= x_j <= u_j
   i_t nonzero_lower_bounds = 0;
   for (i_t j = 0; j < linear_cols; j++) {
     if (problem.lower[j] != 0.0 && problem.lower[j] > -inf) { nonzero_lower_bounds++; }
@@ -1287,7 +1291,7 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     }
   }
 
-  // Barrier presolve phase 4: remove empty rows and empty linear columns.
+  // Check for empty rows
   i_t num_empty_rows = 0;
   {
     csr_matrix_t<i_t, f_t> Arow(0, 0, 0);
@@ -1321,26 +1325,24 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
   }
   problem.Q.check_matrix("Before free variable expansion");
 
-  // Barrier presolve phase 5: free linear variables — 5a native (QP/SOCP) or 5b v-w split (LP).
-  // QP/SOCP augmented system: keep free linears as-is (no x = v - w); barrier uses D = 0 on them.
-  const bool register_native_free_linear = free_variables > 0 && (problem.Q.n > 0 || has_cones);
-  if (register_native_free_linear) {
+  // Free linear variables. We handle them directly in QP/SOCP or split them in LP.
+  const bool direct_free_linear = settings.barrier_presolve && free_variables > 0 && (problem.Q.n > 0 || has_cones);
+  if (direct_free_linear) {
     presolve_info.free_variable_pairs.clear();
-    presolve_info.native_free_linear_indices.clear();
-    // Only linear decision variables can be "native" free variables; cone/stack columns
+    presolve_info.direct_free_variables.clear();
+    // Only free linear decision variables need to be handled; cone/stack columns
     // are unbounded by construction and must not be counted here.
-    i_t native_free_count = 0;
+    i_t direct_free_count = 0;
     for (i_t j = 0; j < linear_cols; j++) {
       if (problem.lower[j] == -inf && problem.upper[j] == inf) {
-        presolve_info.native_free_linear_indices.push_back(j);
-        native_free_count++;
+        presolve_info.direct_free_variables.push_back(j);
+        direct_free_count++;
       }
     }
-    settings.log.printf(
-      "Keeping %d native free linear variables for augmented-system barrier (QP/SOCP)\n",
-      native_free_count);
+    settings.log.printf("Handling %d free variables directly in augmented system\n",
+                        direct_free_count);
   } else if (settings.barrier_presolve && !has_cones && free_variables > 0) {
-    // Phase 5b: x_j = v - w with v, w >= 0 (LP without cones; SOCP/QP use phase 5a).
+    // For pure LP problems (Q is empty and there are no cones in this branch)
     // We have a variable x_j: with -inf < x_j < inf
     // we create new variables v and w with 0 <= v, w and x_j = v - w
     // Constraints
@@ -1353,191 +1355,55 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     // becomes
     // sum_{k != j} c_k x_k + c_j v - c_j w
 
-    const i_t old_num_cols = problem.num_cols;
-    const i_t new_cone_start =
-      problem.second_order_cone_dims.empty() ? 0 : linear_cols + free_variables;
-    const i_t num_cols = old_num_cols + free_variables;
-
-    auto old_A         = problem.A;
-    auto old_Q         = problem.Q;
-    auto old_objective = problem.objective;
-    auto old_lower     = problem.lower;
-    auto old_upper     = problem.upper;
-
-    std::vector<i_t> partner_index(old_num_cols, -1);
-    std::vector<i_t> orig_to_new(old_num_cols, -1);
-    std::vector<bool> is_free(old_num_cols, false);
-
-    i_t next_partner = linear_cols;
-    for (i_t j = 0; j < linear_cols; ++j) {
-      orig_to_new[j] = j;
-      if (old_lower[j] == -inf && old_upper[j] == inf) {
-        is_free[j]       = true;
-        partner_index[j] = next_partner++;
-      }
-    }
-    for (i_t j = linear_cols; j < old_num_cols; ++j) {
-      orig_to_new[j] = j + free_variables;
-    }
-    assert(next_partner == new_cone_start || problem.second_order_cone_dims.empty());
-
-    i_t nnz = old_A.col_start[old_num_cols];
-    for (i_t j = 0; j < linear_cols; ++j) {
-      if (is_free[j]) { nnz += old_A.col_start[j + 1] - old_A.col_start[j]; }
-    }
-
-    csc_matrix_t<i_t, f_t> expanded_A(problem.A.m, num_cols, nnz);
-    i_t nz = 0;
-    for (i_t j = 0; j < linear_cols; ++j) {
-      expanded_A.col_start[j] = nz;
-      for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) {
-        expanded_A.i[nz] = old_A.i[p];
-        expanded_A.x[nz] = old_A.x[p];
-        ++nz;
-      }
-    }
-    for (i_t j = 0; j < linear_cols; ++j) {
-      if (partner_index[j] != -1) {
-        expanded_A.col_start[partner_index[j]] = nz;
-        for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) {
-          expanded_A.i[nz] = old_A.i[p];
-          expanded_A.x[nz] = -old_A.x[p];
-          ++nz;
-        }
-      }
-    }
-    for (i_t j = linear_cols; j < old_num_cols; ++j) {
-      i_t new_j                   = orig_to_new[j];
-      expanded_A.col_start[new_j] = nz;
-      for (i_t p = old_A.col_start[j]; p < old_A.col_start[j + 1]; ++p) {
-        expanded_A.i[nz] = old_A.i[p];
-        expanded_A.x[nz] = old_A.x[p];
-        ++nz;
-      }
-    }
-    expanded_A.col_start[num_cols] = nz;
-
-    std::vector<f_t> objective(num_cols);
-    std::vector<f_t> lower(num_cols, -INFINITY);
-    std::vector<f_t> upper(num_cols, INFINITY);
-    presolve_info.free_variable_pairs.clear();
-    presolve_info.free_variable_pairs.reserve(free_variables * 2);
-
-    for (i_t j = 0; j < linear_cols; ++j) {
-      objective[j] = old_objective[j];
-      if (is_free[j]) {
-        lower[j]                    = 0.0;
-        upper[j]                    = inf;
-        objective[partner_index[j]] = -old_objective[j];
-        lower[partner_index[j]]     = 0.0;
-        upper[partner_index[j]]     = inf;
-        presolve_info.free_variable_pairs.push_back(j);
-        presolve_info.free_variable_pairs.push_back(partner_index[j]);
-      } else {
-        lower[j] = old_lower[j];
-        upper[j] = old_upper[j];
+    i_t num_cols = problem.num_cols + free_variables;
+    i_t nnz      = problem.A.col_start[problem.num_cols];
+    for (i_t j = 0; j < problem.num_cols; j++) {
+      if (problem.lower[j] == -inf && problem.upper[j] == inf) {
+        nnz += (problem.A.col_start[j + 1] - problem.A.col_start[j]);
       }
     }
-    for (i_t j = linear_cols; j < old_num_cols; ++j) {
-      i_t new_j        = orig_to_new[j];
-      objective[new_j] = old_objective[j];
-      lower[new_j]     = old_lower[j];
-      upper[new_j]     = old_upper[j];
-    }
 
-    if (old_Q.n > 0) {
-      std::vector<i_t> row_counts(num_cols, 0);
-      i_t nz_count = 0;
-      for (i_t row = 0; row < old_Q.n; ++row) {
-        i_t new_row     = orig_to_new[row];
-        i_t partner_row = partner_index[row];
-        i_t q_start     = old_Q.row_start[row];
-        i_t q_end       = old_Q.row_start[row + 1];
-        for (i_t qj = q_start; qj < q_end; ++qj) {
-          i_t col         = old_Q.j[qj];
-          i_t partner_col = partner_index[col];
-          row_counts[new_row]++;
-          nz_count++;
-          if (partner_col != -1) {
-            row_counts[new_row]++;
-            nz_count++;
-          }
-          if (partner_row != -1) {
-            row_counts[partner_row]++;
-            nz_count++;
-            if (partner_col != -1) {
-              row_counts[partner_row]++;
-              nz_count++;
-            }
-          }
-        }
-      }
+    problem.A.col_start.resize(num_cols + 1);
+    problem.A.i.resize(nnz);
+    problem.A.x.resize(nnz);
+    problem.lower.resize(num_cols);
+    problem.upper.resize(num_cols);
+    problem.objective.resize(num_cols);
 
-      std::vector<i_t> Q_row_start(num_cols + 1);
-      Q_row_start[0] = 0;
-      for (i_t row = 0; row < num_cols; ++row) {
-        Q_row_start[row + 1] = Q_row_start[row] + row_counts[row];
-      }
-      std::vector<i_t> Q_j(nz_count);
-      std::vector<f_t> Q_x(nz_count);
-      auto row_starts = Q_row_start;
-      for (i_t row = 0; row < old_Q.n; ++row) {
-        i_t new_row     = orig_to_new[row];
-        i_t partner_row = partner_index[row];
-        i_t q_start     = old_Q.row_start[row];
-        i_t q_end       = old_Q.row_start[row + 1];
-        for (i_t qj = q_start; qj < q_end; ++qj) {
-          i_t col         = old_Q.j[qj];
-          f_t qij         = old_Q.x[qj];
-          i_t new_col     = orig_to_new[col];
-          i_t partner_col = partner_index[col];
-
-          Q_j[row_starts[new_row]] = new_col;
-          Q_x[row_starts[new_row]] = qij;
-          row_starts[new_row]++;
-
-          if (partner_col != -1) {
-            Q_j[row_starts[new_row]] = partner_col;
-            Q_x[row_starts[new_row]] = -qij;
-            row_starts[new_row]++;
-          }
-          if (partner_row != -1) {
-            Q_j[row_starts[partner_row]] = new_col;
-            Q_x[row_starts[partner_row]] = -qij;
-            row_starts[partner_row]++;
-            if (partner_col != -1) {
-              Q_j[row_starts[partner_row]] = partner_col;
-              Q_x[row_starts[partner_row]] = qij;
-              row_starts[partner_row]++;
-            }
-          }
+    presolve_info.free_variable_pairs.resize(free_variables * 2);
+    i_t pair_count = 0;
+    i_t q          = problem.A.col_start[problem.num_cols];
+    i_t col        = problem.num_cols;
+    for (i_t j = 0; j < problem.num_cols; j++) {
+      if (problem.lower[j] == -inf && problem.upper[j] == inf) {
+        for (i_t p = problem.A.col_start[j]; p < problem.A.col_start[j + 1]; p++) {
+          i_t i          = problem.A.i[p];
+          f_t aij        = problem.A.x[p];
+          problem.A.i[q] = i;
+          problem.A.x[q] = -aij;
+          q++;
         }
+        problem.lower[col]                              = 0.0;
+        problem.upper[col]                              = inf;
+        problem.objective[col]                          = -problem.objective[j];
+        presolve_info.free_variable_pairs[pair_count++] = j;
+        presolve_info.free_variable_pairs[pair_count++] = col;
+        problem.A.col_start[++col]                      = q;
+        problem.lower[j]                                = 0.0;
       }
-
-      problem.Q.m = problem.Q.n = num_cols;
-      problem.Q.nz_max          = Q_row_start[num_cols];
-      problem.Q.row_start       = Q_row_start;
-      problem.Q.j               = Q_j;
-      problem.Q.x               = Q_x;
-      problem.Q.check_matrix("After free variable expansion");
     }
 
-    problem.A         = expanded_A;
-    problem.A.n       = num_cols;
-    problem.objective = objective;
-    problem.lower     = lower;
-    problem.upper     = upper;
-    problem.num_cols  = num_cols;
-    if (!problem.second_order_cone_dims.empty()) { problem.cone_var_start = new_cone_start; }
+    problem.A.n      = num_cols;
+    problem.num_cols = num_cols;
   }
 
   if (settings.barrier_presolve && settings.folding != 0 && problem.Q.n == 0 &&
-      problem.second_order_cone_dims.empty()) {
+      !has_cones) {
     folding(problem, settings, presolve_info);
   }
 
   // Check for dependent rows
-  bool check_dependent_rows = false;  // settings.barrier;
+  bool check_dependent_rows = false;
   if (check_dependent_rows) {
     std::vector<i_t> dependent_rows;
     constexpr i_t kOk = -1;
@@ -1892,25 +1758,13 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
   if (num_free_variables > 0) {
     settings.log.printf("Post-solve: Handling free variables %d\n", num_free_variables);
     // We added free variables so we need to map the crushed solution back to the original variables
-    std::vector<bool> remove_partner(input_x.size(), false);
     for (i_t k = 0; k < 2 * num_free_variables; k += 2) {
       const i_t u = free_variable_pairs[k];
       const i_t v = free_variable_pairs[k + 1];
       input_x[u] -= input_x[v];
-      remove_partner[v] = true;
-    }
-    std::vector<f_t> compact_x;
-    std::vector<f_t> compact_z;
-    compact_x.reserve(input_x.size() - num_free_variables);
-    compact_z.reserve(input_z.size() - num_free_variables);
-    for (i_t j = 0; j < static_cast<i_t>(input_x.size()); ++j) {
-      if (!remove_partner[j]) {
-        compact_x.push_back(input_x[j]);
-        compact_z.push_back(input_z[j]);
-      }
     }
-    input_x = compact_x;
-    input_z = compact_z;
+    input_z.resize(input_z.size() - num_free_variables);
+    input_x.resize(input_x.size() - num_free_variables);
   }
 
   if (presolve_info.removed_variables.size() > 0) {
@@ -1981,11 +1835,6 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
     }
   }
 
-  if (!presolve_info.phase1_bounded_linear_indices.empty()) {
-    settings.log.printf("Post-solve: %d linear column(s) had phase-1 bound tightening (x unchanged)\n",
-                        static_cast<int>(presolve_info.phase1_bounded_linear_indices.size()));
-  }
-
   // Dual correction for originally free variables that received implied bounds.
   // Barrier produced (y, z) with z_j != 0 satisfying A^T y + z = c + Qx.
   // We need corrected (y_bar, z_bar) with z_bar_j = 0 for all j in F_b where
@@ -2004,8 +1853,6 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
     settings.log.printf("Post-solve: Correcting duals for %d bounded free variables\n",
                         static_cast<i_t>(presolve_info.bounded_free_variables.size()));
     const csc_matrix_t<i_t, f_t>& A = original_problem.A;
-    csr_matrix_t<i_t, f_t> A_row(A.m, A.n, A.nnz());
-    A.to_compressed_row(A_row);
     // Traverse in reverse order, to ensure that all z_j = 0 after the correction
     for (auto it = presolve_info.bounded_free_variables.rbegin();
          it != presolve_info.bounded_free_variables.rend();
@@ -2015,9 +1862,15 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
       if (w_j == 0.0) { continue; }
       const f_t du = w_j / bfv.coefficient;
       input_y[bfv.constraint] += du;
-      const auto [row_start, row_end] = A_row.get_constraint_range(bfv.constraint);
-      for (i_t p = row_start; p < row_end; ++p) {
-        input_z[A_row.j[p]] -= A_row.x[p] * du;
+      for (i_t j = 0; j < A.n; j++) {
+        const i_t col_start = A.col_start[j];
+        const i_t col_end   = A.col_start[j + 1];
+        for (i_t p = col_start; p < col_end; p++) {
+          if (A.i[p] == bfv.constraint) {
+            input_z[j] -= A.x[p] * du;
+            break;
+          }
+        }
       }
     }
   }
diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp
index bc67627c21..add97eafce 100644
--- a/cpp/src/dual_simplex/presolve.hpp
+++ b/cpp/src/dual_simplex/presolve.hpp
@@ -165,11 +165,10 @@ struct presolve_info_t {
   // Variables that were negated to handle -inf < x_j <= u_j
   std::vector<i_t> negated_variables;
 
-  // Free variable indices for QP/SOCP augmented barrier (not split, handled natively)
-  std::vector<i_t> native_free_linear_indices;
+  // Free variable indices that the barrier solver handles directly in the augmented system
+  // (not split into v - w). Used for QP/SOCP.
+  std::vector<i_t> direct_free_variables;
 
-  // Linear columns where phase 1 tightened a fully free var to a one-sided bound (same x index)
-  std::vector<i_t> phase1_bounded_linear_indices;
   // Originally-free variables that received implied bounds, with the constraint used
   std::vector<bounded_free_var_t<i_t, f_t>> bounded_free_variables;
 };
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index 4bbd2613d3..b747203061 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -505,6 +505,7 @@ run_barrier(dual_simplex::user_problem_t<i_t, f_t>& user_problem,
   barrier_settings.ordering                        = settings.ordering;
   barrier_settings.barrier_dual_initial_point      = settings.barrier_dual_initial_point;
   barrier_settings.barrier                         = true;
+  barrier_settings.barrier_presolve                = true;
   barrier_settings.crossover                       = settings.crossover;
   barrier_settings.eliminate_dense_columns         = settings.eliminate_dense_columns;
   barrier_settings.barrier_iterative_refinement = settings.barrier_iterative_refinement;
diff --git a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu
index ae4efb9b86..4058bf3aa9 100644
--- a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu
+++ b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu
@@ -305,10 +305,10 @@ TEST(barrier, presolve_reindexes_cone_start_after_empty_column_removal)
   EXPECT_EQ(barrier_lp.cone_var_start, 0);
 }
 
-TEST(barrier, presolve_keeps_native_free_variables_before_cones)
+TEST(barrier, presolve_keeps_direct_free_variables_before_cones)
 {
   // Layout: [x0, x1 | cone x2, x3, x4] with x0, x1 free and a 3-dimensional SOC block.
-  // SOCP barrier presolve keeps native free variables (no x = v - w split); cone_var_start
+  // SOCP barrier presolve keeps direct free variables (no x = v - w split); cone_var_start
   // and column count stay unchanged.
   raft::handle_t handle{};
   init_handler(&handle);
@@ -361,9 +361,9 @@ TEST(barrier, presolve_keeps_native_free_variables_before_cones)
   EXPECT_EQ(presolved_lp.cone_var_start, 2);
   EXPECT_EQ(presolved_lp.second_order_cone_dims, std::vector<int>({3}));
   EXPECT_TRUE(presolve_info.free_variable_pairs.empty());
-  ASSERT_EQ(presolve_info.native_free_linear_indices.size(), 2);
-  EXPECT_EQ(presolve_info.native_free_linear_indices[0], 0);
-  EXPECT_EQ(presolve_info.native_free_linear_indices[1], 1);
+  ASSERT_EQ(presolve_info.direct_free_variables.size(), 2);
+  EXPECT_EQ(presolve_info.direct_free_variables[0], 0);
+  EXPECT_EQ(presolve_info.direct_free_variables[1], 1);
   EXPECT_EQ(presolved_lp.lower[0], -inf);
   EXPECT_EQ(presolved_lp.lower[1], -inf);
   EXPECT_EQ(presolved_lp.upper[0], inf);
@@ -874,7 +874,7 @@ TEST(barrier, free_linear_prefix_is_uncrushed_correctly_with_soc_block)
   //            u     = 1
   //            (t, u, v) in Q^3
   //
-  // Native free variable l is kept through presolve; end-to-end solve returns
+  // Direct free variable l is kept through presolve; end-to-end solve returns
   // l* = 1, t* = 1, u* = 1, v* = 0, obj* = 1.
   raft::handle_t handle{};
   init_handler(&handle);

From 03918bdc1bcd918869ad32addb00fc6e008271c3 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Fri, 22 May 2026 16:39:44 -0700
Subject: [PATCH 05/10] Style fixes

---
 cpp/src/barrier/barrier.cu                    | 309 ++++++++++--------
 cpp/src/dual_simplex/presolve.cpp             |  22 +-
 cpp/src/dual_simplex/scaling.cpp              |   5 +-
 .../dual_simplex/simplex_solver_settings.hpp  |   4 +-
 cpp/src/dual_simplex/solve.cpp                |  10 +-
 cpp/src/pdlp/solve.cu                         |   4 +-
 .../dual_simplex/unit_tests/solve_barrier.cu  |  54 +--
 .../data_model/data_model.py                  |   1 -
 .../data_model/data_model_wrapper.pxd         |   2 +-
 .../cuopt/cuopt/linear_programming/problem.py |  10 +-
 10 files changed, 221 insertions(+), 200 deletions(-)

diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu
index a4c4b5b9bd..375d1b760e 100644
--- a/cpp/src/barrier/barrier.cu
+++ b/cpp/src/barrier/barrier.cu
@@ -66,8 +66,12 @@ namespace cuopt::linear_programming::dual_simplex {
 }
 
 // out[i] = is_direct_free_linear[i] ? 0 : a[i] * b[i]
-[[maybe_unused]] static void pairwise_multiply_skip_direct_free_linear(
-  float* a, float* b, int* is_direct_free_linear, float* out, int size, rmm::cuda_stream_view stream)
+[[maybe_unused]] static void pairwise_multiply_skip_direct_free_linear(float* a,
+                                                                       float* b,
+                                                                       int* is_direct_free_linear,
+                                                                       float* out,
+                                                                       int size,
+                                                                       rmm::cuda_stream_view stream)
 {
   cub::DeviceTransform::Transform(
     cuda::std::make_tuple(a, b, is_direct_free_linear),
@@ -77,8 +81,12 @@ namespace cuopt::linear_programming::dual_simplex {
     stream.value());
 }
 
-[[maybe_unused]] static void pairwise_multiply_skip_direct_free_linear(
-  double* a, double* b, int* is_direct_free_linear, double* out, int size, rmm::cuda_stream_view stream)
+[[maybe_unused]] static void pairwise_multiply_skip_direct_free_linear(double* a,
+                                                                       double* b,
+                                                                       int* is_direct_free_linear,
+                                                                       double* out,
+                                                                       int size,
+                                                                       rmm::cuda_stream_view stream)
 {
   cub::DeviceTransform::Transform(
     cuda::std::make_tuple(a, b, is_direct_free_linear),
@@ -266,8 +274,10 @@ class iteration_data_t {
           is_direct_free_linear_host[j] = 1;
         }
         d_is_direct_free_linear_.resize(lp.num_cols, stream_view_);
-        raft::copy(
-          d_is_direct_free_linear_.data(), is_direct_free_linear_host.data(), lp.num_cols, stream_view_);
+        raft::copy(d_is_direct_free_linear_.data(),
+                   is_direct_free_linear_host.data(),
+                   lp.num_cols,
+                   stream_view_);
         settings.log.printf("Direct free linear (QP): %d\n", n_direct_free_linear);
       }
     }
@@ -316,9 +326,9 @@ class iteration_data_t {
     {
       raft::common::nvtx::range scope("Barrier: LP Data: SOC setup");
       if (!lp.second_order_cone_dims.empty()) {
-        cone_var_start_ = lp.cone_var_start;
-        i_t total_cone_dim =
-          std::accumulate(lp.second_order_cone_dims.begin(), lp.second_order_cone_dims.end(), i_t(0));
+        cone_var_start_    = lp.cone_var_start;
+        i_t total_cone_dim = std::accumulate(
+          lp.second_order_cone_dims.begin(), lp.second_order_cone_dims.end(), i_t(0));
         cuopt_assert(cone_var_start_ >= 0, "cone_var_start must be nonnegative");
         cuopt_assert(cone_var_start_ + total_cone_dim <= lp.num_cols,
                      "cone variables exceed problem dimension");
@@ -330,7 +340,8 @@ class iteration_data_t {
           raft::device_span<f_t>{},
           stream_view_);
         cuopt_assert(cone_count() > 0, "second-order cone topology must contain at least one cone");
-        cuopt_assert(cone_entry_count() == total_cone_dim, "second-order cone entry count mismatch");
+        cuopt_assert(cone_entry_count() == total_cone_dim,
+                     "second-order cone entry count mismatch");
       }
     }
 
@@ -505,7 +516,7 @@ class iteration_data_t {
     {
       raft::common::nvtx::range scope("Barrier: LP Data: Cholesky init");
       i_t factorization_size = use_augmented ? lp.num_rows + lp.num_cols : lp.num_rows;
-      chol = std::make_unique<sparse_cholesky_cudss_t<i_t, f_t>>(
+      chol                   = std::make_unique<sparse_cholesky_cudss_t<i_t, f_t>>(
         handle_ptr, settings, factorization_size);
       chol->set_positive_definite(false);
     }
@@ -624,91 +635,91 @@ class iteration_data_t {
       {
         raft::common::nvtx::range scope("Barrier: augmented: host CSR build");
         for (i_t i = 0; i < n; i++) {
-        augmented_CSR.row_start[i] = q;
-
-        const bool is_cone_row = is_cone_variable(i);
-
-        if (is_cone_row) {
-          // Determine which cone this variable belongs to and its local row
-          i_t local_idx = i - cone_start();
-          i_t k         = 0;
-          while (k + 1 < cone_count() &&
-                 cone_offsets_host[k + 1] <= static_cast<std::size_t>(local_idx)) {
-            k++;
-          }
-          i_t local_r =
-            static_cast<i_t>(static_cast<std::size_t>(local_idx) - cone_offsets_host[k]);
-          i_t q_k            = static_cast<i_t>(cone_offsets_host[k + 1] - cone_offsets_host[k]);
-          i_t cone_col_start = cone_start() + static_cast<i_t>(cone_offsets_host[k]);
-          i_t block_base     = static_cast<i_t>(cone_block_offsets_host[k]) + local_r * q_k;
-
-          // Merge-join: Q entries (sorted) with dense cone block columns (contiguous)
-          i_t qp    = (nnzQ > 0) ? Q.col_start[i] : 0;
-          i_t q_end = (nnzQ > 0) ? Q.col_start[i + 1] : 0;
-
-          // Q entries before cone block
-          while (qp < q_end && Q.i[qp] < cone_col_start) {
-            augmented_CSR.j[q]   = Q.i[qp];
-            augmented_CSR.x[q++] = -Q.x[qp];
-            off_diag_Qnz++;
-            qp++;
-          }
+          augmented_CSR.row_start[i] = q;
 
-          // Dense cone block, absorbing any Q entries that fall inside
-          for (i_t c = 0; c < q_k; c++) {
-            i_t col         = cone_col_start + c;
-            f_t q_contrib   = f_t(0);
-            f_t initial_val = (c == local_r) ? f_t(-dual_perturb)
-                                             : f_t(0);  // diagonal entry of the cone block column
+          const bool is_cone_row = is_cone_variable(i);
 
-            if (qp < q_end && Q.i[qp] == col) {
-              q_contrib = Q.x[qp];
+          if (is_cone_row) {
+            // Determine which cone this variable belongs to and its local row
+            i_t local_idx = i - cone_start();
+            i_t k         = 0;
+            while (k + 1 < cone_count() &&
+                   cone_offsets_host[k + 1] <= static_cast<std::size_t>(local_idx)) {
+              k++;
+            }
+            i_t local_r =
+              static_cast<i_t>(static_cast<std::size_t>(local_idx) - cone_offsets_host[k]);
+            i_t q_k            = static_cast<i_t>(cone_offsets_host[k + 1] - cone_offsets_host[k]);
+            i_t cone_col_start = cone_start() + static_cast<i_t>(cone_offsets_host[k]);
+            i_t block_base     = static_cast<i_t>(cone_block_offsets_host[k]) + local_r * q_k;
+
+            // Merge-join: Q entries (sorted) with dense cone block columns (contiguous)
+            i_t qp    = (nnzQ > 0) ? Q.col_start[i] : 0;
+            i_t q_end = (nnzQ > 0) ? Q.col_start[i + 1] : 0;
+
+            // Q entries before cone block
+            while (qp < q_end && Q.i[qp] < cone_col_start) {
+              augmented_CSR.j[q]   = Q.i[qp];
+              augmented_CSR.x[q++] = -Q.x[qp];
+              off_diag_Qnz++;
               qp++;
             }
 
-            cone_csr_indices_host[block_base + c] = q;
-            cone_Q_values_host[block_base + c]    = q_contrib;
-            if (col == i) { augmented_diagonal_indices[i] = q; }
-            augmented_CSR.j[q]   = col;
-            augmented_CSR.x[q++] = initial_val - q_contrib;
-          }
+            // Dense cone block, absorbing any Q entries that fall inside
+            for (i_t c = 0; c < q_k; c++) {
+              i_t col         = cone_col_start + c;
+              f_t q_contrib   = f_t(0);
+              f_t initial_val = (c == local_r) ? f_t(-dual_perturb)
+                                               : f_t(0);  // diagonal entry of the cone block column
+
+              if (qp < q_end && Q.i[qp] == col) {
+                q_contrib = Q.x[qp];
+                qp++;
+              }
+
+              cone_csr_indices_host[block_base + c] = q;
+              cone_Q_values_host[block_base + c]    = q_contrib;
+              if (col == i) { augmented_diagonal_indices[i] = q; }
+              augmented_CSR.j[q]   = col;
+              augmented_CSR.x[q++] = initial_val - q_contrib;
+            }
 
-          // Q entries after cone block
-          while (qp < q_end) {
-            augmented_CSR.j[q]   = Q.i[qp];
-            augmented_CSR.x[q++] = -Q.x[qp];
-            off_diag_Qnz++;
-            qp++;
-          }
-        } else if (nnzQ == 0) {
-          augmented_diagonal_indices[i] = q;
-          augmented_CSR.j[q]            = i;
-          augmented_CSR.x[q++]          = -diag[i] - dual_perturb;
-        } else {
-          // Q is symmetric
-          const i_t q_col_beg = Q.col_start[i];
-          const i_t q_col_end = Q.col_start[i + 1];
-          bool has_diagonal   = false;
-          for (i_t p = q_col_beg; p < q_col_end; ++p) {
-            augmented_CSR.j[q] = Q.i[p];
-            if (Q.i[p] == i) {
-              has_diagonal                  = true;
-              augmented_diagonal_indices[i] = q;
-              augmented_CSR.x[q++]          = -Q.x[p] - diag[i] - dual_perturb;
-            } else {
+            // Q entries after cone block
+            while (qp < q_end) {
+              augmented_CSR.j[q]   = Q.i[qp];
+              augmented_CSR.x[q++] = -Q.x[qp];
               off_diag_Qnz++;
-              augmented_CSR.x[q++] = -Q.x[p];
+              qp++;
             }
-          }
-          if (!has_diagonal) {
+          } else if (nnzQ == 0) {
             augmented_diagonal_indices[i] = q;
             augmented_CSR.j[q]            = i;
             augmented_CSR.x[q++]          = -diag[i] - dual_perturb;
+          } else {
+            // Q is symmetric
+            const i_t q_col_beg = Q.col_start[i];
+            const i_t q_col_end = Q.col_start[i + 1];
+            bool has_diagonal   = false;
+            for (i_t p = q_col_beg; p < q_col_end; ++p) {
+              augmented_CSR.j[q] = Q.i[p];
+              if (Q.i[p] == i) {
+                has_diagonal                  = true;
+                augmented_diagonal_indices[i] = q;
+                augmented_CSR.x[q++]          = -Q.x[p] - diag[i] - dual_perturb;
+              } else {
+                off_diag_Qnz++;
+                augmented_CSR.x[q++] = -Q.x[p];
+              }
+            }
+            if (!has_diagonal) {
+              augmented_diagonal_indices[i] = q;
+              augmented_CSR.j[q]            = i;
+              augmented_CSR.x[q++]          = -diag[i] - dual_perturb;
+            }
           }
-        }
-        // AT block, we can use A in csc directly
-        const i_t col_beg = A.col_start[i];
-        const i_t col_end = A.col_start[i + 1];
+          // AT block, we can use A in csc directly
+          const i_t col_beg = A.col_start[i];
+          const i_t col_end = A.col_start[i + 1];
           for (i_t p = col_beg; p < col_end; ++p) {
             augmented_CSR.j[q]   = A.i[p] + n;
             augmented_CSR.x[q++] = A.x[p];
@@ -783,19 +794,22 @@ class iteration_data_t {
       // Primal diagonal: linear block includes dual_perturb; SOC block is filled by scatter below.
       // Direct free variables use barrier D = 0 in augmented_multiply; omit span_diag[j] here so
       // the factorized matrix matches the matvec (only -q_diag - dual_perturb on the diagonal).
-      thrust::for_each_n(rmm::exec_policy(handle_ptr->get_stream()),
-                         thrust::make_counting_iterator<i_t>(0),
-                         linear_n,
-                         [span_x             = cuopt::make_span(device_augmented.x),
-                          span_diag_indices  = cuopt::make_span(d_augmented_diagonal_indices_),
-                          span_q_diag        = cuopt::make_span(d_Q_diag_),
-                          span_diag          = cuopt::make_span(d_diag_),
-                          span_is_direct_free_linear       = cuopt::make_span(d_is_direct_free_linear_),
-                          dual_perturb_value = dual_perturb] __device__(i_t j) {
-                           f_t q_diag     = span_q_diag.size() > 0 ? span_q_diag[j] : 0.0;
-                           const f_t d_j  = (span_is_direct_free_linear.size() > 0 && span_is_direct_free_linear[j]) ? f_t(0) : span_diag[j];
-                           span_x[span_diag_indices[j]] = -q_diag - d_j - dual_perturb_value;
-                         });
+      thrust::for_each_n(
+        rmm::exec_policy(handle_ptr->get_stream()),
+        thrust::make_counting_iterator<i_t>(0),
+        linear_n,
+        [span_x                     = cuopt::make_span(device_augmented.x),
+         span_diag_indices          = cuopt::make_span(d_augmented_diagonal_indices_),
+         span_q_diag                = cuopt::make_span(d_Q_diag_),
+         span_diag                  = cuopt::make_span(d_diag_),
+         span_is_direct_free_linear = cuopt::make_span(d_is_direct_free_linear_),
+         dual_perturb_value         = dual_perturb] __device__(i_t j) {
+          f_t q_diag    = span_q_diag.size() > 0 ? span_q_diag[j] : 0.0;
+          const f_t d_j = (span_is_direct_free_linear.size() > 0 && span_is_direct_free_linear[j])
+                            ? f_t(0)
+                            : span_diag[j];
+          span_x[span_diag_indices[j]] = -q_diag - d_j - dual_perturb_value;
+        });
       RAFT_CHECK_CUDA(handle_ptr->get_stream());
 
       thrust::for_each_n(rmm::exec_policy(handle_ptr->get_stream()),
@@ -1728,8 +1742,12 @@ class iteration_data_t {
     // r1 <- D * x_1 on linear indices; barrier D is zero on direct free variables
     const i_t linear_n = has_soc ? cone_start() : n;
     if (n_direct_free_linear > 0) {
-      pairwise_multiply_skip_direct_free_linear(
-        d_x1.data(), d_diag_.data(), d_is_direct_free_linear_.data(), d_r1.data(), linear_n, stream_view_);
+      pairwise_multiply_skip_direct_free_linear(d_x1.data(),
+                                                d_diag_.data(),
+                                                d_is_direct_free_linear_.data(),
+                                                d_r1.data(),
+                                                linear_n,
+                                                stream_view_);
     } else {
       pairwise_multiply(d_x1.data(), d_diag_.data(), d_r1.data(), linear_n, stream_view_);
     }
@@ -1853,7 +1871,8 @@ class iteration_data_t {
   bool use_augmented;
   i_t symbolic_status;
   i_t n_direct_free_linear{0};
-  rmm::device_uvector<i_t> d_is_direct_free_linear_;  // 1 if direct free linear (j < cone_start), else 0
+  rmm::device_uvector<i_t>
+    d_is_direct_free_linear_;  // 1 if direct free linear (j < cone_start), else 0
 
   // Adaptive regularization for the augmented system
   f_t dual_perturb{1e-8};
@@ -2064,9 +2083,9 @@ template <typename i_t, typename f_t>
 int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
 {
   raft::common::nvtx::range fun_scope("Barrier: initial_point");
-  const bool use_augmented = data.use_augmented;
-  const bool has_soc       = data.has_cones();
-  const bool has_direct_free_linear      = data.n_direct_free_linear > 0;
+  const bool use_augmented          = data.use_augmented;
+  const bool has_soc                = data.has_cones();
+  const bool has_direct_free_linear = data.n_direct_free_linear > 0;
 
   // Mask used by the two ADAT/augmented branches below to enforce z > 0 on the linear block
   // while leaving the SOC cone block (kept feasible by NT scaling) alone.
@@ -2595,8 +2614,9 @@ f_t barrier_solver_t<i_t, f_t>::gpu_max_step_to_boundary(iteration_data_t<i_t, f
                                                          const rmm::device_uvector<f_t>& x,
                                                          const rmm::device_uvector<f_t>& dx)
 {
-  const bool has_soc  = data.has_cones() && static_cast<i_t>(x.size()) >= data.cone_end();
-  const bool has_direct_free_linear = data.n_direct_free_linear > 0 && static_cast<i_t>(x.size()) == lp.num_cols;
+  const bool has_soc = data.has_cones() && static_cast<i_t>(x.size()) >= data.cone_end();
+  const bool has_direct_free_linear =
+    data.n_direct_free_linear > 0 && static_cast<i_t>(x.size()) == lp.num_cols;
 
   auto reduce_segment = [&](i_t start, i_t len) -> f_t {
     if (len <= 0) { return f_t(1); }
@@ -2604,8 +2624,8 @@ f_t barrier_solver_t<i_t, f_t>::gpu_max_step_to_boundary(iteration_data_t<i_t, f
       auto direct_free_linear_ptr = data.d_is_direct_free_linear_.data() + start;
       // step size computation for nonnegative variables with free variables
       auto ratio_test_free = [direct_free_linear_ptr] HD(const thrust::tuple<f_t, f_t, i_t> t) {
-        const f_t dx_val  = thrust::get<0>(t);
-        const f_t x_val   = thrust::get<1>(t);
+        const f_t dx_val                = thrust::get<0>(t);
+        const f_t x_val                 = thrust::get<1>(t);
         const i_t is_direct_free_linear = thrust::get<2>(t);
         if (is_direct_free_linear) return f_t(1.0);
         if (dx_val < f_t(0.0)) return -x_val / dx_val;
@@ -2662,13 +2682,13 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
 {
   raft::common::nvtx::range fun_scope("Barrier: compute_search_direction");
 
-  const bool debug         = false;
-  const bool use_augmented = data.use_augmented;
-  const bool has_soc       = data.has_cones();
-  const bool has_direct_free_linear      = data.n_direct_free_linear > 0;
-  const i_t m_c            = data.cone_entry_count();
-  const i_t cone_var_start = data.cone_start();
-  const i_t linear_size    = data.linear_xz_size(lp.num_cols);
+  const bool debug                  = false;
+  const bool use_augmented          = data.use_augmented;
+  const bool has_soc                = data.has_cones();
+  const bool has_direct_free_linear = data.n_direct_free_linear > 0;
+  const i_t m_c                     = data.cone_entry_count();
+  const i_t cone_var_start          = data.cone_start();
+  const i_t linear_size             = data.linear_xz_size(lp.num_cols);
 
   // Linear (orthant) block only; SOC uses recover_cone_dz_from_target.
   auto recover_linear_orthant_dz = [&](raft::device_span<const f_t> target,
@@ -2750,8 +2770,10 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
         constexpr f_t free_var_reg = 1e-7;
         if (data.Q.n > 0 && data.Q_diagonal) {
           cub::DeviceTransform::Transform(
-            cuda::std::make_tuple(
-              data.d_z_.data(), data.d_x_.data(), data.d_is_direct_free_linear_.data(), data.d_Q_diag_.data()),
+            cuda::std::make_tuple(data.d_z_.data(),
+                                  data.d_x_.data(),
+                                  data.d_is_direct_free_linear_.data(),
+                                  data.d_Q_diag_.data()),
             data.d_diag_.data(),
             linear_size,
             [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_direct_free_linear, f_t q_jj) {
@@ -2761,7 +2783,8 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
             stream_view_.value());
         } else {
           cub::DeviceTransform::Transform(
-            cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data(), data.d_is_direct_free_linear_.data()),
+            cuda::std::make_tuple(
+              data.d_z_.data(), data.d_x_.data(), data.d_is_direct_free_linear_.data()),
             data.d_diag_.data(),
             linear_size,
             [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_direct_free_linear) {
@@ -2942,7 +2965,8 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
     // Augmented RHS [dx; dy]: primal block is d_r1_ (assembled above).
     //   linear j: dual_rhs[j] - complementarity_target[j]
     //             + E_j*((complementarity_wv_rhs - v.*bound_rhs)./w)  (target = xz_rhs/x; free: 0)
-    //   cone j:   dual_rhs[j] - complementarity_target[j]  (NT target: -z or combined centering term)
+    //   cone j:   dual_rhs[j] - complementarity_target[j]  (NT target: -z or combined centering
+    //   term)
     // Constraint block: primal_rhs.
 
     // TODO: d_augmented_rhs can be preallocated
@@ -3469,10 +3493,8 @@ void copy_host_iterate_to_device(iteration_data_t<i_t, f_t>& data, rmm::cuda_str
   raft::copy(data.d_y_.data(), data.y.data(), data.y.size(), stream);
 
   data.d_upper_bounds_.resize(data.upper_bounds.size(), stream);
-  raft::copy(data.d_upper_bounds_.data(),
-             data.upper_bounds.data(),
-             data.upper_bounds.size(),
-             stream);
+  raft::copy(
+    data.d_upper_bounds_.data(), data.upper_bounds.data(), data.upper_bounds.size(), stream);
 }
 
 // One-time static problem data (constant across barrier iterations).
@@ -3493,15 +3515,15 @@ void copy_static_problem_data_to_device(const lp_problem_t<i_t, f_t>& lp,
   data.d_dw_residual_.resize(data.n_upper_bounds, stream);
 }
 
-// Per Mehrotra step: affine/corrector RHS only (iterate stays on device from initial sync / next_iterate).
+// Per Mehrotra step: affine/corrector RHS only (iterate stays on device from initial sync /
+// next_iterate).
 template <typename i_t, typename f_t>
 void copy_step_rhs_to_device(iteration_data_t<i_t, f_t>& data, rmm::cuda_stream_view stream)
 {
   raft::common::nvtx::range fun_scope("Barrier: copy_step_rhs_to_device");
 
   data.d_bound_rhs_.resize(data.bound_rhs.size(), stream);
-  raft::copy(
-    data.d_bound_rhs_.data(), data.bound_rhs.data(), data.bound_rhs.size(), stream);
+  raft::copy(data.d_bound_rhs_.data(), data.bound_rhs.data(), data.bound_rhs.size(), stream);
 
   raft::copy(data.d_h_.data(), data.primal_rhs.data(), data.primal_rhs.size(), stream);
   raft::copy(data.d_dual_rhs_.data(), data.dual_rhs.data(), data.dual_rhs.size(), stream);
@@ -3556,11 +3578,7 @@ void fill_affine_cone_complementarity_target(iteration_data_t<i_t, f_t>& data,
   auto cone_target =
     raft::device_span<f_t>(data.d_complementarity_target_.data() + cone_var_start, m_c);
   cub::DeviceTransform::Transform(
-    cones.z.data(),
-    cone_target.data(),
-    m_c,
-    [] HD(f_t z_val) { return -z_val; },
-    stream.value());
+    cones.z.data(), cone_target.data(), m_c, [] HD(f_t z_val) { return -z_val; }, stream.value());
   RAFT_CUDA_TRY(cudaPeekAtLastError());
   RAFT_CHECK_CUDA(stream);
 }
@@ -3578,20 +3596,21 @@ void fill_corrector_cone_complementarity_target(iteration_data_t<i_t, f_t>& data
   cones.z     = raft::device_span<f_t>(data.d_z_.data() + cone_var_start, m_c);
   auto cone_target =
     raft::device_span<f_t>(data.d_complementarity_target_.data() + cone_var_start, m_c);
-  compute_combined_cone_rhs_term(raft::device_span<const f_t>(data.d_dx_aff_.data() + cone_var_start, m_c),
-                                 raft::device_span<const f_t>(data.d_dz_aff_.data() + cone_var_start, m_c),
-                                 cones,
-                                 sigma_mu,
-                                 cone_target,
-                                 stream);
+  compute_combined_cone_rhs_term(
+    raft::device_span<const f_t>(data.d_dx_aff_.data() + cone_var_start, m_c),
+    raft::device_span<const f_t>(data.d_dz_aff_.data() + cone_var_start, m_c),
+    cones,
+    sigma_mu,
+    cone_target,
+    stream);
 }
 
 template <typename i_t, typename f_t>
 void barrier_solver_t<i_t, f_t>::compute_affine_rhs(iteration_data_t<i_t, f_t>& data)
 {
   raft::common::nvtx::range fun_scope("Barrier: compute_affine_rhs");
-  const bool has_soc    = data.has_cones();
-  const i_t linear_size = data.linear_xz_size(lp.num_cols);
+  const bool has_soc       = data.has_cones();
+  const i_t linear_size    = data.linear_xz_size(lp.num_cols);
   const i_t cone_var_start = data.cone_start();
   const i_t m_c            = data.cone_entry_count();
 
@@ -3723,19 +3742,19 @@ void barrier_solver_t<i_t, f_t>::compute_target_mu(
     stream_view_);
 
   complementarity_aff_sum = complementarity_xz_aff_sum + complementarity_wv_aff_sum;
-  const f_t mu_denom = data.complementarity_degree(data.x.size(), data.n_upper_bounds);
-  mu_aff             = complementarity_aff_sum / mu_denom;
-  sigma              = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0)));
-  new_mu             = sigma * mu;
+  const f_t mu_denom      = data.complementarity_degree(data.x.size(), data.n_upper_bounds);
+  mu_aff                  = complementarity_aff_sum / mu_denom;
+  sigma                   = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0)));
+  new_mu                  = sigma * mu;
 }
 
 template <typename i_t, typename f_t>
 void barrier_solver_t<i_t, f_t>::compute_cc_rhs(iteration_data_t<i_t, f_t>& data, f_t& new_mu)
 {
   raft::common::nvtx::range fun_scope("Barrier: compute_cc_rhs");
-  const bool has_soc    = data.has_cones();
-  const bool has_direct_free_linear   = data.n_direct_free_linear > 0;
-  const i_t linear_size = data.linear_xz_size(lp.num_cols);
+  const bool has_soc                = data.has_cones();
+  const bool has_direct_free_linear = data.n_direct_free_linear > 0;
+  const i_t linear_size             = data.linear_xz_size(lp.num_cols);
 
   auto fill_linear_cc_rhs = [&](raft::device_span<f_t> out,
                                 raft::device_span<const f_t> dx_aff,
diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp
index 6676660c29..de295c0c20 100644
--- a/cpp/src/dual_simplex/presolve.cpp
+++ b/cpp/src/dual_simplex/presolve.cpp
@@ -944,12 +944,13 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
              lp_problem_t<i_t, f_t>& problem,
              presolve_info_t<i_t, f_t>& presolve_info)
 {
-  problem = original;
+  problem               = original;
   const i_t linear_cols = linear_var_count(problem);
   const bool has_cones  = !problem.second_order_cone_dims.empty();
   std::vector<char> row_sense(problem.num_rows, '=');
 
-  // Check for free variables (linear block only; cone columns are handled by the barrier SOC layout)
+  // Check for free variables (linear block only; cone columns are handled by the barrier SOC
+  // layout)
   i_t free_variables = 0;
   for (i_t j = 0; j < linear_cols; j++) {
     if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; }
@@ -991,12 +992,11 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
       problem.A.to_compressed_row(Arow);
 
       // Keep only rows safe for bound inference: no cone columns
-      if (has_cones)
-      {
+      if (has_cones) {
         std::vector<i_t> safe_constraints;
         safe_constraints.reserve(constraints_to_check.size());
         for (i_t i : constraints_to_check) {
-          bool touches_cone        = false;
+          bool touches_cone = false;
           for (i_t p = Arow.row_start[i]; p < Arow.row_start[i + 1]; ++p) {
             const i_t j = Arow.j[p];
             if (j >= linear_cols) {
@@ -1022,7 +1022,7 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
         i_t last_free_i       = -1;
         f_t last_free_coeff_i = 0.0;
         for (i_t p = row_start; p < row_end; p++) {
-          const i_t j       = Arow.j[p];
+          const i_t j = Arow.j[p];
           if (j >= linear_cols) { continue; }
           const f_t aij     = Arow.x[p];
           const f_t lower_j = problem.lower[j];
@@ -1075,8 +1075,8 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
         // And we can derive two bounds from this:
         // x_j >= 1/a_ij * (rhs - lower_activity_i)
         // x_j <= 1/a_ij * (rhs - upper_activity_i)
-        const i_t j         = last_free_i;
-        const f_t a_ij      = last_free_coeff_i;
+        const i_t j    = last_free_i;
+        const f_t a_ij = last_free_coeff_i;
         if (a_ij == 0) { continue; }
         const f_t max_bound = 1e10;
         bool bounded        = false;
@@ -1326,7 +1326,8 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
   problem.Q.check_matrix("Before free variable expansion");
 
   // Free linear variables. We handle them directly in QP/SOCP or split them in LP.
-  const bool direct_free_linear = settings.barrier_presolve && free_variables > 0 && (problem.Q.n > 0 || has_cones);
+  const bool direct_free_linear =
+    settings.barrier_presolve && free_variables > 0 && (problem.Q.n > 0 || has_cones);
   if (direct_free_linear) {
     presolve_info.free_variable_pairs.clear();
     presolve_info.direct_free_variables.clear();
@@ -1397,8 +1398,7 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     problem.num_cols = num_cols;
   }
 
-  if (settings.barrier_presolve && settings.folding != 0 && problem.Q.n == 0 &&
-      !has_cones) {
+  if (settings.barrier_presolve && settings.folding != 0 && problem.Q.n == 0 && !has_cones) {
     folding(problem, settings, presolve_info);
   }
 
diff --git a/cpp/src/dual_simplex/scaling.cpp b/cpp/src/dual_simplex/scaling.cpp
index 2db6c75b43..852c19e2e1 100644
--- a/cpp/src/dual_simplex/scaling.cpp
+++ b/cpp/src/dual_simplex/scaling.cpp
@@ -197,9 +197,8 @@ i_t column_scaling(const lp_problem_t<i_t, f_t>& unscaled,
     max_col_norm       = std::max(col_norm, max_col_norm);
     min_col_norm       = std::min(col_norm, min_col_norm);
   }
-  settings.log.printf("Scaling matrix. Maximum column norm %e, minimum column norm %e\n",
-                      max_col_norm,
-                      min_col_norm);
+  settings.log.printf(
+    "Scaling matrix. Maximum column norm %e, minimum column norm %e\n", max_col_norm, min_col_norm);
 
   // scaled_A = unscaled_A * C
   for (i_t j = 0; j < n; ++j) {
diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp
index 66fa94bfc1..5328cf3fe1 100644
--- a/cpp/src/dual_simplex/simplex_solver_settings.hpp
+++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp
@@ -164,8 +164,8 @@ struct simplex_solver_settings_t {
   bool eliminate_singletons;  // true to eliminate singletons from the basis
   bool print_presolve_stats;  // true to print presolve stats
   bool barrier_presolve;  // bound shifts, free-var bounding; LP v-w split / folding when no cones
-  bool cudss_deterministic;   // true to use cuDSS deterministic mode, false for non-deterministic
-  bool barrier;               // true to use barrier method, false to use dual simplex method
+  bool cudss_deterministic;  // true to use cuDSS deterministic mode, false for non-deterministic
+  bool barrier;              // true to use barrier method, false to use dual simplex method
   bool deterministic;  // true to use B&B deterministic mode, false to use non-deterministic mode
   bool eliminate_dense_columns;       // true to eliminate dense columns from A*D*A^T
   bool barrier_iterative_refinement;  // true to use iterative refinement for barrier method
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index 7c23784802..15d51041f7 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -385,9 +385,9 @@ lp_status_t solve_linear_program_with_advanced_basis(
 
 template <typename i_t, typename f_t>
 lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& user_problem,
-                                               const simplex_solver_settings_t<i_t, f_t>& settings,
-                                               f_t start_time,
-                                               lp_solution_t<i_t, f_t>& solution)
+                                              const simplex_solver_settings_t<i_t, f_t>& settings,
+                                              f_t start_time,
+                                              lp_solution_t<i_t, f_t>& solution)
 {
   lp_status_t status = lp_status_t::UNSET;
   lp_problem_t<i_t, f_t> original_lp(user_problem.handle_ptr, 1, 1, 1);
@@ -698,8 +698,8 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
     // Run crossover
     lp_solution_t<i_t, f_t> crossover_solution(original_lp.num_rows, original_lp.num_cols);
     std::vector<variable_status_t> vstatus(original_lp.num_cols);
-    crossover_status_t crossover_status = crossover(
-      original_lp, settings, lp_solution, start_time, crossover_solution, vstatus);
+    crossover_status_t crossover_status =
+      crossover(original_lp, settings, lp_solution, start_time, crossover_solution, vstatus);
     settings.log.printf("Crossover status: %d\n", crossover_status);
     if (crossover_status == crossover_status_t::OPTIMAL) { barrier_status = lp_status_t::OPTIMAL; }
   }
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index b747203061..5c90e65cbc 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -508,8 +508,8 @@ run_barrier(dual_simplex::user_problem_t<i_t, f_t>& user_problem,
   barrier_settings.barrier_presolve                = true;
   barrier_settings.crossover                       = settings.crossover;
   barrier_settings.eliminate_dense_columns         = settings.eliminate_dense_columns;
-  barrier_settings.barrier_iterative_refinement = settings.barrier_iterative_refinement;
-  barrier_settings.barrier_step_scale           = settings.barrier_step_scale;
+  barrier_settings.barrier_iterative_refinement    = settings.barrier_iterative_refinement;
+  barrier_settings.barrier_step_scale              = settings.barrier_step_scale;
   barrier_settings.cudss_deterministic             = settings.cudss_deterministic;
   barrier_settings.barrier_relaxed_feasibility_tol = settings.tolerances.relative_primal_tolerance;
   barrier_settings.barrier_relaxed_optimality_tol  = settings.tolerances.relative_dual_tolerance;
diff --git a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu
index 4058bf3aa9..45fcd297e8 100644
--- a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu
+++ b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu
@@ -213,9 +213,9 @@ TEST(barrier, cone_metadata_reindexed_when_slack_is_inserted_before_cones)
   user_problem.cone_var_start         = 1;
 
   simplex_solver_settings_t<int, double> settings;
-  settings.barrier          = true;
-  settings.dualize          = 0;
-  settings.scale_columns    = false;
+  settings.barrier       = true;
+  settings.dualize       = 0;
+  settings.scale_columns = false;
 
   std::vector<int> new_slacks;
   dualize_info_t<int, double> dualize_info;
@@ -278,9 +278,9 @@ TEST(barrier, presolve_reindexes_cone_start_after_empty_column_removal)
   user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
 
   simplex_solver_settings_t<int, double> settings;
-  settings.barrier          = true;
-  settings.dualize          = 0;
-  settings.scale_columns    = false;
+  settings.barrier       = true;
+  settings.dualize       = 0;
+  settings.scale_columns = false;
 
   std::vector<int> new_slacks;
   dualize_info_t<int, double> dualize_info;
@@ -336,7 +336,7 @@ TEST(barrier, presolve_keeps_direct_free_variables_before_cones)
 
   user_problem.rhs       = {1.0};
   user_problem.row_sense = {'E'};
-  user_problem.lower = {-inf, -inf, 0.0, 0.0, 0.0};
+  user_problem.lower     = {-inf, -inf, 0.0, 0.0, 0.0};
   user_problem.upper.assign(n, inf);
   user_problem.num_range_rows         = 0;
   user_problem.cone_var_start         = 2;
@@ -344,9 +344,9 @@ TEST(barrier, presolve_keeps_direct_free_variables_before_cones)
   user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
 
   simplex_solver_settings_t<int, double> settings;
-  settings.barrier          = true;
-  settings.dualize          = 0;
-  settings.scale_columns    = false;
+  settings.barrier       = true;
+  settings.dualize       = 0;
+  settings.scale_columns = false;
 
   std::vector<int> new_slacks;
   dualize_info_t<int, double> dualize_info;
@@ -441,8 +441,8 @@ TEST(barrier, rejects_middle_cone_input_before_barrier)
   user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
 
   simplex_solver_settings_t<int, double> settings;
-  settings.barrier          = true;
-  settings.dualize          = 0;
+  settings.barrier = true;
+  settings.dualize = 0;
   lp_solution_t<int, double> solution(m, n);
 
   auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
@@ -495,8 +495,8 @@ TEST(barrier, socp_min_x0_subject_to_norm_constraint)
   user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
 
   simplex_solver_settings_t<int, double> settings;
-  settings.barrier          = true;
-  settings.dualize          = 0;
+  settings.barrier = true;
+  settings.dualize = 0;
 
   lp_solution_t<int, double> solution(m, n);
   auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
@@ -558,8 +558,8 @@ TEST(barrier, mixed_linear_and_soc_block)
   user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
 
   simplex_solver_settings_t<int, double> settings;
-  settings.barrier          = true;
-  settings.dualize          = 0;
+  settings.barrier = true;
+  settings.dualize = 0;
 
   lp_solution_t<int, double> solution(m, n);
   auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
@@ -623,9 +623,9 @@ TEST(barrier, mixed_linear_and_soc_tail_coupling)
   user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
 
   simplex_solver_settings_t<int, double> settings;
-  settings.barrier          = true;
-  settings.dualize          = 0;
-  settings.scale_columns    = true;
+  settings.barrier       = true;
+  settings.dualize       = 0;
+  settings.scale_columns = true;
 
   lp_solution_t<int, double> solution(m, n);
   auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
@@ -689,9 +689,9 @@ TEST(barrier, mixed_linear_and_soc_tail_coupling_with_inequality)
   user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
 
   simplex_solver_settings_t<int, double> settings;
-  settings.barrier          = true;
-  settings.dualize          = 0;
-  settings.scale_columns    = true;
+  settings.barrier       = true;
+  settings.dualize       = 0;
+  settings.scale_columns = true;
 
   lp_solution_t<int, double> solution(m, n);
   auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
@@ -766,8 +766,8 @@ TEST(barrier, mixed_linear_and_two_soc_blocks)
   user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
 
   simplex_solver_settings_t<int, double> settings;
-  settings.barrier          = true;
-  settings.dualize          = 0;
+  settings.barrier = true;
+  settings.dualize = 0;
 
   lp_solution_t<int, double> solution(m, n);
   auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
@@ -846,9 +846,9 @@ TEST(barrier, mixed_linear_and_two_soc_blocks_with_inequality)
   user_problem.var_types.assign(n, variable_type_t::CONTINUOUS);
 
   simplex_solver_settings_t<int, double> settings;
-  settings.barrier          = true;
-  settings.dualize          = 0;
-  settings.scale_columns    = true;
+  settings.barrier       = true;
+  settings.dualize       = 0;
+  settings.scale_columns = true;
 
   lp_solution_t<int, double> solution(m, n);
   auto status = solve_linear_program_with_barrier(user_problem, settings, solution);
diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model.py b/python/cuopt/cuopt/linear_programming/data_model/data_model.py
index 5e6fd6744d..f8b6dd609f 100644
--- a/python/cuopt/cuopt/linear_programming/data_model/data_model.py
+++ b/python/cuopt/cuopt/linear_programming/data_model/data_model.py
@@ -4,7 +4,6 @@
 import os
 import time
 
-import numpy as np
 
 from . import data_model_wrapper
 from .utilities import catch_cuopt_exception
diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd
index 9f9f5ba5cf..6c401b59f5 100644
--- a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd
+++ b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
 # SPDX-License-Identifier: Apache-2.0
 
 from .data_model cimport *
diff --git a/python/cuopt/cuopt/linear_programming/problem.py b/python/cuopt/cuopt/linear_programming/problem.py
index 84ca43c5b5..84fa1b27d7 100644
--- a/python/cuopt/cuopt/linear_programming/problem.py
+++ b/python/cuopt/cuopt/linear_programming/problem.py
@@ -1316,7 +1316,9 @@ def __init__(self, expr, sense, rhs, name=""):
             ) = _quadratic_expression_to_qcmatrix(expr, rhs)
             self.linear_values = np.array(linear_values, dtype=np.float64)
             self.linear_indices = np.array(linear_indices, dtype=np.int32)
-            self.quadratic_values = np.array(quadratic_values, dtype=np.float64)
+            self.quadratic_values = np.array(
+                quadratic_values, dtype=np.float64
+            )
             self.quadratic_row_indices = np.array(
                 quadratic_row_indices, dtype=np.int32
             )
@@ -1379,7 +1381,7 @@ def compute_slack(self):
             index_to_var[v_idx].Value * coeff
             for v_idx, coeff in self.vindex_coeff_dict.items()
         )
-        
+
         return self.RHS - lhs
 
 
@@ -1590,7 +1592,9 @@ def _to_data_model(self):
         dm.set_row_names(self.row_names)
         dm.set_problem_name(self.Name)
 
-        linear_constr_count = sum(1 for c in self.constrs if not c.is_quadratic)
+        linear_constr_count = sum(
+            1 for c in self.constrs if not c.is_quadratic
+        )
         quad_index = 0
         for constr in self.constrs:
             if not constr.is_quadratic:

From 068ce73ea463923ee74edef6def7c97c17e868f4 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Fri, 22 May 2026 16:41:18 -0700
Subject: [PATCH 06/10] Revert unnecessary changes to
 simplex_solver_settings.hpp

---
 cpp/src/dual_simplex/simplex_solver_settings.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp
index 5328cf3fe1..500822e1bd 100644
--- a/cpp/src/dual_simplex/simplex_solver_settings.hpp
+++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp
@@ -163,9 +163,9 @@ struct simplex_solver_settings_t {
     use_left_looking_lu;  // true to use left looking LU factorization, false to use right looking
   bool eliminate_singletons;  // true to eliminate singletons from the basis
   bool print_presolve_stats;  // true to print presolve stats
-  bool barrier_presolve;  // bound shifts, free-var bounding; LP v-w split / folding when no cones
-  bool cudss_deterministic;  // true to use cuDSS deterministic mode, false for non-deterministic
-  bool barrier;              // true to use barrier method, false to use dual simplex method
+  bool barrier_presolve;      // true to use barrier presolve
+  bool cudss_deterministic;   // true to use cuDSS deterministic mode, false for non-deterministic
+  bool barrier;               // true to use barrier method, false to use dual simplex method
   bool deterministic;  // true to use B&B deterministic mode, false to use non-deterministic mode
   bool eliminate_dense_columns;       // true to eliminate dense columns from A*D*A^T
   bool barrier_iterative_refinement;  // true to use iterative refinement for barrier method

From 380396105558154ee5417061b15a203be29d88db Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Fri, 22 May 2026 16:51:52 -0700
Subject: [PATCH 07/10] Revert unintentional change to match 26.06

---
 .../pdlp/termination_strategy/infeasibility_information.cu  | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu
index 96de3a788d..9268e17910 100644
--- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu
+++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu
@@ -26,7 +26,13 @@
 #include <raft/linalg/unary_op.cuh>
 #include <raft/util/cuda_utils.cuh>
 
+#include <thrust/device_ptr.h>
+#include <thrust/extrema.h>
+#include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/transform_output_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/transform_reduce.h>
+#include <thrust/tuple.h>
 
 namespace cuopt::linear_programming::detail {
 template <typename i_t, typename f_t>

From b94ae44bfe4dfd0543bdf21208b36922efc0f73e Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Fri, 22 May 2026 17:10:36 -0700
Subject: [PATCH 08/10] apply_soc_qcmatrix_conversion_for_simplex ->
 convert_quadratic_constraints_to_second_order_cones

---
 cpp/src/pdlp/translate.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/pdlp/translate.hpp b/cpp/src/pdlp/translate.hpp
index 2bfaa127ed..774582a555 100644
--- a/cpp/src/pdlp/translate.hpp
+++ b/cpp/src/pdlp/translate.hpp
@@ -191,7 +191,7 @@ static dual_simplex::user_problem_t<i_t, f_t> cuopt_problem_to_user_problem(
   user_problem.Q_values  = model.Q_values;
 
   if (model.original_problem_ptr->has_quadratic_constraints()) {
-    detail::apply_soc_qcmatrix_conversion_for_simplex<i_t, f_t>(
+    detail::convert_quadratic_constraints_to_second_order_cones<i_t, f_t>(
       n, model.original_problem_ptr->get_quadratic_constraints(), csr_A, user_problem);
   }
 
@@ -302,7 +302,7 @@ static dual_simplex::user_problem_t<i_t, f_t> cuopt_optimization_problem_to_user
   user_problem.Q_values  = model.get_quadratic_objective_values();
 
   if (model.has_quadratic_constraints()) {
-    detail::apply_soc_qcmatrix_conversion_for_simplex<i_t, f_t>(
+    detail::convert_quadratic_constraints_to_second_order_cones<i_t, f_t>(
       static_cast<int>(n), model.get_quadratic_constraints(), csr_A, user_problem);
   }
 

From 3f2f3fcbf196f523fb279b4844e704292ff16499 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Fri, 22 May 2026 17:18:36 -0700
Subject: [PATCH 09/10] Clean up scaling and prints

---
 cpp/src/dual_simplex/scaling.cpp              | 66 ++++++++++++-------
 cpp/src/dual_simplex/scaling.hpp              | 10 +--
 cpp/src/dual_simplex/solve.cpp                |  4 +-
 cpp/src/pdlp/solve.cu                         |  4 +-
 .../dual_simplex/unit_tests/solve_barrier.cu  |  4 +-
 5 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/cpp/src/dual_simplex/scaling.cpp b/cpp/src/dual_simplex/scaling.cpp
index 852c19e2e1..30aec6c782 100644
--- a/cpp/src/dual_simplex/scaling.cpp
+++ b/cpp/src/dual_simplex/scaling.cpp
@@ -13,11 +13,11 @@
 namespace cuopt::linear_programming::dual_simplex {
 
 template <typename i_t, typename f_t>
-i_t column_scaling(const lp_problem_t<i_t, f_t>& unscaled,
-                   const simplex_solver_settings_t<i_t, f_t>& settings,
-                   lp_problem_t<i_t, f_t>& scaled,
-                   std::vector<f_t>& col_scale,
-                   std::vector<f_t>& row_scale)
+i_t scaling(const lp_problem_t<i_t, f_t>& unscaled,
+            const simplex_solver_settings_t<i_t, f_t>& settings,
+            lp_problem_t<i_t, f_t>& scaled,
+            std::vector<f_t>& col_scale,
+            std::vector<f_t>& row_scale)
 {
   scaled = unscaled;
   i_t m  = scaled.num_rows;
@@ -161,6 +161,16 @@ i_t column_scaling(const lp_problem_t<i_t, f_t>& unscaled,
       if (max_deviation < 0.1) break;
     }
 
+    // Ruiz col_scale accumulates reciprocals (c[j] = 1/sqrt(norm)), so invert
+    // to store column norms: C(j,j) = 1/col_scale[j].
+    for (i_t j = 0; j < n; ++j) {
+      col_scale[j] = f_t(1) / col_scale[j];
+    }
+    // Same for row_scale: invert so R(i,i) = 1/row_scale[i].
+    for (i_t i = 0; i < m; ++i) {
+      row_scale[i] = f_t(1) / row_scale[i];
+    }
+
     f_t a_min = std::numeric_limits<f_t>::max();
     f_t a_max = 0;
     for (i_t p = 0; p < scaled.A.col_start[n]; ++p) {
@@ -192,24 +202,31 @@ i_t column_scaling(const lp_problem_t<i_t, f_t>& unscaled,
       const f_t x = scaled.A.x[p];
       sum += x * x;
     }
-    const f_t col_norm = sum > 0 ? std::sqrt(sum) : 1.0;
-    col_scale[j]       = f_t(1) / col_norm;
-    max_col_norm       = std::max(col_norm, max_col_norm);
-    min_col_norm       = std::min(col_norm, min_col_norm);
+    f_t col_norm_j = col_scale[j] = sum > 0 ? std::sqrt(sum) : 1.0;
+    max_col_norm                   = std::max(col_norm_j, max_col_norm);
+    min_col_norm                   = std::min(col_norm_j, min_col_norm);
   }
   settings.log.printf(
     "Scaling matrix. Maximum column norm %e, minimum column norm %e\n", max_col_norm, min_col_norm);
+  // C(j, j) = 1/col_scale(j)
 
   // scaled_A = unscaled_A * C
   for (i_t j = 0; j < n; ++j) {
     const i_t col_start = scaled.A.col_start[j];
     const i_t col_end   = scaled.A.col_start[j + 1];
     for (i_t p = col_start; p < col_end; ++p) {
-      scaled.A.x[p] *= col_scale[j];
+      scaled.A.x[p] /= col_scale[j];
     }
-    scaled.objective[j] *= col_scale[j];
-    if (scaled.lower[j] > -1e20) scaled.lower[j] /= col_scale[j];
-    if (scaled.upper[j] < 1e20) scaled.upper[j] /= col_scale[j];
+  }
+  // scaled_obj = C*unscaled_obj
+  for (i_t j = 0; j < n; ++j) {
+    scaled.objective[j] /= col_scale[j];
+  }
+  // scaled_lower = C^{-1} * unscaled_lower
+  // scaled_upper = C^{-1} * unscaled_upper
+  for (i_t j = 0; j < n; ++j) {
+    scaled.lower[j] *= col_scale[j];
+    scaled.upper[j] *= col_scale[j];
   }
 
   for (i_t i = 0; i < unscaled.Q.n; ++i) {
@@ -217,7 +234,7 @@ i_t column_scaling(const lp_problem_t<i_t, f_t>& unscaled,
     const i_t row_end   = unscaled.Q.row_start[i + 1];
     for (i_t p = row_start; p < row_end; ++p) {
       const i_t col = unscaled.Q.j[p];
-      scaled.Q.x[p] = unscaled.Q.x[p] * col_scale[i] * col_scale[col];
+      scaled.Q.x[p] = unscaled.Q.x[p] / (col_scale[i] * col_scale[col]);
     }
   }
   return 0;
@@ -236,28 +253,27 @@ void unscale_solution(const std::vector<f_t>& col_scale,
   const i_t n = scaled_x.size();
   unscaled_x.resize(n);
   unscaled_z.resize(n);
+  // C(j,j) = 1/col_scale[j], so x_orig = x_scaled / col_scale, z_orig = z_scaled * col_scale
   for (i_t j = 0; j < n; ++j) {
-    // column_scaling multiplies A(:,j) and c_j by col_scale[j], divides bounds:
-    // x_orig = col_scale .* x_scaled, z_orig = z_scaled / col_scale.
-    unscaled_x[j] = scaled_x[j] * col_scale[j];
-    unscaled_z[j] = scaled_z[j] / col_scale[j];
+    unscaled_x[j] = scaled_x[j] / col_scale[j];
+    unscaled_z[j] = scaled_z[j] * col_scale[j];
   }
 
   const i_t m = scaled_y.size();
   unscaled_y.resize(m);
+  // R(i,i) = 1/row_scale[i], so y_orig = y_scaled / row_scale
   for (i_t i = 0; i < m; ++i) {
-    // y_unscaled = R * y_scaled  (row scaling: constraint was scaled by R)
-    unscaled_y[i] = scaled_y[i] * row_scale[i];
+    unscaled_y[i] = scaled_y[i] / row_scale[i];
   }
 }
 
 #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE
 
-template int column_scaling<int, double>(const lp_problem_t<int, double>& unscaled,
-                                         const simplex_solver_settings_t<int, double>& settings,
-                                         lp_problem_t<int, double>& scaled,
-                                         std::vector<double>& col_scale,
-                                         std::vector<double>& row_scale);
+template int scaling<int, double>(const lp_problem_t<int, double>& unscaled,
+                                  const simplex_solver_settings_t<int, double>& settings,
+                                  lp_problem_t<int, double>& scaled,
+                                  std::vector<double>& col_scale,
+                                  std::vector<double>& row_scale);
 
 template void unscale_solution<int, double>(const std::vector<double>& col_scale,
                                             const std::vector<double>& row_scale,
diff --git a/cpp/src/dual_simplex/scaling.hpp b/cpp/src/dual_simplex/scaling.hpp
index 94aca2055d..df0bf4d845 100644
--- a/cpp/src/dual_simplex/scaling.hpp
+++ b/cpp/src/dual_simplex/scaling.hpp
@@ -17,11 +17,11 @@
 namespace cuopt::linear_programming::dual_simplex {
 
 template <typename i_t, typename f_t>
-i_t column_scaling(const lp_problem_t<i_t, f_t>& unscaled,
-                   const simplex_solver_settings_t<i_t, f_t>& settings,
-                   lp_problem_t<i_t, f_t>& scaled,
-                   std::vector<f_t>& column_scaling,
-                   std::vector<f_t>& row_scaling);
+i_t scaling(const lp_problem_t<i_t, f_t>& unscaled,
+            const simplex_solver_settings_t<i_t, f_t>& settings,
+            lp_problem_t<i_t, f_t>& scaled,
+            std::vector<f_t>& column_scaling,
+            std::vector<f_t>& row_scaling);
 
 template <typename i_t, typename f_t>
 void unscale_solution(const std::vector<f_t>& column_scaling,
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index 15d51041f7..26f399ac86 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -209,7 +209,7 @@ lp_status_t solve_linear_program_with_advanced_basis(
   std::vector<f_t> row_scales_simplex;
   {
     raft::common::nvtx::range scope_scaling("DualSimplex::scaling");
-    column_scaling(presolved_lp, settings, lp, column_scales, row_scales_simplex);
+    scaling(presolved_lp, settings, lp, column_scales, row_scales_simplex);
   }
   assert(presolved_lp.num_cols == lp.num_cols);
   lp_problem_t<i_t, f_t> phase1_problem(original_lp.handle_ptr, 1, 1, 1);
@@ -419,7 +419,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
                                     presolved_lp.A.col_start[presolved_lp.num_cols]);
   std::vector<f_t> column_scales;
   std::vector<f_t> row_scales;
-  column_scaling(presolved_lp, barrier_settings, barrier_lp, column_scales, row_scales);
+  scaling(presolved_lp, barrier_settings, barrier_lp, column_scales, row_scales);
 
   // Solve using barrier
   lp_solution_t<i_t, f_t> barrier_solution(barrier_lp.num_rows, barrier_lp.num_cols);
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index 5c90e65cbc..d9be010ebb 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -1751,7 +1751,7 @@ optimization_problem_solution_t<i_t, f_t> solve_qp(optimization_problem_t<i_t, f
 
     raft::common::nvtx::range fun_scope("Running QP solver");
     if (op_problem.has_quadratic_constraints()) {
-      CUOPT_LOG_INFO("Problem has %d quadratic constraints. Using Barrier with SOC conversion.",
+      CUOPT_LOG_INFO("Problem has %d quadratic constraints. Converting to second-order cones and solving with barrier.",
                      static_cast<int>(op_problem.get_quadratic_constraints().size()));
     }
     if (settings.user_problem_file != "") {
@@ -1813,7 +1813,7 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
         CUOPT_LOG_INFO("Problem has a quadratic objective. Using Barrier.");
       }
       if (op_problem.has_quadratic_constraints()) {
-        CUOPT_LOG_INFO("Problem has %d quadratic constraints. Using Barrier with SOC conversion.",
+        CUOPT_LOG_INFO("Problem has %d quadratic constraints. Converting to second-order cones and solving with barrier.",
                        static_cast<int>(op_problem.get_quadratic_constraints().size()));
       }
       settings.method    = method_t::Barrier;
diff --git a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu
index 45fcd297e8..a5b48fe5a5 100644
--- a/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu
+++ b/cpp/tests/dual_simplex/unit_tests/solve_barrier.cu
@@ -234,7 +234,7 @@ TEST(barrier, cone_metadata_reindexed_when_slack_is_inserted_before_cones)
                                        original_lp.A.col_start[original_lp.num_cols]);
   std::vector<double> column_scales;
   std::vector<double> row_scales;
-  column_scaling(original_lp, settings, barrier_lp, column_scales, row_scales);
+  scaling(original_lp, settings, barrier_lp, column_scales, row_scales);
 
   EXPECT_EQ(barrier_lp.second_order_cone_dims, user_problem.second_order_cone_dims);
   EXPECT_EQ(barrier_lp.cone_var_start, 2);
@@ -301,7 +301,7 @@ TEST(barrier, presolve_reindexes_cone_start_after_empty_column_removal)
                                        presolved_lp.A.col_start[presolved_lp.num_cols]);
   std::vector<double> column_scales;
   std::vector<double> row_scales;
-  ASSERT_EQ(column_scaling(presolved_lp, settings, barrier_lp, column_scales, row_scales), 0);
+  ASSERT_EQ(scaling(presolved_lp, settings, barrier_lp, column_scales, row_scales), 0);
   EXPECT_EQ(barrier_lp.cone_var_start, 0);
 }
 

From b128480352de884b9b9a8a39ea40c9272de43b3d Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Fri, 22 May 2026 17:23:20 -0700
Subject: [PATCH 10/10] More fixes for scaling

---
 cpp/src/dual_simplex/scaling.cpp | 80 ++++++++++++++++----------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/cpp/src/dual_simplex/scaling.cpp b/cpp/src/dual_simplex/scaling.cpp
index 30aec6c782..92b4d3377d 100644
--- a/cpp/src/dual_simplex/scaling.cpp
+++ b/cpp/src/dual_simplex/scaling.cpp
@@ -16,14 +16,14 @@ template <typename i_t, typename f_t>
 i_t scaling(const lp_problem_t<i_t, f_t>& unscaled,
             const simplex_solver_settings_t<i_t, f_t>& settings,
             lp_problem_t<i_t, f_t>& scaled,
-            std::vector<f_t>& col_scale,
-            std::vector<f_t>& row_scale)
+            std::vector<f_t>& column_scaling,
+            std::vector<f_t>& row_scaling)
 {
   scaled = unscaled;
   i_t m  = scaled.num_rows;
   i_t n  = scaled.num_cols;
 
-  row_scale.assign(m, 1.0);
+  row_scaling.assign(m, 1.0);
 
   // =========================================================================
   // Ruiz equilibration for SOCP (and QP) problems
@@ -31,10 +31,10 @@ i_t scaling(const lp_problem_t<i_t, f_t>& unscaled,
   // For SOCP problems, apply Ruiz equilibration: alternating row and column
   // infinity-norm scaling to bring the constraint matrix close to equilibrium.
   // This dramatically improves the conditioning of the augmented KKT system.
-  // Ruiz equilibration for SOCP (and QP) problems, applied only when the
-  // constraint matrix has a large row-norm imbalance.
+  // Applied only when the constraint matrix has a large row-norm imbalance.
   if (!unscaled.second_order_cone_dims.empty() || unscaled.Q.n > 0) {
-    col_scale.assign(n, 1.0);
+    // col_scale and row_scale accumulate reciprocal scale factors during Ruiz iterations.
+    std::vector<f_t> col_scale(n, 1.0);
 
     // Decide whether Ruiz scaling is needed by checking row-norm imbalance.
     // If max_row_norm / min_row_norm is small, the matrix is already well-conditioned
@@ -59,6 +59,7 @@ i_t scaling(const lp_problem_t<i_t, f_t>& unscaled,
     if (row_norm_ratio < 100.0) {
       settings.log.printf("Skipping Ruiz equilibration (row norm ratio %.1f < 100)\n",
                           row_norm_ratio);
+      column_scaling.assign(n, 1.0);
       return 0;
     }
 
@@ -91,7 +92,7 @@ i_t scaling(const lp_problem_t<i_t, f_t>& unscaled,
           Arow.x[p] *= r[i];
         }
         scaled.rhs[i] *= r[i];
-        row_scale[i] *= r[i];
+        row_scaling[i] *= r[i];
       }
 
       // --- Column scaling: scale each column by 1/sqrt(max|a_ij|) ---
@@ -161,14 +162,15 @@ i_t scaling(const lp_problem_t<i_t, f_t>& unscaled,
       if (max_deviation < 0.1) break;
     }
 
-    // Ruiz col_scale accumulates reciprocals (c[j] = 1/sqrt(norm)), so invert
-    // to store column norms: C(j,j) = 1/col_scale[j].
+    // Ruiz col_scale/row_scaling accumulate reciprocals (c[j] = 1/sqrt(norm)).
+    // Invert to match the output convention: C(j,j) = 1/column_scaling[j],
+    // R(i,i) = 1/row_scaling[i].
+    column_scaling.resize(n);
     for (i_t j = 0; j < n; ++j) {
-      col_scale[j] = f_t(1) / col_scale[j];
+      column_scaling[j] = f_t(1) / col_scale[j];
     }
-    // Same for row_scale: invert so R(i,i) = 1/row_scale[i].
     for (i_t i = 0; i < m; ++i) {
-      row_scale[i] = f_t(1) / row_scale[i];
+      row_scaling[i] = f_t(1) / row_scaling[i];
     }
 
     f_t a_min = std::numeric_limits<f_t>::max();
@@ -186,14 +188,13 @@ i_t scaling(const lp_problem_t<i_t, f_t>& unscaled,
 
   if (!settings.scale_columns) {
     settings.log.printf("Skipping column scaling\n");
-    col_scale.resize(n, 1.0);
+    column_scaling.resize(n, 1.0);
     return 0;
   }
 
-  col_scale.resize(n);
-
-  f_t max_col_norm = 0;
-  f_t min_col_norm = std::numeric_limits<f_t>::max();
+  column_scaling.resize(n);
+  f_t max = 0;
+  f_t min = std::numeric_limits<f_t>::max();
   for (i_t j = 0; j < n; ++j) {
     const i_t col_start = scaled.A.col_start[j];
     const i_t col_end   = scaled.A.col_start[j + 1];
@@ -202,47 +203,47 @@ i_t scaling(const lp_problem_t<i_t, f_t>& unscaled,
       const f_t x = scaled.A.x[p];
       sum += x * x;
     }
-    f_t col_norm_j = col_scale[j] = sum > 0 ? std::sqrt(sum) : 1.0;
-    max_col_norm                   = std::max(col_norm_j, max_col_norm);
-    min_col_norm                   = std::min(col_norm_j, min_col_norm);
+    f_t col_norm_j = column_scaling[j] = sum > 0 ? std::sqrt(sum) : 1.0;
+    max                                = std::max(col_norm_j, max);
+    min                                = std::min(col_norm_j, min);
   }
-  settings.log.printf(
-    "Scaling matrix. Maximum column norm %e, minimum column norm %e\n", max_col_norm, min_col_norm);
-  // C(j, j) = 1/col_scale(j)
+  settings.log.printf("Scaling matrix. Maximum column norm %e, minimum column norm %e\n", max, min);
+  // C(j, j) = 1/column_scaling(j)
 
   // scaled_A = unscaled_A * C
   for (i_t j = 0; j < n; ++j) {
     const i_t col_start = scaled.A.col_start[j];
     const i_t col_end   = scaled.A.col_start[j + 1];
     for (i_t p = col_start; p < col_end; ++p) {
-      scaled.A.x[p] /= col_scale[j];
+      scaled.A.x[p] /= column_scaling[j];
     }
   }
   // scaled_obj = C*unscaled_obj
   for (i_t j = 0; j < n; ++j) {
-    scaled.objective[j] /= col_scale[j];
+    scaled.objective[j] /= column_scaling[j];
   }
   // scaled_lower = C^{-1} * unscaled_lower
   // scaled_upper = C^{-1} * unscaled_upper
   for (i_t j = 0; j < n; ++j) {
-    scaled.lower[j] *= col_scale[j];
-    scaled.upper[j] *= col_scale[j];
+    scaled.lower[j] *= column_scaling[j];
+    scaled.upper[j] *= column_scaling[j];
   }
 
   for (i_t i = 0; i < unscaled.Q.n; ++i) {
     const i_t row_start = unscaled.Q.row_start[i];
     const i_t row_end   = unscaled.Q.row_start[i + 1];
+    i_t row             = i;
     for (i_t p = row_start; p < row_end; ++p) {
-      const i_t col = unscaled.Q.j[p];
-      scaled.Q.x[p] = unscaled.Q.x[p] / (col_scale[i] * col_scale[col]);
+      i_t col       = unscaled.Q.j[p];
+      scaled.Q.x[p] = unscaled.Q.x[p] / (column_scaling[row] * column_scaling[col]);
     }
   }
   return 0;
 }
 
 template <typename i_t, typename f_t>
-void unscale_solution(const std::vector<f_t>& col_scale,
-                      const std::vector<f_t>& row_scale,
+void unscale_solution(const std::vector<f_t>& column_scaling,
+                      const std::vector<f_t>& row_scaling,
                       const std::vector<f_t>& scaled_x,
                       const std::vector<f_t>& scaled_y,
                       const std::vector<f_t>& scaled_z,
@@ -253,17 +254,16 @@ void unscale_solution(const std::vector<f_t>& col_scale,
   const i_t n = scaled_x.size();
   unscaled_x.resize(n);
   unscaled_z.resize(n);
-  // C(j,j) = 1/col_scale[j], so x_orig = x_scaled / col_scale, z_orig = z_scaled * col_scale
   for (i_t j = 0; j < n; ++j) {
-    unscaled_x[j] = scaled_x[j] / col_scale[j];
-    unscaled_z[j] = scaled_z[j] * col_scale[j];
+    unscaled_x[j] = scaled_x[j] / column_scaling[j];
+    unscaled_z[j] = scaled_z[j] * column_scaling[j];
   }
 
   const i_t m = scaled_y.size();
   unscaled_y.resize(m);
-  // R(i,i) = 1/row_scale[i], so y_orig = y_scaled / row_scale
+  // R(i,i) = 1/row_scaling[i], so y_orig = y_scaled / row_scaling
   for (i_t i = 0; i < m; ++i) {
-    unscaled_y[i] = scaled_y[i] / row_scale[i];
+    unscaled_y[i] = scaled_y[i] / row_scaling[i];
   }
 }
 
@@ -272,11 +272,11 @@ void unscale_solution(const std::vector<f_t>& col_scale,
 template int scaling<int, double>(const lp_problem_t<int, double>& unscaled,
                                   const simplex_solver_settings_t<int, double>& settings,
                                   lp_problem_t<int, double>& scaled,
-                                  std::vector<double>& col_scale,
-                                  std::vector<double>& row_scale);
+                                  std::vector<double>& column_scaling,
+                                  std::vector<double>& row_scaling);
 
-template void unscale_solution<int, double>(const std::vector<double>& col_scale,
-                                            const std::vector<double>& row_scale,
+template void unscale_solution<int, double>(const std::vector<double>& column_scaling,
+                                            const std::vector<double>& row_scaling,
                                             const std::vector<double>& scaled_x,
                                             const std::vector<double>& scaled_y,
                                             const std::vector<double>& scaled_z,