From 4a95ad0e6745abe40ae45f3df105cd89778bee1e Mon Sep 17 00:00:00 2001 From: dance858 Date: Sat, 23 May 2026 05:16:53 -0700 Subject: [PATCH 1/2] Tighten loose CSR allocation bounds across 7 sites --- src/atoms/affine/hstack.c | 8 +++++--- src/atoms/affine/sum.c | 6 ++++-- src/atoms/affine/trace.c | 2 +- src/atoms/bivariate_full_dom/matmul.c | 1 + src/problem.c | 8 +++++--- src/utils/CSR_sum.c | 2 +- src/utils/sparse_matrix.c | 3 ++- 7 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/atoms/affine/hstack.c b/src/atoms/affine/hstack.c index 5bbe8fe..6b554d9 100644 --- a/src/atoms/affine/hstack.c +++ b/src/atoms/affine/hstack.c @@ -20,6 +20,7 @@ #include "utils/CSR_sum.h" #include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" +#include "utils/utils.h" #include #include #include @@ -114,9 +115,10 @@ static void wsum_hess_init_impl(expr *node) } /* worst-case scenario the nnz of node->wsum_hess is the sum of children's - nnz */ - CSR_matrix *H = new_CSR_matrix(node->n_vars, node->n_vars, nnz); - hnode->CSR_work = new_CSR_matrix(node->n_vars, node->n_vars, nnz); + nnz, capped by the output cell count */ + int nnz_ub = MIN(nnz, node->n_vars * node->n_vars); + CSR_matrix *H = new_CSR_matrix(node->n_vars, node->n_vars, nnz_ub); + hnode->CSR_work = new_CSR_matrix(node->n_vars, node->n_vars, nnz_ub); /* fill sparsity pattern */ H->nnz = 0; diff --git a/src/atoms/affine/sum.c b/src/atoms/affine/sum.c index f042d11..58fe57a 100644 --- a/src/atoms/affine/sum.c +++ b/src/atoms/affine/sum.c @@ -91,8 +91,10 @@ static void jacobian_init_impl(expr *node) jacobian_init(x); CSR_matrix *Jx = x->jacobian->to_csr(x->jacobian); - /* we never have to store more than the child's nnz */ - CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, Jx->nnz); + /* we never have to store more than the child's nnz, nor more than the + output's cell count */ + int max_nnz = MIN(Jx->nnz, node->size * node->n_vars); + CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, max_nnz); node->work->iwork = sp_malloc(MAX(jac->n, Jx->nnz) * sizeof(int)); snode->idx_map = sp_malloc(Jx->nnz * sizeof(int)); diff --git a/src/atoms/affine/trace.c b/src/atoms/affine/trace.c index 2dca9a5..cf25461 100644 --- a/src/atoms/affine/trace.c +++ b/src/atoms/affine/trace.c @@ -66,7 +66,7 @@ static void jacobian_init_impl(expr *node) total_nnz += A->p[row + 1] - A->p[row]; } - CSR_matrix *jac = new_CSR_matrix(1, node->n_vars, total_nnz); + CSR_matrix *jac = new_CSR_matrix(1, node->n_vars, MIN(total_nnz, node->n_vars)); // --------------------------------------------------------------- // fill sparsity pattern and idx_map diff --git a/src/atoms/bivariate_full_dom/matmul.c b/src/atoms/bivariate_full_dom/matmul.c index f75d1e1..dfc7aaa 100644 --- a/src/atoms/bivariate_full_dom/matmul.c +++ b/src/atoms/bivariate_full_dom/matmul.c @@ -241,6 +241,7 @@ static void jacobian_init_chain_rule(expr *node) mnode->term1_CSR = YT_kron_I_alloc(m, k, n, f->work->jacobian_csc); mnode->term2_CSR = I_kron_X_alloc(m, k, n, g->work->jacobian_csc); int max_nnz = mnode->term1_CSR->nnz + mnode->term2_CSR->nnz; + max_nnz = MIN(max_nnz, node->size * node->n_vars); CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, max_nnz); sum_csr_alloc(mnode->term1_CSR, mnode->term2_CSR, jac); node->jacobian = new_sparse_matrix(jac); diff --git a/src/problem.c b/src/problem.c index c2ec84a..cac7224 100644 --- a/src/problem.c +++ b/src/problem.c @@ -251,12 +251,14 @@ void problem_init_hessian(problem *prob) nnz += prob->constraints[i]->wsum_hess->nnz; } - prob->lagrange_hessian = new_CSR_matrix(prob->n_vars, prob->n_vars, nnz); - memset(prob->lagrange_hessian->x, 0, nnz * sizeof(double)); /* affine shortcut */ - prob->stats.nnz_hessian = nnz; + int hess_nnz_ub = MIN(nnz, prob->n_vars * prob->n_vars); + prob->lagrange_hessian = new_CSR_matrix(prob->n_vars, prob->n_vars, hess_nnz_ub); + memset(prob->lagrange_hessian->x, 0, + hess_nnz_ub * sizeof(double)); /* affine shortcut */ prob->hess_idx_map = (int *) sp_malloc(nnz * sizeof(int)); int *iwork = (int *) sp_malloc(MAX(nnz, prob->n_vars) * sizeof(int)); problem_lagrange_hess_fill_sparsity(prob, iwork); + prob->stats.nnz_hessian = prob->lagrange_hessian->nnz; free(iwork); clock_gettime(CLOCK_MONOTONIC, &timer.end); diff --git a/src/utils/CSR_sum.c b/src/utils/CSR_sum.c index b30ef8d..7249acb 100644 --- a/src/utils/CSR_sum.c +++ b/src/utils/CSR_sum.c @@ -367,7 +367,7 @@ CSR_matrix *sum_4_csr_alloc(const CSR_matrix *A, const CSR_matrix *B, const CSR_matrix *inputs[4] = {A, B, C, D}; int m = A->m; int n = A->n; - int nnz_ub = A->nnz + B->nnz + C->nnz + D->nnz; + int nnz_ub = MIN(A->nnz + B->nnz + C->nnz + D->nnz, m * n); /* allocate output and index maps */ CSR_matrix *out = new_CSR_matrix(m, n, nnz_ub); diff --git a/src/utils/sparse_matrix.c b/src/utils/sparse_matrix.c index 91b2467..0e648f0 100644 --- a/src/utils/sparse_matrix.c +++ b/src/utils/sparse_matrix.c @@ -22,6 +22,7 @@ #include "utils/matrix.h" #include "utils/mini_numpy.h" #include "utils/tracked_alloc.h" +#include "utils/utils.h" #include #include @@ -122,7 +123,7 @@ static void sparse_transpose_fill_values(const matrix *self, matrix *out) static matrix *sparse_index_alloc(matrix *self, const int *indices, int n_idxs) { CSR_matrix *Jx = ((sparse_matrix *) self)->csr; - CSR_matrix *J = new_CSR_matrix(n_idxs, self->n, Jx->nnz); + CSR_matrix *J = new_CSR_matrix(n_idxs, self->n, MIN(Jx->nnz, n_idxs * self->n)); J->p[0] = 0; for (int i = 0; i < n_idxs; i++) From de151bdc52cc76920aab11dba7ab80e0ab6f132d Mon Sep 17 00:00:00 2001 From: dance858 Date: Sat, 23 May 2026 05:34:18 -0700 Subject: [PATCH 2/2] comment --- src/problem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/problem.c b/src/problem.c index cac7224..b0748af 100644 --- a/src/problem.c +++ b/src/problem.c @@ -253,8 +253,10 @@ void problem_init_hessian(problem *prob) int hess_nnz_ub = MIN(nnz, prob->n_vars * prob->n_vars); prob->lagrange_hessian = new_CSR_matrix(prob->n_vars, prob->n_vars, hess_nnz_ub); - memset(prob->lagrange_hessian->x, 0, - hess_nnz_ub * sizeof(double)); /* affine shortcut */ + + /* affine shortcut */ + memset(prob->lagrange_hessian->x, 0, hess_nnz_ub * sizeof(double)); + prob->hess_idx_map = (int *) sp_malloc(nnz * sizeof(int)); int *iwork = (int *) sp_malloc(MAX(nnz, prob->n_vars) * sizeof(int)); problem_lagrange_hess_fill_sparsity(prob, iwork);