From be1731361cc18575f479b07f6e396d0859e37f83 Mon Sep 17 00:00:00 2001
From: Tim Davis <DrTimothyAldenDavis@gmail.com>
Date: Mon, 1 Sep 2025 08:47:27 -0500
Subject: [PATCH 1/6] performance bug fix for HPEC25 paper on maxflow

---
 ChangeLog                             |  4 ++
 experimental/algorithm/LAGr_MaxFlow.c | 95 ++++++++++++++++++++++-----
 experimental/benchmark/do_maxflow     | 14 ++++
 experimental/benchmark/maxflow_demo.c | 36 ++++++++--
 experimental/test/test_MaxFlow.c      | 19 +++++-
 5 files changed, 144 insertions(+), 24 deletions(-)
 create mode 100755 experimental/benchmark/do_maxflow

diff --git a/ChangeLog b/ChangeLog
index 17aefd0853..0becc1fe85 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+Sept 1, 2025: version 1.2.1
+
+    * MaxFlow: performance bug fix for results in HPEC'25 paper.
+
 July 25, 2025: version 1.2.0
 
     * v2.1 C API: using the new GrB_get/set methods in the C API;
diff --git a/experimental/algorithm/LAGr_MaxFlow.c b/experimental/algorithm/LAGr_MaxFlow.c
index 5b0abfe635..7d0dbc474d 100644
--- a/experimental/algorithm/LAGr_MaxFlow.c
+++ b/experimental/algorithm/LAGr_MaxFlow.c
@@ -15,6 +15,8 @@
 
 //------------------------------------------------------------------------------
 
+// TODO: work in progress (flow_matrix and global relabel)
+
 // LAGr_MaxFlow is a GraphBLAS implementation of the push-relabel algorithm
 // of Baumstark et al. [1]
 //
@@ -27,6 +29,7 @@
 #include "LG_internal.h"
 #include <LAGraph.h>
 
+//#define DBG
 #if LG_SUITESPARSE_GRAPHBLAS_V10
 
 //------------------------------------------------------------------------------
@@ -90,6 +93,8 @@ static GrB_Info LG_global_relabel
     GrB_Index sink,             // sink node
     GrB_Vector src_and_sink,    // mask vector, with just [src sink]
     GrB_UnaryOp GetResidual,    // unary op to compute resid=capacity-flow
+    GrB_BinaryOp global_relabel_accum, // accum for the disconnected assign
+    GrB_Index relabel_value, // value to relabel the disconnected nodes
     // input/output:
     GrB_Vector d,       // d(i) = height/label of node i
     // outputs:
@@ -116,10 +121,10 @@ static GrB_Info LG_global_relabel
     LG_TRY(LAGraph_Cached_OutDegree(G2, msg));
     // compute lvl using bfs on G2, starting at sink node
     LG_TRY(LAGr_BreadthFirstSearch(lvl, NULL, G2, sink, msg));
-    // d<!struct([src,sink])> = lvl
-    GRB_TRY(GrB_assign(d, src_and_sink, NULL, *lvl, GrB_ALL, n, GrB_DESC_SC));
-    // d<!struct(lvl)> = n
-    GRB_TRY(GrB_assign(d, *lvl, NULL, n, GrB_ALL, n, GrB_DESC_SC));
+    // d<!struct([src,sink])> = max(d(i), lvl)
+    GRB_TRY(GrB_assign(d, src_and_sink, global_relabel_accum, *lvl, GrB_ALL, n, GrB_DESC_SC));
+    // d<!struct(lvl)> = max(d(i), relabel_value)
+    GRB_TRY(GrB_assign(d, *lvl, global_relabel_accum, relabel_value, GrB_ALL, n, GrB_DESC_SC));
     LG_FREE_WORK ;
     return (GrB_SUCCESS) ;
 }
@@ -459,7 +464,7 @@ JIT_STR(void LG_MF_MxeMult32(LG_MF_resultTuple32 * z,
   bool j_active = ((*y) > 0) ;
   if ((x->di <  x->dj-1) /* case a */
   ||  (x->di == x->dj-1 && !j_active) /* case b */
-  ||  (x->di == x->dj   && (!j_active || (j_active && (i < j)))) /* case c */
+  ||  (x->di == x->dj && (!j_active || (j_active && (i < j)))) /* case c */
   ||  (x->di == x->dj+1))   /* case d */
   {
       z->residual = x->residual;
@@ -564,6 +569,28 @@ JIT_STR(void LG_MF_ExtractMatrixFlow(double* flow, const LG_MF_flowEdge* edge){*
 
 #endif
 
+#ifdef DBG
+void print_compareVec(const GrB_Vector vec) {
+  GxB_Iterator iter;
+  GxB_Iterator_new(&iter);
+  GrB_Info info = GxB_Vector_Iterator_attach(iter, vec, NULL);
+  if(info < 0){
+    printf("error with matrix passed in");
+  }
+  info = GxB_Vector_Iterator_seek(iter, 0);
+  while(info != GxB_EXHAUSTED){
+    GrB_Index i;
+    i = GxB_Vector_Iterator_getIndex(iter);
+    LG_MF_compareTuple32 e;
+    GxB_Iterator_get_UDT(iter, &e);
+    printf("(%ld, 0)         (di: %d, dj: %d, J: %d, residual: %lf) \n", i, e.di, e.dj, e.j, e.residual);
+    info = GxB_Vector_Iterator_next(iter);
+  }
+  GrB_free(&iter);
+}
+#endif
+
+
 //------------------------------------------------------------------------------
 // LAGraph_MaxFlow
 //------------------------------------------------------------------------------
@@ -766,6 +793,9 @@ int LAGr_MaxFlow
 
   GrB_Type Integer_Type = NULL ;
 
+  //accum operator for the global relabel
+  GrB_BinaryOp global_relabel_accum = NULL ;
+
   #ifdef COVERAGE
   // Just for test coverage, use 64-bit ints for n > 100.  Do not use this
   // rule in production!
@@ -782,6 +812,9 @@ int LAGr_MaxFlow
 
     Integer_Type = GrB_INT64 ;
 
+    // use the 64 bit max operator 
+    global_relabel_accum = GrB_MAX_INT64 ;
+
     // create types for computation
     GRB_TRY(GxB_Type_new(&ResultTuple, sizeof(LG_MF_resultTuple64),
         "LG_MF_resultTuple64", RESULTTUPLE_STR64));
@@ -852,6 +885,9 @@ int LAGr_MaxFlow
 
     Integer_Type = GrB_INT32 ;
 
+    // use 32 bit max op
+    global_relabel_accum = GrB_MAX_INT32 ;
+
     // create types for computation
     GRB_TRY(GxB_Type_new(&ResultTuple, sizeof(LG_MF_resultTuple32),
         "LG_MF_resultTuple32", RESULTTUPLE_STR32));
@@ -943,8 +979,14 @@ int LAGr_MaxFlow
   GRB_TRY(GrB_apply(R, A, NULL, ResidualBackward, AT, GrB_DESC_SC));
 
   // initial global relabeling
-  LG_TRY (LG_global_relabel (R, sink, src_and_sink, GetResidual, d, &lvl, msg)) ;
-
+  // relabel to 2*n to prevent any flow from going to the
+  // disconnected nodes.
+  GrB_Index relabel_value = 2*n ;
+  LG_TRY (LG_global_relabel (R, sink, src_and_sink, GetResidual, global_relabel_accum, relabel_value, d, &lvl, msg)) ;
+
+  // reset to n
+  relabel_value = n ;
+  
   // create excess vector e and initial flows from the src to its neighbors
   // e<struct(lvl)> = A (src,:)
   GRB_TRY(GrB_Vector_new(&e, GrB_FP64, n));
@@ -968,20 +1010,27 @@ int LAGr_MaxFlow
 
   for (int64_t iter = 0 ; n_active > 0 ; iter++)
   {
-
+    #ifdef DBG
+      printf ("iter: %ld, n_active %ld\n", iter, n_active) ;
+    #endif
     //--------------------------------------------------------------------------
     // Part 1: global relabeling
     //--------------------------------------------------------------------------
 
-    if ((iter > 0) && (flow_mtx != NULL) && (iter % 12 == 0))
+    if ((iter > 0)  && (iter % 12 == 0))
     {
-      LG_TRY (LG_global_relabel (R, sink, src_and_sink, GetResidual, d, &lvl, msg)) ;
-      // delete nodes in e that cannot be reached from the sink
-      // e<!struct(lvl)> = empty scalar
-      GrB_assign (e, lvl, NULL, empty, GrB_ALL, n, GrB_DESC_SC) ;
+      #ifdef DBG
+        printf ("relabel at : %ld\n", iter) ;
+      #endif
+      LG_TRY (LG_global_relabel (R, sink, src_and_sink, GetResidual, global_relabel_accum, relabel_value, d, &lvl, msg)) ;
+      if(flow_mtx == NULL){
+        // delete nodes in e that cannot be reached from the sink
+	//  e<!struct(lvl)> = empty scalar
+	GrB_assign (e, lvl, NULL, empty, GrB_ALL, n, GrB_DESC_SC) ;
+	GRB_TRY(GrB_Vector_nvals(&n_active, e));
+	if(n_active == 0) break;
+      }
       GrB_free(&lvl);
-      GRB_TRY(GrB_Vector_nvals(&n_active, e));
-      if(n_active == 0) break;
     }
 
     //--------------------------------------------------------------------------
@@ -1001,6 +1050,14 @@ int LAGr_MaxFlow
     // create Map matrix from pattern and values of yd
     // yd = CreateCompareVec (y,d) using eWiseMult
     GRB_TRY(GrB_eWiseMult(yd, NULL, NULL, CreateCompareVec, y,  d, NULL));
+
+    #ifdef DBG
+      print_compareVec(yd);
+      GxB_print(d, 3);
+      GxB_print(e, 3);
+    #endif
+    
+    
     // Jvec = ExtractJ (yd), where Jvec(i) = yd(i)->j
     GRB_TRY(GrB_apply(Jvec, NULL, NULL, ExtractJ, yd, NULL));
     GRB_TRY(GrB_Matrix_clear(Map));
@@ -1043,7 +1100,12 @@ int LAGr_MaxFlow
     // create the Delta matrix from delta_vec and y
     // note that delta_vec has the same structure as y
     // Jvec = ExtractYJ (y), where Jvec(i) = y(i)->j
+    // if Jvec has no values, then there is no possible
+    // candidates to push to, so the algorithm terminates
     GRB_TRY(GrB_apply(Jvec, NULL, NULL, ExtractYJ, y, NULL));
+    GrB_Index J_n;
+    GRB_TRY(GrB_Vector_nvals(&J_n, Jvec));
+    if(J_n == 0) break;
     GRB_TRY(GrB_Matrix_clear(Delta));
     GRB_TRY(GrB_Matrix_build(Delta, delta_vec, Jvec, delta_vec, GxB_IGNORE_DUP, desc));
 
@@ -1057,7 +1119,7 @@ int LAGr_MaxFlow
 
     // reduce Delta to delta_vec
     // delta_vec = sum (Delta), summing up each row of Delta
-    GRB_TRY(GrB_reduce(delta_vec, NULL, NULL, GrB_PLUS_FP64, Delta, GrB_DESC_T0));
+    GRB_TRY(GrB_reduce(delta_vec, NULL, NULL, GrB_PLUS_MONOID_FP64, Delta, GrB_DESC_T0));
 
     // add delta_vec to e
     // e<struct(delta_vec)> += delta_vec
@@ -1065,6 +1127,7 @@ int LAGr_MaxFlow
 
     // augment maxflow for all active nodes
     LG_TRY (LG_augment_maxflow (f, e, sink, src_and_sink, &n_active, msg)) ;
+
   }
 
   //----------------------------------------------------------------------------
diff --git a/experimental/benchmark/do_maxflow b/experimental/benchmark/do_maxflow
new file mode 100755
index 0000000000..7a40625343
--- /dev/null
+++ b/experimental/benchmark/do_maxflow
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+
+MTX="/home/davis/matrices"
+GAP="/home/davis/GAP"
+
+../../build/experimental/benchmark/maxflow_demo $MTX/com-Youtube.mtx 4 275
+../../build/experimental/benchmark/maxflow_demo $MTX/com-LiveJournal.mtx 2 88
+../../build/experimental/benchmark/maxflow_demo $MTX/com-Orkut.mtx 43 75
+
+../../build/experimental/benchmark/maxflow_demo $GAP/GAP-web/GAP-web.grb 64712 500
+../../build/experimental/benchmark/maxflow_demo $GAP/GAP-road/GAP-road.grb 4 1075
+../../build/experimental/benchmark/maxflow_demo $GAP/GAP-twitter/GAP-twitter.grb 13 17
+
diff --git a/experimental/benchmark/maxflow_demo.c b/experimental/benchmark/maxflow_demo.c
index 86f7a1985c..b09a04baf2 100644
--- a/experimental/benchmark/maxflow_demo.c
+++ b/experimental/benchmark/maxflow_demo.c
@@ -7,6 +7,7 @@
 
 #define LAGRAPH_CATCH(info)                                                    \
   {                                                                            \
+    printf ("result: %d %s\n", info, msg) ;                                    \
     LAGraph_Delete(&G, msg);                                                   \
     return (info);                                                             \
   }
@@ -19,37 +20,58 @@ int main (int argc, char ** argv){
 
   char msg[LAGRAPH_MSG_LEN];
   LAGraph_Graph G = NULL;
-  
+
   double flow = 0;
-  GrB_Index T=0, S=0;
+  GrB_Index T=0, S=0, nflow ;
+  GrB_Matrix flow_matrix = NULL ;
 
   LAGRAPH_TRY(LAGraph_Init(msg));
-  
+
   //read in graph
   LAGRAPH_TRY(readproblem(&G, NULL, false, true, false, NULL, true, argc, argv));
+  printf ("read in the problem\n") ;
+  double t1 = LAGraph_WallClockTime();
   LAGRAPH_TRY(LAGraph_Cached_AT(G, msg));
+  t1 = LAGraph_WallClockTime() - t1 ;
+  printf ("cached AT, time %g\n", t1) ;
+  t1 = LAGraph_WallClockTime();
   LAGRAPH_TRY(LAGraph_Cached_EMin(G, msg));
+  t1 = LAGraph_WallClockTime() - t1 ;
+  printf ("cached EMin, time %g\n", t1) ;
 
   char* end1, *end2;
   S = strtoul(argv[2], &end1, 10);
   T = strtoul(argv[3], &end2, 10);
   if(argc > 4){
     int num_threads = atoi(argv[4]);
-    LAGRAPH_TRY(LAGraph_SetNumThreads(1, num_threads, msg)); 
+    LAGRAPH_TRY(LAGraph_SetNumThreads(1, num_threads, msg));
   }
 
+  int nthreads_outer, nthreads_inner ;
+  LAGRAPH_TRY(LAGraph_GetNumThreads(&nthreads_outer, &nthreads_inner, msg));
+  printf ("nthreads: %d %d\n", nthreads_outer, nthreads_inner) ;
+
   if(end1 == 0 || end2 == 0){
     printf("values for source and sink are incorrect.\n");
   }
-  printf("Starting max flow from %ld to %ld", S, T);
+  printf("Starting max flow from %ld to %ld\n", S, T);
 
-  //LG_SET_BURBLE(1);
+  // LG_SET_BURBLE(1);
   double time = LAGraph_WallClockTime();
   LAGRAPH_TRY(LAGr_MaxFlow(&flow, NULL, G, S, T, msg));
   time = LAGraph_WallClockTime() - time;
   printf("Time for LAGraph_MaxFlow: %g sec\n", time);
   printf("Max Flow is: %lf\n", flow);
- 
+
+  /* printf("Starting max flow from %ld to %ld, with flow_matrix returned\n", S, T); */
+  /* time = LAGraph_WallClockTime(); */
+  /* LAGRAPH_TRY(LAGr_MaxFlow(&flow, &flow_matrix, G, S, T, msg)); */
+  /* time = LAGraph_WallClockTime() - time; */
+  /* printf("Time for LAGraph_MaxFlow with flow matrix: %g sec\n", time); */
+  /* printf("Max Flow is: %lf\n", flow); */
+  /* GRB_TRY (GrB_Matrix_nvals (&nflow, flow_matrix)) ; */
+  /* printf("# of entries in flow matrix: %lu\n", nflow); */
+
   LAGraph_Delete(&G, msg);
   LAGRAPH_TRY(LAGraph_Finalize(msg));
 
diff --git a/experimental/test/test_MaxFlow.c b/experimental/test/test_MaxFlow.c
index ea04c5b0eb..c685e3fb15 100644
--- a/experimental/test/test_MaxFlow.c
+++ b/experimental/test/test_MaxFlow.c
@@ -53,7 +53,7 @@ test_info tests[] = {
 void test_MaxFlow(void) {
 #if LG_SUITESPARSE_GRAPHBLAS_V10
   LAGraph_Init(msg);
-//OK(LG_SET_BURBLE(1));
+  //OK(LG_SET_BURBLE(1));
   OK(LG_SET_BURBLE(0));
   for(uint8_t test = 0; test < NTESTS; test++){
     GrB_Matrix A=NULL;
@@ -87,6 +87,23 @@ void test_MaxFlow(void) {
     TEST_CHECK(flow == tests[test].F);
     OK(GxB_Global_Option_set(GxB_JIT_C_CONTROL, GxB_JIT_ON));
 
+    // test all source/destination pairs for small problems
+    GrB_Index n ;
+    OK (GrB_Matrix_nrows (&n, G->A)) ;
+    printf ("n: %ld\n", (int64_t) n) ;
+    if (n < 100)
+    {
+        for (GrB_Index src = 0 ; src < n ; src++)
+        {
+            for (GrB_Index dest = 0 ; dest < n ; dest++)
+            {
+              printf("src: %ld, dest: %ld\n", src, dest);
+                if (src == dest) continue ;
+                OK(LAGr_MaxFlow(&flow, NULL, G, src, dest, msg));
+            }
+        }
+    }
+
     //free work
     OK(LAGraph_Delete(&G, msg));
   }

From 03988faa8fe70805e890bb24c8532e2114b360e9 Mon Sep 17 00:00:00 2001
From: Tim Davis <DrTimothyAldenDavis@gmail.com>
Date: Mon, 1 Sep 2025 08:48:14 -0500
Subject: [PATCH 2/6] strange build error on macos, causing former passing
 builds to fail

---
 .github/workflows/build.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index ad76dff740..ff5b7ff06e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -78,6 +78,7 @@ jobs:
 
     - name: Install dependencies
       run: |
+        brew uninstall cmake
         brew tap-new libomp/cask
         brew extract --version=14.0.6 libomp libomp/cask
         brew install libomp@14.0.6

From c23df3ac0156228883bfa328640aac13c8826175 Mon Sep 17 00:00:00 2001
From: Tim Davis <DrTimothyAldenDavis@gmail.com>
Date: Tue, 2 Sep 2025 11:12:45 -0500
Subject: [PATCH 3/6] rename variables to match HPEC25 paper; benchmark with
 and w/o flow matrix

---
 experimental/algorithm/LAGr_MaxFlow.c | 198 ++++++++++++++------------
 experimental/benchmark/do_maxflow     |   5 +-
 experimental/benchmark/maxflow_demo.c |  17 +--
 3 files changed, 116 insertions(+), 104 deletions(-)

diff --git a/experimental/algorithm/LAGr_MaxFlow.c b/experimental/algorithm/LAGr_MaxFlow.c
index 7d0dbc474d..b77f4c732c 100644
--- a/experimental/algorithm/LAGr_MaxFlow.c
+++ b/experimental/algorithm/LAGr_MaxFlow.c
@@ -15,8 +15,6 @@
 
 //------------------------------------------------------------------------------
 
-// TODO: work in progress (flow_matrix and global relabel)
-
 // LAGr_MaxFlow is a GraphBLAS implementation of the push-relabel algorithm
 // of Baumstark et al. [1]
 //
@@ -78,8 +76,8 @@ static GrB_Info LG_augment_maxflow
 #undef  LG_FREE_WORK
 #define LG_FREE_WORK                        \
 {                                           \
-    GrB_free(&C);                           \
-    GrB_free(&T);                           \
+    GrB_free(&R_hat);                       \
+    GrB_free(&R_hat_transpose);             \
     LAGraph_Delete(&G2, msg);               \
 }
 
@@ -102,22 +100,22 @@ static GrB_Info LG_global_relabel
     char *msg
 )
 {
-    GrB_Matrix C = NULL, T = NULL ;
+    GrB_Matrix R_hat = NULL, R_hat_transpose = NULL ;
     LAGraph_Graph G2 = NULL ;
     GrB_Index n ;
     GRB_TRY(GrB_Matrix_nrows(&n, R)) ;
-    GRB_TRY(GrB_Matrix_new(&T, GrB_FP64, n, n));
-    GRB_TRY(GrB_Matrix_new(&C, GrB_FP64, n, n));
-    // C = GetResidual (R), computing the residual of each edge
-    GRB_TRY(GrB_apply(C, NULL, NULL, GetResidual, R, NULL)) ;
-    // prune zeros and negative entries from C
-    GRB_TRY(GrB_select(C, NULL, NULL, GrB_VALUEGT_FP64, C, 0, NULL)) ;
-    // T = C'
-    GRB_TRY(GrB_transpose(T, NULL, NULL, C, NULL));
+    GRB_TRY(GrB_Matrix_new(&R_hat_transpose, GrB_FP64, n, n));
+    GRB_TRY(GrB_Matrix_new(&R_hat, GrB_FP64, n, n));
+    // R_hat = GetResidual (R), computing the residual of each edge
+    GRB_TRY(GrB_apply(R_hat, NULL, NULL, GetResidual, R, NULL)) ;
+    // prune zeros and negative entries from R_hat
+    GRB_TRY(GrB_select(R_hat, NULL, NULL, GrB_VALUEGT_FP64, R_hat, 0, NULL)) ;
+    // R_hat_transpose = R_hat'
+    GRB_TRY(GrB_transpose(R_hat_transpose, NULL, NULL, R_hat, NULL));
     // construct G2 and its cached transpose and outdegree
-    LG_TRY(LAGraph_New(&G2, &T, LAGraph_ADJACENCY_DIRECTED, msg));
-    G2->AT = C ;
-    C = NULL ;
+    LG_TRY(LAGraph_New(&G2, &R_hat_transpose, LAGraph_ADJACENCY_DIRECTED, msg));
+    G2->AT = R_hat ;
+    R_hat = NULL ;
     LG_TRY(LAGraph_Cached_OutDegree(G2, msg));
     // compute lvl using bfs on G2, starting at sink node
     LG_TRY(LAGr_BreadthFirstSearch(lvl, NULL, G2, sink, msg));
@@ -131,8 +129,7 @@ static GrB_Info LG_global_relabel
 
 //------------------------------------------------------------------------------
 
-#undef  LG_FREE_WORK
-#define LG_FREE_WORK                        \
+#define LG_FREE_WORK_EXCEPT_R               \
 {                                           \
     GrB_free(&FlowEdge);                    \
     GrB_free(&CompareTuple);                \
@@ -140,12 +137,11 @@ static GrB_Info LG_global_relabel
     GrB_free(&e);                           \
     GrB_free(&d);                           \
     GrB_free(&theta);                       \
-    GrB_free(&R);                           \
     GrB_free(&Delta);                       \
     GrB_free(&delta_vec);                   \
-    GrB_free(&Map);                         \
-    GrB_free(&y);                           \
-    GrB_free(&yd);                          \
+    GrB_free(&C);                           \
+    GrB_free(&push_vector);                 \
+    GrB_free(&pd);                          \
     GrB_free(&src_and_sink);                \
     GrB_free(&Jvec);                        \
     GrB_free(&Prune);                       \
@@ -179,7 +175,14 @@ static GrB_Info LG_global_relabel
     GrB_free(&MakeFlow);                    \
     GrB_free(&GetResidual);                 \
     GrB_free(&lvl) ;                        \
+}
+
+#undef  LG_FREE_WORK
+#define LG_FREE_WORK                        \
+{                                           \
+    LG_FREE_WORK_EXCEPT_R                   \
     GrB_free(&ExtractMatrixFlow);           \
+    GrB_free(&R);                           \
 }
 
 #undef  LG_FREE_ALL
@@ -211,7 +214,7 @@ JIT_STR(typedef struct{
   double flow;          /* current flow along this edge (i,j); can be negative */
   } LG_MF_flowEdge;, FLOWEDGE_STR)
 
-// type of the y vector for y = R*d: LG_MF_resultTuple64/32 (ResultTuple)
+// type of the push_vector vector for push_vector = R*d: LG_MF_resultTuple64/32 (ResultTuple)
 JIT_STR(typedef struct{
   double residual;      /* residual = capacity - flow for the edge (i,j) */
   int64_t d;            /* d(j) of the target node j */
@@ -223,7 +226,7 @@ JIT_STR(typedef struct{
   int32_t j;            /* node id of the target node j */
   } LG_MF_resultTuple32;, RESULTTUPLE_STR32)
 
-// type of the Map matrix and yd vector: LG_MF_compareTuple64/32 (CompareTuple)
+// type of the C matrix and pd vector: LG_MF_compareTuple64/32 (CompareTuple)
 JIT_STR(typedef struct{
   double residual;      /* residual = capacity - flow for the edge (i,j) */
   int64_t di;           /* d(i) for node i */
@@ -350,7 +353,7 @@ JIT_STR(void LG_MF_RxdAdd32(LG_MF_resultTuple32 * z,
   }, RXDADD_STR32)
 
 //------------------------------------------------------------------------------
-// unary ops for delta_vec = ResidualFlow (y)
+// unary ops for delta_vec = ResidualFlow (push_vector)
 //------------------------------------------------------------------------------
 
 JIT_STR(void LG_MF_ResidualFlow64(double *z, const LG_MF_resultTuple64 *x)
@@ -370,7 +373,7 @@ JIT_STR(void LG_MF_UpdateFlow(LG_MF_flowEdge *z,
   }, UPDATEFLOW_STR)
 
 //------------------------------------------------------------------------------
-// binary op for d<struct(y)> = Relabel (d, y) using eWiseMult
+// binary op for d<struct(push_vector)> = Relabel (d, push_vector)
 //------------------------------------------------------------------------------
 
 JIT_STR(void LG_MF_Relabel64(int64_t *z,
@@ -394,7 +397,7 @@ JIT_STR(void LG_MF_Relabel32(int32_t *z,
   }, RELABEL_STR32)
 
 //------------------------------------------------------------------------------
-// unary op for Jvec = ExtractJ (yd), where Jvec(i) = yd(i)->j
+// unary op for Jvec = ExtractJ (pd), where Jvec(i) = pd(i)->j
 //------------------------------------------------------------------------------
 
 JIT_STR(void LG_MF_ExtractJ64(int64_t *z, const LG_MF_compareTuple64 *x) { (*z) = x->j; }, EXTRACTJ_STR64)
@@ -402,7 +405,7 @@ JIT_STR(void LG_MF_ExtractJ64(int64_t *z, const LG_MF_compareTuple64 *x) { (*z)
 JIT_STR(void LG_MF_ExtractJ32(int32_t *z, const LG_MF_compareTuple32 *x) { (*z) = x->j; }, EXTRACTJ_STR32)
 
 //------------------------------------------------------------------------------
-// unary op for Jvec = ExtractYJ (y), where Jvec(i) = y(i)->j
+// unary op for Jvec = ExtractYJ(push_vector), where Jvec(i) = push_vector(i)->j
 //------------------------------------------------------------------------------
 
 JIT_STR(void LG_MF_ExtractYJ64(int64_t *z, const LG_MF_resultTuple64 *x) { (*z) = x->j; }, EXTRACTYJ_STR64)
@@ -430,10 +433,10 @@ JIT_STR(void LG_MF_InitBack(LG_MF_flowEdge * z,
   }, INITBACK_STR)
 
 //------------------------------------------------------------------------------
-// y = Map*e semiring
+// push_vector = C*e semiring
 //------------------------------------------------------------------------------
 
-// multiplicative operator, z = Map(i,j)*e(j), 64-bit case
+// multiplicative operator, z = C(i,j)*e(j), 64-bit case
 JIT_STR(void LG_MF_MxeMult64(LG_MF_resultTuple64 * z,
     const LG_MF_compareTuple64 * x, GrB_Index i, GrB_Index j,
     const double * y, GrB_Index iy, GrB_Index jy,
@@ -456,7 +459,7 @@ JIT_STR(void LG_MF_MxeMult64(LG_MF_resultTuple64 * z,
   }
 }, MXEMULT_STR64)
 
-// multiplicative operator, z = Map(i,j)*e(j), 32-bit case
+// multiplicative operator, z = C(i,j)*e(j), 32-bit case
 JIT_STR(void LG_MF_MxeMult32(LG_MF_resultTuple32 * z,
     const LG_MF_compareTuple32 * x, GrB_Index i, GrB_Index j,
     const double * y, GrB_Index iy, GrB_Index jy,
@@ -494,7 +497,7 @@ JIT_STR(void LG_MF_MxeAdd32(LG_MF_resultTuple32 * z,
   }, MXEADD_STR32)
 
 //------------------------------------------------------------------------------
-// binary op for yd = CreateCompareVec (y,d) using eWiseMult
+// binary op for pd = CreateCompareVec (push_vector,d) using eWiseMult
 //------------------------------------------------------------------------------
 
 JIT_STR(void LG_MF_CreateCompareVec64(LG_MF_compareTuple64 *comp,
@@ -515,7 +518,7 @@ JIT_STR(void LG_MF_CreateCompareVec32(LG_MF_compareTuple32 *comp,
   }, CREATECOMPAREVEC_STR32)
 
 //------------------------------------------------------------------------------
-// index unary op to remove empty tuples from y (for which y->j is -1)
+// index unary op to remove empty tuples from push_vector (for which y->j is -1)
 //------------------------------------------------------------------------------
 
 JIT_STR(void LG_MF_Prune64(bool * z, const LG_MF_resultTuple64 * x,
@@ -554,7 +557,7 @@ JIT_STR(void LG_MF_CheckInvariant32(bool *z, const int32_t *height,
 #endif
 
 //------------------------------------------------------------------------------
-// binary op for C = GetResidual (R), computing the residual of each edge
+// binary op for R_hat = GetResidual (R), computing the residual of each edge
 //------------------------------------------------------------------------------
 
 JIT_STR(void LG_MF_GetResidual(double * res, const LG_MF_flowEdge * flow_edge){
@@ -565,7 +568,9 @@ JIT_STR(void LG_MF_GetResidual(double * res, const LG_MF_flowEdge * flow_edge){
 // unary op for flow_mtx = ExtractMatrixFlow (R)
 //------------------------------------------------------------------------------
 
-JIT_STR(void LG_MF_ExtractMatrixFlow(double* flow, const LG_MF_flowEdge* edge){*flow = edge->flow;}, EMFLOW_STR)
+JIT_STR(void LG_MF_ExtractMatrixFlow(double* flow, const LG_MF_flowEdge* edge){
+    *flow = edge->flow;
+}, EMFLOW_STR)
 
 #endif
 
@@ -638,8 +643,8 @@ int LAGr_MaxFlow
   GrB_Vector src_and_sink = NULL ;
   GrB_Index n_active = INT64_MAX ;
 
-  // semiring and vectors for y<struct(e)> = R x d
-  GrB_Vector y = NULL ;
+  // semiring and vectors for push_vector<struct(e)> = R*d
+  GrB_Vector push_vector = NULL ;
   GrB_IndexUnaryOp Prune = NULL ;
   GxB_IndexBinaryOp RxdIndexMult = NULL ;
   GrB_BinaryOp RxdAdd = NULL, RxdMult = NULL ;
@@ -647,16 +652,16 @@ int LAGr_MaxFlow
   GrB_Semiring RxdSemiring = NULL ;
   GrB_Scalar theta = NULL ;
 
-  // binary op and yd
-  GrB_Vector yd = NULL ;
+  // binary op and pd
+  GrB_Vector pd = NULL ;
   GrB_BinaryOp CreateCompareVec = NULL ;
 
   // utility vectors, Matrix, and ops for mapping
-  GrB_Matrix Map = NULL ;
+  GrB_Matrix C = NULL ;         // matrix of candidate pushes
   GrB_Vector Jvec = NULL ;
   GrB_UnaryOp ExtractJ = NULL, ExtractYJ = NULL ;
 
-  // Map*e semiring
+  // C*e semiring
   GrB_Semiring MxeSemiring = NULL ;
   GrB_Monoid MxeAddMonoid = NULL ;
   GrB_BinaryOp MxeAdd = NULL, MxeMult = NULL ;
@@ -664,6 +669,8 @@ int LAGr_MaxFlow
 
   // to extract the residual flow
   GrB_UnaryOp ResidualFlow = NULL ;
+
+  // to extract the final flows for the flow matrix
   GrB_UnaryOp ExtractMatrixFlow = NULL ;
 
   // Delta structures
@@ -779,14 +786,6 @@ int LAGr_MaxFlow
   GRB_TRY(GrB_Scalar_new(&theta, GrB_BOOL));        // unused placeholder
   GRB_TRY(GrB_Scalar_setElement_BOOL(theta, false));
 
-  // create op for optional output flow_mtx
-  if (flow_mtx != NULL)
-  {
-    GRB_TRY(GxB_UnaryOp_new(&ExtractMatrixFlow,
-        F_UNARY(LG_MF_ExtractMatrixFlow), GrB_FP64, FlowEdge,
-        "LG_MF_ExtractMatrixFlow", EMFLOW_STR));
-  }
-
   //----------------------------------------------------------------------------
   // determine the integer type to use for the problem
   //----------------------------------------------------------------------------
@@ -844,7 +843,7 @@ int LAGr_MaxFlow
     LG_MF_resultTuple64 id = {.d = INT64_MAX, .j = -1, .residual = 0};
     GRB_TRY(GrB_Monoid_new_UDT(&RxdAddMonoid, RxdAdd, &id));
 
-    // create binary op for yd
+    // create binary op for pd
     GRB_TRY(GxB_BinaryOp_new(&CreateCompareVec,
         F_BINARY(LG_MF_CreateCompareVec64), CompareTuple, ResultTuple, GrB_INT64,
         "LG_MF_CreateCompareVec64", CREATECOMPAREVEC_STR64));
@@ -862,7 +861,7 @@ int LAGr_MaxFlow
         F_UNARY(LG_MF_ExtractYJ64), GrB_INT64, ResultTuple,
         "LG_MF_ExtractYJ64", EXTRACTYJ_STR64));
 
-    // create ops for Map*e semiring
+    // create ops for C*e semiring
     GRB_TRY(GxB_IndexBinaryOp_new(&MxeIndexMult,
         F_INDEX_BINARY(LG_MF_MxeMult64), ResultTuple, CompareTuple, GrB_FP64, GrB_BOOL,
         "LG_MF_MxeMult64", MXEMULT_STR64));
@@ -916,7 +915,7 @@ int LAGr_MaxFlow
     LG_MF_resultTuple32 id = {.d = INT32_MAX, .j = -1, .residual = 0};
     GRB_TRY(GrB_Monoid_new_UDT(&RxdAddMonoid, RxdAdd, &id));
 
-    // create binary op for yd
+    // create binary op for pd
     GRB_TRY(GxB_BinaryOp_new(&CreateCompareVec,
         F_BINARY(LG_MF_CreateCompareVec32), CompareTuple, ResultTuple, GrB_INT32,
         "LG_MF_CreateCompareVec32", CREATECOMPAREVEC_STR32));
@@ -934,7 +933,7 @@ int LAGr_MaxFlow
         F_UNARY(LG_MF_ExtractYJ32), GrB_INT32, ResultTuple,
         "LG_MF_ExtractYJ32", EXTRACTYJ_STR32));
 
-    // create ops for Map*e semiring
+    // create ops for C*e semiring
     GRB_TRY(GxB_IndexBinaryOp_new(&MxeIndexMult,
         F_INDEX_BINARY(LG_MF_MxeMult32), ResultTuple, CompareTuple, GrB_FP64, GrB_BOOL,
         "LG_MF_MxeMult32", MXEMULT_STR32));
@@ -954,15 +953,15 @@ int LAGr_MaxFlow
   // create remaining vectors, matrices, descriptor, and semirings
   //----------------------------------------------------------------------------
 
-  GRB_TRY(GrB_Matrix_new(&Map, CompareTuple, n,n));
+  GRB_TRY(GrB_Matrix_new(&C, CompareTuple, n,n));
   GRB_TRY(GrB_Vector_new(&Jvec, Integer_Type, n));
-  GRB_TRY(GrB_Vector_new(&yd, CompareTuple, n));
-  GRB_TRY(GrB_Vector_new(&y, ResultTuple, n));
+  GRB_TRY(GrB_Vector_new(&pd, CompareTuple, n));
+  GRB_TRY(GrB_Vector_new(&push_vector, ResultTuple, n));
 
   GRB_TRY(GrB_Semiring_new(&RxdSemiring, RxdAddMonoid, RxdMult));
   GRB_TRY(GrB_Semiring_new(&MxeSemiring, MxeAddMonoid, MxeMult));
 
-  // create descriptor for building the Map and Delta matrices
+  // create descriptor for building the C and Delta matrices
   GRB_TRY(GrB_Descriptor_new(&desc));
   GRB_TRY(GrB_set(desc, GxB_USE_INDICES, GxB_ROWINDEX_LIST));
 
@@ -1037,50 +1036,52 @@ int LAGr_MaxFlow
     // Part 2: deciding where to push
     //--------------------------------------------------------------------------
 
-    // y<struct(e),replace> = R*d using the RxdSemiring
-    GRB_TRY(GrB_mxv(y, e, NULL, RxdSemiring, R, d, GrB_DESC_RS));
+    // push_vector<struct(e),replace> = R*d using the RxdSemiring
+    GRB_TRY(GrB_mxv(push_vector, e, NULL, RxdSemiring, R, d, GrB_DESC_RS));
 
-    // remove empty tuples (0,inf,-1) from y
-    GRB_TRY(GrB_select(y, NULL, NULL, Prune, y, 0, NULL));
+    // remove empty tuples (0,inf,-1) from push_vector
+    GRB_TRY(GrB_select(push_vector, NULL, NULL, Prune, push_vector, 0, NULL));
 
     //--------------------------------------------------------------------------
     // Part 3: verifying the pushes
     //--------------------------------------------------------------------------
 
-    // create Map matrix from pattern and values of yd
-    // yd = CreateCompareVec (y,d) using eWiseMult
-    GRB_TRY(GrB_eWiseMult(yd, NULL, NULL, CreateCompareVec, y,  d, NULL));
+    // create C matrix (Candidate pushes) from pattern and values of pd
+    // pd = CreateCompareVec (push_vector,d) using eWiseMult
+    GRB_TRY(GrB_eWiseMult(pd, NULL, NULL, CreateCompareVec, push_vector, d, NULL));
 
     #ifdef DBG
-      print_compareVec(yd);
+      print_compareVec(pd);
       GxB_print(d, 3);
       GxB_print(e, 3);
     #endif
     
-    
-    // Jvec = ExtractJ (yd), where Jvec(i) = yd(i)->j
-    GRB_TRY(GrB_apply(Jvec, NULL, NULL, ExtractJ, yd, NULL));
-    GRB_TRY(GrB_Matrix_clear(Map));
-    GRB_TRY(GrB_Matrix_build(Map, yd, Jvec, yd, GxB_IGNORE_DUP, desc));
-
-    // make e dense for Map computation
+    // Jvec = ExtractJ (pd), where Jvec(i) = pd(i)->j
+    GRB_TRY(GrB_apply(Jvec, NULL, NULL, ExtractJ, pd, NULL));
+    GRB_TRY(GrB_Matrix_clear(C));
+    GRB_TRY(GrB_Matrix_build(C, pd, Jvec, pd, GxB_IGNORE_DUP, desc));
+    GRB_TRY(GrB_Vector_clear(pd));
+    GRB_TRY(GrB_Vector_clear(Jvec));
+
+    // make e dense for C computation
     // TODO: consider keeping e in bitmap/full format only,
     // or always full with e(i)=0 denoting a non-active node.
     GRB_TRY(GrB_assign(e, e, NULL, 0, GrB_ALL, n, GrB_DESC_SC));
 
-    // y = Map*e using the MxeSemiring
-    GRB_TRY(GrB_mxv(y, NULL, NULL, MxeSemiring, Map, e, NULL));
+    // push_vector = C*e using the MxeSemiring
+    GRB_TRY(GrB_mxv(push_vector, NULL, NULL, MxeSemiring, C, e, NULL));
+    GRB_TRY(GrB_Matrix_clear(C));
 
-    // remove empty tuples (0,inf,-1) from y
-    GRB_TRY(GrB_select(y, NULL, NULL, Prune, y, -1, NULL));
+    // remove empty tuples (0,inf,-1) from push_vector
+    GRB_TRY(GrB_select(push_vector, NULL, NULL, Prune, push_vector, -1, NULL));
 
     // relabel, updating the height/label vector d
-    // d<struct(y)> = Relabel (d, y) using eWiseMult
-    GRB_TRY(GrB_eWiseMult(d, y, NULL, Relabel, d, y, GrB_DESC_S));
+    // d<struct(push_vector)> = Relabel (d, push_vector) using eWiseMult
+    GRB_TRY(GrB_eWiseMult(d, push_vector, NULL, Relabel, d, push_vector, GrB_DESC_S));
 
     #ifdef DBG
         // assert invariant for all labels
-        GRB_TRY(GrB_eWiseMult(invariant, y, NULL, CheckInvariant, d, y, GrB_DESC_RS));
+        GRB_TRY(GrB_eWiseMult(invariant, push_vector, NULL, CheckInvariant, d, push_vector, GrB_DESC_RS));
         GRB_TRY(GrB_reduce(check, NULL, GrB_LAND_MONOID_BOOL, invariant, NULL));
         GRB_TRY(GrB_Scalar_extractElement(&check_raw, check));
         ASSERT(check_raw == true);
@@ -1090,24 +1091,25 @@ int LAGr_MaxFlow
     // Part 4: executing the pushes
     //--------------------------------------------------------------------------
 
-    // extract residual flows from y
-    // delta_vec = ResidualFlow (y), obtaining just the residual flows
-    GRB_TRY(GrB_apply(delta_vec, NULL, NULL, ResidualFlow, y, NULL));
+    // extract residual flows from push_vector
+    // delta_vec = ResidualFlow (push_vector), obtaining just the residual flows
+    GRB_TRY(GrB_apply(delta_vec, NULL, NULL, ResidualFlow, push_vector, NULL));
 
     // delta_vec = min (delta_vec, e), where e is dense
     GRB_TRY(GrB_eWiseMult(delta_vec, NULL, NULL, GrB_MIN_FP64, delta_vec, e, NULL));
 
-    // create the Delta matrix from delta_vec and y
-    // note that delta_vec has the same structure as y
-    // Jvec = ExtractYJ (y), where Jvec(i) = y(i)->j
+    // create the Delta matrix from delta_vec and push_vector
+    // note that delta_vec has the same structure as push_vector
+    // Jvec = ExtractYJ (push_vector), where Jvec(i) = push_vector(i)->j
     // if Jvec has no values, then there is no possible
     // candidates to push to, so the algorithm terminates
-    GRB_TRY(GrB_apply(Jvec, NULL, NULL, ExtractYJ, y, NULL));
+    GRB_TRY(GrB_apply(Jvec, NULL, NULL, ExtractYJ, push_vector, NULL));
     GrB_Index J_n;
     GRB_TRY(GrB_Vector_nvals(&J_n, Jvec));
     if(J_n == 0) break;
     GRB_TRY(GrB_Matrix_clear(Delta));
     GRB_TRY(GrB_Matrix_build(Delta, delta_vec, Jvec, delta_vec, GxB_IGNORE_DUP, desc));
+    GRB_TRY(GrB_Vector_clear(Jvec));
 
     // make Delta anti-symmetric
     // Delta = (Delta - Delta')
@@ -1120,6 +1122,7 @@ int LAGr_MaxFlow
     // reduce Delta to delta_vec
     // delta_vec = sum (Delta), summing up each row of Delta
     GRB_TRY(GrB_reduce(delta_vec, NULL, NULL, GrB_PLUS_MONOID_FP64, Delta, GrB_DESC_T0));
+    GRB_TRY(GrB_Matrix_clear(Delta));
 
     // add delta_vec to e
     // e<struct(delta_vec)> += delta_vec
@@ -1130,12 +1133,19 @@ int LAGr_MaxFlow
 
   }
 
+
   //----------------------------------------------------------------------------
   // optionally construct the output flow matrix, if requested
   //----------------------------------------------------------------------------
 
   if (flow_mtx != NULL)
   {
+    // free all workspace except R
+    LG_FREE_WORK_EXCEPT_R ;
+    // create the ExtractMatrixFlow op to compute the flow matrix
+    GRB_TRY(GxB_UnaryOp_new(&ExtractMatrixFlow,
+        F_UNARY(LG_MF_ExtractMatrixFlow), GrB_FP64, FlowEdge,
+        "LG_MF_ExtractMatrixFlow", EMFLOW_STR));
     // flow_mtx = ExtractMatrixFlow (R)
     GRB_TRY(GrB_Matrix_new(flow_mtx, GrB_FP64, n, n));
     GRB_TRY(GrB_apply(*flow_mtx, NULL, NULL, ExtractMatrixFlow, R, NULL));
@@ -1150,26 +1160,26 @@ int LAGr_MaxFlow
   #ifdef COVERAGE
   // The MxeAdd operator is not tested via the call to GrB_mxv with the
   // MxeSemiring above, so test it via the MxeAddMonoid.
-  GrB_free(&y);
-  GRB_TRY(GrB_Vector_new(&y, ResultTuple, 3));
+  GrB_free(&push_vector);
+  GRB_TRY(GrB_Vector_new(&push_vector, ResultTuple, 3));
   if (n > NBIG)
   {
     LG_MF_resultTuple64 a = {.d = 1, .j = 2, .residual = 3};
     LG_MF_resultTuple64 b = {.d = 4, .j = 5, .residual = 6};
-    GRB_TRY (GrB_Vector_setElement_UDT (y, (void *) &a, 0)) ;
-    GRB_TRY (GrB_Vector_setElement_UDT (y, (void *) &b, 0)) ;
+    GRB_TRY (GrB_Vector_setElement_UDT (push_vector, (void *) &a, 0)) ;
+    GRB_TRY (GrB_Vector_setElement_UDT (push_vector, (void *) &b, 0)) ;
     LG_MF_resultTuple64 c = {.d = 0, .j = 0, .residual = 0};
-    GRB_TRY (GrB_Vector_reduce_UDT ((void *) &c, NULL, MxeAddMonoid, y, NULL)) ;
+    GRB_TRY (GrB_Vector_reduce_UDT ((void *) &c, NULL, MxeAddMonoid, push_vector, NULL)) ;
     LG_ASSERT ((c.residual == 6 && c.j == 5 && c.d == 4), GrB_PANIC) ;
   }
   else
   {
     LG_MF_resultTuple32 a = {.d = 1, .j = 2, .residual = 3};
     LG_MF_resultTuple32 b = {.d = 4, .j = 5, .residual = 6};
-    GRB_TRY (GrB_Vector_setElement_UDT (y, (void *) &a, 0)) ;
-    GRB_TRY (GrB_Vector_setElement_UDT (y, (void *) &b, 0)) ;
+    GRB_TRY (GrB_Vector_setElement_UDT (push_vector, (void *) &a, 0)) ;
+    GRB_TRY (GrB_Vector_setElement_UDT (push_vector, (void *) &b, 0)) ;
     LG_MF_resultTuple32 c = {.d = 0, .j = 0, .residual = 0};
-    GRB_TRY (GrB_Vector_reduce_UDT ((void *) &c, NULL, MxeAddMonoid, y, NULL)) ;
+    GRB_TRY (GrB_Vector_reduce_UDT ((void *) &c, NULL, MxeAddMonoid, push_vector, NULL)) ;
     LG_ASSERT ((c.residual == 6 && c.j == 5 && c.d == 4), GrB_PANIC) ;
   }
   #endif
diff --git a/experimental/benchmark/do_maxflow b/experimental/benchmark/do_maxflow
index 7a40625343..8291c8cf90 100755
--- a/experimental/benchmark/do_maxflow
+++ b/experimental/benchmark/do_maxflow
@@ -4,11 +4,12 @@
 MTX="/home/davis/matrices"
 GAP="/home/davis/GAP"
 
+# OK with 128GB of RAM:
 ../../build/experimental/benchmark/maxflow_demo $MTX/com-Youtube.mtx 4 275
 ../../build/experimental/benchmark/maxflow_demo $MTX/com-LiveJournal.mtx 2 88
 ../../build/experimental/benchmark/maxflow_demo $MTX/com-Orkut.mtx 43 75
-
-../../build/experimental/benchmark/maxflow_demo $GAP/GAP-web/GAP-web.grb 64712 500
 ../../build/experimental/benchmark/maxflow_demo $GAP/GAP-road/GAP-road.grb 4 1075
 ../../build/experimental/benchmark/maxflow_demo $GAP/GAP-twitter/GAP-twitter.grb 13 17
 
+# too large for hyper (needs > 128GB of RAM)
+# ../../build/experimental/benchmark/maxflow_demo $GAP/GAP-web/GAP-web.grb 64712 500
diff --git a/experimental/benchmark/maxflow_demo.c b/experimental/benchmark/maxflow_demo.c
index b09a04baf2..c4ba6da901 100644
--- a/experimental/benchmark/maxflow_demo.c
+++ b/experimental/benchmark/maxflow_demo.c
@@ -63,16 +63,17 @@ int main (int argc, char ** argv){
   printf("Time for LAGraph_MaxFlow: %g sec\n", time);
   printf("Max Flow is: %lf\n", flow);
 
-  /* printf("Starting max flow from %ld to %ld, with flow_matrix returned\n", S, T); */
-  /* time = LAGraph_WallClockTime(); */
-  /* LAGRAPH_TRY(LAGr_MaxFlow(&flow, &flow_matrix, G, S, T, msg)); */
-  /* time = LAGraph_WallClockTime() - time; */
-  /* printf("Time for LAGraph_MaxFlow with flow matrix: %g sec\n", time); */
-  /* printf("Max Flow is: %lf\n", flow); */
-  /* GRB_TRY (GrB_Matrix_nvals (&nflow, flow_matrix)) ; */
-  /* printf("# of entries in flow matrix: %lu\n", nflow); */
+  printf("Starting max flow from %ld to %ld, with flow_matrix returned\n", S, T);
+  time = LAGraph_WallClockTime();
+  LAGRAPH_TRY(LAGr_MaxFlow(&flow, &flow_matrix, G, S, T, msg));
+  time = LAGraph_WallClockTime() - time;
+  printf("Time for LAGraph_MaxFlow with flow matrix: %g sec\n", time);
+  printf("Max Flow is: %lf\n", flow);
+  GRB_TRY (GrB_Matrix_nvals (&nflow, flow_matrix)) ;
+  printf("# of entries in flow matrix: %lu\n", nflow);
 
   LAGraph_Delete(&G, msg);
+  GrB_free (&flow_matrix) ;
   LAGRAPH_TRY(LAGraph_Finalize(msg));
 
   return GrB_SUCCESS;

From 170c10a7ebfe7e729f1f8ef62057a5d86dbdc321 Mon Sep 17 00:00:00 2001
From: Tim Davis <DrTimothyAldenDavis@gmail.com>
Date: Tue, 2 Sep 2025 15:20:44 -0500
Subject: [PATCH 4/6] maxflow: freed FlowEdge type too early when flow matrix
 is returned

---
 experimental/algorithm/LAGr_MaxFlow.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/experimental/algorithm/LAGr_MaxFlow.c b/experimental/algorithm/LAGr_MaxFlow.c
index b77f4c732c..7f9c5dde55 100644
--- a/experimental/algorithm/LAGr_MaxFlow.c
+++ b/experimental/algorithm/LAGr_MaxFlow.c
@@ -131,7 +131,6 @@ static GrB_Info LG_global_relabel
 
 #define LG_FREE_WORK_EXCEPT_R               \
 {                                           \
-    GrB_free(&FlowEdge);                    \
     GrB_free(&CompareTuple);                \
     GrB_free(&ResultTuple);                 \
     GrB_free(&e);                           \
@@ -181,6 +180,7 @@ static GrB_Info LG_global_relabel
 #define LG_FREE_WORK                        \
 {                                           \
     LG_FREE_WORK_EXCEPT_R                   \
+    GrB_free(&FlowEdge);                    \
     GrB_free(&ExtractMatrixFlow);           \
     GrB_free(&R);                           \
 }

From d33eda4c00ca56847967a3db4f03aedeca307a86 Mon Sep 17 00:00:00 2001
From: Tim Davis <DrTimothyAldenDavis@gmail.com>
Date: Tue, 2 Sep 2025 15:31:49 -0500
Subject: [PATCH 5/6] maxflow: do not free objects required for test coverage

---
 experimental/algorithm/LAGr_MaxFlow.c | 138 +++++++++++++-------------
 1 file changed, 69 insertions(+), 69 deletions(-)

diff --git a/experimental/algorithm/LAGr_MaxFlow.c b/experimental/algorithm/LAGr_MaxFlow.c
index 7f9c5dde55..3aab809c30 100644
--- a/experimental/algorithm/LAGr_MaxFlow.c
+++ b/experimental/algorithm/LAGr_MaxFlow.c
@@ -132,7 +132,6 @@ static GrB_Info LG_global_relabel
 #define LG_FREE_WORK_EXCEPT_R               \
 {                                           \
     GrB_free(&CompareTuple);                \
-    GrB_free(&ResultTuple);                 \
     GrB_free(&e);                           \
     GrB_free(&d);                           \
     GrB_free(&theta);                       \
@@ -147,18 +146,16 @@ static GrB_Info LG_global_relabel
     GrB_free(&UpdateFlow);                  \
     GrB_free(&Relabel);                     \
     GrB_free(&ResidualFlow);                \
-    GrB_free(&MxeIndexMult);                \
-    GrB_free(&MxeMult);                     \
-    GrB_free(&MxeAdd);                      \
-    GrB_free(&MxeAddMonoid);                \
-    GrB_free(&MxeSemiring);                 \
+    GrB_free(&Cxe_IndexMult);               \
+    GrB_free(&Cxe_Mult);                    \
+    GrB_free(&Cxe_Semiring);                \
     GrB_free(&ExtractJ);                    \
     GrB_free(&CreateCompareVec);            \
-    GrB_free(&RxdSemiring);                 \
-    GrB_free(&RxdAdd);                      \
-    GrB_free(&RxdAddMonoid);                \
-    GrB_free(&RxdIndexMult);                \
-    GrB_free(&RxdMult);                     \
+    GrB_free(&Rxd_Semiring);                \
+    GrB_free(&Rxd_Add);                     \
+    GrB_free(&Rxd_AddMonoid);               \
+    GrB_free(&Rxd_IndexMult);               \
+    GrB_free(&Rxd_Mult);                    \
     GrB_free(&InitForw);                    \
     GrB_free(&InitBack);                    \
     GrB_free(&ResidualForward);             \
@@ -180,6 +177,9 @@ static GrB_Info LG_global_relabel
 #define LG_FREE_WORK                        \
 {                                           \
     LG_FREE_WORK_EXCEPT_R                   \
+    GrB_free(&Cxe_Add);                     \
+    GrB_free(&Cxe_AddMonoid);               \
+    GrB_free(&ResultTuple);                 \
     GrB_free(&FlowEdge);                    \
     GrB_free(&ExtractMatrixFlow);           \
     GrB_free(&R);                           \
@@ -262,7 +262,7 @@ JIT_STR(void LG_MF_ResidualBackward(LG_MF_flowEdge *z, const double *y) {
 //------------------------------------------------------------------------------
 
 // multiplicative operator, z = R(i,j) * d(j), 64-bit case
-JIT_STR(void LG_MF_RxdMult64(LG_MF_resultTuple64 *z,
+JIT_STR(void LG_MF_Rxd_Mult64(LG_MF_resultTuple64 *z,
     const LG_MF_flowEdge *x, GrB_Index i, GrB_Index j,
     const int64_t *y, GrB_Index iy, GrB_Index jy,
     const bool* theta) {
@@ -280,7 +280,7 @@ JIT_STR(void LG_MF_RxdMult64(LG_MF_resultTuple64 *z,
 }, RXDMULT_STR64)
 
 // multiplicative operator, z = R(i,j) * d(j), 32-bit case
-JIT_STR(void LG_MF_RxdMult32(LG_MF_resultTuple32 *z,
+JIT_STR(void LG_MF_Rxd_Mult32(LG_MF_resultTuple32 *z,
     const LG_MF_flowEdge *x, GrB_Index i, GrB_Index j,
     const int32_t *y, GrB_Index iy, GrB_Index jy,
     const bool* theta) {
@@ -298,7 +298,7 @@ JIT_STR(void LG_MF_RxdMult32(LG_MF_resultTuple32 *z,
 }, RXDMULT_STR32)
 
 // additive monoid: z = the best tuple, x or y, 64-bit case
-JIT_STR(void LG_MF_RxdAdd64(LG_MF_resultTuple64 * z,
+JIT_STR(void LG_MF_Rxd_Add64(LG_MF_resultTuple64 * z,
     const LG_MF_resultTuple64 * x,
     const LG_MF_resultTuple64 * y) {
   if(x->d < y->d){
@@ -326,7 +326,7 @@ JIT_STR(void LG_MF_RxdAdd64(LG_MF_resultTuple64 * z,
   }, RXDADD_STR64)
 
 // additive monoid: z = the best tuple, x or y, 32-bit case
-JIT_STR(void LG_MF_RxdAdd32(LG_MF_resultTuple32 * z,
+JIT_STR(void LG_MF_Rxd_Add32(LG_MF_resultTuple32 * z,
     const LG_MF_resultTuple32 * x, const LG_MF_resultTuple32 * y) {
   if(x->d < y->d){
     (*z) = (*x) ;
@@ -437,7 +437,7 @@ JIT_STR(void LG_MF_InitBack(LG_MF_flowEdge * z,
 //------------------------------------------------------------------------------
 
 // multiplicative operator, z = C(i,j)*e(j), 64-bit case
-JIT_STR(void LG_MF_MxeMult64(LG_MF_resultTuple64 * z,
+JIT_STR(void LG_MF_Cxe_Mult64(LG_MF_resultTuple64 * z,
     const LG_MF_compareTuple64 * x, GrB_Index i, GrB_Index j,
     const double * y, GrB_Index iy, GrB_Index jy,
     const bool* theta){
@@ -460,7 +460,7 @@ JIT_STR(void LG_MF_MxeMult64(LG_MF_resultTuple64 * z,
 }, MXEMULT_STR64)
 
 // multiplicative operator, z = C(i,j)*e(j), 32-bit case
-JIT_STR(void LG_MF_MxeMult32(LG_MF_resultTuple32 * z,
+JIT_STR(void LG_MF_Cxe_Mult32(LG_MF_resultTuple32 * z,
     const LG_MF_compareTuple32 * x, GrB_Index i, GrB_Index j,
     const double * y, GrB_Index iy, GrB_Index jy,
     const bool* theta){
@@ -486,12 +486,12 @@ JIT_STR(void LG_MF_MxeMult32(LG_MF_resultTuple32 * z,
 // because any given node only pushes to one neighbor at a time.  As a result,
 // no reduction is needed in GrB_mxv.  The semiring still needs a monoid,
 // however.
-JIT_STR(void LG_MF_MxeAdd64(LG_MF_resultTuple64 * z,
+JIT_STR(void LG_MF_Cxe_Add64(LG_MF_resultTuple64 * z,
     const LG_MF_resultTuple64 * x, const LG_MF_resultTuple64 * y){
     (*z) = (*y) ;
   }, MXEADD_STR64)
 
-JIT_STR(void LG_MF_MxeAdd32(LG_MF_resultTuple32 * z,
+JIT_STR(void LG_MF_Cxe_Add32(LG_MF_resultTuple32 * z,
     const LG_MF_resultTuple32 * x, const LG_MF_resultTuple32 * y){
     (*z) = (*y) ;
   }, MXEADD_STR32)
@@ -646,10 +646,10 @@ int LAGr_MaxFlow
   // semiring and vectors for push_vector<struct(e)> = R*d
   GrB_Vector push_vector = NULL ;
   GrB_IndexUnaryOp Prune = NULL ;
-  GxB_IndexBinaryOp RxdIndexMult = NULL ;
-  GrB_BinaryOp RxdAdd = NULL, RxdMult = NULL ;
-  GrB_Monoid RxdAddMonoid = NULL ;
-  GrB_Semiring RxdSemiring = NULL ;
+  GxB_IndexBinaryOp Rxd_IndexMult = NULL ;
+  GrB_BinaryOp Rxd_Add = NULL, Rxd_Mult = NULL ;
+  GrB_Monoid Rxd_AddMonoid = NULL ;
+  GrB_Semiring Rxd_Semiring = NULL ;
   GrB_Scalar theta = NULL ;
 
   // binary op and pd
@@ -662,10 +662,10 @@ int LAGr_MaxFlow
   GrB_UnaryOp ExtractJ = NULL, ExtractYJ = NULL ;
 
   // C*e semiring
-  GrB_Semiring MxeSemiring = NULL ;
-  GrB_Monoid MxeAddMonoid = NULL ;
-  GrB_BinaryOp MxeAdd = NULL, MxeMult = NULL ;
-  GxB_IndexBinaryOp MxeIndexMult = NULL ;
+  GrB_Semiring Cxe_Semiring = NULL ;
+  GrB_Monoid Cxe_AddMonoid = NULL ;
+  GrB_BinaryOp Cxe_Add = NULL, Cxe_Mult = NULL ;
+  GxB_IndexBinaryOp Cxe_IndexMult = NULL ;
 
   // to extract the residual flow
   GrB_UnaryOp ResidualFlow = NULL ;
@@ -833,15 +833,15 @@ int LAGr_MaxFlow
 
     // create ops for R*d semiring
 
-    GRB_TRY(GxB_IndexBinaryOp_new(&RxdIndexMult,
-        F_INDEX_BINARY(LG_MF_RxdMult64), ResultTuple, FlowEdge, GrB_INT64, GrB_BOOL,
-        "LG_MF_RxdMult64", RXDMULT_STR64));
-    GRB_TRY(GxB_BinaryOp_new_IndexOp(&RxdMult, RxdIndexMult, theta));
-    GRB_TRY(GxB_BinaryOp_new(&RxdAdd,
-        F_BINARY(LG_MF_RxdAdd64), ResultTuple, ResultTuple, ResultTuple,
-        "LG_MF_RxdAdd64", RXDADD_STR64));
+    GRB_TRY(GxB_IndexBinaryOp_new(&Rxd_IndexMult,
+        F_INDEX_BINARY(LG_MF_Rxd_Mult64), ResultTuple, FlowEdge, GrB_INT64, GrB_BOOL,
+        "LG_MF_Rxd_Mult64", RXDMULT_STR64));
+    GRB_TRY(GxB_BinaryOp_new_IndexOp(&Rxd_Mult, Rxd_IndexMult, theta));
+    GRB_TRY(GxB_BinaryOp_new(&Rxd_Add,
+        F_BINARY(LG_MF_Rxd_Add64), ResultTuple, ResultTuple, ResultTuple,
+        "LG_MF_Rxd_Add64", RXDADD_STR64));
     LG_MF_resultTuple64 id = {.d = INT64_MAX, .j = -1, .residual = 0};
-    GRB_TRY(GrB_Monoid_new_UDT(&RxdAddMonoid, RxdAdd, &id));
+    GRB_TRY(GrB_Monoid_new_UDT(&Rxd_AddMonoid, Rxd_Add, &id));
 
     // create binary op for pd
     GRB_TRY(GxB_BinaryOp_new(&CreateCompareVec,
@@ -862,14 +862,14 @@ int LAGr_MaxFlow
         "LG_MF_ExtractYJ64", EXTRACTYJ_STR64));
 
     // create ops for C*e semiring
-    GRB_TRY(GxB_IndexBinaryOp_new(&MxeIndexMult,
-        F_INDEX_BINARY(LG_MF_MxeMult64), ResultTuple, CompareTuple, GrB_FP64, GrB_BOOL,
-        "LG_MF_MxeMult64", MXEMULT_STR64));
-    GRB_TRY(GxB_BinaryOp_new_IndexOp(&MxeMult, MxeIndexMult, theta));
-    GRB_TRY(GxB_BinaryOp_new(&MxeAdd,
-        F_BINARY(LG_MF_MxeAdd64), ResultTuple, ResultTuple, ResultTuple,
-        "LG_MF_MxeAdd64", MXEADD_STR64));
-    GRB_TRY(GrB_Monoid_new_UDT(&MxeAddMonoid, MxeAdd, &id));
+    GRB_TRY(GxB_IndexBinaryOp_new(&Cxe_IndexMult,
+        F_INDEX_BINARY(LG_MF_Cxe_Mult64), ResultTuple, CompareTuple, GrB_FP64, GrB_BOOL,
+        "LG_MF_Cxe_Mult64", MXEMULT_STR64));
+    GRB_TRY(GxB_BinaryOp_new_IndexOp(&Cxe_Mult, Cxe_IndexMult, theta));
+    GRB_TRY(GxB_BinaryOp_new(&Cxe_Add,
+        F_BINARY(LG_MF_Cxe_Add64), ResultTuple, ResultTuple, ResultTuple,
+        "LG_MF_Cxe_Add64", MXEADD_STR64));
+    GRB_TRY(GrB_Monoid_new_UDT(&Cxe_AddMonoid, Cxe_Add, &id));
 
     // update height binary op
     GRB_TRY(GxB_BinaryOp_new(&Relabel,
@@ -905,15 +905,15 @@ int LAGr_MaxFlow
         "LG_MF_ResidualFlow32", RESIDUALFLOW_STR32));
 
     // create ops for R*d semiring
-    GRB_TRY(GxB_IndexBinaryOp_new(&RxdIndexMult,
-        F_INDEX_BINARY(LG_MF_RxdMult32), ResultTuple, FlowEdge, GrB_INT32, GrB_BOOL,
-        "LG_MF_RxdMult32", RXDMULT_STR32));
-    GRB_TRY(GxB_BinaryOp_new_IndexOp(&RxdMult, RxdIndexMult, theta));
-    GRB_TRY(GxB_BinaryOp_new(&RxdAdd,
-        F_BINARY(LG_MF_RxdAdd32), ResultTuple, ResultTuple, ResultTuple,
-        "LG_MF_RxdAdd32", RXDADD_STR32));
+    GRB_TRY(GxB_IndexBinaryOp_new(&Rxd_IndexMult,
+        F_INDEX_BINARY(LG_MF_Rxd_Mult32), ResultTuple, FlowEdge, GrB_INT32, GrB_BOOL,
+        "LG_MF_Rxd_Mult32", RXDMULT_STR32));
+    GRB_TRY(GxB_BinaryOp_new_IndexOp(&Rxd_Mult, Rxd_IndexMult, theta));
+    GRB_TRY(GxB_BinaryOp_new(&Rxd_Add,
+        F_BINARY(LG_MF_Rxd_Add32), ResultTuple, ResultTuple, ResultTuple,
+        "LG_MF_Rxd_Add32", RXDADD_STR32));
     LG_MF_resultTuple32 id = {.d = INT32_MAX, .j = -1, .residual = 0};
-    GRB_TRY(GrB_Monoid_new_UDT(&RxdAddMonoid, RxdAdd, &id));
+    GRB_TRY(GrB_Monoid_new_UDT(&Rxd_AddMonoid, Rxd_Add, &id));
 
     // create binary op for pd
     GRB_TRY(GxB_BinaryOp_new(&CreateCompareVec,
@@ -934,14 +934,14 @@ int LAGr_MaxFlow
         "LG_MF_ExtractYJ32", EXTRACTYJ_STR32));
 
     // create ops for C*e semiring
-    GRB_TRY(GxB_IndexBinaryOp_new(&MxeIndexMult,
-        F_INDEX_BINARY(LG_MF_MxeMult32), ResultTuple, CompareTuple, GrB_FP64, GrB_BOOL,
-        "LG_MF_MxeMult32", MXEMULT_STR32));
-    GRB_TRY(GxB_BinaryOp_new_IndexOp(&MxeMult, MxeIndexMult, theta));
-    GRB_TRY(GxB_BinaryOp_new(&MxeAdd,
-        F_BINARY(LG_MF_MxeAdd32), ResultTuple, ResultTuple, ResultTuple,
-        "LG_MF_MxeAdd32", MXEADD_STR32));
-    GRB_TRY(GrB_Monoid_new_UDT(&MxeAddMonoid, MxeAdd, &id));
+    GRB_TRY(GxB_IndexBinaryOp_new(&Cxe_IndexMult,
+        F_INDEX_BINARY(LG_MF_Cxe_Mult32), ResultTuple, CompareTuple, GrB_FP64, GrB_BOOL,
+        "LG_MF_Cxe_Mult32", MXEMULT_STR32));
+    GRB_TRY(GxB_BinaryOp_new_IndexOp(&Cxe_Mult, Cxe_IndexMult, theta));
+    GRB_TRY(GxB_BinaryOp_new(&Cxe_Add,
+        F_BINARY(LG_MF_Cxe_Add32), ResultTuple, ResultTuple, ResultTuple,
+        "LG_MF_Cxe_Add32", MXEADD_STR32));
+    GRB_TRY(GrB_Monoid_new_UDT(&Cxe_AddMonoid, Cxe_Add, &id));
 
     // update height binary op
     GRB_TRY(GxB_BinaryOp_new(&Relabel,
@@ -958,8 +958,8 @@ int LAGr_MaxFlow
   GRB_TRY(GrB_Vector_new(&pd, CompareTuple, n));
   GRB_TRY(GrB_Vector_new(&push_vector, ResultTuple, n));
 
-  GRB_TRY(GrB_Semiring_new(&RxdSemiring, RxdAddMonoid, RxdMult));
-  GRB_TRY(GrB_Semiring_new(&MxeSemiring, MxeAddMonoid, MxeMult));
+  GRB_TRY(GrB_Semiring_new(&Rxd_Semiring, Rxd_AddMonoid, Rxd_Mult));
+  GRB_TRY(GrB_Semiring_new(&Cxe_Semiring, Cxe_AddMonoid, Cxe_Mult));
 
   // create descriptor for building the C and Delta matrices
   GRB_TRY(GrB_Descriptor_new(&desc));
@@ -1036,8 +1036,8 @@ int LAGr_MaxFlow
     // Part 2: deciding where to push
     //--------------------------------------------------------------------------
 
-    // push_vector<struct(e),replace> = R*d using the RxdSemiring
-    GRB_TRY(GrB_mxv(push_vector, e, NULL, RxdSemiring, R, d, GrB_DESC_RS));
+    // push_vector<struct(e),replace> = R*d using the Rxd_Semiring
+    GRB_TRY(GrB_mxv(push_vector, e, NULL, Rxd_Semiring, R, d, GrB_DESC_RS));
 
     // remove empty tuples (0,inf,-1) from push_vector
     GRB_TRY(GrB_select(push_vector, NULL, NULL, Prune, push_vector, 0, NULL));
@@ -1068,8 +1068,8 @@ int LAGr_MaxFlow
     // or always full with e(i)=0 denoting a non-active node.
     GRB_TRY(GrB_assign(e, e, NULL, 0, GrB_ALL, n, GrB_DESC_SC));
 
-    // push_vector = C*e using the MxeSemiring
-    GRB_TRY(GrB_mxv(push_vector, NULL, NULL, MxeSemiring, C, e, NULL));
+    // push_vector = C*e using the Cxe_Semiring
+    GRB_TRY(GrB_mxv(push_vector, NULL, NULL, Cxe_Semiring, C, e, NULL));
     GRB_TRY(GrB_Matrix_clear(C));
 
     // remove empty tuples (0,inf,-1) from push_vector
@@ -1158,8 +1158,8 @@ int LAGr_MaxFlow
   //----------------------------------------------------------------------------
 
   #ifdef COVERAGE
-  // The MxeAdd operator is not tested via the call to GrB_mxv with the
-  // MxeSemiring above, so test it via the MxeAddMonoid.
+  // The Cxe_Add operator is not tested via the call to GrB_mxv with the
+  // Cxe_Semiring above, so test it via the Cxe_AddMonoid.
   GrB_free(&push_vector);
   GRB_TRY(GrB_Vector_new(&push_vector, ResultTuple, 3));
   if (n > NBIG)
@@ -1169,7 +1169,7 @@ int LAGr_MaxFlow
     GRB_TRY (GrB_Vector_setElement_UDT (push_vector, (void *) &a, 0)) ;
     GRB_TRY (GrB_Vector_setElement_UDT (push_vector, (void *) &b, 0)) ;
     LG_MF_resultTuple64 c = {.d = 0, .j = 0, .residual = 0};
-    GRB_TRY (GrB_Vector_reduce_UDT ((void *) &c, NULL, MxeAddMonoid, push_vector, NULL)) ;
+    GRB_TRY (GrB_Vector_reduce_UDT ((void *) &c, NULL, Cxe_AddMonoid, push_vector, NULL)) ;
     LG_ASSERT ((c.residual == 6 && c.j == 5 && c.d == 4), GrB_PANIC) ;
   }
   else
@@ -1179,7 +1179,7 @@ int LAGr_MaxFlow
     GRB_TRY (GrB_Vector_setElement_UDT (push_vector, (void *) &a, 0)) ;
     GRB_TRY (GrB_Vector_setElement_UDT (push_vector, (void *) &b, 0)) ;
     LG_MF_resultTuple32 c = {.d = 0, .j = 0, .residual = 0};
-    GRB_TRY (GrB_Vector_reduce_UDT ((void *) &c, NULL, MxeAddMonoid, push_vector, NULL)) ;
+    GRB_TRY (GrB_Vector_reduce_UDT ((void *) &c, NULL, Cxe_AddMonoid, push_vector, NULL)) ;
     LG_ASSERT ((c.residual == 6 && c.j == 5 && c.d == 4), GrB_PANIC) ;
   }
   #endif

From c3d297a4e5e5b67b3814d232b138d089273a0dee Mon Sep 17 00:00:00 2001
From: Tim Davis <DrTimothyAldenDavis@gmail.com>
Date: Sat, 6 Sep 2025 10:19:22 -0500
Subject: [PATCH 6/6] maxflow: add cite to HPEC25 paper

---
 experimental/algorithm/LAGr_MaxFlow.c | 7 +++++++
 experimental/benchmark/do_maxflow     | 4 +++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/experimental/algorithm/LAGr_MaxFlow.c b/experimental/algorithm/LAGr_MaxFlow.c
index 3aab809c30..87cc1533b9 100644
--- a/experimental/algorithm/LAGr_MaxFlow.c
+++ b/experimental/algorithm/LAGr_MaxFlow.c
@@ -23,6 +23,13 @@
 // (eds) Algorithms - ESA 2015. Lecture Notes in Computer Science(), vol 9294.
 // Springer, Berlin, Heidelberg.  https://doi.org/10.1007/978-3-662-48350-3 10.
 
+// [2] D. Peries and T. Davis, "A parallel push-relabel maximum flow algorithm
+// in LAGraph and GraphBLAS", IEEE HPEC'25, Sept 2025.
+
+// TODO: return the (optional) flow matrix can be costly in terms of run time.
+// The HPEC'25 results only benchmark the computation of the max flow, f.
+// Future work:  we plan on revising how the flow matrix is constructed.
+
 #include <LAGraphX.h>
 #include "LG_internal.h"
 #include <LAGraph.h>
diff --git a/experimental/benchmark/do_maxflow b/experimental/benchmark/do_maxflow
index 8291c8cf90..231ff9bc98 100755
--- a/experimental/benchmark/do_maxflow
+++ b/experimental/benchmark/do_maxflow
@@ -8,8 +8,10 @@ GAP="/home/davis/GAP"
 ../../build/experimental/benchmark/maxflow_demo $MTX/com-Youtube.mtx 4 275
 ../../build/experimental/benchmark/maxflow_demo $MTX/com-LiveJournal.mtx 2 88
 ../../build/experimental/benchmark/maxflow_demo $MTX/com-Orkut.mtx 43 75
-../../build/experimental/benchmark/maxflow_demo $GAP/GAP-road/GAP-road.grb 4 1075
 ../../build/experimental/benchmark/maxflow_demo $GAP/GAP-twitter/GAP-twitter.grb 13 17
 
+# takes a *very* long time when computing the flow matrix:
+../../build/experimental/benchmark/maxflow_demo $GAP/GAP-road/GAP-road.grb 4 1075
+
 # too large for hyper (needs > 128GB of RAM)
 # ../../build/experimental/benchmark/maxflow_demo $GAP/GAP-web/GAP-web.grb 64712 500