From 7699ee317432da66c79dc93bebf3673ec7b2f714 Mon Sep 17 00:00:00 2001
From: Hyeoksu Lee <hyeoksu@M4Pro.local>
Date: Mon, 6 Oct 2025 13:45:05 -0700
Subject: [PATCH 01/10] refactor time_stepper routines

---
 src/simulation/m_start_up.fpp      |  10 +-
 src/simulation/m_time_steppers.fpp | 780 ++++-------------------------
 2 files changed, 105 insertions(+), 685 deletions(-)

diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 123db558ea..ffd0bf865e 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1149,14 +1149,8 @@ contains
         mytime = mytime + dt
 
         ! Total-variation-diminishing (TVD) Runge-Kutta (RK) time-steppers
-        if (time_stepper == 1) then
-            call s_1st_order_tvd_rk(t_step, time_avg)
-        elseif (time_stepper == 2) then
-            call s_2nd_order_tvd_rk(t_step, time_avg)
-        elseif (time_stepper == 3 .and. (.not. adap_dt)) then
-            call s_3rd_order_tvd_rk(t_step, time_avg)
-        elseif (time_stepper == 3 .and. adap_dt) then
-            call s_strang_splitting(t_step, time_avg)
+        if (any(time_stepper == (/1, 2, 3/))) then
+            call s_tvd_rk(t_step, time_avg, time_stepper)
         end if
 
         if (relax) call s_infinite_relaxation_k(q_cons_ts(1)%vf)
diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index e7d4ba6017..0e6ca3e330 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -75,7 +75,10 @@ module m_time_steppers
     integer, private :: num_ts !<
     !! Number of time stages in the time-stepping scheme
 
-    $:GPU_DECLARE(create='[q_cons_ts,q_prim_vf,q_T_sf,rhs_vf,q_prim_ts,rhs_mv,rhs_pb,max_dt]')
+    integer :: stor !< storage index
+    real(wp), allocatable, dimension(:, :) :: rk_coef
+
+    $:GPU_DECLARE(create='[q_cons_ts,q_prim_vf,q_T_sf,rhs_vf,q_prim_ts,rhs_mv,rhs_pb,max_dt,rk_coef]')
 
 #if defined(__NVCOMPILER_GPU_UNIFIED_MEM)
     real(wp), allocatable, dimension(:, :, :, :), pinned, target :: q_cons_ts_pool_host
@@ -438,729 +441,152 @@ contains
             end do
         end do
 
-    end subroutine s_initialize_time_steppers_module
-
-    !> 1st order TVD RK time-stepping algorithm
-        !! @param t_step Current time step
-    impure subroutine s_1st_order_tvd_rk(t_step, time_avg)
-#ifdef _CRAYFTN
-        !DIR$ OPTIMIZE (-haggress)
-#endif
-        integer, intent(in) :: t_step
-        real(wp), intent(inout) :: time_avg
-
-        integer :: i, j, k, l, q !< Generic loop iterator
-
-        real(wp) :: start, finish
-
-        ! Stage 1 of 1
-        call cpu_time(start)
-        call nvtxStartRange("TIMESTEP")
-
-        call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, 1)
-
-#ifdef DEBUG
-        print *, 'got rhs'
-#endif
-
-        if (run_time_info) then
-            if (igr) then
-                call s_write_run_time_information(q_cons_ts(1)%vf, t_step)
-            else
-                call s_write_run_time_information(q_prim_vf, t_step)
-            end if
-        end if
-
-#ifdef DEBUG
-        print *, 'wrote runtime info'
-#endif
-
-        if (probe_wrt) then
-            call s_time_step_cycling(t_step)
-        end if
-
-        if (cfl_dt) then
-            if (mytime >= t_stop) return
-        else
-            if (t_step == t_step_stop) return
-        end if
-
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1)
-
-        $:GPU_PARALLEL_LOOP(collapse=4)
-        do i = 1, sys_size
-            do l = 0, p
-                do k = 0, n
-                    do j = 0, m
-                        q_cons_ts(1)%vf(i)%sf(j, k, l) = &
-                            q_cons_ts(1)%vf(i)%sf(j, k, l) &
-                            + dt*rhs_vf(i)%sf(j, k, l)
-                    end do
-                end do
-            end do
-        end do
-
-        !Evolve pb and mv for non-polytropic qbmm
-        if (qbmm .and. (.not. polytropic)) then
-            $:GPU_PARALLEL_LOOP(collapse=5)
-            do i = 1, nb
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, m
-                            do q = 1, nnode
-                                pb_ts(1)%sf(j, k, l, q, i) = &
-                                    pb_ts(1)%sf(j, k, l, q, i) &
-                                    + dt*rhs_pb(j, k, l, q, i)
-                            end do
-                        end do
-                    end do
-                end do
-            end do
+        if (any(time_stepper == (/1, 2, 3/))) then
+          ! Time stepper index
+          if (time_stepper == 1) then
+            stor = 1
+          else
+            stor = 2
+          end if
+
+          @:ALLOCATE (rk_coef(time_stepper, 3))
+          if (time_stepper == 1) then
+            rk_coef(1, 1) = 1._wp
+            rk_coef(1, 2) = 0._wp
+            rk_coef(1, 3) = 1._wp
+          else if (time_stepper == 2) then
+            rk_coef(1, 1) = 1._wp
+            rk_coef(1, 2) = 0._wp
+            rk_coef(1, 3) = 1._wp
+            rk_coef(2, 1) = 0.5_wp
+            rk_coef(2, 2) = 0.5_wp
+            rk_coef(2, 3) = 0.5_wp
+          else if (time_stepper == 3) then
+            rk_coef(1, 1) = 1._wp
+            rk_coef(1, 2) = 0._wp
+            rk_coef(1, 3) = 1._wp
+            rk_coef(2, 1) = 1._wp/4._wp
+            rk_coef(2, 2) = 3._wp/4._wp
+            rk_coef(2, 3) = 1._wp/4._wp
+            rk_coef(3, 1) = 2._wp/3._wp
+            rk_coef(3, 2) = 1._wp/3._wp
+            rk_coef(3, 3) = 2._wp/3._wp
+          end if
         end if
 
-        if (qbmm .and. (.not. polytropic)) then
-            $:GPU_PARALLEL_LOOP(collapse=5)
-            do i = 1, nb
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, m
-                            do q = 1, nnode
-                                mv_ts(1)%sf(j, k, l, q, i) = &
-                                    mv_ts(1)%sf(j, k, l, q, i) &
-                                    + dt*rhs_mv(j, k, l, q, i)
-                            end do
-                        end do
-                    end do
-                end do
-            end do
-        end if
-
-        if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, dt)
-
-        if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf)
-
-        if (model_eqns == 3) call s_pressure_relaxation_procedure(q_cons_ts(1)%vf)
-
-        if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf)
-
-        if (ib) then
-            if (qbmm .and. .not. polytropic) then
-                call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf)
-            else
-                call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf)
-            end if
-        end if
-
-        call nvtxEndRange
-
-        call cpu_time(finish)
-
-        wall_time = abs(finish - start)
-
-        if (t_step >= 2) then
-            wall_time_avg = (wall_time + (t_step - 2)*wall_time_avg)/(t_step - 1)
-        else
-            wall_time_avg = 0._wp
-        end if
-
-    end subroutine s_1st_order_tvd_rk
+    end subroutine s_initialize_time_steppers_module
 
-    !> 2nd order TVD RK time-stepping algorithm
-        !! @param t_step Current time-step
-    impure subroutine s_2nd_order_tvd_rk(t_step, time_avg)
+    impure subroutine s_tvd_rk(t_step, time_avg, nstage)
 #ifdef _CRAYFTN
         !DIR$ OPTIMIZE (-haggress)
 #endif
         integer, intent(in) :: t_step
         real(wp), intent(inout) :: time_avg
+        integer, intent(in) :: nstage
 
-        integer :: i, j, k, l, q!< Generic loop iterator
+        integer :: i, j, k, l, q, s !< Generic loop iterator
         real(wp) :: start, finish
         integer :: dest
 
-        ! Stage 1 of 2
-
         call cpu_time(start)
-
         call nvtxStartRange("TIMESTEP")
 
-        call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, 1)
-
-        if (run_time_info) then
-            if (igr) then
-                call s_write_run_time_information(q_cons_ts(1)%vf, t_step)
-            else
-                call s_write_run_time_information(q_prim_vf, t_step)
-            end if
-        end if
-
-        if (probe_wrt) then
-            call s_time_step_cycling(t_step)
-        end if
-
-        if (cfl_dt) then
-            if (mytime >= t_stop) return
-        else
-            if (t_step == t_step_stop) return
-        end if
-
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1)
-
-#if defined(__NVCOMPILER_GPU_UNIFIED_MEM) || defined(FRONTIER_UNIFIED)
-        $:GPU_PARALLEL_LOOP(collapse=4)
-        do i = 1, sys_size
-            do l = 0, p
-                do k = 0, n
-                    do j = 0, m
-                        q_cons_ts(2)%vf(i)%sf(j, k, l) = &
-                            q_cons_ts(1)%vf(i)%sf(j, k, l)
-                        q_cons_ts(1)%vf(i)%sf(j, k, l) = &
-                            q_cons_ts(1)%vf(i)%sf(j, k, l) &
-                            + dt*rhs_vf(i)%sf(j, k, l)
-                    end do
-                end do
-            end do
-        end do
-
-        dest = 1 ! Result in q_cons_ts(1)%vf
-#else
-        $:GPU_PARALLEL_LOOP(collapse=4)
-        do i = 1, sys_size
-            do l = 0, p
-                do k = 0, n
-                    do j = 0, m
-                        q_cons_ts(2)%vf(i)%sf(j, k, l) = &
-                            q_cons_ts(1)%vf(i)%sf(j, k, l) &
-                            + dt*rhs_vf(i)%sf(j, k, l)
-                    end do
-                end do
-            end do
-        end do
-
-        dest = 2 ! Result in q_cons_ts(2)%vf
-#endif
-
-        !Evolve pb and mv for non-polytropic qbmm
-        if (qbmm .and. (.not. polytropic)) then
-            $:GPU_PARALLEL_LOOP(collapse=5)
-            do i = 1, nb
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, m
-                            do q = 1, nnode
-                                pb_ts(2)%sf(j, k, l, q, i) = &
-                                    pb_ts(1)%sf(j, k, l, q, i) &
-                                    + dt*rhs_pb(j, k, l, q, i)
-                            end do
-                        end do
-                    end do
-                end do
-            end do
-        end if
-
-        if (qbmm .and. (.not. polytropic)) then
-            $:GPU_PARALLEL_LOOP(collapse=5)
-            do i = 1, nb
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, m
-                            do q = 1, nnode
-                                mv_ts(2)%sf(j, k, l, q, i) = &
-                                    mv_ts(1)%sf(j, k, l, q, i) &
-                                    + dt*rhs_mv(j, k, l, q, i)
-                            end do
-                        end do
-                    end do
-                end do
-            end do
-        end if
-
-        if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, dt)
+        ! Adaptive dt: initial stage 
+        if (adap_dt) call s_adaptive_dt_bubble(1)
 
-        if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf)
-
-        if (model_eqns == 3 .and. (.not. relax)) then
-            call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf)
-        end if
+        do s = 1, nstage
+            call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, s)
 
-        if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf)
-
-        if (ib) then
-            if (qbmm .and. .not. polytropic) then
-                call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf)
-            else
-                call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf)
-            end if
-        end if
-
-        ! Stage 2 of 2
-        call s_compute_rhs(q_cons_ts(dest)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2)
-
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2)
-
-#if defined(__NVCOMPILER_GPU_UNIFIED_MEM) || defined(FRONTIER_UNIFIED)
-        $:GPU_PARALLEL_LOOP(collapse=4)
-        do i = 1, sys_size
-            do l = 0, p
-                do k = 0, n
-                    do j = 0, m
-                        q_cons_ts(1)%vf(i)%sf(j, k, l) = &
-                            (q_cons_ts(2)%vf(i)%sf(j, k, l) &
-                             + q_cons_ts(1)%vf(i)%sf(j, k, l) &
-                             + dt*rhs_vf(i)%sf(j, k, l))/2._wp
-                    end do
-                end do
-            end do
-        end do
-
-        dest = 1 ! Result in q_cons_ts(1)%vf
-#else
-        $:GPU_PARALLEL_LOOP(collapse=4)
-        do i = 1, sys_size
-            do l = 0, p
-                do k = 0, n
-                    do j = 0, m
-                        q_cons_ts(1)%vf(i)%sf(j, k, l) = &
-                            (q_cons_ts(1)%vf(i)%sf(j, k, l) &
-                             + q_cons_ts(2)%vf(i)%sf(j, k, l) &
-                             + dt*rhs_vf(i)%sf(j, k, l))/2._wp
-                    end do
-                end do
-            end do
-        end do
-
-        dest = 1 ! Result in q_cons_ts(1)%vf
-#endif
-
-        if (qbmm .and. (.not. polytropic)) then
-            $:GPU_PARALLEL_LOOP(collapse=5)
-            do i = 1, nb
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, m
-                            do q = 1, nnode
-                                pb_ts(1)%sf(j, k, l, q, i) = &
-                                    (pb_ts(1)%sf(j, k, l, q, i) &
-                                     + pb_ts(2)%sf(j, k, l, q, i) &
-                                     + dt*rhs_pb(j, k, l, q, i))/2._wp
-                            end do
-                        end do
-                    end do
-                end do
-            end do
-        end if
-
-        if (qbmm .and. (.not. polytropic)) then
-            $:GPU_PARALLEL_LOOP(collapse=5)
-            do i = 1, nb
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, m
-                            do q = 1, nnode
-                                mv_ts(1)%sf(j, k, l, q, i) = &
-                                    (mv_ts(1)%sf(j, k, l, q, i) &
-                                     + mv_ts(2)%sf(j, k, l, q, i) &
-                                     + dt*rhs_mv(j, k, l, q, i))/2._wp
-                            end do
-                        end do
-                    end do
-                end do
-            end do
-        end if
-
-        if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp)
-
-        if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf)
-
-        if (model_eqns == 3 .and. (.not. relax)) then
-            call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf)
-        end if
-
-        if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf)
-
-        if (ib) then
-            if (qbmm .and. .not. polytropic) then
-                call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf)
-            else
-                call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf)
+            if (s == 1 .and. run_time_info) then
+                if (igr) then
+                    call s_write_run_time_information(q_cons_ts(1)%vf, t_step)
+                else
+                    call s_write_run_time_information(q_prim_vf, t_step)
+                end if
             end if
-        end if
-
-        call nvtxEndRange
-
-        call cpu_time(finish)
-
-        wall_time = abs(finish - start)
-
-        if (t_step >= 2) then
-            wall_time_avg = (wall_time + (t_step - 2)*wall_time_avg)/(t_step - 1)
-        else
-            wall_time_avg = 0._wp
-        end if
-
-    end subroutine s_2nd_order_tvd_rk
-
-    !> 3rd order TVD RK time-stepping algorithm
-        !! @param t_step Current time-step
-    impure subroutine s_3rd_order_tvd_rk(t_step, time_avg)
-#ifdef _CRAYFTN
-        !DIR$ OPTIMIZE (-haggress)
-#endif
-        integer, intent(IN) :: t_step
-        real(wp), intent(INOUT) :: time_avg
-
-        integer :: i, j, k, l, q !< Generic loop iterator
-        real(wp) :: start, finish
-        integer :: dest
-
-        ! Stage 1 of 3
-
-        if (.not. adap_dt) then
-            call cpu_time(start)
-            call nvtxStartRange("TIMESTEP")
-        end if
-
-        call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, 1)
 
-        if (run_time_info) then
-            if (igr) then
-                call s_write_run_time_information(q_cons_ts(1)%vf, t_step)
-            else
-                call s_write_run_time_information(q_prim_vf, t_step)
+            if (probe_wrt) then
+                call s_time_step_cycling(t_step)
             end if
-        end if
-
-        if (probe_wrt) then
-            call s_time_step_cycling(t_step)
-        end if
-
-        if (cfl_dt) then
-            if (mytime >= t_stop) return
-        else
-            if (t_step == t_step_stop) return
-        end if
-
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1)
-
-#if defined(__NVCOMPILER_GPU_UNIFIED_MEM) || defined(FRONTIER_UNIFIED)
-        $:GPU_PARALLEL_LOOP(collapse=4)
-        do i = 1, sys_size
-            do l = 0, p
-                do k = 0, n
-                    do j = 0, m
-                        q_cons_ts(2)%vf(i)%sf(j, k, l) = &
-                            q_cons_ts(1)%vf(i)%sf(j, k, l)
-                        q_cons_ts(1)%vf(i)%sf(j, k, l) = &
-                            q_cons_ts(1)%vf(i)%sf(j, k, l) &
-                            + dt*rhs_vf(i)%sf(j, k, l)
-                    end do
-                end do
-            end do
-        end do
 
-        dest = 1 ! result in q_cons_ts(1)%vf
-#else
-        $:GPU_PARALLEL_LOOP(collapse=4)
-        do i = 1, sys_size
-            do l = 0, p
-                do k = 0, n
-                    do j = 0, m
-                        q_cons_ts(2)%vf(i)%sf(j, k, l) = &
-                            q_cons_ts(1)%vf(i)%sf(j, k, l) &
-                            + dt*rhs_vf(i)%sf(j, k, l)
-                    end do
-                end do
-            end do
-        end do
-
-        dest = 2 ! result in q_cons_ts(2)%vf
-#endif
-
-        !Evolve pb and mv for non-polytropic qbmm
-        if (qbmm .and. (.not. polytropic)) then
-            $:GPU_PARALLEL_LOOP(collapse=5)
-            do i = 1, nb
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, m
-                            do q = 1, nnode
-                                pb_ts(2)%sf(j, k, l, q, i) = &
-                                    pb_ts(1)%sf(j, k, l, q, i) &
-                                    + dt*rhs_pb(j, k, l, q, i)
-                            end do
-                        end do
-                    end do
-                end do
-            end do
-        end if
-
-        if (qbmm .and. (.not. polytropic)) then
-            $:GPU_PARALLEL_LOOP(collapse=5)
-            do i = 1, nb
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, m
-                            do q = 1, nnode
-                                mv_ts(2)%sf(j, k, l, q, i) = &
-                                    mv_ts(1)%sf(j, k, l, q, i) &
-                                    + dt*rhs_mv(j, k, l, q, i)
-                            end do
-                        end do
-                    end do
-                end do
-            end do
-        end if
-
-        if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, dt)
-
-        if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf)
-
-        if (model_eqns == 3 .and. (.not. relax)) then
-            call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf)
-        end if
-
-        if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf)
-
-        if (ib) then
-            if (qbmm .and. .not. polytropic) then
-                call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf)
+            if (cfl_dt) then
+                if (mytime >= t_stop) return
             else
-                call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf)
+                if (t_step == t_step_stop) return
             end if
-        end if
-
-        ! Stage 2 of 3
-        call s_compute_rhs(q_cons_ts(dest)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2)
-
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2)
-
-#if  defined(__NVCOMPILER_GPU_UNIFIED_MEM) || defined(FRONTIER_UNIFIED)
-        $:GPU_PARALLEL_LOOP(collapse=4)
-        do i = 1, sys_size
-            do l = 0, p
-                do k = 0, n
-                    do j = 0, m
-                        q_cons_ts(1)%vf(i)%sf(j, k, l) = &
-                            (3._wp*q_cons_ts(2)%vf(i)%sf(j, k, l) &
-                             + q_cons_ts(1)%vf(i)%sf(j, k, l) &
-                             + dt*rhs_vf(i)%sf(j, k, l))/4._wp
-                    end do
-                end do
-            end do
-        end do
-
-        dest = 1 ! Result in q_cons_ts(1)%vf
-#else
-        $:GPU_PARALLEL_LOOP(collapse=4)
-        do i = 1, sys_size
-            do l = 0, p
-                do k = 0, n
-                    do j = 0, m
-                        q_cons_ts(2)%vf(i)%sf(j, k, l) = &
-                            (3._wp*q_cons_ts(1)%vf(i)%sf(j, k, l) &
-                             + q_cons_ts(2)%vf(i)%sf(j, k, l) &
-                             + dt*rhs_vf(i)%sf(j, k, l))/4._wp
-                    end do
-                end do
-            end do
-        end do
 
-        dest = 2 ! Result in q_cons_ts(2)%vf
-#endif
+            if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=s)
 
-        if (qbmm .and. (.not. polytropic)) then
-            $:GPU_PARALLEL_LOOP(collapse=5)
-            do i = 1, nb
+            $:GPU_PARALLEL_LOOP(collapse=4)
+            do i = 1, sys_size
                 do l = 0, p
                     do k = 0, n
                         do j = 0, m
-                            do q = 1, nnode
-                                pb_ts(2)%sf(j, k, l, q, i) = &
-                                    (3._wp*pb_ts(1)%sf(j, k, l, q, i) &
-                                     + pb_ts(2)%sf(j, k, l, q, i) &
-                                     + dt*rhs_pb(j, k, l, q, i))/4._wp
-                            end do
+                            if (s == 1 .and. time_stepper /= 1) then
+                              q_cons_ts(stor)%vf(i)%sf(j, k, l) = &
+                                  q_cons_ts(1)%vf(i)%sf(j, k, l)
+                            end if
+                            q_cons_ts(1)%vf(i)%sf(j, k, l) = &
+                                rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) &
+                              + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) &
+                              + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l)
                         end do
                     end do
                 end do
             end do
-        end if
 
-        if (qbmm .and. (.not. polytropic)) then
-            $:GPU_PARALLEL_LOOP(collapse=5)
-            do i = 1, nb
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, m
-                            do q = 1, nnode
-                                mv_ts(2)%sf(j, k, l, q, i) = &
-                                    (3._wp*mv_ts(1)%sf(j, k, l, q, i) &
-                                     + mv_ts(2)%sf(j, k, l, q, i) &
-                                     + dt*rhs_mv(j, k, l, q, i))/4._wp
+            !Evolve pb and mv for non-polytropic qbmm
+            if (qbmm .and. (.not. polytropic)) then
+                $:GPU_PARALLEL_LOOP(collapse=5)
+                do i = 1, nb
+                    do l = 0, p
+                        do k = 0, n
+                            do j = 0, m
+                                do q = 1, nnode
+                                    if (s == 1 .and. time_stepper /= 1) then
+                                      pb_ts(stor)%sf(j, k, l, q, i) = &
+                                        pb_ts(1)%sf(j, k, l, q, i)
+                                      mv_ts(stor)%sf(j, k, l, q, i) = &
+                                        mv_ts(1)%sf(j, k, l, q, i)
+                                    end if
+                                    pb_ts(1)%sf(j, k, l, q, i) = &
+                                          rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) &
+                                        + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) &
+                                        + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i)
+                                    mv_ts(1)%sf(j, k, l, q, i) = &
+                                          rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) &
+                                        + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) &
+                                        + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i)
+                                end do
                             end do
                         end do
                     end do
                 end do
-            end do
-        end if
-
-        if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, dt/4._wp)
-
-        if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf)
-
-        if (model_eqns == 3 .and. (.not. relax)) then
-            call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf)
-        end if
-
-        if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf)
-
-        if (ib) then
-            if (qbmm .and. .not. polytropic) then
-                call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf)
-            else
-                call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf)
             end if
-        end if
-
-        ! Stage 3 of 3
-        call s_compute_rhs(q_cons_ts(dest)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 3)
-
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3)
-
-#if defined(__NVCOMPILER_GPU_UNIFIED_MEM) || defined(FRONTIER_UNIFIED)
-        $:GPU_PARALLEL_LOOP(collapse=4)
-        do i = 1, sys_size
-            do l = 0, p
-                do k = 0, n
-                    do j = 0, m
-                        q_cons_ts(1)%vf(i)%sf(j, k, l) = &
-                            (q_cons_ts(2)%vf(i)%sf(j, k, l) &
-                             + 2._wp*q_cons_ts(1)%vf(i)%sf(j, k, l) &
-                             + 2._wp*dt*rhs_vf(i)%sf(j, k, l))/3._wp
-                    end do
-                end do
-            end do
-        end do
 
-        dest = 1 ! Result in q_cons_ts(1)%vf
-#else
-        $:GPU_PARALLEL_LOOP(collapse=4)
-        do i = 1, sys_size
-            do l = 0, p
-                do k = 0, n
-                    do j = 0, m
-                        q_cons_ts(1)%vf(i)%sf(j, k, l) = &
-                            (q_cons_ts(1)%vf(i)%sf(j, k, l) &
-                             + 2._wp*q_cons_ts(2)%vf(i)%sf(j, k, l) &
-                             + 2._wp*dt*rhs_vf(i)%sf(j, k, l))/3._wp
-                    end do
-                end do
-            end do
-        end do
+            if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, rk_coef(s, 3)*dt)
 
-        dest = 1 ! Result in q_cons_ts(2)%vf
-#endif
+            if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf)
 
-        if (qbmm .and. (.not. polytropic)) then
-            $:GPU_PARALLEL_LOOP(collapse=5)
-            do i = 1, nb
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, m
-                            do q = 1, nnode
-                                pb_ts(1)%sf(j, k, l, q, i) = &
-                                    (pb_ts(1)%sf(j, k, l, q, i) &
-                                     + 2._wp*pb_ts(2)%sf(j, k, l, q, i) &
-                                     + 2._wp*dt*rhs_pb(j, k, l, q, i))/3._wp
-                            end do
-                        end do
-                    end do
-                end do
-            end do
-        end if
-
-        if (qbmm .and. (.not. polytropic)) then
-            $:GPU_PARALLEL_LOOP(collapse=5)
-            do i = 1, nb
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, m
-                            do q = 1, nnode
-                                mv_ts(1)%sf(j, k, l, q, i) = &
-                                    (mv_ts(1)%sf(j, k, l, q, i) &
-                                     + 2._wp*mv_ts(2)%sf(j, k, l, q, i) &
-                                     + 2._wp*dt*rhs_mv(j, k, l, q, i))/3._wp
-                            end do
-                        end do
-                    end do
-                end do
-            end do
-        end if
-
-        if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp)
-
-        if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf)
-
-        if (model_eqns == 3 .and. (.not. relax)) then
-            call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf)
-        end if
-
-        call nvtxStartRange("RHS-ELASTIC")
-        if (hyperelasticity) call s_hyperelastic_rmt_stress_update(q_cons_ts(dest)%vf, q_prim_vf)
-        call nvtxEndRange
-
-        if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf)
-
-        if (ib) then
-            if (qbmm .and. .not. polytropic) then
-                call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf)
-            else
-                call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf)
+            if (model_eqns == 3 .and. (.not. relax)) then
+                call s_pressure_relaxation_procedure(q_cons_ts(1)%vf)
             end if
-        end if
-
-        if (.not. adap_dt) then
-            call nvtxEndRange
-            call cpu_time(finish)
 
-            wall_time = abs(finish - start)
+            if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf)
 
-            if (t_step >= 2) then
-                wall_time_avg = (wall_time + (t_step - 2)*wall_time_avg)/(t_step - 1)
-            else
-                wall_time_avg = 0._wp
+            if (ib) then
+                if (qbmm .and. .not. polytropic) then
+                    call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf)
+                else
+                    call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf)
+                end if
             end if
+        end do
 
-        end if
-
-    end subroutine s_3rd_order_tvd_rk
-
-    !> Strang splitting scheme with 3rd order TVD RK time-stepping algorithm for
-        !!      the flux term and adaptive time stepping algorithm for
-        !!      the source term
-        !! @param t_step Current time-step
-    subroutine s_strang_splitting(t_step, time_avg)
-
-        integer, intent(in) :: t_step
-        real(wp), intent(inout) :: time_avg
-
-        real(wp) :: start, finish
-
-        call cpu_time(start)
-
-        call nvtxStartRange("TIMESTEP")
-
-        ! Stage 1 of 3
-        call s_adaptive_dt_bubble(1)
-
-        ! Stage 2 of 3
-        call s_3rd_order_tvd_rk(t_step, time_avg)
-
-        ! Stage 3 of 3
-        call s_adaptive_dt_bubble(3)
+        ! Adaptive dt: final stage 
+        if (adap_dt) call s_adaptive_dt_bubble(3)
 
         call nvtxEndRange
-
         call cpu_time(finish)
 
         wall_time = abs(finish - start)
@@ -1171,7 +597,7 @@ contains
             wall_time_avg = 0._wp
         end if
 
-    end subroutine s_strang_splitting
+    end subroutine s_tvd_rk
 
     !> Bubble source part in Strang operator splitting scheme
         !! @param t_step Current time-step

From 9bcad63157be27d0955cc8e1399c08c67a212948 Mon Sep 17 00:00:00 2001
From: Hyeoksu Lee <hyeoksu@M4Pro.local>
Date: Mon, 6 Oct 2025 13:47:05 -0700
Subject: [PATCH 02/10] format

---
 src/simulation/m_time_steppers.fpp | 84 +++++++++++++++---------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index 0e6ca3e330..6a916f627f 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -442,36 +442,36 @@ contains
         end do
 
         if (any(time_stepper == (/1, 2, 3/))) then
-          ! Time stepper index
-          if (time_stepper == 1) then
-            stor = 1
-          else
-            stor = 2
-          end if
-
-          @:ALLOCATE (rk_coef(time_stepper, 3))
-          if (time_stepper == 1) then
-            rk_coef(1, 1) = 1._wp
-            rk_coef(1, 2) = 0._wp
-            rk_coef(1, 3) = 1._wp
-          else if (time_stepper == 2) then
-            rk_coef(1, 1) = 1._wp
-            rk_coef(1, 2) = 0._wp
-            rk_coef(1, 3) = 1._wp
-            rk_coef(2, 1) = 0.5_wp
-            rk_coef(2, 2) = 0.5_wp
-            rk_coef(2, 3) = 0.5_wp
-          else if (time_stepper == 3) then
-            rk_coef(1, 1) = 1._wp
-            rk_coef(1, 2) = 0._wp
-            rk_coef(1, 3) = 1._wp
-            rk_coef(2, 1) = 1._wp/4._wp
-            rk_coef(2, 2) = 3._wp/4._wp
-            rk_coef(2, 3) = 1._wp/4._wp
-            rk_coef(3, 1) = 2._wp/3._wp
-            rk_coef(3, 2) = 1._wp/3._wp
-            rk_coef(3, 3) = 2._wp/3._wp
-          end if
+            ! Time stepper index
+            if (time_stepper == 1) then
+                stor = 1
+            else
+                stor = 2
+            end if
+
+            @:ALLOCATE (rk_coef(time_stepper, 3))
+            if (time_stepper == 1) then
+                rk_coef(1, 1) = 1._wp
+                rk_coef(1, 2) = 0._wp
+                rk_coef(1, 3) = 1._wp
+            else if (time_stepper == 2) then
+                rk_coef(1, 1) = 1._wp
+                rk_coef(1, 2) = 0._wp
+                rk_coef(1, 3) = 1._wp
+                rk_coef(2, 1) = 0.5_wp
+                rk_coef(2, 2) = 0.5_wp
+                rk_coef(2, 3) = 0.5_wp
+            else if (time_stepper == 3) then
+                rk_coef(1, 1) = 1._wp
+                rk_coef(1, 2) = 0._wp
+                rk_coef(1, 3) = 1._wp
+                rk_coef(2, 1) = 1._wp/4._wp
+                rk_coef(2, 2) = 3._wp/4._wp
+                rk_coef(2, 3) = 1._wp/4._wp
+                rk_coef(3, 1) = 2._wp/3._wp
+                rk_coef(3, 2) = 1._wp/3._wp
+                rk_coef(3, 3) = 2._wp/3._wp
+            end if
         end if
 
     end subroutine s_initialize_time_steppers_module
@@ -491,7 +491,7 @@ contains
         call cpu_time(start)
         call nvtxStartRange("TIMESTEP")
 
-        ! Adaptive dt: initial stage 
+        ! Adaptive dt: initial stage
         if (adap_dt) call s_adaptive_dt_bubble(1)
 
         do s = 1, nstage
@@ -523,13 +523,13 @@ contains
                     do k = 0, n
                         do j = 0, m
                             if (s == 1 .and. time_stepper /= 1) then
-                              q_cons_ts(stor)%vf(i)%sf(j, k, l) = &
-                                  q_cons_ts(1)%vf(i)%sf(j, k, l)
+                                q_cons_ts(stor)%vf(i)%sf(j, k, l) = &
+                                    q_cons_ts(1)%vf(i)%sf(j, k, l)
                             end if
                             q_cons_ts(1)%vf(i)%sf(j, k, l) = &
                                 rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) &
-                              + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) &
-                              + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l)
+                                + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) &
+                                + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l)
                         end do
                     end do
                 end do
@@ -544,17 +544,17 @@ contains
                             do j = 0, m
                                 do q = 1, nnode
                                     if (s == 1 .and. time_stepper /= 1) then
-                                      pb_ts(stor)%sf(j, k, l, q, i) = &
-                                        pb_ts(1)%sf(j, k, l, q, i)
-                                      mv_ts(stor)%sf(j, k, l, q, i) = &
-                                        mv_ts(1)%sf(j, k, l, q, i)
+                                        pb_ts(stor)%sf(j, k, l, q, i) = &
+                                            pb_ts(1)%sf(j, k, l, q, i)
+                                        mv_ts(stor)%sf(j, k, l, q, i) = &
+                                            mv_ts(1)%sf(j, k, l, q, i)
                                     end if
                                     pb_ts(1)%sf(j, k, l, q, i) = &
-                                          rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) &
+                                        rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) &
                                         + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) &
                                         + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i)
                                     mv_ts(1)%sf(j, k, l, q, i) = &
-                                          rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) &
+                                        rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) &
                                         + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) &
                                         + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i)
                                 end do
@@ -583,7 +583,7 @@ contains
             end if
         end do
 
-        ! Adaptive dt: final stage 
+        ! Adaptive dt: final stage
         if (adap_dt) call s_adaptive_dt_bubble(3)
 
         call nvtxEndRange

From d349c914eb7545d1bb33036f46f4b66fe518bd4e Mon Sep 17 00:00:00 2001
From: Hyeoksu Lee <121892187+hyeoksu-lee@users.noreply.github.com>
Date: Mon, 6 Oct 2025 14:50:01 -0700
Subject: [PATCH 03/10] Update src/simulation/m_time_steppers.fpp

Co-authored-by: qodo-merge-pro[bot] <151058649+qodo-merge-pro[bot]@users.noreply.github.com>
---
 src/simulation/m_time_steppers.fpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index 6a916f627f..fc6686cc82 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -522,7 +522,7 @@ contains
                 do l = 0, p
                     do k = 0, n
                         do j = 0, m
-                            if (s == 1 .and. time_stepper /= 1) then
+                            if (s == 1 .and. nstage > 1) then
                                 q_cons_ts(stor)%vf(i)%sf(j, k, l) = &
                                     q_cons_ts(1)%vf(i)%sf(j, k, l)
                             end if

From bdd0382518c364d17df1dc2d227ecc17082f6477 Mon Sep 17 00:00:00 2001
From: Hyeoksu Lee <121892187+hyeoksu-lee@users.noreply.github.com>
Date: Mon, 6 Oct 2025 14:50:15 -0700
Subject: [PATCH 04/10] Update src/simulation/m_time_steppers.fpp

Co-authored-by: qodo-merge-pro[bot] <151058649+qodo-merge-pro[bot]@users.noreply.github.com>
---
 src/simulation/m_time_steppers.fpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index fc6686cc82..2b1c5ce267 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -543,7 +543,7 @@ contains
                         do k = 0, n
                             do j = 0, m
                                 do q = 1, nnode
-                                    if (s == 1 .and. time_stepper /= 1) then
+                                    if (s == 1 .and. nstage > 1) then
                                         pb_ts(stor)%sf(j, k, l, q, i) = &
                                             pb_ts(1)%sf(j, k, l, q, i)
                                         mv_ts(stor)%sf(j, k, l, q, i) = &

From 1cf0390660bc27caefc55e0d5245514f1d7cc410 Mon Sep 17 00:00:00 2001
From: Hyeoksu Lee <hyeoksu@M4Pro.local>
Date: Thu, 9 Oct 2025 22:50:56 -0700
Subject: [PATCH 05/10] fix precision-related bug

---
 src/simulation/m_time_steppers.fpp | 63 +++++++++++++-----------------
 1 file changed, 27 insertions(+), 36 deletions(-)

diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index 2b1c5ce267..43a0f67ea8 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -442,35 +442,24 @@ contains
         end do
 
         if (any(time_stepper == (/1, 2, 3/))) then
-            ! Time stepper index
+            ! temporary array index for TVD RK
             if (time_stepper == 1) then
                 stor = 1
             else
                 stor = 2
             end if
 
-            @:ALLOCATE (rk_coef(time_stepper, 3))
+            ! TVD RK coefficients
+            @:ALLOCATE (rk_coef(time_stepper, 4))
             if (time_stepper == 1) then
-                rk_coef(1, 1) = 1._wp
-                rk_coef(1, 2) = 0._wp
-                rk_coef(1, 3) = 1._wp
+                rk_coef(1, :) = (/1._wp, 0._wp, 1._wp, 1._wp/)
             else if (time_stepper == 2) then
-                rk_coef(1, 1) = 1._wp
-                rk_coef(1, 2) = 0._wp
-                rk_coef(1, 3) = 1._wp
-                rk_coef(2, 1) = 0.5_wp
-                rk_coef(2, 2) = 0.5_wp
-                rk_coef(2, 3) = 0.5_wp
+                rk_coef(1, :) = (/1._wp, 0._wp, 1._wp, 1._wp/)
+                rk_coef(2, :) = (/1._wp, 1._wp, 1._wp, 2._wp/)
             else if (time_stepper == 3) then
-                rk_coef(1, 1) = 1._wp
-                rk_coef(1, 2) = 0._wp
-                rk_coef(1, 3) = 1._wp
-                rk_coef(2, 1) = 1._wp/4._wp
-                rk_coef(2, 2) = 3._wp/4._wp
-                rk_coef(2, 3) = 1._wp/4._wp
-                rk_coef(3, 1) = 2._wp/3._wp
-                rk_coef(3, 2) = 1._wp/3._wp
-                rk_coef(3, 3) = 2._wp/3._wp
+                rk_coef(1, :) = (/1._wp, 0._wp, 1._wp, 1._wp/)
+                rk_coef(2, :) = (/1._wp, 3._wp, 1._wp, 4._wp/)
+                rk_coef(3, :) = (/2._wp, 1._wp, 2._wp, 3._wp/)
             end if
         end if
 
@@ -497,22 +486,24 @@ contains
         do s = 1, nstage
             call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, s)
 
-            if (s == 1 .and. run_time_info) then
+            if (s == 1) then
+              if (run_time_info) then
                 if (igr) then
                     call s_write_run_time_information(q_cons_ts(1)%vf, t_step)
                 else
                     call s_write_run_time_information(q_prim_vf, t_step)
                 end if
-            end if
+              end if
 
-            if (probe_wrt) then
-                call s_time_step_cycling(t_step)
-            end if
+              if (probe_wrt) then
+                  call s_time_step_cycling(t_step)
+              end if
 
-            if (cfl_dt) then
-                if (mytime >= t_stop) return
-            else
-                if (t_step == t_step_stop) return
+              if (cfl_dt) then
+                  if (mytime >= t_stop) return
+              else
+                  if (t_step == t_step_stop) return
+              end if
             end if
 
             if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=s)
@@ -527,9 +518,9 @@ contains
                                     q_cons_ts(1)%vf(i)%sf(j, k, l)
                             end if
                             q_cons_ts(1)%vf(i)%sf(j, k, l) = &
-                                rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) &
+                                (rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) &
                                 + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) &
-                                + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l)
+                                + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4)
                         end do
                     end do
                 end do
@@ -550,13 +541,13 @@ contains
                                             mv_ts(1)%sf(j, k, l, q, i)
                                     end if
                                     pb_ts(1)%sf(j, k, l, q, i) = &
-                                        rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) &
+                                        (rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) &
                                         + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) &
-                                        + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i)
+                                        + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i))/rk_coef(s, 4)
                                     mv_ts(1)%sf(j, k, l, q, i) = &
-                                        rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) &
+                                        (rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) &
                                         + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) &
-                                        + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i)
+                                        + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i))/rk_coef(s, 4)
                                 end do
                             end do
                         end do
@@ -564,7 +555,7 @@ contains
                 end do
             end if
 
-            if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, rk_coef(s, 3)*dt)
+            if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, rk_coef(s, 3)*dt/rk_coef(s, 4))
 
             if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf)
 

From 5ab91939b0b25164b578fbbeda0f623d1f29c95c Mon Sep 17 00:00:00 2001
From: Hyeoksu Lee <hyeoksu@M4Pro.local>
Date: Thu, 9 Oct 2025 22:53:15 -0700
Subject: [PATCH 06/10] format

---
 src/simulation/m_time_steppers.fpp | 40 +++++++++++++++---------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index 43a0f67ea8..3158ac80a5 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -487,23 +487,23 @@ contains
             call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, s)
 
             if (s == 1) then
-              if (run_time_info) then
-                if (igr) then
-                    call s_write_run_time_information(q_cons_ts(1)%vf, t_step)
-                else
-                    call s_write_run_time_information(q_prim_vf, t_step)
+                if (run_time_info) then
+                    if (igr) then
+                        call s_write_run_time_information(q_cons_ts(1)%vf, t_step)
+                    else
+                        call s_write_run_time_information(q_prim_vf, t_step)
+                    end if
                 end if
-              end if
 
-              if (probe_wrt) then
-                  call s_time_step_cycling(t_step)
-              end if
+                if (probe_wrt) then
+                    call s_time_step_cycling(t_step)
+                end if
 
-              if (cfl_dt) then
-                  if (mytime >= t_stop) return
-              else
-                  if (t_step == t_step_stop) return
-              end if
+                if (cfl_dt) then
+                    if (mytime >= t_stop) return
+                else
+                    if (t_step == t_step_stop) return
+                end if
             end if
 
             if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=s)
@@ -519,8 +519,8 @@ contains
                             end if
                             q_cons_ts(1)%vf(i)%sf(j, k, l) = &
                                 (rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) &
-                                + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) &
-                                + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4)
+                                 + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) &
+                                 + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4)
                         end do
                     end do
                 end do
@@ -542,12 +542,12 @@ contains
                                     end if
                                     pb_ts(1)%sf(j, k, l, q, i) = &
                                         (rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) &
-                                        + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) &
-                                        + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i))/rk_coef(s, 4)
+                                         + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) &
+                                         + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i))/rk_coef(s, 4)
                                     mv_ts(1)%sf(j, k, l, q, i) = &
                                         (rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) &
-                                        + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) &
-                                        + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i))/rk_coef(s, 4)
+                                         + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) &
+                                         + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i))/rk_coef(s, 4)
                                 end do
                             end do
                         end do

From 34f2307493d258f270649c0f9acd5137dff4380d Mon Sep 17 00:00:00 2001
From: Hyeoksu Lee <hyeoksu@M4Pro.local>
Date: Thu, 9 Oct 2025 23:08:28 -0700
Subject: [PATCH 07/10] remove unused var

---
 src/simulation/m_time_steppers.fpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index 3158ac80a5..d76a8bcd4b 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -475,7 +475,6 @@ contains
 
         integer :: i, j, k, l, q, s !< Generic loop iterator
         real(wp) :: start, finish
-        integer :: dest
 
         call cpu_time(start)
         call nvtxStartRange("TIMESTEP")

From 37460c76352a203092508a38e60765cd07b33776 Mon Sep 17 00:00:00 2001
From: Hyeoksu Lee <hyeoksu@M4Pro.local>
Date: Thu, 9 Oct 2025 23:39:20 -0700
Subject: [PATCH 08/10] fix cantera version compatibility for MacOS runner

---
 .github/workflows/test.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 3031858eb1..503f691ec6 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -48,6 +48,11 @@ jobs:
       - name: Clone
         uses: actions/checkout@v4
 
+      - name: Set up Python 3.13
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+
       - name: Setup MacOS
         if:   matrix.os == 'macos'
         run:  |

From 1d236ceeaf02d1c7daf60c3b459709b0ef1aae6d Mon Sep 17 00:00:00 2001
From: Hyeoksu Lee <hlee3@dt-login02.delta.ncsa.illinois.edu>
Date: Fri, 10 Oct 2025 04:15:23 -0500
Subject: [PATCH 09/10] add gpu_update for rk_coef

---
 src/simulation/m_time_steppers.fpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index d76a8bcd4b..66514e34ed 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -461,6 +461,7 @@ contains
                 rk_coef(2, :) = (/1._wp, 3._wp, 1._wp, 4._wp/)
                 rk_coef(3, :) = (/2._wp, 1._wp, 2._wp, 3._wp/)
             end if
+            $:GPU_UPDATE(device='[rk_coef]') 
         end if
 
     end subroutine s_initialize_time_steppers_module

From 1787d0c46697b68ece9bc9bcee359f06b8ff2b3d Mon Sep 17 00:00:00 2001
From: Hyeoksu Lee <hyeoksu@M4Pro.local>
Date: Fri, 10 Oct 2025 02:19:45 -0700
Subject: [PATCH 10/10] format

---
 src/simulation/m_time_steppers.fpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index 66514e34ed..c12ecd6319 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -461,7 +461,7 @@ contains
                 rk_coef(2, :) = (/1._wp, 3._wp, 1._wp, 4._wp/)
                 rk_coef(3, :) = (/2._wp, 1._wp, 2._wp, 3._wp/)
             end if
-            $:GPU_UPDATE(device='[rk_coef]') 
+            $:GPU_UPDATE(device='[rk_coef]')
         end if
 
     end subroutine s_initialize_time_steppers_module