From 7699ee317432da66c79dc93bebf3673ec7b2f714 Mon Sep 17 00:00:00 2001 From: Hyeoksu Lee Date: Mon, 6 Oct 2025 13:45:05 -0700 Subject: [PATCH 01/10] refactor time_stepper routines --- src/simulation/m_start_up.fpp | 10 +- src/simulation/m_time_steppers.fpp | 780 ++++------------------------- 2 files changed, 105 insertions(+), 685 deletions(-) diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 123db558ea..ffd0bf865e 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1149,14 +1149,8 @@ contains mytime = mytime + dt ! Total-variation-diminishing (TVD) Runge-Kutta (RK) time-steppers - if (time_stepper == 1) then - call s_1st_order_tvd_rk(t_step, time_avg) - elseif (time_stepper == 2) then - call s_2nd_order_tvd_rk(t_step, time_avg) - elseif (time_stepper == 3 .and. (.not. adap_dt)) then - call s_3rd_order_tvd_rk(t_step, time_avg) - elseif (time_stepper == 3 .and. adap_dt) then - call s_strang_splitting(t_step, time_avg) + if (any(time_stepper == (/1, 2, 3/))) then + call s_tvd_rk(t_step, time_avg, time_stepper) end if if (relax) call s_infinite_relaxation_k(q_cons_ts(1)%vf) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index e7d4ba6017..0e6ca3e330 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -75,7 +75,10 @@ module m_time_steppers integer, private :: num_ts !< !! Number of time stages in the time-stepping scheme - $:GPU_DECLARE(create='[q_cons_ts,q_prim_vf,q_T_sf,rhs_vf,q_prim_ts,rhs_mv,rhs_pb,max_dt]') + integer :: stor !< storage index + real(wp), allocatable, dimension(:, :) :: rk_coef + + $:GPU_DECLARE(create='[q_cons_ts,q_prim_vf,q_T_sf,rhs_vf,q_prim_ts,rhs_mv,rhs_pb,max_dt,rk_coef]') #if defined(__NVCOMPILER_GPU_UNIFIED_MEM) real(wp), allocatable, dimension(:, :, :, :), pinned, target :: q_cons_ts_pool_host @@ -438,729 +441,152 @@ contains end do end do - end subroutine s_initialize_time_steppers_module - - !> 1st order TVD RK time-stepping algorithm - !! @param t_step Current time step - impure subroutine s_1st_order_tvd_rk(t_step, time_avg) -#ifdef _CRAYFTN - !DIR$ OPTIMIZE (-haggress) -#endif - integer, intent(in) :: t_step - real(wp), intent(inout) :: time_avg - - integer :: i, j, k, l, q !< Generic loop iterator - - real(wp) :: start, finish - - ! Stage 1 of 1 - call cpu_time(start) - call nvtxStartRange("TIMESTEP") - - call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, 1) - -#ifdef DEBUG - print *, 'got rhs' -#endif - - if (run_time_info) then - if (igr) then - call s_write_run_time_information(q_cons_ts(1)%vf, t_step) - else - call s_write_run_time_information(q_prim_vf, t_step) - end if - end if - -#ifdef DEBUG - print *, 'wrote runtime info' -#endif - - if (probe_wrt) then - call s_time_step_cycling(t_step) - end if - - if (cfl_dt) then - if (mytime >= t_stop) return - else - if (t_step == t_step_stop) return - end if - - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - !Evolve pb and mv for non-polytropic qbmm - if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(1)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i) - end do - end do - end do - end do - end do + if (any(time_stepper == (/1, 2, 3/))) then + ! Time stepper index + if (time_stepper == 1) then + stor = 1 + else + stor = 2 + end if + + @:ALLOCATE (rk_coef(time_stepper, 3)) + if (time_stepper == 1) then + rk_coef(1, 1) = 1._wp + rk_coef(1, 2) = 0._wp + rk_coef(1, 3) = 1._wp + else if (time_stepper == 2) then + rk_coef(1, 1) = 1._wp + rk_coef(1, 2) = 0._wp + rk_coef(1, 3) = 1._wp + rk_coef(2, 1) = 0.5_wp + rk_coef(2, 2) = 0.5_wp + rk_coef(2, 3) = 0.5_wp + else if (time_stepper == 3) then + rk_coef(1, 1) = 1._wp + rk_coef(1, 2) = 0._wp + rk_coef(1, 3) = 1._wp + rk_coef(2, 1) = 1._wp/4._wp + rk_coef(2, 2) = 3._wp/4._wp + rk_coef(2, 3) = 1._wp/4._wp + rk_coef(3, 1) = 2._wp/3._wp + rk_coef(3, 2) = 1._wp/3._wp + rk_coef(3, 3) = 2._wp/3._wp + end if end if - if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(1)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i) - end do - end do - end do - end do - end do - end if - - if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, dt) - - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf) - - if (model_eqns == 3) call s_pressure_relaxation_procedure(q_cons_ts(1)%vf) - - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf) - - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) - else - call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf) - end if - end if - - call nvtxEndRange - - call cpu_time(finish) - - wall_time = abs(finish - start) - - if (t_step >= 2) then - wall_time_avg = (wall_time + (t_step - 2)*wall_time_avg)/(t_step - 1) - else - wall_time_avg = 0._wp - end if - - end subroutine s_1st_order_tvd_rk + end subroutine s_initialize_time_steppers_module - !> 2nd order TVD RK time-stepping algorithm - !! @param t_step Current time-step - impure subroutine s_2nd_order_tvd_rk(t_step, time_avg) + impure subroutine s_tvd_rk(t_step, time_avg, nstage) #ifdef _CRAYFTN !DIR$ OPTIMIZE (-haggress) #endif integer, intent(in) :: t_step real(wp), intent(inout) :: time_avg + integer, intent(in) :: nstage - integer :: i, j, k, l, q!< Generic loop iterator + integer :: i, j, k, l, q, s !< Generic loop iterator real(wp) :: start, finish integer :: dest - ! Stage 1 of 2 - call cpu_time(start) - call nvtxStartRange("TIMESTEP") - call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, 1) - - if (run_time_info) then - if (igr) then - call s_write_run_time_information(q_cons_ts(1)%vf, t_step) - else - call s_write_run_time_information(q_prim_vf, t_step) - end if - end if - - if (probe_wrt) then - call s_time_step_cycling(t_step) - end if - - if (cfl_dt) then - if (mytime >= t_stop) return - else - if (t_step == t_step_stop) return - end if - - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - -#if defined(__NVCOMPILER_GPU_UNIFIED_MEM) || defined(FRONTIER_UNIFIED) - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - dest = 1 ! Result in q_cons_ts(1)%vf -#else - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - dest = 2 ! Result in q_cons_ts(2)%vf -#endif - - !Evolve pb and mv for non-polytropic qbmm - if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(2)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i) - end do - end do - end do - end do - end do - end if - - if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(2)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i) - end do - end do - end do - end do - end do - end if - - if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, dt) + ! Adaptive dt: initial stage + if (adap_dt) call s_adaptive_dt_bubble(1) - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf) - - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf) - end if + do s = 1, nstage + call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, s) - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf) - - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) - else - call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf) - end if - end if - - ! Stage 2 of 2 - call s_compute_rhs(q_cons_ts(dest)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) - - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - -#if defined(__NVCOMPILER_GPU_UNIFIED_MEM) || defined(FRONTIER_UNIFIED) - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (q_cons_ts(2)%vf(i)%sf(j, k, l) & - + q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l))/2._wp - end do - end do - end do - end do - - dest = 1 ! Result in q_cons_ts(1)%vf -#else - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (q_cons_ts(1)%vf(i)%sf(j, k, l) & - + q_cons_ts(2)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l))/2._wp - end do - end do - end do - end do - - dest = 1 ! Result in q_cons_ts(1)%vf -#endif - - if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(1)%sf(j, k, l, q, i) = & - (pb_ts(1)%sf(j, k, l, q, i) & - + pb_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i))/2._wp - end do - end do - end do - end do - end do - end if - - if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(1)%sf(j, k, l, q, i) = & - (mv_ts(1)%sf(j, k, l, q, i) & - + mv_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i))/2._wp - end do - end do - end do - end do - end do - end if - - if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) - - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf) - - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf) - end if - - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf) - - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) - else - call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf) + if (s == 1 .and. run_time_info) then + if (igr) then + call s_write_run_time_information(q_cons_ts(1)%vf, t_step) + else + call s_write_run_time_information(q_prim_vf, t_step) + end if end if - end if - - call nvtxEndRange - - call cpu_time(finish) - - wall_time = abs(finish - start) - - if (t_step >= 2) then - wall_time_avg = (wall_time + (t_step - 2)*wall_time_avg)/(t_step - 1) - else - wall_time_avg = 0._wp - end if - - end subroutine s_2nd_order_tvd_rk - - !> 3rd order TVD RK time-stepping algorithm - !! @param t_step Current time-step - impure subroutine s_3rd_order_tvd_rk(t_step, time_avg) -#ifdef _CRAYFTN - !DIR$ OPTIMIZE (-haggress) -#endif - integer, intent(IN) :: t_step - real(wp), intent(INOUT) :: time_avg - - integer :: i, j, k, l, q !< Generic loop iterator - real(wp) :: start, finish - integer :: dest - - ! Stage 1 of 3 - - if (.not. adap_dt) then - call cpu_time(start) - call nvtxStartRange("TIMESTEP") - end if - - call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, 1) - if (run_time_info) then - if (igr) then - call s_write_run_time_information(q_cons_ts(1)%vf, t_step) - else - call s_write_run_time_information(q_prim_vf, t_step) + if (probe_wrt) then + call s_time_step_cycling(t_step) end if - end if - - if (probe_wrt) then - call s_time_step_cycling(t_step) - end if - - if (cfl_dt) then - if (mytime >= t_stop) return - else - if (t_step == t_step_stop) return - end if - - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - -#if defined(__NVCOMPILER_GPU_UNIFIED_MEM) || defined(FRONTIER_UNIFIED) - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l) - end do - end do - end do - end do - dest = 1 ! result in q_cons_ts(1)%vf -#else - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - dest = 2 ! result in q_cons_ts(2)%vf -#endif - - !Evolve pb and mv for non-polytropic qbmm - if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(2)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i) - end do - end do - end do - end do - end do - end if - - if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(2)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i) - end do - end do - end do - end do - end do - end if - - if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, dt) - - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf) - - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf) - end if - - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf) - - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) + if (cfl_dt) then + if (mytime >= t_stop) return else - call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf) + if (t_step == t_step_stop) return end if - end if - - ! Stage 2 of 3 - call s_compute_rhs(q_cons_ts(dest)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) - - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - -#if defined(__NVCOMPILER_GPU_UNIFIED_MEM) || defined(FRONTIER_UNIFIED) - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (3._wp*q_cons_ts(2)%vf(i)%sf(j, k, l) & - + q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l))/4._wp - end do - end do - end do - end do - - dest = 1 ! Result in q_cons_ts(1)%vf -#else - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - (3._wp*q_cons_ts(1)%vf(i)%sf(j, k, l) & - + q_cons_ts(2)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l))/4._wp - end do - end do - end do - end do - dest = 2 ! Result in q_cons_ts(2)%vf -#endif + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=s) - if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb + $:GPU_PARALLEL_LOOP(collapse=4) + do i = 1, sys_size do l = 0, p do k = 0, n do j = 0, m - do q = 1, nnode - pb_ts(2)%sf(j, k, l, q, i) = & - (3._wp*pb_ts(1)%sf(j, k, l, q, i) & - + pb_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i))/4._wp - end do + if (s == 1 .and. time_stepper /= 1) then + q_cons_ts(stor)%vf(i)%sf(j, k, l) = & + q_cons_ts(1)%vf(i)%sf(j, k, l) + end if + q_cons_ts(1)%vf(i)%sf(j, k, l) = & + rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) & + + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & + + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l) end do end do end do end do - end if - if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(2)%sf(j, k, l, q, i) = & - (3._wp*mv_ts(1)%sf(j, k, l, q, i) & - + mv_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i))/4._wp + !Evolve pb and mv for non-polytropic qbmm + if (qbmm .and. (.not. polytropic)) then + $:GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + if (s == 1 .and. time_stepper /= 1) then + pb_ts(stor)%sf(j, k, l, q, i) = & + pb_ts(1)%sf(j, k, l, q, i) + mv_ts(stor)%sf(j, k, l, q, i) = & + mv_ts(1)%sf(j, k, l, q, i) + end if + pb_ts(1)%sf(j, k, l, q, i) = & + rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) & + + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) & + + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i) + mv_ts(1)%sf(j, k, l, q, i) = & + rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) & + + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) & + + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i) + end do end do end do end do end do - end do - end if - - if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, dt/4._wp) - - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf) - - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf) - end if - - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf) - - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) - else - call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf) end if - end if - - ! Stage 3 of 3 - call s_compute_rhs(q_cons_ts(dest)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 3) - - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3) - -#if defined(__NVCOMPILER_GPU_UNIFIED_MEM) || defined(FRONTIER_UNIFIED) - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (q_cons_ts(2)%vf(i)%sf(j, k, l) & - + 2._wp*q_cons_ts(1)%vf(i)%sf(j, k, l) & - + 2._wp*dt*rhs_vf(i)%sf(j, k, l))/3._wp - end do - end do - end do - end do - dest = 1 ! Result in q_cons_ts(1)%vf -#else - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (q_cons_ts(1)%vf(i)%sf(j, k, l) & - + 2._wp*q_cons_ts(2)%vf(i)%sf(j, k, l) & - + 2._wp*dt*rhs_vf(i)%sf(j, k, l))/3._wp - end do - end do - end do - end do + if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, rk_coef(s, 3)*dt) - dest = 1 ! Result in q_cons_ts(2)%vf -#endif + if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf) - if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(1)%sf(j, k, l, q, i) = & - (pb_ts(1)%sf(j, k, l, q, i) & - + 2._wp*pb_ts(2)%sf(j, k, l, q, i) & - + 2._wp*dt*rhs_pb(j, k, l, q, i))/3._wp - end do - end do - end do - end do - end do - end if - - if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(1)%sf(j, k, l, q, i) = & - (mv_ts(1)%sf(j, k, l, q, i) & - + 2._wp*mv_ts(2)%sf(j, k, l, q, i) & - + 2._wp*dt*rhs_mv(j, k, l, q, i))/3._wp - end do - end do - end do - end do - end do - end if - - if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) - - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf) - - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf) - end if - - call nvtxStartRange("RHS-ELASTIC") - if (hyperelasticity) call s_hyperelastic_rmt_stress_update(q_cons_ts(dest)%vf, q_prim_vf) - call nvtxEndRange - - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf) - - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) - else - call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf) + if (model_eqns == 3 .and. (.not. relax)) then + call s_pressure_relaxation_procedure(q_cons_ts(1)%vf) end if - end if - - if (.not. adap_dt) then - call nvtxEndRange - call cpu_time(finish) - wall_time = abs(finish - start) + if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf) - if (t_step >= 2) then - wall_time_avg = (wall_time + (t_step - 2)*wall_time_avg)/(t_step - 1) - else - wall_time_avg = 0._wp + if (ib) then + if (qbmm .and. .not. polytropic) then + call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) + else + call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf) + end if end if + end do - end if - - end subroutine s_3rd_order_tvd_rk - - !> Strang splitting scheme with 3rd order TVD RK time-stepping algorithm for - !! the flux term and adaptive time stepping algorithm for - !! the source term - !! @param t_step Current time-step - subroutine s_strang_splitting(t_step, time_avg) - - integer, intent(in) :: t_step - real(wp), intent(inout) :: time_avg - - real(wp) :: start, finish - - call cpu_time(start) - - call nvtxStartRange("TIMESTEP") - - ! Stage 1 of 3 - call s_adaptive_dt_bubble(1) - - ! Stage 2 of 3 - call s_3rd_order_tvd_rk(t_step, time_avg) - - ! Stage 3 of 3 - call s_adaptive_dt_bubble(3) + ! Adaptive dt: final stage + if (adap_dt) call s_adaptive_dt_bubble(3) call nvtxEndRange - call cpu_time(finish) wall_time = abs(finish - start) @@ -1171,7 +597,7 @@ contains wall_time_avg = 0._wp end if - end subroutine s_strang_splitting + end subroutine s_tvd_rk !> Bubble source part in Strang operator splitting scheme !! @param t_step Current time-step From 9bcad63157be27d0955cc8e1399c08c67a212948 Mon Sep 17 00:00:00 2001 From: Hyeoksu Lee Date: Mon, 6 Oct 2025 13:47:05 -0700 Subject: [PATCH 02/10] format --- src/simulation/m_time_steppers.fpp | 84 +++++++++++++++--------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 0e6ca3e330..6a916f627f 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -442,36 +442,36 @@ contains end do if (any(time_stepper == (/1, 2, 3/))) then - ! Time stepper index - if (time_stepper == 1) then - stor = 1 - else - stor = 2 - end if - - @:ALLOCATE (rk_coef(time_stepper, 3)) - if (time_stepper == 1) then - rk_coef(1, 1) = 1._wp - rk_coef(1, 2) = 0._wp - rk_coef(1, 3) = 1._wp - else if (time_stepper == 2) then - rk_coef(1, 1) = 1._wp - rk_coef(1, 2) = 0._wp - rk_coef(1, 3) = 1._wp - rk_coef(2, 1) = 0.5_wp - rk_coef(2, 2) = 0.5_wp - rk_coef(2, 3) = 0.5_wp - else if (time_stepper == 3) then - rk_coef(1, 1) = 1._wp - rk_coef(1, 2) = 0._wp - rk_coef(1, 3) = 1._wp - rk_coef(2, 1) = 1._wp/4._wp - rk_coef(2, 2) = 3._wp/4._wp - rk_coef(2, 3) = 1._wp/4._wp - rk_coef(3, 1) = 2._wp/3._wp - rk_coef(3, 2) = 1._wp/3._wp - rk_coef(3, 3) = 2._wp/3._wp - end if + ! Time stepper index + if (time_stepper == 1) then + stor = 1 + else + stor = 2 + end if + + @:ALLOCATE (rk_coef(time_stepper, 3)) + if (time_stepper == 1) then + rk_coef(1, 1) = 1._wp + rk_coef(1, 2) = 0._wp + rk_coef(1, 3) = 1._wp + else if (time_stepper == 2) then + rk_coef(1, 1) = 1._wp + rk_coef(1, 2) = 0._wp + rk_coef(1, 3) = 1._wp + rk_coef(2, 1) = 0.5_wp + rk_coef(2, 2) = 0.5_wp + rk_coef(2, 3) = 0.5_wp + else if (time_stepper == 3) then + rk_coef(1, 1) = 1._wp + rk_coef(1, 2) = 0._wp + rk_coef(1, 3) = 1._wp + rk_coef(2, 1) = 1._wp/4._wp + rk_coef(2, 2) = 3._wp/4._wp + rk_coef(2, 3) = 1._wp/4._wp + rk_coef(3, 1) = 2._wp/3._wp + rk_coef(3, 2) = 1._wp/3._wp + rk_coef(3, 3) = 2._wp/3._wp + end if end if end subroutine s_initialize_time_steppers_module @@ -491,7 +491,7 @@ contains call cpu_time(start) call nvtxStartRange("TIMESTEP") - ! Adaptive dt: initial stage + ! Adaptive dt: initial stage if (adap_dt) call s_adaptive_dt_bubble(1) do s = 1, nstage @@ -523,13 +523,13 @@ contains do k = 0, n do j = 0, m if (s == 1 .and. time_stepper /= 1) then - q_cons_ts(stor)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) + q_cons_ts(stor)%vf(i)%sf(j, k, l) = & + q_cons_ts(1)%vf(i)%sf(j, k, l) end if q_cons_ts(1)%vf(i)%sf(j, k, l) = & rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) & - + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & - + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l) + + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & + + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l) end do end do end do @@ -544,17 +544,17 @@ contains do j = 0, m do q = 1, nnode if (s == 1 .and. time_stepper /= 1) then - pb_ts(stor)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) - mv_ts(stor)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) + pb_ts(stor)%sf(j, k, l, q, i) = & + pb_ts(1)%sf(j, k, l, q, i) + mv_ts(stor)%sf(j, k, l, q, i) = & + mv_ts(1)%sf(j, k, l, q, i) end if pb_ts(1)%sf(j, k, l, q, i) = & - rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) & + rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) & + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) & + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i) mv_ts(1)%sf(j, k, l, q, i) = & - rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) & + rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) & + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) & + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i) end do @@ -583,7 +583,7 @@ contains end if end do - ! Adaptive dt: final stage + ! Adaptive dt: final stage if (adap_dt) call s_adaptive_dt_bubble(3) call nvtxEndRange From d349c914eb7545d1bb33036f46f4b66fe518bd4e Mon Sep 17 00:00:00 2001 From: Hyeoksu Lee <121892187+hyeoksu-lee@users.noreply.github.com> Date: Mon, 6 Oct 2025 14:50:01 -0700 Subject: [PATCH 03/10] Update src/simulation/m_time_steppers.fpp Co-authored-by: qodo-merge-pro[bot] <151058649+qodo-merge-pro[bot]@users.noreply.github.com> --- src/simulation/m_time_steppers.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 6a916f627f..fc6686cc82 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -522,7 +522,7 @@ contains do l = 0, p do k = 0, n do j = 0, m - if (s == 1 .and. time_stepper /= 1) then + if (s == 1 .and. nstage > 1) then q_cons_ts(stor)%vf(i)%sf(j, k, l) = & q_cons_ts(1)%vf(i)%sf(j, k, l) end if From bdd0382518c364d17df1dc2d227ecc17082f6477 Mon Sep 17 00:00:00 2001 From: Hyeoksu Lee <121892187+hyeoksu-lee@users.noreply.github.com> Date: Mon, 6 Oct 2025 14:50:15 -0700 Subject: [PATCH 04/10] Update src/simulation/m_time_steppers.fpp Co-authored-by: qodo-merge-pro[bot] <151058649+qodo-merge-pro[bot]@users.noreply.github.com> --- src/simulation/m_time_steppers.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index fc6686cc82..2b1c5ce267 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -543,7 +543,7 @@ contains do k = 0, n do j = 0, m do q = 1, nnode - if (s == 1 .and. time_stepper /= 1) then + if (s == 1 .and. nstage > 1) then pb_ts(stor)%sf(j, k, l, q, i) = & pb_ts(1)%sf(j, k, l, q, i) mv_ts(stor)%sf(j, k, l, q, i) = & From 1cf0390660bc27caefc55e0d5245514f1d7cc410 Mon Sep 17 00:00:00 2001 From: Hyeoksu Lee Date: Thu, 9 Oct 2025 22:50:56 -0700 Subject: [PATCH 05/10] fix precision-related bug --- src/simulation/m_time_steppers.fpp | 63 +++++++++++++----------------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 2b1c5ce267..43a0f67ea8 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -442,35 +442,24 @@ contains end do if (any(time_stepper == (/1, 2, 3/))) then - ! Time stepper index + ! temporary array index for TVD RK if (time_stepper == 1) then stor = 1 else stor = 2 end if - @:ALLOCATE (rk_coef(time_stepper, 3)) + ! TVD RK coefficients + @:ALLOCATE (rk_coef(time_stepper, 4)) if (time_stepper == 1) then - rk_coef(1, 1) = 1._wp - rk_coef(1, 2) = 0._wp - rk_coef(1, 3) = 1._wp + rk_coef(1, :) = (/1._wp, 0._wp, 1._wp, 1._wp/) else if (time_stepper == 2) then - rk_coef(1, 1) = 1._wp - rk_coef(1, 2) = 0._wp - rk_coef(1, 3) = 1._wp - rk_coef(2, 1) = 0.5_wp - rk_coef(2, 2) = 0.5_wp - rk_coef(2, 3) = 0.5_wp + rk_coef(1, :) = (/1._wp, 0._wp, 1._wp, 1._wp/) + rk_coef(2, :) = (/1._wp, 1._wp, 1._wp, 2._wp/) else if (time_stepper == 3) then - rk_coef(1, 1) = 1._wp - rk_coef(1, 2) = 0._wp - rk_coef(1, 3) = 1._wp - rk_coef(2, 1) = 1._wp/4._wp - rk_coef(2, 2) = 3._wp/4._wp - rk_coef(2, 3) = 1._wp/4._wp - rk_coef(3, 1) = 2._wp/3._wp - rk_coef(3, 2) = 1._wp/3._wp - rk_coef(3, 3) = 2._wp/3._wp + rk_coef(1, :) = (/1._wp, 0._wp, 1._wp, 1._wp/) + rk_coef(2, :) = (/1._wp, 3._wp, 1._wp, 4._wp/) + rk_coef(3, :) = (/2._wp, 1._wp, 2._wp, 3._wp/) end if end if @@ -497,22 +486,24 @@ contains do s = 1, nstage call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, s) - if (s == 1 .and. run_time_info) then + if (s == 1) then + if (run_time_info) then if (igr) then call s_write_run_time_information(q_cons_ts(1)%vf, t_step) else call s_write_run_time_information(q_prim_vf, t_step) end if - end if + end if - if (probe_wrt) then - call s_time_step_cycling(t_step) - end if + if (probe_wrt) then + call s_time_step_cycling(t_step) + end if - if (cfl_dt) then - if (mytime >= t_stop) return - else - if (t_step == t_step_stop) return + if (cfl_dt) then + if (mytime >= t_stop) return + else + if (t_step == t_step_stop) return + end if end if if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=s) @@ -527,9 +518,9 @@ contains q_cons_ts(1)%vf(i)%sf(j, k, l) end if q_cons_ts(1)%vf(i)%sf(j, k, l) = & - rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) & + (rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) & + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & - + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l) + + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4) end do end do end do @@ -550,13 +541,13 @@ contains mv_ts(1)%sf(j, k, l, q, i) end if pb_ts(1)%sf(j, k, l, q, i) = & - rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) & + (rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) & + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) & - + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i) + + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i))/rk_coef(s, 4) mv_ts(1)%sf(j, k, l, q, i) = & - rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) & + (rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) & + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) & - + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i) + + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i))/rk_coef(s, 4) end do end do end do @@ -564,7 +555,7 @@ contains end do end if - if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, rk_coef(s, 3)*dt) + if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, rk_coef(s, 3)*dt/rk_coef(s, 4)) if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf) From 5ab91939b0b25164b578fbbeda0f623d1f29c95c Mon Sep 17 00:00:00 2001 From: Hyeoksu Lee Date: Thu, 9 Oct 2025 22:53:15 -0700 Subject: [PATCH 06/10] format --- src/simulation/m_time_steppers.fpp | 40 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 43a0f67ea8..3158ac80a5 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -487,23 +487,23 @@ contains call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, s) if (s == 1) then - if (run_time_info) then - if (igr) then - call s_write_run_time_information(q_cons_ts(1)%vf, t_step) - else - call s_write_run_time_information(q_prim_vf, t_step) + if (run_time_info) then + if (igr) then + call s_write_run_time_information(q_cons_ts(1)%vf, t_step) + else + call s_write_run_time_information(q_prim_vf, t_step) + end if end if - end if - if (probe_wrt) then - call s_time_step_cycling(t_step) - end if + if (probe_wrt) then + call s_time_step_cycling(t_step) + end if - if (cfl_dt) then - if (mytime >= t_stop) return - else - if (t_step == t_step_stop) return - end if + if (cfl_dt) then + if (mytime >= t_stop) return + else + if (t_step == t_step_stop) return + end if end if if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=s) @@ -519,8 +519,8 @@ contains end if q_cons_ts(1)%vf(i)%sf(j, k, l) = & (rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) & - + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & - + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4) + + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & + + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4) end do end do end do @@ -542,12 +542,12 @@ contains end if pb_ts(1)%sf(j, k, l, q, i) = & (rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) & - + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) & - + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i))/rk_coef(s, 4) + + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) & + + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i))/rk_coef(s, 4) mv_ts(1)%sf(j, k, l, q, i) = & (rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) & - + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) & - + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i))/rk_coef(s, 4) + + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) & + + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i))/rk_coef(s, 4) end do end do end do From 34f2307493d258f270649c0f9acd5137dff4380d Mon Sep 17 00:00:00 2001 From: Hyeoksu Lee Date: Thu, 9 Oct 2025 23:08:28 -0700 Subject: [PATCH 07/10] remove unused var --- src/simulation/m_time_steppers.fpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 3158ac80a5..d76a8bcd4b 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -475,7 +475,6 @@ contains integer :: i, j, k, l, q, s !< Generic loop iterator real(wp) :: start, finish - integer :: dest call cpu_time(start) call nvtxStartRange("TIMESTEP") From 37460c76352a203092508a38e60765cd07b33776 Mon Sep 17 00:00:00 2001 From: Hyeoksu Lee Date: Thu, 9 Oct 2025 23:39:20 -0700 Subject: [PATCH 08/10] fix cantera version compatibility for MacOS runner --- .github/workflows/test.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3031858eb1..503f691ec6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,6 +48,11 @@ jobs: - name: Clone uses: actions/checkout@v4 + - name: Set up Python 3.13 + uses: actions/setup-python@v5 + with: + python-version: '3.13' + - name: Setup MacOS if: matrix.os == 'macos' run: | From 1d236ceeaf02d1c7daf60c3b459709b0ef1aae6d Mon Sep 17 00:00:00 2001 From: Hyeoksu Lee Date: Fri, 10 Oct 2025 04:15:23 -0500 Subject: [PATCH 09/10] add gpu_update for rk_coef --- src/simulation/m_time_steppers.fpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index d76a8bcd4b..66514e34ed 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -461,6 +461,7 @@ contains rk_coef(2, :) = (/1._wp, 3._wp, 1._wp, 4._wp/) rk_coef(3, :) = (/2._wp, 1._wp, 2._wp, 3._wp/) end if + $:GPU_UPDATE(device='[rk_coef]') end if end subroutine s_initialize_time_steppers_module From 1787d0c46697b68ece9bc9bcee359f06b8ff2b3d Mon Sep 17 00:00:00 2001 From: Hyeoksu Lee Date: Fri, 10 Oct 2025 02:19:45 -0700 Subject: [PATCH 10/10] format --- src/simulation/m_time_steppers.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 66514e34ed..c12ecd6319 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -461,7 +461,7 @@ contains rk_coef(2, :) = (/1._wp, 3._wp, 1._wp, 4._wp/) rk_coef(3, :) = (/2._wp, 1._wp, 2._wp, 3._wp/) end if - $:GPU_UPDATE(device='[rk_coef]') + $:GPU_UPDATE(device='[rk_coef]') end if end subroutine s_initialize_time_steppers_module