From 8584e8909ff2eed746a6c5f4077688e1c4070c70 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Fri, 18 Jul 2025 15:41:06 -0400 Subject: [PATCH 01/60] Update mfc python and cmake to support OpenMP --- CMakeLists.txt | 40 ++++++++++++++++++++++++++++------------ toolchain/mfc/args.py | 9 +++++++-- toolchain/mfc/build.py | 6 +++++- toolchain/mfc/state.py | 26 ++++++++++++++++++++++---- 4 files changed, 62 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8269c1cb48..4c7a35187f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ project(MFC LANGUAGES C CXX Fortran) option(MFC_MPI "Build with MPI" ON) option(MFC_OpenACC "Build with OpenACC" OFF) +option(MFC_OpenMP "Build with OpenMP" OFF) option(MFC_GCov "Build with GCov" OFF) option(MFC_Unified "Build with unified CPU & GPU memory (GH-200 only)" OFF) option(MFC_PRE_PROCESS "Build pre_process" OFF) @@ -393,7 +394,7 @@ HANDLE_SOURCES(syscheck OFF) # being used. function(MFC_SETUP_TARGET) - cmake_parse_arguments(ARGS "OpenACC;MPI;SILO;HDF5;FFTW" "TARGET" "SOURCES" ${ARGN}) + cmake_parse_arguments(ARGS "OpenACC;MPI;SILO;HDF5;FFTW;OpenMP" "TARGET" "SOURCES" ${ARGN}) add_executable(${ARGS_TARGET} ${ARGS_SOURCES}) set(IPO_TARGETS ${ARGS_TARGET}) @@ -447,7 +448,7 @@ function(MFC_SETUP_TARGET) endif() if (ARGS_FFTW) - if (MFC_OpenACC AND ARGS_OpenACC) + if ((MFC_OpenACC AND ARGS_OpenACC) OR (MFC_OpenMP AND ARGS_OpenMP)) if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") find_package(CUDAToolkit REQUIRED) target_link_libraries(${a_target} PRIVATE CUDA::cudart CUDA::cufft) @@ -461,16 +462,31 @@ function(MFC_SETUP_TARGET) endif() endif() - if (MFC_OpenACC AND ARGS_OpenACC) - find_package(OpenACC) + if ((MFC_OpenACC AND ARGS_OpenACC) OR (MFC_OpenMP AND ARGS_OpenMP)) + if ((MFC_OpenACC AND ARGS_OpenACC)) + find_package(OpenACC) - # This should be equivalent to if (NOT OpenACC_FC_FOUND) - if (NOT TARGET OpenACC::OpenACC_Fortran) - message(FATAL_ERROR "OpenACC + Fortran is unsupported.") - endif() + # This should be equivalent to if (NOT OpenACC_FC_FOUND) + if (NOT TARGET OpenACC::OpenACC_Fortran) + message(FATAL_ERROR "OpenACC + Fortran is unsupported.") + endif() - target_link_libraries(${a_target} PRIVATE OpenACC::OpenACC_Fortran) - target_compile_definitions(${a_target} PRIVATE MFC_OpenACC) + target_link_libraries(${a_target} PRIVATE OpenACC::OpenACC_Fortran) + target_compile_definitions(${a_target} PRIVATE MFC_OpenACC MFC_GPU) + elseif((MFC_OpenMP AND ARGS_OpenMP)) + find_package(OpenMP) + + # This should be equivalent to if (NOT OpenACC_FC_FOUND) + if (NOT TARGET OpenMP::OpenMP_Fortran) + message(FATAL_ERROR "OpenMP + Fortran is unsupported.") + endif() + + target_link_libraries(${a_target} PRIVATE OpenMP::OpenMP_Fortran) + target_compile_definitions(${a_target} PRIVATE MFC_OpenMP MFC_GPU) + if(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + target_compile_options(${a_target} PRIVATE "-mp=gpu") + endif() + endif() if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") # FIXME: This should work with other cards than gfx90a ones. @@ -535,7 +551,7 @@ endif() if (MFC_SIMULATION) MFC_SETUP_TARGET(TARGET simulation SOURCES "${simulation_SRCs}" - MPI OpenACC FFTW) + MPI FFTW OpenACC OpenMP) endif() if (MFC_POST_PROCESS) @@ -550,7 +566,7 @@ endif() if (MFC_SYSCHECK) MFC_SETUP_TARGET(TARGET syscheck SOURCES "${syscheck_SRCs}" - MPI OpenACC) + MPI OpenACC OpenMP) endif() if (MFC_DOCUMENTATION) diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py index d3de6769c3..b73bf93763 100644 --- a/toolchain/mfc/args.py +++ b/toolchain/mfc/args.py @@ -4,9 +4,10 @@ from .build import TARGETS, DEFAULT_TARGETS from .common import MFCException, format_list_to_string from .test.cases import list_cases +from .state import gpuConfigOptions, MFCConfig # pylint: disable=too-many-locals, too-many-branches, too-many-statements -def parse(config): +def parse(config: MFCConfig): parser = argparse.ArgumentParser( prog="./mfc.sh", description="""\ @@ -46,7 +47,7 @@ def parse(config): compare.add_argument("-rel", "--reltol", metavar="RELTOL", type=float, default=1e-12, help="Relative tolerance.") compare.add_argument("-abs", "--abstol", metavar="ABSTOL", type=float, default=1e-12, help="Absolute tolerance.") - def add_common_arguments(p, mask = None): + def add_common_arguments(p: argparse.ArgumentParser, mask = None): if mask is None: mask = "" @@ -57,6 +58,10 @@ def add_common_arguments(p, mask = None): if "m" not in mask: for f in dataclasses.fields(config): + if f.name == 'gpu': + p.add_argument(f"--{f.name}", action="store", nargs='?', default=gpuConfigOptions.ACC.value, choices=[e.value for e in gpuConfigOptions]) + p.add_argument(f"--no-{f.name}", action="store_const", const = gpuConfigOptions.NONE.value, dest=f.name, help=f"Turn the {f.name} option OFF.") + continue p.add_argument( f"--{f.name}", action="store_true", help=f"Turn the {f.name} option ON.") p.add_argument(f"--no-{f.name}", action="store_false", dest=f.name, help=f"Turn the {f.name} option OFF.") diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py index 2de738986d..422f400154 100644 --- a/toolchain/mfc/build.py +++ b/toolchain/mfc/build.py @@ -6,6 +6,7 @@ format_list_to_string from .state import ARG, CFG from .run import input +from .state import gpuConfigOptions @dataclasses.dataclass class MFCTarget: @@ -141,7 +142,10 @@ def configure(self, case: Case): if not self.isDependency: flags.append(f"-DMFC_MPI={ 'ON' if ARG('mpi') else 'OFF'}") - flags.append(f"-DMFC_OpenACC={'ON' if ARG('gpu') else 'OFF'}") + # flags.append(f"-DMFC_OpenACC={'ON' if ARG('acc') else 'OFF'}") + # flags.append(f"-DMFC_OpenMP={'ON' if ARG('mp') else 'OFF'}") + flags.append(f"-DMFC_OpenACC={'ON' if (ARG('gpu') == gpuConfigOptions.ACC.value) else 'OFF'}") + flags.append(f"-DMFC_OpenMP={'ON' if (ARG('gpu') == gpuConfigOptions.MP.value) else 'OFF'}") flags.append(f"-DMFC_GCov={ 'ON' if ARG('gcov') else 'OFF'}") flags.append(f"-DMFC_Unified={'ON' if ARG('unified') else 'OFF'}") diff --git a/toolchain/mfc/state.py b/toolchain/mfc/state.py index fa7d438e77..383a458ec9 100644 --- a/toolchain/mfc/state.py +++ b/toolchain/mfc/state.py @@ -1,10 +1,18 @@ import typing, dataclasses +from enum import Enum, unique +@unique +class gpuConfigOptions(Enum): + NONE = 'no' + ACC = 'acc' + MP = 'mp' @dataclasses.dataclass class MFCConfig: mpi: bool = True - gpu: bool = False + gpu: str = gpuConfigOptions.NONE.value + # mp: bool = False + # acc: bool = False debug: bool = False gcov: bool = False unified: bool = False @@ -21,7 +29,7 @@ def from_dict(d: dict): return r - def items(self) -> typing.List[typing.Tuple[str, bool]]: + def items(self) -> typing.Iterable[typing.Tuple[str, typing.Any]]: return dataclasses.asdict(self).items() def make_options(self) -> typing.List[str]: @@ -44,8 +52,18 @@ def __eq__(self, other) -> bool: def __str__(self) -> str: """ Returns a string like "mpi=No & gpu=No & debug=No & gcov=No & unified=No" """ - - return ' & '.join([ f"{k}={'Yes' if v else 'No'}" for k, v in self.items() ]) + strings = [] + for k,v in self.items(): + if isinstance(v, bool): + strings.append(f"{k}={'Yes' if v else 'No'}") + elif isinstance(v, str): + strings.append(f"{k}={v.capitalize()}") + elif isinstance(v, int): + strings.append(f"{k}={v}") + else: + strings.append(f"{k}={v.__str__()}") + + return ' & '.join(strings) gCFG: MFCConfig = MFCConfig() From 7521731f889e7acd8ca4819986cbc88ac3bf14a3 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Fri, 18 Jul 2025 18:26:21 -0400 Subject: [PATCH 02/60] Fixed issue with not compiling on CPU builds --- toolchain/mfc/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py index 422f400154..a5ce9d70db 100644 --- a/toolchain/mfc/build.py +++ b/toolchain/mfc/build.py @@ -18,7 +18,7 @@ class Dependencies: def compute(self) -> typing.Set: r = self.all[:] - r += self.gpu[:] if ARG("gpu") else self.cpu[:] + r += self.gpu[:] if (ARG("gpu") != gpuConfigOptions.NONE.value) else self.cpu[:] return r From 90c67381972c855265ab9a284ca1900df3e998cb Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Fri, 18 Jul 2025 18:50:10 -0400 Subject: [PATCH 03/60] Temporary commit --- src/common/include/parallel_macros.fpp | 64 +++++++++++++++++++++++++- src/common/m_nvtx.f90 | 6 +-- src/simulation/m_fftw.fpp | 19 ++++---- src/simulation/m_start_up.fpp | 22 ++++++--- toolchain/cmake/configuration.cmake.in | 1 + 5 files changed, 92 insertions(+), 20 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 8d0a5a673b..bb1bc7caf1 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -367,14 +367,64 @@ $:end_acc_directive #:enddef +#:def GEN_MP_PARENTHESES_CLAUSE(clause_name, clause_modifier, clause_str) + #:set clause_regex = re.compile(',(?![^(]*\\))') + #:assert isinstance(clause_name, str) + #:if clause_str is not None + #:set count = 0 + #:assert isinstance(clause_str, str) + #:assert clause_str[0] == '[' and clause_str[-1] == ']' + #:for c in clause_str + #:if c == '(' + #:set count = count + 1 + #:elif c == ')' + #:set count = count - 1 + #:endif + #:if c == ',' and count > 1 + #:stop 'Nested parentheses with comma inside is not supported. Incorrect clause: {}'.format(clause_str) + #:elif count < 0 + #:stop 'Missing parentheses. Incorrect clause: {}'.format(clause_str) + #:endif + #:endfor + #:set clause_str = re.sub(clause_regex, ';', clause_str) + #:set clause_list = [x.strip() for x in clause_str.strip('[]').split(';')] + $:ASSERT_LIST(clause_list, str) + #:set clause_str = clause_name + '(' + clause_modifier + ':' + ', '.join(clause_list) + ') ' + #:else + #:set clause_str = '' + #:endif + $:clause_str +#:enddef + +#:def GEN_TO_STR(to) + #:set to_str = GEN_MP_PARENTHESES_CLAUSE('map', 'to', to) + $:to_str +#:enddef + + +#:def GEN_ALLOC_STR(alloc) + #:set alloc_str = GEN_MP_PARENTHESES_CLAUSE('map', 'alloc', alloc) + $:alloc_str +#:enddef + #:def GPU_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') #:set create_val = GEN_CREATE_STR(create) #:set attach_val = GEN_ATTACH_STR(attach) + #:set to_val = GEN_TO_STR(copyin) + #:set alloc_val = GEN_ALLOC_STR(create) + #:set alloc_val2 = GEN_ALLOC_STR(attach) #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n') - #:set acc_directive = '!$acc enter data ' + clause_val + extraAccArgs_val.strip('\n') + #:set extraMpArgs_val = '' + #:set acc_clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n') + #:set mp_clause_val = to_val.strip('\n') + alloc_val.strip('\n') + alloc_val2.strip('\n') + #:set acc_directive = '!$acc enter data ' + acc_clause_val + extraAccArgs_val.strip('\n') + #:set mp_directive = '!$omp target enter data ' + mp_clause_val + extraMpArgs_val.strip('\n') +#if MFC_OpenACC $:acc_directive +#elif MFC_OpenMP + $:mp_directive +#endif #:enddef #:def GPU_EXIT_DATA(copyout=None, delete=None, detach=None, extraAccArgs=None) @@ -421,5 +471,15 @@ $:acc_directive #:enddef +#:def USE_GPU_MODULE() + +#if defined(MFC_OpenACC) + use openacc +#elif defined(MFC_OpenMP) + use omp_lib +#endif + +#:enddef + #:endmute ! New line at end of file is required for FYPP diff --git a/src/common/m_nvtx.f90 b/src/common/m_nvtx.f90 index ce3273751a..524bf77781 100644 --- a/src/common/m_nvtx.f90 +++ b/src/common/m_nvtx.f90 @@ -25,7 +25,7 @@ module m_nvtx type(c_ptr) :: message ! ascii char end type nvtxEventAttributes -#if defined(MFC_OpenACC) && defined(__PGI) +#if defined(MFC_GPU) && defined(__PGI) interface nvtxRangePush ! push range with custom label and standard color @@ -58,7 +58,7 @@ subroutine nvtxStartRange(name, id) integer, intent(IN), optional :: id type(nvtxEventAttributes) :: event -#if defined(MFC_OpenACC) && defined(__PGI) +#if defined(MFC_GPU) && defined(__PGI) tempName = trim(name)//c_null_char @@ -74,7 +74,7 @@ subroutine nvtxStartRange(name, id) end subroutine nvtxStartRange subroutine nvtxEndRange -#if defined(MFC_OpenACC) && defined(__PGI) +#if defined(MFC_GPU) && defined(__PGI) call nvtxRangePop #endif end subroutine nvtxEndRange diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 852fb90290..9563b99a6d 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -15,9 +15,9 @@ module m_fftw use m_mpi_proxy !< Message passing interface (MPI) module proxy -#if defined(MFC_OpenACC) && defined(__PGI) +#if defined(MFC_GPU) && defined(__PGI) use cufft -#elif defined(MFC_OpenACC) +#elif defined(MFC_GPU) use hipfort use hipfort_check use hipfort_hipfft @@ -29,7 +29,7 @@ module m_fftw s_apply_fourier_filter, & s_finalize_fftw_module -#if !defined(MFC_OpenACC) +#if !defined(MFC_GPU) include 'fftw3.f03' #endif @@ -45,13 +45,14 @@ module m_fftw complex(c_double_complex), pointer :: data_fltr_cmplx(:) !< !! Filtered complex data in Fourier space -#if defined(MFC_OpenACC) +#if defined(MFC_GPU) $:GPU_DECLARE(create='[real_size,cmplx_size,x_size,batch_size,Nfq]') real(dp), allocatable, target :: data_real_gpu(:) complex(dp), allocatable, target :: data_cmplx_gpu(:) complex(dp), allocatable, target :: data_fltr_cmplx_gpu(:) $:GPU_DECLARE(create='[data_real_gpu,data_cmplx_gpu,data_fltr_cmplx_gpu]') + !$omp declare target (data_real_gpu,data_cmplx_gpu,data_fltr_cmplx_gpu) #if defined(__PGI) integer :: fwd_plan_gpu, bwd_plan_gpu @@ -81,8 +82,7 @@ contains x_size = m + 1 batch_size = x_size*sys_size -#if defined(MFC_OpenACC) - +#if defined(MFC_GPU) rank = 1; istride = 1; ostride = 1 allocate (gpu_fft_size(1:rank), iembed(1:rank), oembed(1:rank)) @@ -107,7 +107,7 @@ contains bwd_plan = fftw_plan_dft_c2r_1d(real_size, data_fltr_cmplx, data_real, FFTW_ESTIMATE) #endif -#if defined(MFC_OpenACC) +#if defined(MFC_GPU) @:ALLOCATE(data_real_gpu(1:real_size*x_size*sys_size)) @:ALLOCATE(data_cmplx_gpu(1:cmplx_size*x_size*sys_size)) @:ALLOCATE(data_fltr_cmplx_gpu(1:cmplx_size*x_size*sys_size)) @@ -139,7 +139,8 @@ contains ! Restrict filter to processors that have cells adjacent to axis if (bc_y%beg >= 0) return -#if defined(MFC_OpenACC) +#if defined(MFC_GPU) + $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m @@ -302,7 +303,7 @@ contains !! applying the Fourier filter in the azimuthal direction. impure subroutine s_finalize_fftw_module -#if defined(MFC_OpenACC) +#if defined(MFC_GPU) integer :: ierr !< Generic flag used to identify and report GPU errors @:DEALLOCATE(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu) #if defined(__PGI) diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 782418b416..4d83080b19 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -68,9 +68,12 @@ module m_start_up use m_helper_basic !< Functions to compare floating point numbers -#ifdef MFC_OpenACC - use openacc -#endif + $:USE_GPU_MODULE() +! #if defined(MFC_OpenACC) +! use openacc +! #elif defined(MFC_OpenMP) +! use omp_lib +! #endif use m_nvtx @@ -1329,14 +1332,16 @@ contains impure subroutine s_initialize_mpi_domain integer :: ierr -#ifdef MFC_OpenACC +#ifdef MFC_GPU real(wp) :: starttime, endtime integer :: num_devices, local_size, num_nodes, ppn, my_device_num integer :: dev, devNum, local_rank #ifdef MFC_MPI integer :: local_comm #endif +#if defined(MFC_OpenACC) integer(acc_device_kind) :: devtype +#endif #endif ! Initializing MPI execution environment @@ -1344,7 +1349,7 @@ contains call s_mpi_initialize() ! Bind GPUs if OpenACC is enabled -#ifdef MFC_OpenACC +#ifdef MFC_GPU #ifndef MFC_MPI local_size = 1 local_rank = 0 @@ -1354,12 +1359,17 @@ contains call MPI_Comm_size(local_comm, local_size, ierr) call MPI_Comm_rank(local_comm, local_rank, ierr) #endif - +#if defined(MFC_OpenACC) devtype = acc_get_device_type() devNum = acc_get_num_devices(devtype) dev = mod(local_rank, devNum) call acc_set_device_num(dev, devtype) +#elif defined(MFC_OpenMP) + devNum = omp_get_num_devices() + dev = mod(local_rank, devNum) + call omp_set_default_device(dev) +#endif #endif ! The rank 0 processor assigns default values to the user inputs prior to diff --git a/toolchain/cmake/configuration.cmake.in b/toolchain/cmake/configuration.cmake.in index a9b9b5399c..4cdd90fcaa 100644 --- a/toolchain/cmake/configuration.cmake.in +++ b/toolchain/cmake/configuration.cmake.in @@ -14,6 +14,7 @@ CMake Configuration: MPI : ${MFC_MPI} OpenACC : ${MFC_OpenACC} + OpenMP : ${MFC_OpenMP} Fypp : ${FYPP_EXE} Doxygen : ${DOXYGEN_EXECUTABLE} From 06a783e22562184f2a28f17434544b121f633a7d Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Sun, 20 Jul 2025 16:21:21 -0400 Subject: [PATCH 04/60] OMP parallel and parallel loop --- src/common/include/acc_macros.fpp | 59 +++++++ src/common/include/omp_macros.fpp | 157 ++++++++++++++++++ src/common/include/parallel_macros.fpp | 113 +++---------- src/common/include/shared_parallel_macros.fpp | 91 ++++++++++ 4 files changed, 333 insertions(+), 87 deletions(-) create mode 100644 src/common/include/acc_macros.fpp create mode 100644 src/common/include/omp_macros.fpp create mode 100644 src/common/include/shared_parallel_macros.fpp diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp new file mode 100644 index 0000000000..9375799bde --- /dev/null +++ b/src/common/include/acc_macros.fpp @@ -0,0 +1,59 @@ +#:include 'shared_parallel_macros.fpp' + +#:def ACC_PARALLEL(code, private=None, default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & + & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) + #:set default_val = GEN_DEFAULT_STR(default) + #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + #:set copy_val = GEN_COPY_STR(copy) + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + #:set copyout_val = GEN_COPYOUT_STR(copyout) + #:set create_val = GEN_CREATE_STR(create) + #:set no_create_val = GEN_NOCREATE_STR(no_create) + #:set present_val = GEN_PRESENT_STR(present) + #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) + #:set attach_val = GEN_ATTACH_STR(attach) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set acc_clause_val = default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & + & copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') + #:set acc_directive = '!$acc parallel ' + & + & acc_clause_val + extraAccArgs_val.strip('\n') + #:set end_acc_directive = '!$acc end parallel' + $:acc_directive + $:code + $:end_acc_directive +#:enddef + +#:def ACC_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & + & default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & + & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) + #:set collapse_val = GEN_COLLAPSE_STR(collapse) + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + #:set default_val = GEN_DEFAULT_STR(default) + #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + #:set copy_val = GEN_COPY_STR(copy) + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + #:set copyout_val = GEN_COPYOUT_STR(copyout) + #:set create_val = GEN_CREATE_STR(create) + #:set no_create_val = GEN_NOCREATE_STR(no_create) + #:set present_val = GEN_PRESENT_STR(present) + #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) + #:set attach_val = GEN_ATTACH_STR(attach) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & + & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & + & copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') + #:set acc_directive = '!$acc parallel loop ' + & + & clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef +! New line at end of file is required for FYPP \ No newline at end of file diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp new file mode 100644 index 0000000000..4a27e8f093 --- /dev/null +++ b/src/common/include/omp_macros.fpp @@ -0,0 +1,157 @@ +#:include 'shared_parallel_macros.fpp' + +#:def OMP_MAP_STR(map_type, var_list) + #:assert map_type is not None + #:assert isinstance(map_type, str) + #:if var_list is not None + #:set map_clause = 'map(' + map_type + ':' + #:set map_val = GEN_CLAUSE(map_clause, var_list) + #:else + #:set map_val = '' + #:endif + $:map_val +#:enddef + +#:def OMP_DEFAULT_STR(default) + #:if default is not None + #:assert isinstance(default, str) + #:assert (default == 'present' or default == 'none') + #:if default == 'present' + #:set default_val = 'defaultmap(present:all) ' + #:elif default == 'none' + #:stop 'Not Supported Yet' + #:endif + #:else + #:set default_val = '' + #:endif + $:default_val +#:enddef + +#:def OMP_DEFAULT_STR(default) + #:if default is not None + #:assert isinstance(default, str) + #:assert (default == 'present' or default == 'none') + #:if default == 'present' + #:set default_val = 'defaultmap(present:aggregate) defaultmap(present:allocatable) ' + #:elif default == 'none' + #:stop 'Not Supported Yet' + #:endif + #:else + #:set default_val = '' + #:endif + $:default_val +#:enddef + +#:def OMP_COPY_STR(copy) + #:set copy_val = OMP_MAP_STR('tofrom', copy) + $:copy_val +#:enddef + +#:def OMP_COPYIN_STR(copyin) + #:set copyin_val = OMP_MAP_STR('to', copyin) + $:copyin_val +#:enddef + +#:def OMP_COPYOUT_STR(copyout) + #:set copyout_val = OMP_MAP_STR('from', copyout) + $:copyout_val +#:enddef + +#:def OMP_CREATE_STR(create) + #:set create_val = OMP_MAP_STR('alloc', create) + $:create_val +#:enddef + +#:def OMP_NOCREATE_STR(no_create) + #:if no_create is not None + #:stop 'no_create is not supported yet' + #:endif + #:set no_create_val = '' + $:no_create_val +#:enddef + +#:def OMP_PRESENT_STR(present) + #:set present_val = OMP_MAP_STR('present,alloc', present) + $:present_val +#:enddef + +#:def OMP_DEVICEPTR_STR(deviceptr) + #:set deviceptr_val = GEN_PARENTHESES_CLAUSE('use_device_ptr', deviceptr) + $:deviceptr_val +#:enddef + +#:def OMP_ATTACH_STR(attach) + #:if attach is not None + #:stop 'attach is not supported yet' + #:endif + #:set attach_val = '' + $:attach_val +#:enddef + +#:def OMP_PARALLELISM_STR(parallelism) + #:set temp = '' + $:temp +#:enddef + +#:def OMP_PARALLEL(code, private=None, default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & + & no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None) + #:set default_val = OMP_DEFAULT_STR(default) + #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + #:set copy_val = OMP_COPY_STR(copy) + #:set copyin_val = OMP_COPYIN_STR(copyin).strip('\n') + OMP_COPYIN_STR(copyinReadOnly).strip('\n') + #:set copyout_val = OMP_COPYOUT_STR(copyout) + #:set create_val = OMP_CREATE_STR(create) + #:set no_create_val = OMP_NOCREATE_STR(no_create) + #:set present_val = OMP_PRESENT_STR(present) + #:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr) + #:set attach_val = OMP_ATTACH_STR(attach) + #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) + #:set omp_clause_val = default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & + & copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n')) + + #:set omp_clause_val = 'defaultmap(firstprivate:scalar) ' + omp_clause_val.strip('\n') + #:set omp_directive = '!$omp target teams ' + omp_clause_val + extraOmpArgs_val.strip('\n') + + #:set end_omp_directive = '!$omp end target teams' + $:omp_directive + $:code + $:omp_end_directive +#:enddef + +#:def OMP_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & + & default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & + & no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None) + + #:set collapse_val = GEN_COLLAPSE_STR(collapse) + #:set parallelism_val = OMP_PARALLELISM_STR(parallelism) + #! #:set default_val = OMP_DEFAULT_STR(default) + #:set default_val = '' + #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + #:set copy_val = OMP_COPY_STR(copy) + #:set copyin_val = OMP_COPYIN_STR(copyin).strip('\n') + OMP_COPYIN_STR(copyinReadOnly).strip('\n') + #:set copyout_val = OMP_COPYOUT_STR(copyout) + #:set create_val = OMP_CREATE_STR(create) + #:set no_create_val = OMP_NOCREATE_STR(no_create) + #:set present_val = OMP_PRESENT_STR(present) + #:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr) + #:set attach_val = OMP_ATTACH_STR(attach) + #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) + #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & + & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & + & copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') + #! Hardcoding the parallelism for now + #:set omp_directive = '!$omp target teams distribute parallel do simd ' + & + & clause_val + extraOmpArgs_val.strip('\n') + $:omp_directive +#:enddef +! New line at end of file is required for FYPP \ No newline at end of file diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index bb1bc7caf1..00dc95493e 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -1,4 +1,7 @@ #:mute +#:include 'shared_parallel_macros.fpp' +#:include 'omp_macros.fpp' +#:include 'acc_macros.fpp' #:def ASSERT_LIST(data, datatype) #:assert data is not None @@ -106,15 +109,7 @@ $:link_val #:enddef -#:def GEN_EXTRA_ARGS_STR(extraArgs) - #:if extraArgs is not None - #:assert isinstance(extraArgs, str) - #:set extraArgs_val = extraArgs - #:else - #:set extraArgs_val = '' - #:endif - $:extraArgs_val -#:enddef + #:def GEN_PARALLELISM_STR(parallelism) #:if parallelism is not None @@ -154,36 +149,6 @@ $:default_val #:enddef -#:def GEN_REDUCTION_STR(reduction, reductionOp) - #:if reduction is not None and reductionOp is not None - #:assert isinstance(reduction, str) - #:assert isinstance(reductionOp, str) - #:assert reduction[0] == '[' and reduction[-1] == ']' - #:assert reductionOp[0] == '[' and reductionOp[-1] == ']' - #:set reduction = reduction.replace(' ', '') - #:set reduction = reduction[1:-1] - #:set reduction_list = reduction.split('],') - #:set reduction_list = [str + ']' for str in reduction_list] - #:assert all(str[0] == '[' and str[-1] == ']' for str in reduction_list) - - #:set reductionOp_list = [x.strip() for x in reductionOp.strip('[]').split(',')] - $:ASSERT_LIST(reduction_list, str) - $:ASSERT_LIST(reductionOp_list, str) - #:assert len(reduction_list) == len(reductionOp_list) - #:set reduction_val = '' - #:for i in range(len(reduction_list)) - #:set temp_clause = GEN_PARENTHESES_CLAUSE('reduction', reduction_list[i]).strip('\n') - #:set ind = temp_clause.find('reduction(') + len('reduction(') - #:set reduction_val = reduction_val.strip('\n') + temp_clause[:ind] + reductionOp_list[i] + ':' + temp_clause[ind:] - #:endfor - #:elif reduction is not None or reductionOp is not None - #:stop 'Cannot set the reduction list or reduction operation without setting the other' - #:else - #:set reduction_val = '' - #:endif - $:reduction_val -#:enddef - #:def GEN_HOST_STR(host) #:set host_val = GEN_PARENTHESES_CLAUSE('host', host) $:host_val @@ -201,60 +166,34 @@ #:def GPU_PARALLEL(code, private=None, default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & - & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) - #:set default_val = GEN_DEFAULT_STR(default) - #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') - #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) - #:set copy_val = GEN_COPY_STR(copy) - #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - #:set copyout_val = GEN_COPYOUT_STR(copyout) - #:set create_val = GEN_CREATE_STR(create) - #:set no_create_val = GEN_NOCREATE_STR(no_create) - #:set present_val = GEN_PRESENT_STR(present) - #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) - #:set attach_val = GEN_ATTACH_STR(attach) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & - & copy_val.strip('\n') + copyin_val.strip('\n') + & - & copyout_val.strip('\n') + create_val.strip('\n') + & - & no_create_val.strip('\n') + present_val.strip('\n') + & - & deviceptr_val.strip('\n') + attach_val.strip('\n') - #:set acc_directive = '!$acc parallel ' + & - & clause_val + extraAccArgs_val.strip('\n') - #:set end_acc_directive = '!$acc end parallel' - $:acc_directive + & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None) + + #:set acc_code = ACC_PARALLEL(code, private, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) + #:set omp_code = OMP_PARALLEL(code, private, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) + +#if defined(MFC_OpenACC) + $:acc_code +#elif defined(MFC_OpenMP) + $:omp_code +#else $:code - $:end_acc_directive -#:enddef +#endif +#:enddef #:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & - & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) - #:set collapse_val = GEN_COLLAPSE_STR(collapse) - #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) - #:set default_val = GEN_DEFAULT_STR(default) - #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') - #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) - #:set copy_val = GEN_COPY_STR(copy) - #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - #:set copyout_val = GEN_COPYOUT_STR(copyout) - #:set create_val = GEN_CREATE_STR(create) - #:set no_create_val = GEN_NOCREATE_STR(no_create) - #:set present_val = GEN_PRESENT_STR(present) - #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) - #:set attach_val = GEN_ATTACH_STR(attach) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & - & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & - & copy_val.strip('\n') + copyin_val.strip('\n') + & - & copyout_val.strip('\n') + create_val.strip('\n') + & - & no_create_val.strip('\n') + present_val.strip('\n') + & - & deviceptr_val.strip('\n') + attach_val.strip('\n') - #:set acc_directive = '!$acc parallel loop ' + & - & clause_val + extraAccArgs_val.strip('\n') - $:acc_directive + & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None) + + #:set acc_code = ACC_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) + #:set omp_code = OMP_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) + +#if defined(MFC_OpenACC) + $:acc_code +#elif defined(MFC_OpenMP) + $:omp_code +#endif #:enddef #:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None) diff --git a/src/common/include/shared_parallel_macros.fpp b/src/common/include/shared_parallel_macros.fpp new file mode 100644 index 0000000000..eadb2211f4 --- /dev/null +++ b/src/common/include/shared_parallel_macros.fpp @@ -0,0 +1,91 @@ +#:def GEN_CLAUSE(clause_name, clause_str) + #:set clause_regex = re.compile(',(?![^(]*\\))') + #:assert isinstance(clause_name, str) + #:if clause_str is not None + #:set count = 0 + #:assert isinstance(clause_str, str) + #:assert clause_str[0] == '[' and clause_str[-1] == ']' + #:for c in clause_str + #:if c == '(' + #:set count = count + 1 + #:elif c == ')' + #:set count = count - 1 + #:endif + #:if c == ',' and count > 1 + #:stop 'Nested parentheses with comma inside is not supported. Incorrect clause: {}'.format(clause_str) + #:elif count < 0 + #:stop 'Missing parentheses. Incorrect clause: {}'.format(clause_str) + #:endif + #:endfor + #:set clause_str = re.sub(clause_regex, ';', clause_str) + #:set clause_list = [x.strip() for x in clause_str.strip('[]').split(';')] + $:ASSERT_LIST(clause_list, str) + #:set clause_str = clause_name + ', '.join(clause_list) + ') ' + #:else + #:set clause_str = '' + #:endif + $:clause_str +#:enddef + +#:def GEN_PARENTHESES_CLAUSE(clause_name, clause_str) + #:assert isinstance(clause_name, str) + #:if clause_str is not None + #:assert isinstance(clause_str, str) + #:set clause = clause_name + '(' + #:set clause_str = GEN_CLAUSE(clause, clause_str) + #:else + #:set clause_str = '' + #:endif + $:clause_str +#:enddef + +#:def GEN_PRIVATE_STR(private, initialized_values) + #:assert isinstance(initialized_values, bool) + #:if initialized_values == True + #:set private_val = GEN_PARENTHESES_CLAUSE('firstprivate', private) + #:else + #:set private_val = GEN_PARENTHESES_CLAUSE('private', private) + #:endif + $:private_val +#:enddef + +#:def GEN_REDUCTION_STR(reduction, reductionOp) + #:if reduction is not None and reductionOp is not None + #:assert isinstance(reduction, str) + #:assert isinstance(reductionOp, str) + #:assert reduction[0] == '[' and reduction[-1] == ']' + #:assert reductionOp[0] == '[' and reductionOp[-1] == ']' + #:set reduction = reduction.replace(' ', '') + #:set reduction = reduction[1:-1] + #:set reduction_list = reduction.split('],') + #:set reduction_list = [str + ']' for str in reduction_list] + #:assert all(str[0] == '[' and str[-1] == ']' for str in reduction_list) + + #:set reductionOp_list = [x.strip() for x in reductionOp.strip('[]').split(',')] + $:ASSERT_LIST(reduction_list, str) + $:ASSERT_LIST(reductionOp_list, str) + #:assert len(reduction_list) == len(reductionOp_list) + #:set reduction_val = '' + #:for i in range(len(reduction_list)) + #:set temp_clause = GEN_PARENTHESES_CLAUSE('reduction', reduction_list[i]).strip('\n') + #:set ind = temp_clause.find('reduction(') + len('reduction(') + #:set reduction_val = reduction_val.strip('\n') + temp_clause[:ind] + reductionOp_list[i] + ':' + temp_clause[ind:] + #:endfor + #:elif reduction is not None or reductionOp is not None + #:stop 'Cannot set the reduction list or reduction operation without setting the other' + #:else + #:set reduction_val = '' + #:endif + $:reduction_val +#:enddef + +#:def GEN_EXTRA_ARGS_STR(extraArgs) + #:if extraArgs is not None + #:assert isinstance(extraArgs, str) + #:set extraArgs_val = extraArgs + #:else + #:set extraArgs_val = '' + #:endif + $:extraArgs_val +#:enddef +! New line at end of file is required for FYPP \ No newline at end of file From 2abfad5f5fd5a3bb5135602e57ac8750c6f823f7 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Sun, 20 Jul 2025 16:25:07 -0400 Subject: [PATCH 05/60] Removed pure markings --- src/common/m_boundary_common.fpp | 22 +++++++-------- src/common/m_eigen_solver.f90 | 16 +++++------ src/common/m_finite_differences.fpp | 4 +-- src/common/m_helper.fpp | 36 ++++++++++++------------ src/common/m_helper_basic.fpp | 12 ++++---- src/common/m_phase_change.fpp | 16 +++++------ src/common/m_variables_conversion.fpp | 12 ++++---- src/simulation/m_acoustic_src.fpp | 14 ++++----- src/simulation/m_bubbles.fpp | 34 +++++++++++----------- src/simulation/m_bubbles_EE.fpp | 4 +-- src/simulation/m_bubbles_EL.fpp | 12 ++++---- src/simulation/m_bubbles_EL_kernels.fpp | 18 ++++++------ src/simulation/m_cbc.fpp | 2 +- src/simulation/m_compute_cbc.fpp | 26 ++++++++--------- src/simulation/m_derived_variables.fpp | 2 +- src/simulation/m_hyperelastic.fpp | 4 +-- src/simulation/m_hypoelastic.fpp | 2 +- src/simulation/m_ibm.fpp | 10 +++---- src/simulation/m_mhd.fpp | 2 +- src/simulation/m_pressure_relaxation.fpp | 12 ++++---- src/simulation/m_qbmm.fpp | 14 ++++----- src/simulation/m_riemann_solvers.fpp | 12 ++++---- src/simulation/m_sim_helpers.fpp | 10 +++---- src/simulation/m_surface_tension.fpp | 2 +- src/simulation/m_weno.fpp | 2 +- 25 files changed, 150 insertions(+), 150 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 3dc4da2c2e..3335a79b53 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -276,7 +276,7 @@ contains end subroutine s_populate_variables_buffers - pure subroutine s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l) + subroutine s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_ghost_cell_extrapolation', & & parallelism='[seq]', cray_inline=True) type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf @@ -337,7 +337,7 @@ contains end subroutine s_ghost_cell_extrapolation - pure subroutine s_symmetry(q_prim_vf, bc_dir, bc_loc, k, l, pb_in, mv_in) + subroutine s_symmetry(q_prim_vf, bc_dir, bc_loc, k, l, pb_in, mv_in) $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb_in, mv_in @@ -597,7 +597,7 @@ contains end subroutine s_symmetry - pure subroutine s_periodic(q_prim_vf, bc_dir, bc_loc, k, l, pb_in, mv_in) + subroutine s_periodic(q_prim_vf, bc_dir, bc_loc, k, l, pb_in, mv_in) $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb_in, mv_in @@ -736,7 +736,7 @@ contains end subroutine s_periodic - pure subroutine s_axis(q_prim_vf, pb_in, mv_in, k, l) + subroutine s_axis(q_prim_vf, pb_in, mv_in, k, l) $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb_in, mv_in @@ -795,7 +795,7 @@ contains end subroutine s_axis - pure subroutine s_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l) + subroutine s_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_slip_wall',parallelism='[seq]', & & cray_inline=True) type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf @@ -886,7 +886,7 @@ contains end subroutine s_slip_wall - pure subroutine s_no_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l) + subroutine s_no_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_no_slip_wall',parallelism='[seq]', & & cray_inline=True) @@ -1014,7 +1014,7 @@ contains end subroutine s_no_slip_wall - pure subroutine s_dirichlet(q_prim_vf, bc_dir, bc_loc, k, l) + subroutine s_dirichlet(q_prim_vf, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_dirichlet',parallelism='[seq]', & & cray_inline=True) type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf @@ -1079,7 +1079,7 @@ contains end subroutine s_dirichlet - pure subroutine s_qbmm_extrapolation(bc_dir, bc_loc, k, l, pb_in, mv_in) + subroutine s_qbmm_extrapolation(bc_dir, bc_loc, k, l, pb_in, mv_in) $:GPU_ROUTINE(parallelism='[seq]') real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb_in, mv_in integer, intent(in) :: bc_dir, bc_loc @@ -1274,7 +1274,7 @@ contains end if end subroutine s_populate_capillary_buffers - pure subroutine s_color_function_periodic(c_divs, bc_dir, bc_loc, k, l) + subroutine s_color_function_periodic(c_divs, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_color_function_periodic', & & parallelism='[seq]', cray_inline=True) type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs @@ -1329,7 +1329,7 @@ contains end subroutine s_color_function_periodic - pure subroutine s_color_function_reflective(c_divs, bc_dir, bc_loc, k, l) + subroutine s_color_function_reflective(c_divs, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_color_function_reflective', & & parallelism='[seq]', cray_inline=True) type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs @@ -1408,7 +1408,7 @@ contains end subroutine s_color_function_reflective - pure subroutine s_color_function_ghost_cell_extrapolation(c_divs, bc_dir, bc_loc, k, l) + subroutine s_color_function_ghost_cell_extrapolation(c_divs, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_color_function_ghost_cell_extrapolation', & & parallelism='[seq]', cray_inline=True) type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs diff --git a/src/common/m_eigen_solver.f90 b/src/common/m_eigen_solver.f90 index 2bf38d04bd..5003075c88 100644 --- a/src/common/m_eigen_solver.f90 +++ b/src/common/m_eigen_solver.f90 @@ -35,7 +35,7 @@ module m_eigen_solver !! @param fv2 temporary storage array !! @param fv3 temporary storage array !! @param ierr an error completion code - pure subroutine cg(nm, nl, ar, ai, wr, wi, zr, zi, fv1, fv2, fv3, ierr) + subroutine cg(nm, nl, ar, ai, wr, wi, zr, zi, fv1, fv2, fv3, ierr) integer, intent(in) :: nm, nl real(wp), dimension(nm, nl), intent(inout) :: ar, ai real(wp), dimension(nl), intent(out) :: wr, wi @@ -78,7 +78,7 @@ end subroutine cg !! (2) j=1, ,low-1 or i=igh+1, ,nl. !! @param scale the information determining the permutations and scaling !! factors used. - pure subroutine cbal(nm, nl, ar, ai, low, igh, scale) + subroutine cbal(nm, nl, ar, ai, low, igh, scale) integer, intent(in) :: nm, nl real(wp), dimension(nm, nl), intent(inout) :: ar, ai integer, intent(out) :: low, igh @@ -221,7 +221,7 @@ end subroutine cbal !! if cbal has not been used, set igh=nl. !! @param ortr further information about the transformations !! @param orti further information about the transformations - pure subroutine corth(nm, nl, low, igh, ar, ai, ortr, orti) + subroutine corth(nm, nl, low, igh, ar, ai, ortr, orti) integer, intent(in) :: nm, nl, low, igh real(wp), dimension(nm, nl), intent(inout) :: ar, ai real(wp), dimension(igh), intent(out) :: ortr, orti @@ -345,7 +345,7 @@ end subroutine corth !! @param zr the real part of the eigenvectors !! @param zi the imaginary part of the eigenvectors !! @param ierr an error completion code - pure subroutine comqr2(nm, nl, low, igh, ortr, orti, hr, hi, wr, wi, zr, zi, ierr) + subroutine comqr2(nm, nl, low, igh, ortr, orti, hr, hi, wr, wi, zr, zi, ierr) integer, intent(in) :: nm, nl, low, igh real(wp), dimension(nm, nl), intent(inout) :: hr, hi real(wp), dimension(nl), intent(out) :: wr, wi @@ -708,7 +708,7 @@ end subroutine comqr2 !! their first ml columns !! @param zi the imaginary part of the eigenvectors to be back !! transformed in their first ml columns - pure subroutine cbabk2(nm, nl, low, igh, scale, ml, zr, zi) + subroutine cbabk2(nm, nl, low, igh, scale, ml, zr, zi) integer, intent(in) :: nm, nl, low, igh real(wp), intent(in) :: scale(nl) integer, intent(in) :: ml @@ -754,7 +754,7 @@ pure subroutine cbabk2(nm, nl, low, igh, scale, ml, zr, zi) 200 return end subroutine cbabk2 - pure elemental subroutine csroot(xr, xi, yr, yi) + elemental subroutine csroot(xr, xi, yr, yi) real(wp), intent(in) :: xr, xi real(wp), intent(out) :: yr, yi @@ -774,7 +774,7 @@ pure elemental subroutine csroot(xr, xi, yr, yi) return end subroutine csroot - pure elemental subroutine cdiv(ar, ai, br, bi, cr, ci) + elemental subroutine cdiv(ar, ai, br, bi, cr, ci) real(wp), intent(in) :: ar, ai, br, bi real(wp), intent(out) :: cr, ci real(wp) :: s, ars, ais, brs, bis @@ -790,7 +790,7 @@ pure elemental subroutine cdiv(ar, ai, br, bi, cr, ci) return end subroutine cdiv - pure elemental subroutine pythag(a, b, c) + elemental subroutine pythag(a, b, c) real(wp), intent(in) :: a, b real(wp), intent(out) :: c diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index 2430374f4f..4b7e399ad5 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -8,7 +8,7 @@ module m_finite_differences contains - pure subroutine s_compute_fd_divergence(div, fields, ix_s, iy_s, iz_s) + subroutine s_compute_fd_divergence(div, fields, ix_s, iy_s, iz_s) type(scalar_field), intent(INOUT) :: div type(scalar_field), intent(IN) :: fields(1:3) @@ -69,7 +69,7 @@ contains !! @param q Number of cells in the s-coordinate direction !! @param s_cc Locations of the cell-centers in the s-coordinate direction !! @param fd_coeff_s Finite-diff. coefficients in the s-coordinate direction - pure subroutine s_compute_finite_difference_coefficients(q, s_cc, fd_coeff_s, local_buff_size, & + subroutine s_compute_finite_difference_coefficients(q, s_cc, fd_coeff_s, local_buff_size, & fd_number_in, fd_order_in, offset_s) integer :: lB, lE !< loop bounds diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp index f432ca12fa..c8a17a1875 100644 --- a/src/common/m_helper.fpp +++ b/src/common/m_helper.fpp @@ -45,7 +45,7 @@ contains !! @param vftmp is the void fraction !! @param Rtmp is the bubble radii !! @param ntmp is the output number bubble density - pure subroutine s_comp_n_from_prim(vftmp, Rtmp, ntmp, weights) + subroutine s_comp_n_from_prim(vftmp, Rtmp, ntmp, weights) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: vftmp real(wp), dimension(nb), intent(in) :: Rtmp @@ -59,7 +59,7 @@ contains end subroutine s_comp_n_from_prim - pure subroutine s_comp_n_from_cons(vftmp, nRtmp, ntmp, weights) + subroutine s_comp_n_from_cons(vftmp, nRtmp, ntmp, weights) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: vftmp real(wp), dimension(nb), intent(in) :: nRtmp @@ -222,7 +222,7 @@ contains !! @param peclet Peclet number !! @param Re_trans Real part of the transport coefficients !! @param Im_trans Imaginary part of the transport coefficients - pure elemental subroutine s_transcoeff(omega, peclet, Re_trans, Im_trans) + elemental subroutine s_transcoeff(omega, peclet, Re_trans, Im_trans) real(wp), intent(in) :: omega, peclet real(wp), intent(out) :: Re_trans, Im_trans @@ -241,7 +241,7 @@ contains end subroutine s_transcoeff - pure elemental subroutine s_int_to_str(i, res) + elemental subroutine s_int_to_str(i, res) integer, intent(in) :: i character(len=*), intent(inout) :: res @@ -292,7 +292,7 @@ contains !! @param a First vector. !! @param b Second vector. !! @return The cross product of the two vectors. - pure function f_cross(a, b) result(c) + function f_cross(a, b) result(c) real(wp), dimension(3), intent(in) :: a, b real(wp), dimension(3) :: c @@ -305,7 +305,7 @@ contains !> This procedure swaps two real numbers. !! @param lhs Left-hand side. !! @param rhs Right-hand side. - pure elemental subroutine s_swap(lhs, rhs) + elemental subroutine s_swap(lhs, rhs) real(wp), intent(inout) :: lhs, rhs real(wp) :: ltemp @@ -318,7 +318,7 @@ contains !> This procedure creates a transformation matrix. !! @param p Parameters for the transformation. !! @return Transformation matrix. - pure function f_create_transform_matrix(param, center) result(out_matrix) + function f_create_transform_matrix(param, center) result(out_matrix) type(ic_model_parameters), intent(in) :: param real(wp), dimension(1:3), optional, intent(in) :: center @@ -379,7 +379,7 @@ contains !> This procedure transforms a vector by a matrix. !! @param vec Vector to transform. !! @param matrix Transformation matrix. - pure subroutine s_transform_vec(vec, matrix) + subroutine s_transform_vec(vec, matrix) real(wp), dimension(1:3), intent(inout) :: vec real(wp), dimension(1:4, 1:4), intent(in) :: matrix @@ -394,7 +394,7 @@ contains !> This procedure transforms a triangle by a matrix, one vertex at a time. !! @param triangle Triangle to transform. !! @param matrix Transformation matrix. - pure subroutine s_transform_triangle(triangle, matrix, matrix_n) + subroutine s_transform_triangle(triangle, matrix, matrix_n) type(t_triangle), intent(inout) :: triangle real(wp), dimension(1:4, 1:4), intent(in) :: matrix, matrix_n @@ -412,7 +412,7 @@ contains !> This procedure transforms a model by a matrix, one triangle at a time. !! @param model Model to transform. !! @param matrix Transformation matrix. - pure subroutine s_transform_model(model, matrix, matrix_n) + subroutine s_transform_model(model, matrix, matrix_n) type(t_model), intent(inout) :: model real(wp), dimension(1:4, 1:4), intent(in) :: matrix, matrix_n @@ -428,7 +428,7 @@ contains !> This procedure creates a bounding box for a model. !! @param model Model to create bounding box for. !! @return Bounding box. - pure function f_create_bbox(model) result(bbox) + function f_create_bbox(model) result(bbox) type(t_model), intent(in) :: model type(t_bbox) :: bbox @@ -457,7 +457,7 @@ contains !! @param lhs logical input. !! @param rhs other logical input. !! @return xored result. - pure elemental function f_xor(lhs, rhs) result(res) + elemental function f_xor(lhs, rhs) result(res) logical, intent(in) :: lhs, rhs logical :: res @@ -468,7 +468,7 @@ contains !> This procedure converts logical to 1 or 0. !! @param perdicate A Logical argument. !! @return 1 if .true., 0 if .false.. - pure elemental function f_logical_to_int(predicate) result(int) + elemental function f_logical_to_int(predicate) result(int) logical, intent(in) :: predicate integer :: int @@ -484,7 +484,7 @@ contains !! @param x is the input value !! @param l is the degree !! @return P is the unassociated legendre polynomial evaluated at x - pure recursive function unassociated_legendre(x, l) result(result_P) + recursive function unassociated_legendre(x, l) result(result_P) integer, intent(in) :: l real(wp), intent(in) :: x @@ -506,7 +506,7 @@ contains !! @param l is the degree !! @param m_order is the order !! @return Y is the spherical harmonic function evaluated at x and phi - pure recursive function spherical_harmonic_func(x, phi, l, m_order) result(Y) + recursive function spherical_harmonic_func(x, phi, l, m_order) result(Y) integer, intent(in) :: l, m_order real(wp), intent(in) :: x, phi @@ -528,7 +528,7 @@ contains !! @param l is the degree !! @param m_order is the order !! @return P is the associated legendre polynomial evaluated at x - pure recursive function associated_legendre(x, l, m_order) result(result_P) + recursive function associated_legendre(x, l, m_order) result(result_P) integer, intent(in) :: l, m_order real(wp), intent(in) :: x @@ -553,7 +553,7 @@ contains !> This function calculates the double factorial value of an integer !! @param n_in is the input integer !! @return R is the double factorial value of n - pure elemental function double_factorial(n_in) result(R_result) + elemental function double_factorial(n_in) result(R_result) integer, intent(in) :: n_in integer, parameter :: int64_kind = selected_int_kind(18) ! 18 bytes for 64-bit integer @@ -567,7 +567,7 @@ contains !> The following function calculates the factorial value of an integer !! @param n_in is the input integer !! @return R is the factorial value of n - pure elemental function factorial(n_in) result(R_result) + elemental function factorial(n_in) result(R_result) integer, intent(in) :: n_in integer, parameter :: int64_kind = selected_int_kind(18) ! 18 bytes for 64-bit integer diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp index 359055a44d..0ee3102938 100644 --- a/src/common/m_helper_basic.fpp +++ b/src/common/m_helper_basic.fpp @@ -26,7 +26,7 @@ contains !! @param b Second number. !! @param tol_input Relative error (default = 1.e-10_wp). !! @return Result of the comparison. - logical pure elemental function f_approx_equal(a, b, tol_input) result(res) + logical elemental function f_approx_equal(a, b, tol_input) result(res) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: a, b real(wp), optional, intent(in) :: tol_input @@ -52,7 +52,7 @@ contains !! @param b Array that contains several point numbers. !! @param tol_input Relative error (default = 1e-10_wp). !! @return Result of the comparison. - logical pure function f_approx_in_array(a, b, tol_input) result(res) + logical function f_approx_in_array(a, b, tol_input) result(res) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: a real(wp), intent(in) :: b(:) @@ -78,7 +78,7 @@ contains !> Checks if a real(wp) variable is of default value. !! @param var Variable to check. - logical pure elemental function f_is_default(var) result(res) + logical elemental function f_is_default(var) result(res) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: var @@ -87,7 +87,7 @@ contains !> Checks if ALL elements of a real(wp) array are of default value. !! @param var_array Array to check. - logical pure function f_all_default(var_array) result(res) + logical function f_all_default(var_array) result(res) real(wp), intent(in) :: var_array(:) ! logical :: res_array(size(var_array)) ! integer :: i @@ -103,14 +103,14 @@ contains !> Checks if a real(wp) variable is an integer. !! @param var Variable to check. - logical pure elemental function f_is_integer(var) result(res) + logical elemental function f_is_integer(var) result(res) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: var res = f_approx_equal(var, real(nint(var), wp)) end function f_is_integer - pure subroutine s_configure_coordinate_bounds(weno_polyn, buff_size, idwint, idwbuff, & + subroutine s_configure_coordinate_bounds(weno_polyn, buff_size, idwint, idwbuff, & viscous, bubbles_lagrange, m, n, p, num_dims, igr) integer, intent(in) :: weno_polyn, m, n, p, num_dims diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index e04242a787..8f4cdf51c5 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -80,7 +80,7 @@ contains !! model, also considering mass depletion, depending on the incoming !! state conditions. !! @param q_cons_vf Cell-average conservative variables - pure subroutine s_infinite_relaxation_k(q_cons_vf) + subroutine s_infinite_relaxation_k(q_cons_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf real(wp) :: pS, pSOV, pSSL !< equilibrium pressure for mixture, overheated vapor, and subcooled liquid @@ -279,14 +279,14 @@ contains !! @param j generic loop iterator for x direction !! @param k generic loop iterator for y direction !! @param l generic loop iterator for z direction - !! @param MFL flag that tells whether the fluid is pure gas (0), pure liquid (1), or a mixture (2) + !! @param MFL flag that tells whether the fluid is gas (0), liquid (1), or a mixture (2) !! @param pS equilibrium pressure at the interface !! @param p_infpT stiffness for the participating fluids under pT-equilibrium !! @param rM sum of the reacting masses !! @param q_cons_vf Cell-average conservative variables !! @param rhoe mixture energy !! @param TS equilibrium temperature at the interface - pure subroutine s_infinite_pt_relaxation_k(j, k, l, MFL, pS, p_infpT, q_cons_vf, rhoe, TS) + subroutine s_infinite_pt_relaxation_k(j, k, l, MFL, pS, p_infpT, q_cons_vf, rhoe, TS) $:GPU_ROUTINE(function_name='s_infinite_pt_relaxation_k', & & parallelism='[seq]', cray_inline=True) @@ -386,7 +386,7 @@ contains !! @param rhoe mixture energy !! @param q_cons_vf Cell-average conservative variables !! @param TS equilibrium temperature at the interface - pure subroutine s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS) + subroutine s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS) $:GPU_ROUTINE(function_name='s_infinite_ptg_relaxation_k', & & parallelism='[seq]', cray_inline=True) @@ -507,7 +507,7 @@ contains !! @param j generic loop iterator for x direction !! @param k generic loop iterator for y direction !! @param l generic loop iterator for z direction - pure subroutine s_correct_partial_densities(MCT, q_cons_vf, rM, j, k, l) + subroutine s_correct_partial_densities(MCT, q_cons_vf, rM, j, k, l) $:GPU_ROUTINE(function_name='s_correct_partial_densities', & & parallelism='[seq]', cray_inline=True) @@ -566,7 +566,7 @@ contains !! @param pS equilibrium pressure at the interface !! @param q_cons_vf Cell-average conservative variables !! @param TJac Transpose of the Jacobian Matrix - pure subroutine s_compute_jacobian_matrix(InvJac, j, Jac, k, l, mCPD, mCVGP, mCVGP2, pS, q_cons_vf, TJac) + subroutine s_compute_jacobian_matrix(InvJac, j, Jac, k, l, mCPD, mCVGP, mCVGP2, pS, q_cons_vf, TJac) $:GPU_ROUTINE(function_name='s_compute_jacobian_matrix', & & parallelism='[seq]', cray_inline=True) @@ -669,7 +669,7 @@ contains !! @param pS equilibrium pressure at the interface !! @param rhoe mixture energy !! @param R2D (2D) residue array - pure subroutine s_compute_pTg_residue(j, k, l, mCPD, mCVGP, mQD, q_cons_vf, pS, rhoe, R2D) + subroutine s_compute_pTg_residue(j, k, l, mCPD, mCVGP, mQD, q_cons_vf, pS, rhoe, R2D) $:GPU_ROUTINE(function_name='s_compute_pTg_residue', & & parallelism='[seq]', cray_inline=True) @@ -716,7 +716,7 @@ contains !! @param pSat Saturation Pressure !! @param TSat Saturation Temperature !! @param TSIn equilibrium Temperature - pure elemental subroutine s_TSat(pSat, TSat, TSIn) + elemental subroutine s_TSat(pSat, TSat, TSIn) $:GPU_ROUTINE(function_name='s_TSat',parallelism='[seq]', & & cray_inline=True) diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index b9ef279f9e..32821fb679 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -464,7 +464,7 @@ contains end subroutine s_convert_species_to_mixture_variables - pure subroutine s_convert_species_to_mixture_variables_acc(rho_K, & + subroutine s_convert_species_to_mixture_variables_acc(rho_K, & gamma_K, pi_inf_K, qv_K, & alpha_K, alpha_rho_K, Re_K, & G_K, G) @@ -543,7 +543,7 @@ contains end subroutine s_convert_species_to_mixture_variables_acc - pure subroutine s_convert_species_to_mixture_variables_bubbles_acc(rho_K, & + subroutine s_convert_species_to_mixture_variables_bubbles_acc(rho_K, & gamma_K, pi_inf_K, qv_K, & alpha_K, alpha_rho_K, Re_K) $:GPU_ROUTINE(function_name='s_convert_species_to_mixture_variables_bubbles_acc', & @@ -737,7 +737,7 @@ contains end subroutine s_initialize_variables_conversion_module !Initialize mv at the quadrature nodes based on the initialized moments and sigma - pure subroutine s_initialize_mv(qK_cons_vf, mv) + subroutine s_initialize_mv(qK_cons_vf, mv) type(scalar_field), dimension(sys_size), intent(in) :: qK_cons_vf @@ -770,7 +770,7 @@ contains end subroutine s_initialize_mv !Initialize pb at the quadrature nodes using isothermal relations (Preston model) - pure subroutine s_initialize_pb(qK_cons_vf, mv, pb) + subroutine s_initialize_pb(qK_cons_vf, mv, pb) type(scalar_field), dimension(sys_size), intent(in) :: qK_cons_vf real(wp), dimension(idwint(1)%beg:, idwint(2)%beg:, idwint(3)%beg:, 1:, 1:), intent(in) :: mv @@ -1622,7 +1622,7 @@ contains end subroutine s_finalize_variables_conversion_module #ifndef MFC_PRE_PROCESS - pure subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c_c, c) + subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c_c, c) $:GPU_ROUTINE(function_name='s_compute_speed_of_sound', & & parallelism='[seq]', cray_inline=True) @@ -1689,7 +1689,7 @@ contains #endif #ifndef MFC_PRE_PROCESS - pure subroutine s_compute_fast_magnetosonic_speed(rho, c, B, norm, c_fast, h) + subroutine s_compute_fast_magnetosonic_speed(rho, c, B, norm, c_fast, h) $:GPU_ROUTINE(function_name='s_compute_fast_magnetosonic_speed', & & parallelism='[seq]', cray_inline=True) diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index b14528b9d5..28009427bb 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -345,7 +345,7 @@ contains !! @param frequency_local Frequency at the spatial location for sine and square waves !! @param gauss_sigma_time_local sigma in time for Gaussian pulse !! @param source Source term amplitude - pure elemental subroutine s_source_temporal(sim_time, c, ai, term_index, frequency_local, gauss_sigma_time_local, source, sum_BB) + elemental subroutine s_source_temporal(sim_time, c, ai, term_index, frequency_local, gauss_sigma_time_local, source, sum_BB) $:GPU_ROUTINE(parallelism='[seq]') integer, intent(in) :: ai, term_index real(wp), intent(in) :: sim_time, c, sum_BB @@ -504,7 +504,7 @@ contains !! @param source Source term amplitude !! @param angle Angle of the source term with respect to the x-axis (for 2D or 2D axisymmetric) !! @param xyz_to_r_ratios Ratios of the [xyz]-component of the source term to the magnitude (for 3D) - pure subroutine s_source_spatial(j, k, l, loc, ai, source, angle, xyz_to_r_ratios) + subroutine s_source_spatial(j, k, l, loc, ai, source, angle, xyz_to_r_ratios) integer, intent(in) :: j, k, l, ai real(wp), dimension(3), intent(in) :: loc real(wp), intent(out) :: source, angle, xyz_to_r_ratios(3) @@ -540,7 +540,7 @@ contains !! @param sig Sigma value for the Gaussian distribution !! @param r Displacement from source to current point !! @param source Source term amplitude - pure subroutine s_source_spatial_planar(ai, sig, r, source) + subroutine s_source_spatial_planar(ai, sig, r, source) integer, intent(in) :: ai real(wp), intent(in) :: sig, r(3) real(wp), intent(out) :: source @@ -570,7 +570,7 @@ contains !! @param source Source term amplitude !! @param angle Angle of the source term with respect to the x-axis (for 2D or 2D axisymmetric) !! @param xyz_to_r_ratios Ratios of the [xyz]-component of the source term to the magnitude (for 3D) - pure subroutine s_source_spatial_transducer(ai, sig, r, source, angle, xyz_to_r_ratios) + subroutine s_source_spatial_transducer(ai, sig, r, source, angle, xyz_to_r_ratios) integer, intent(in) :: ai real(wp), intent(in) :: sig, r(3) real(wp), intent(out) :: source, angle, xyz_to_r_ratios(3) @@ -615,7 +615,7 @@ contains !! @param source Source term amplitude !! @param angle Angle of the source term with respect to the x-axis (for 2D or 2D axisymmetric) !! @param xyz_to_r_ratios Ratios of the [xyz]-component of the source term to the magnitude (for 3D) - pure subroutine s_source_spatial_transducer_array(ai, sig, r, source, angle, xyz_to_r_ratios) + subroutine s_source_spatial_transducer_array(ai, sig, r, source, angle, xyz_to_r_ratios) integer, intent(in) :: ai real(wp), intent(in) :: sig, r(3) real(wp), intent(out) :: source, angle, xyz_to_r_ratios(3) @@ -697,7 +697,7 @@ contains !! @param ai Acoustic source index !! @param c Speed of sound !! @return frequency_local Converted frequency - pure elemental function f_frequency_local(freq_conv_flag, ai, c) + elemental function f_frequency_local(freq_conv_flag, ai, c) $:GPU_ROUTINE(parallelism='[seq]') logical, intent(in) :: freq_conv_flag integer, intent(in) :: ai @@ -716,7 +716,7 @@ contains !! @param c Speed of sound !! @param ai Acoustic source index !! @return gauss_sigma_time_local Converted Gaussian sigma time - pure elemental function f_gauss_sigma_time_local(gauss_conv_flag, ai, c) + elemental function f_gauss_sigma_time_local(gauss_conv_flag, ai, c) $:GPU_ROUTINE(parallelism='[seq]') logical, intent(in) :: gauss_conv_flag integer, intent(in) :: ai diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index 0ec758dc22..549c38873c 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -40,7 +40,7 @@ contains !! @param f_bub_adv_src Source for bubble volume fraction !! @param f_divu Divergence of velocity !! @param fCson Speed of sound from fP (EL) - pure elemental function f_rddot(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, fCson) + elemental function f_rddot(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, fCson) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu @@ -81,7 +81,7 @@ contains !! @param fR Current bubble radius !! @param fV Current bubble velocity !! @param fpb Internal bubble pressure - pure elemental function f_cpbw(fR0, fR, fV, fpb) + elemental function f_cpbw(fR0, fR, fV, fpb) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fR0, fR, fV, fpb @@ -100,7 +100,7 @@ contains !! @param fCpinf Driving bubble pressure !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter - pure elemental function f_H(fCpbw, fCpinf, fntait, fBtait) + elemental function f_H(fCpbw, fCpinf, fntait, fBtait) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpbw, fCpinf, fntait, fBtait @@ -120,7 +120,7 @@ contains !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter !! @param fH Bubble enthalpy - pure elemental function f_cgas(fCpinf, fntait, fBtait, fH) + elemental function f_cgas(fCpinf, fntait, fBtait, fH) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpinf, fntait, fBtait, fH @@ -143,7 +143,7 @@ contains !! @param fBtait Tait EOS parameter !! @param advsrc Advection equation source term !! @param divu Divergence of velocity - pure elemental function f_cpinfdot(fRho, fP, falf, fntait, fBtait, advsrc, divu) + elemental function f_cpinfdot(fRho, fP, falf, fntait, fBtait, advsrc, divu) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fRho, fP, falf, fntait, fBtait, advsrc, divu @@ -173,7 +173,7 @@ contains !! @param fV Current bubble velocity !! @param fR0 Equilibrium bubble radius !! @param fpbdot Time derivative of the internal bubble pressure - pure elemental function f_Hdot(fCpbw, fCpinf, fCpinf_dot, fntait, fBtait, fR, fV, fR0, fpbdot) + elemental function f_Hdot(fCpbw, fCpinf, fCpinf_dot, fntait, fBtait, fR, fV, fR0, fpbdot) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpbw, fCpinf, fCpinf_dot, fntait, fBtait real(wp), intent(in) :: fR, fV, fR0, fpbdot @@ -209,7 +209,7 @@ contains !! @param fV Current bubble velocity !! @param fR0 Equilibrium bubble radius !! @param fCpbw Boundary wall pressure - pure elemental function f_rddot_RP(fCp, fRho, fR, fV, fCpbw) + elemental function f_rddot_RP(fCp, fRho, fR, fV, fCpbw) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCp, fRho, fR, fV, fCpbw @@ -232,7 +232,7 @@ contains !! @param fcgas Current gas sound speed !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter - pure elemental function f_rddot_G(fCpbw, fR, fV, fH, fHdot, fcgas, fntait, fBtait) + elemental function f_rddot_G(fCpbw, fR, fV, fH, fHdot, fcgas, fntait, fBtait) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpbw, fR, fV, fH, fHdot real(wp), intent(in) :: fcgas, fntait, fBtait @@ -255,7 +255,7 @@ contains !! @param fR Current bubble radius !! @param fV Current bubble velocity !! @param fpb Internal bubble pressure - pure elemental function f_cpbw_KM(fR0, fR, fV, fpb) + elemental function f_cpbw_KM(fR0, fR, fV, fpb) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fR0, fR, fV, fpb real(wp) :: f_cpbw_KM @@ -282,7 +282,7 @@ contains !! @param fV Current bubble velocity !! @param fR0 Equilibrium bubble radius !! @param fC Current sound speed - pure elemental function f_rddot_KM(fpbdot, fCp, fCpbw, fRho, fR, fV, fR0, fC) + elemental function f_rddot_KM(fpbdot, fCp, fCpbw, fRho, fR, fV, fR0, fC) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fpbdot, fCp, fCpbw real(wp), intent(in) :: fRho, fR, fV, fR0, fC @@ -316,7 +316,7 @@ contains !> Subroutine that computes bubble wall properties for vapor bubbles !! @param pb Internal bubble pressure !! @param iR0 Current bubble size index - pure elemental subroutine s_bwproperty(pb_in, iR0, chi_vw_out, k_mw_out, rho_mw_out) + elemental subroutine s_bwproperty(pb_in, iR0, chi_vw_out, k_mw_out, rho_mw_out) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: pb_in integer, intent(in) :: iR0 @@ -346,7 +346,7 @@ contains !! @param fbeta_c Mass transfer coefficient (EL) !! @param fR_m Mixture gas constant (EL) !! @param fgamma_m Mixture gamma (EL) - pure elemental subroutine s_vflux(fR, fV, fpb, fmass_v, iR0, vflux, fmass_n, fbeta_c, fR_m, fgamma_m) + elemental subroutine s_vflux(fR, fV, fpb, fmass_v, iR0, vflux, fmass_n, fbeta_c, fR_m, fgamma_m) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fR real(wp), intent(in) :: fV @@ -404,7 +404,7 @@ contains !! @param fbeta_t Mass transfer coefficient (EL) !! @param fR_m Mixture gas constant (EL) !! @param fgamma_m Mixture gamma (EL) - pure elemental function f_bpres_dot(fvflux, fR, fV, fpb, fmass_v, iR0, fbeta_t, fR_m, fgamma_m) + elemental function f_bpres_dot(fvflux, fR, fV, fpb, fmass_v, iR0, fbeta_t, fR_m, fgamma_m) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fvflux real(wp), intent(in) :: fR @@ -461,7 +461,7 @@ contains !! @param fbeta_t Heat transfer coefficient (EL) !! @param fCson Speed of sound (EL) !! @param adap_dt_stop Fail-safe exit if max iteration count reached - pure subroutine s_advance_step(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & + subroutine s_advance_step(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & fntait, fBtait, f_bub_adv_src, f_divu, & bub_id, fmass_v, fmass_n, fbeta_c, & fbeta_t, fCson, adap_dt_stop) @@ -594,7 +594,7 @@ contains !! @param f_divu Divergence of velocity !! @param fCson Speed of sound (EL) !! @param h Time step size - pure subroutine s_initial_substep_h(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & + subroutine s_initial_substep_h(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & fntait, fBtait, f_bub_adv_src, f_divu, & fCson, h) $:GPU_ROUTINE(function_name='s_initial_substep_h',parallelism='[seq]', & @@ -676,7 +676,7 @@ contains !! @param myV_tmp Bubble radial velocity at each stage !! @param myPb_tmp Internal bubble pressure at each stage (EL) !! @param myMv_tmp Mass of vapor in the bubble at each stage (EL) - pure subroutine s_advance_substep(err, fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & + subroutine s_advance_substep(err, fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & fntait, fBtait, f_bub_adv_src, f_divu, & bub_id, fmass_v, fmass_n, fbeta_c, & fbeta_t, fCson, h, & @@ -778,7 +778,7 @@ contains !! @param fMv_tmp Mass of vapor in the bubble !! @param fdPbdt_tmp Rate of change of the internal bubble pressure !! @param fdMvdt_tmp Rate of change of the mass of vapor in the bubble - pure elemental subroutine s_advance_EL(fR_tmp, fV_tmp, fPb_tmp, fMv_tmp, bub_id, & + elemental subroutine s_advance_EL(fR_tmp, fV_tmp, fPb_tmp, fMv_tmp, bub_id, & fmass_n, fbeta_c, fbeta_t, fdPbdt_tmp, advance_EL) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(IN) :: fR_tmp, fV_tmp, fPb_tmp, fMv_tmp diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index b43a89e2e5..d2aa192750 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -71,7 +71,7 @@ contains ! Compute the bubble volume fraction alpha from the bubble number density n !! @param q_cons_vf is the conservative variable - pure subroutine s_comp_alpha_from_n(q_cons_vf) + subroutine s_comp_alpha_from_n(q_cons_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf real(wp) :: nR3bar integer(wp) :: i, j, k, l @@ -92,7 +92,7 @@ contains end subroutine s_comp_alpha_from_n - pure subroutine s_compute_bubbles_EE_rhs(idir, q_prim_vf, divu_in) + subroutine s_compute_bubbles_EE_rhs(idir, q_prim_vf, divu_in) integer, intent(in) :: idir type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index bacd19497d..b1e06f2f85 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -739,7 +739,7 @@ contains !! @param gamma Liquid specific heat ratio !! @param pi_inf Liquid stiffness !! @param cson Calculated speed of sound - pure subroutine s_compute_cson_from_pinf(q_prim_vf, pinf, cell, rhol, gamma, pi_inf, cson) + subroutine s_compute_cson_from_pinf(q_prim_vf, pinf, cell, rhol, gamma, pi_inf, cson) $:GPU_ROUTINE(function_name='s_compute_cson_from_pinf', & & parallelism='[seq]', cray_inline=True) @@ -807,7 +807,7 @@ contains !! @param f_pinfl Driving pressure !! @param cell Bubble cell !! @param Romega Control volume radius - pure subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega) + subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega) $:GPU_ROUTINE(function_name='s_get_pinf',parallelism='[seq]', & & cray_inline=True) @@ -1136,7 +1136,7 @@ contains !! @param pos Input coordinates !! @param cell Computational coordinate of the cell !! @param scoord Calculated particle coordinates - pure subroutine s_locate_cell(pos, cell, scoord) + subroutine s_locate_cell(pos, cell, scoord) real(wp), dimension(3), intent(in) :: pos real(wp), dimension(3), intent(out) :: scoord @@ -1209,7 +1209,7 @@ contains !> The purpose of this procedure is to determine if the global coordinates of the bubbles !! are present in the current MPI processor (including ghost cells). !! @param pos_part Spatial coordinates of the bubble - pure function particle_in_domain(pos_part) + function particle_in_domain(pos_part) logical :: particle_in_domain real(wp), dimension(3), intent(in) :: pos_part @@ -1262,7 +1262,7 @@ contains !> The purpose of this procedure is to determine if the lagrangian bubble is located in the !! physical domain. The ghost cells are not part of the physical domain. !! @param pos_part Spatial coordinates of the bubble - pure function particle_in_domain_physical(pos_part) + function particle_in_domain_physical(pos_part) logical :: particle_in_domain_physical real(wp), dimension(3), intent(in) :: pos_part @@ -1281,7 +1281,7 @@ contains !! @param q Input scalar field !! @param dq Output gradient of q !! @param dir Gradient spatial direction - pure subroutine s_gradient_dir(q, dq, dir) + subroutine s_gradient_dir(q, dq, dir) type(scalar_field), intent(inout) :: q type(scalar_field), intent(inout) :: dq diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 48ea3bad9a..6f9c71a4c0 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -22,7 +22,7 @@ contains !! @param lbk_s Computational coordinates of the bubbles !! @param lbk_pos Spatial coordinates of the bubbles !! @param updatedvar Eulerian variable to be updated - pure subroutine s_smoothfunction(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar) + subroutine s_smoothfunction(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar) integer, intent(in) :: nBubs real(wp), dimension(1:lag_params%nBubs_glb, 1:3, 1:2), intent(in) :: lbk_s, lbk_pos @@ -40,7 +40,7 @@ contains !> The purpose of this procedure contains the algorithm to use the delta kernel function to map the effect of the bubbles. !! The effect of the bubbles only affects the cell where the bubble is located. - pure subroutine s_deltafunc(nBubs, lbk_rad, lbk_vel, lbk_s, updatedvar) + subroutine s_deltafunc(nBubs, lbk_rad, lbk_vel, lbk_s, updatedvar) integer, intent(in) :: nBubs real(wp), dimension(1:lag_params%nBubs_glb, 1:3, 1:2), intent(in) :: lbk_s @@ -95,7 +95,7 @@ contains !> The purpose of this procedure contains the algorithm to use the gaussian kernel function to map the effect of the bubbles. !! The effect of the bubbles affects the 3X3x3 cells that surround the bubble. - pure subroutine s_gaussian(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar) + subroutine s_gaussian(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar) integer, intent(in) :: nBubs real(wp), dimension(1:lag_params%nBubs_glb, 1:3, 1:2), intent(in) :: lbk_s, lbk_pos @@ -199,7 +199,7 @@ contains end subroutine s_gaussian !> The purpose of this subroutine is to apply the gaussian kernel function for each bubble (Maeda and Colonius, 2018)). - pure subroutine s_applygaussian(center, cellaux, nodecoord, stddsv, strength_idx, func) + subroutine s_applygaussian(center, cellaux, nodecoord, stddsv, strength_idx, func) $:GPU_ROUTINE(function_name='s_applygaussian',parallelism='[seq]', & & cray_inline=True) @@ -267,7 +267,7 @@ contains !> The purpose of this subroutine is to check if the current cell is outside the computational domain or not (including ghost cells). !! @param cellaux Tested cell to smear the bubble effect in. !! @param celloutside If true, then cellaux is outside the computational domain. - pure subroutine s_check_celloutside(cellaux, celloutside) + subroutine s_check_celloutside(cellaux, celloutside) $:GPU_ROUTINE(function_name='s_check_celloutside',parallelism='[seq]', & & cray_inline=True) @@ -301,7 +301,7 @@ contains !> This subroutine relocates the current cell, if it intersects a symmetric boundary. !! @param cell Cell of the current bubble !! @param cellaux Cell to map the bubble effect in. - pure subroutine s_shift_cell_symmetric_bc(cellaux, cell) + subroutine s_shift_cell_symmetric_bc(cellaux, cell) $:GPU_ROUTINE(function_name='s_shift_cell_symmetric_bc', & & parallelism='[seq]', cray_inline=True) @@ -340,7 +340,7 @@ contains !! @param cell Cell where the bubble is located !! @param volpart Volume of the bubble !! @param stddsv Standard deviaton - pure subroutine s_compute_stddsv(cell, volpart, stddsv) + subroutine s_compute_stddsv(cell, volpart, stddsv) $:GPU_ROUTINE(function_name='s_compute_stddsv',parallelism='[seq]', & & cray_inline=True) @@ -379,7 +379,7 @@ contains !> The purpose of this procedure is to calculate the characteristic cell volume !! @param cell Computational coordinates (x, y, z) !! @param Charvol Characteristic volume - pure elemental subroutine s_get_char_vol(cellx, celly, cellz, Charvol) + elemental subroutine s_get_char_vol(cellx, celly, cellz, Charvol) $:GPU_ROUTINE(function_name='s_get_char_vol',parallelism='[seq]', & & cray_inline=True) @@ -402,7 +402,7 @@ contains !! real type into integer. !! @param s Computational coordinates of the bubble, real type !! @param get_cell Computational coordinates of the bubble, integer type - pure subroutine s_get_cell(s_cell, get_cell) + subroutine s_get_cell(s_cell, get_cell) $:GPU_ROUTINE(function_name='s_get_cell',parallelism='[seq]', & & cray_inline=True) diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index da6cba12ac..1cb84e55c7 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -1571,7 +1571,7 @@ contains end subroutine s_finalize_cbc ! Detext if the problem has any characteristic boundary conditions - pure elemental subroutine s_any_cbc_boundaries(toggle) + elemental subroutine s_any_cbc_boundaries(toggle) logical, intent(inout) :: toggle diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index 694f6735b2..a6e19c0ed4 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -19,7 +19,7 @@ module m_compute_cbc contains !> Base L1 calculation - pure function f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) result(L1) + function f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) result(L1) $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(3), intent(in) :: lambda real(wp), intent(in) :: rho, c, dpres_ds @@ -29,7 +29,7 @@ contains end function f_base_L1 !> Fill density L variables - pure subroutine s_fill_density_L(L, lambda_factor, lambda2, c, mf, dalpha_rho_ds, dpres_ds) + subroutine s_fill_density_L(L, lambda_factor, lambda2, c, mf, dalpha_rho_ds, dpres_ds) $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: lambda_factor, lambda2, c @@ -43,7 +43,7 @@ contains end subroutine s_fill_density_L !> Fill velocity L variables - pure subroutine s_fill_velocity_L(L, lambda_factor, lambda2, dvel_ds) + subroutine s_fill_velocity_L(L, lambda_factor, lambda2, dvel_ds) $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: lambda_factor, lambda2 @@ -56,7 +56,7 @@ contains end subroutine s_fill_velocity_L !> Fill advection L variables - pure subroutine s_fill_advection_L(L, lambda_factor, lambda2, dadv_ds) + subroutine s_fill_advection_L(L, lambda_factor, lambda2, dadv_ds) $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: lambda_factor, lambda2 @@ -69,7 +69,7 @@ contains end subroutine s_fill_advection_L !> Fill chemistry L variables - pure subroutine s_fill_chemistry_L(L, lambda_factor, lambda2, dYs_ds) + subroutine s_fill_chemistry_L(L, lambda_factor, lambda2, dYs_ds) $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: lambda_factor, lambda2 @@ -84,7 +84,7 @@ contains end subroutine s_fill_chemistry_L !> Slip wall CBC (Thompson 1990, pg. 451) - pure subroutine s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) + subroutine s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) $:GPU_ROUTINE(function_name='s_compute_slip_wall_L',parallelism='[seq]', & & cray_inline=True) @@ -100,7 +100,7 @@ contains end subroutine s_compute_slip_wall_L !> Nonreflecting subsonic buffer CBC (Thompson 1987, pg. 13) - pure subroutine s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + subroutine s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) $:GPU_ROUTINE(function_name='s_compute_nonreflecting_subsonic_buffer_L', & & parallelism='[seq]', cray_inline=True) @@ -128,7 +128,7 @@ contains end subroutine s_compute_nonreflecting_subsonic_buffer_L !> Nonreflecting subsonic inflow CBC (Thompson 1990, pg. 455) - pure subroutine s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) + subroutine s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) $:GPU_ROUTINE(function_name='s_compute_nonreflecting_subsonic_inflow_L', & & parallelism='[seq]', cray_inline=True) @@ -143,7 +143,7 @@ contains end subroutine s_compute_nonreflecting_subsonic_inflow_L !> Nonreflecting subsonic outflow CBC (Thompson 1990, pg. 454) - pure subroutine s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + subroutine s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) $:GPU_ROUTINE(function_name='s_compute_nonreflecting_subsonic_outflow_L', & & parallelism='[seq]', cray_inline=True) @@ -165,7 +165,7 @@ contains end subroutine s_compute_nonreflecting_subsonic_outflow_L !> Force-free subsonic outflow CBC (Thompson 1990, pg. 454) - pure subroutine s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + subroutine s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) $:GPU_ROUTINE(function_name='s_compute_force_free_subsonic_outflow_L', & & parallelism='[seq]', cray_inline=True) @@ -185,7 +185,7 @@ contains end subroutine s_compute_force_free_subsonic_outflow_L !> Constant pressure subsonic outflow CBC (Thompson 1990, pg. 455) - pure subroutine s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + subroutine s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) $:GPU_ROUTINE(function_name='s_compute_constant_pressure_subsonic_outflow_L', & & parallelism='[seq]', cray_inline=True) @@ -205,7 +205,7 @@ contains end subroutine s_compute_constant_pressure_subsonic_outflow_L !> Supersonic inflow CBC (Thompson 1990, pg. 453) - pure subroutine s_compute_supersonic_inflow_L(L) + subroutine s_compute_supersonic_inflow_L(L) $:GPU_ROUTINE(function_name='s_compute_supersonic_inflow_L', & & parallelism='[seq]', cray_inline=True) @@ -215,7 +215,7 @@ contains end subroutine s_compute_supersonic_inflow_L !> Supersonic outflow CBC (Thompson 1990, pg. 453) - pure subroutine s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + subroutine s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) $:GPU_ROUTINE(function_name='s_compute_supersonic_outflow_L', & & parallelism='[seq]', cray_inline=True) diff --git a/src/simulation/m_derived_variables.fpp b/src/simulation/m_derived_variables.fpp index 8da88a3a91..467737937e 100644 --- a/src/simulation/m_derived_variables.fpp +++ b/src/simulation/m_derived_variables.fpp @@ -187,7 +187,7 @@ contains !! @param q_prim_vf2 Primitive variables !! @param q_prim_vf3 Primitive variables !! @param q_sf Acceleration component - pure subroutine s_derive_acceleration_component(i, q_prim_vf0, q_prim_vf1, & + subroutine s_derive_acceleration_component(i, q_prim_vf0, q_prim_vf1, & q_prim_vf2, q_prim_vf3, q_sf) integer, intent(in) :: i diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index f4a24fba7a..ba9cc97850 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -218,7 +218,7 @@ contains !! calculate the inverse of grad_xi to obtain F, F is a nxn tensor !! calculate the FFtranspose to obtain the btensor, btensor is nxn tensor !! btensor is symmetric, save the data space - pure subroutine s_neoHookean_cauchy_solver(btensor_in, q_prim_vf, G_param, j, k, l) + subroutine s_neoHookean_cauchy_solver(btensor_in, q_prim_vf, G_param, j, k, l) $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(scalar_field), dimension(b_size), intent(inout) :: btensor_in @@ -257,7 +257,7 @@ contains !! calculate the inverse of grad_xi to obtain F, F is a nxn tensor !! calculate the FFtranspose to obtain the btensor, btensor is nxn tensor !! btensor is symmetric, save the data space - pure subroutine s_Mooney_Rivlin_cauchy_solver(btensor_in, q_prim_vf, G_param, j, k, l) + subroutine s_Mooney_Rivlin_cauchy_solver(btensor_in, q_prim_vf, G_param, j, k, l) $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(scalar_field), dimension(b_size), intent(inout) :: btensor_in diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 3f736b0b0b..f763928a64 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -381,7 +381,7 @@ contains end subroutine s_finalize_hypoelastic_module - pure subroutine s_compute_damage_state(q_cons_vf, rhs_vf) + subroutine s_compute_damage_state(q_cons_vf, rhs_vf) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 9b18f5b5fc..09263c1ab5 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -127,7 +127,7 @@ contains !! @param q_prim_vf Primitive variables !! @param pb Internal bubble pressure !! @param mv Mass of vapor in bubble - pure subroutine s_ibm_correct_state(q_cons_vf, q_prim_vf, pb_in, mv_in) + subroutine s_ibm_correct_state(q_cons_vf, q_prim_vf, pb_in, mv_in) type(scalar_field), & dimension(sys_size), & @@ -420,7 +420,7 @@ contains !> Function that finds the number of ghost points, used for allocating !! memory. - pure subroutine s_find_num_ghost_points(num_gps_out, num_inner_gps_out) + subroutine s_find_num_ghost_points(num_gps_out, num_inner_gps_out) integer, intent(out) :: num_gps_out integer, intent(out) :: num_inner_gps_out @@ -468,7 +468,7 @@ contains end subroutine s_find_num_ghost_points !> Function that finds the ghost points - pure subroutine s_find_ghost_points(ghost_points_in, inner_points_in) + subroutine s_find_ghost_points(ghost_points_in, inner_points_in) type(ghost_point), dimension(num_gps), intent(INOUT) :: ghost_points_in type(ghost_point), dimension(num_inner_gps), intent(INOUT) :: inner_points_in @@ -583,7 +583,7 @@ contains end subroutine s_find_ghost_points !> Function that computes the interpolation coefficients of image points - pure subroutine s_compute_interpolation_coeffs(ghost_points_in) + subroutine s_compute_interpolation_coeffs(ghost_points_in) type(ghost_point), dimension(num_gps), intent(INOUT) :: ghost_points_in @@ -737,7 +737,7 @@ contains !> Function that uses the interpolation coefficients and the current state !! at the cell centers in order to estimate the state at the image point - pure subroutine s_interpolate_image_point(q_prim_vf, gp, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb_in, mv_in, presb_IP, massv_IP) + subroutine s_interpolate_image_point(q_prim_vf, gp, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb_in, mv_in, presb_IP, massv_IP) $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), & dimension(sys_size), & diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index 8112b3af7e..9306dcb760 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -67,7 +67,7 @@ contains !! S = - (divB) [ 0, Bx, By, Bz, vdotB, vx, vy, vz ]^T !! @param q_prim_vf Primitive variables !! @param rhs_vf rhs variables - pure subroutine s_compute_mhd_powell_rhs(q_prim_vf, rhs_vf) + subroutine s_compute_mhd_powell_rhs(q_prim_vf, rhs_vf) type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp index 5affd3342f..63fc17b328 100644 --- a/src/simulation/m_pressure_relaxation.fpp +++ b/src/simulation/m_pressure_relaxation.fpp @@ -65,7 +65,7 @@ contains !> The main pressure relaxation procedure !! @param q_cons_vf Cell-average conservative variables - pure subroutine s_pressure_relaxation_procedure(q_cons_vf) + subroutine s_pressure_relaxation_procedure(q_cons_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer :: j, k, l @@ -82,7 +82,7 @@ contains end subroutine s_pressure_relaxation_procedure !> Process pressure relaxation for a single cell - pure subroutine s_relax_cell_pressure(q_cons_vf, j, k, l) + subroutine s_relax_cell_pressure(q_cons_vf, j, k, l) $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf @@ -102,7 +102,7 @@ contains end subroutine s_relax_cell_pressure !> Check if pressure relaxation is needed for this cell - pure logical function s_needs_pressure_relaxation(q_cons_vf, j, k, l) + logical function s_needs_pressure_relaxation(q_cons_vf, j, k, l) $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf @@ -120,7 +120,7 @@ contains end function s_needs_pressure_relaxation !> Correct volume fractions to physical bounds - pure subroutine s_correct_volume_fractions(q_cons_vf, j, k, l) + subroutine s_correct_volume_fractions(q_cons_vf, j, k, l) $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf @@ -150,7 +150,7 @@ contains end subroutine s_correct_volume_fractions !> Main pressure equilibration using Newton-Raphson - pure subroutine s_equilibrate_pressure(q_cons_vf, j, k, l) + subroutine s_equilibrate_pressure(q_cons_vf, j, k, l) $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf @@ -219,7 +219,7 @@ contains end subroutine s_equilibrate_pressure !> Correct internal energies using equilibrated pressure - pure subroutine s_correct_internal_energies(q_cons_vf, j, k, l) + subroutine s_correct_internal_energies(q_cons_vf, j, k, l) $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 027c47a567..c7a75f2c96 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -411,7 +411,7 @@ contains end subroutine s_initialize_qbmm_module - pure subroutine s_compute_qbmm_rhs(idir, q_cons_vf, q_prim_vf, rhs_vf, flux_n_vf, pb, rhs_pb) + subroutine s_compute_qbmm_rhs(idir, q_cons_vf, q_prim_vf, rhs_vf, flux_n_vf, pb, rhs_pb) integer, intent(in) :: idir type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf, q_prim_vf @@ -562,7 +562,7 @@ contains end subroutine s_compute_qbmm_rhs !Coefficient array for non-polytropic model (pb and mv values are accounted in wght_pb and wght_mv) - pure subroutine s_coeff_nonpoly(pres, rho, c, coeffs) + subroutine s_coeff_nonpoly(pres, rho, c, coeffs) $:GPU_ROUTINE(function_name='s_coeff_nonpoly',parallelism='[seq]', & & cray_inline=True) @@ -633,7 +633,7 @@ contains end subroutine s_coeff_nonpoly !Coefficient array for polytropic model (pb for each R0 bin accounted for in wght_pb) - pure subroutine s_coeff(pres, rho, c, coeffs) + subroutine s_coeff(pres, rho, c, coeffs) $:GPU_ROUTINE(function_name='s_coeff',parallelism='[seq]', & & cray_inline=True) @@ -868,7 +868,7 @@ contains end if end subroutine s_coeff_selector - pure subroutine s_chyqmom(momin, wght, abscX, abscY) + subroutine s_chyqmom(momin, wght, abscX, abscY) $:GPU_ROUTINE(function_name='s_chyqmom',parallelism='[seq]', & & cray_inline=True) @@ -926,7 +926,7 @@ contains end subroutine s_chyqmom - pure subroutine s_hyqmom(frho, fup, fmom) + subroutine s_hyqmom(frho, fup, fmom) $:GPU_ROUTINE(function_name='s_hyqmom',parallelism='[seq]', & & cray_inline=True) @@ -946,7 +946,7 @@ contains end subroutine s_hyqmom - pure function f_quad(abscX, abscY, wght_in, q, r, s) + function f_quad(abscX, abscY, wght_in, q, r, s) $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(nnode, nb), intent(in) :: abscX, abscY, wght_in real(wp), intent(in) :: q, r, s @@ -962,7 +962,7 @@ contains end function f_quad - pure function f_quad2D(abscX, abscY, wght_in, pow) + function f_quad2D(abscX, abscY, wght_in, pow) $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(nnode), intent(in) :: abscX, abscY, wght_in real(wp), dimension(3), intent(in) :: pow diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 8196403564..bae30f3fd6 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -203,7 +203,7 @@ contains !! For more information please refer to: !! 1) s_compute_cartesian_viscous_source_flux !! 2) s_compute_cylindrical_viscous_source_flux - pure subroutine s_compute_viscous_source_flux(velL_vf, & + subroutine s_compute_viscous_source_flux(velL_vf, & dvelL_dx_vf, & dvelL_dy_vf, & dvelL_dz_vf, & @@ -3795,7 +3795,7 @@ contains !! @param[in] ix Global X-direction loop bounds (int_bounds_info). !! @param[in] iy Global Y-direction loop bounds (int_bounds_info). !! @param[in] iz Global Z-direction loop bounds (int_bounds_info). - pure subroutine s_compute_cylindrical_viscous_source_flux(velL_vf, & + subroutine s_compute_cylindrical_viscous_source_flux(velL_vf, & dvelL_dx_vf, dvelL_dy_vf, dvelL_dz_vf, & velR_vf, & dvelR_dx_vf, dvelR_dy_vf, dvelR_dz_vf, & @@ -3956,7 +3956,7 @@ contains !! @param[in] ix X-direction loop bounds (int_bounds_info). !! @param[in] iy Y-direction loop bounds (int_bounds_info). !! @param[in] iz Z-direction loop bounds (int_bounds_info). - pure subroutine s_compute_cartesian_viscous_source_flux(dvelL_dx_vf, & + subroutine s_compute_cartesian_viscous_source_flux(dvelL_dx_vf, & dvelL_dy_vf, & dvelL_dz_vf, & dvelR_dx_vf, & @@ -4082,7 +4082,7 @@ contains !! @param[in] Re_shear Shear Reynolds number. !! @param[in] divergence_v Velocity divergence (du/dx + dv/dy + dw/dz). !! @param[out] tau_shear_out Calculated shear stress tensor (stress on i-face, j-direction). - pure subroutine s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, tau_shear_out) + subroutine s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, tau_shear_out) $:GPU_ROUTINE(parallelism='[seq]') implicit none @@ -4116,7 +4116,7 @@ contains !! @param[in] Re_bulk Bulk Reynolds number. !! @param[in] divergence_v Velocity divergence (du/dx + dv/dy + dw/dz). !! @param[out] tau_bulk_out Calculated bulk stress tensor (stress on i-face, i-direction). - pure subroutine s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, tau_bulk_out) + subroutine s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, tau_bulk_out) $:GPU_ROUTINE(parallelism='[seq]') implicit none @@ -4143,7 +4143,7 @@ contains !! @param flux_src_vf Intercell source fluxes !! @param flux_gsrc_vf Intercell geometric source fluxes !! @param norm_dir Dimensional splitting coordinate direction - pure subroutine s_finalize_riemann_solver(flux_vf, flux_src_vf, & + subroutine s_finalize_riemann_solver(flux_vf, flux_src_vf, & flux_gsrc_vf, & norm_dir) diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index 565524e80b..2ff6d889dd 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -20,7 +20,7 @@ contains !! @param k y coordinate index !! @param l z coordinate index !! @return fltr_dtheta Modified dtheta value for cylindrical coordinates - pure function f_compute_filtered_dtheta(k, l) result(fltr_dtheta) + function f_compute_filtered_dtheta(k, l) result(fltr_dtheta) $:GPU_ROUTINE(parallelism='[seq]') integer, intent(in) :: k, l real(wp) :: fltr_dtheta @@ -47,7 +47,7 @@ contains !! @param k y coordinate index !! @param l z coordinate index !! @return cfl_terms computed CFL terms for 2D/3D cases - pure function f_compute_multidim_cfl_terms(vel, c, j, k, l) result(cfl_terms) + function f_compute_multidim_cfl_terms(vel, c, j, k, l) result(cfl_terms) $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(num_vels), intent(in) :: vel real(wp), intent(in) :: c @@ -89,7 +89,7 @@ contains !! @param j x index !! @param k y index !! @param l z index - pure subroutine s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) + subroutine s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) $:GPU_ROUTINE(function_name='s_compute_enthalpy',parallelism='[seq]', & & cray_inline=True) @@ -181,7 +181,7 @@ contains !! @param icfl_sf cell-centered inviscid cfl number !! @param vcfl_sf (optional) cell-centered viscous CFL number !! @param Rc_sf (optional) cell centered Rc - pure subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl_sf, vcfl_sf, Rc_sf) + subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl_sf, vcfl_sf, Rc_sf) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in), dimension(num_vels) :: vel real(wp), intent(in) :: c, rho @@ -244,7 +244,7 @@ contains !! @param j x coordinate !! @param k y coordinate !! @param l z coordinate - pure subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l) + subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l) $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(num_vels), intent(in) :: vel real(wp), intent(in) :: c, rho diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 5d23d8e4c3..8c61a422fb 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -65,7 +65,7 @@ contains end if end subroutine s_initialize_surface_tension_module - pure subroutine s_compute_capillary_source_flux( & + subroutine s_compute_capillary_source_flux( & vSrc_rsx_vf, vSrc_rsy_vf, vSrc_rsz_vf, & flux_src_vf, & id, isx, isy, isz) diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 6bc9d004d9..5c525c4664 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -1183,7 +1183,7 @@ contains !! @param j First-coordinate cell index !! @param k Secone-coordinate cell index !! @param l Thire-coordinate cell index - pure subroutine s_preserve_monotonicity(v_rs_ws, vL_rs_vf, vR_rs_vf) + subroutine s_preserve_monotonicity(v_rs_ws, vL_rs_vf, vR_rs_vf) real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(IN) :: v_rs_ws real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(INOUT) :: vL_rs_vf, vR_rs_vf From db1b8c518d0c3b0e44f45c72c20e8820058b8911 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Sun, 20 Jul 2025 23:06:07 -0400 Subject: [PATCH 06/60] Added routine and declare and partial data, non compiling --- src/common/include/acc_macros.fpp | 81 ++++++++++++++ src/common/include/omp_macros.fpp | 112 +++++++++++++++---- src/common/include/parallel_macros.fpp | 143 +++++++------------------ src/common/m_phase_change.fpp | 6 +- src/simulation/m_global_parameters.fpp | 5 +- src/simulation/m_ibm.fpp | 5 +- 6 files changed, 224 insertions(+), 128 deletions(-) diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp index 9375799bde..a2dc19b525 100644 --- a/src/common/include/acc_macros.fpp +++ b/src/common/include/acc_macros.fpp @@ -56,4 +56,85 @@ & clause_val + extraAccArgs_val.strip('\n') $:acc_directive #:enddef + +#:def ACC_ROUTINE(function_name=None, parallelism=None, nohost=False, extraAccArgs=None) + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + #:assert isinstance(nohost, bool) + #:if nohost == True + #:set nohost_val = 'nohost' + #:else + #:set nohost_val = '' + #:endif + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = parallelism_val.strip('\n') + nohost_val.strip('\n') + #:set acc_directive = '!$acc routine ' + & + & clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def ACC_DECLARE(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None) + #:set copy_val = GEN_COPY_STR(copy) + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + #:set copyout_val = GEN_COPYOUT_STR(copyout) + #:set create_val = GEN_CREATE_STR(create) + #:set present_val = GEN_PRESENT_STR(present) + #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) + #:set link_val = GEN_LINK_STR(link) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & present_val.strip('\n') + deviceptr_val.strip('\n') + & + & link_val.strip('\n') + #:set acc_directive = '!$acc declare ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def ACC_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) + #:set collapse_val = GEN_COLLAPSE_STR(collapse) + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + #:if data_dependency is not None + #:assert isinstance(data_dependency, str) + #:assert (data_dependency == 'auto' or data_dependency == 'independent') + #:set data_dependency_val = data_dependency + #:else + #:set data_dependency_val = '' + #:endif + #:set private_val = GEN_PRIVATE_STR(private, False) + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & + & data_dependency_val.strip('\n') + private_val.strip('\n') + & + & reduction_val.strip('\n') + #:set acc_directive = '!$acc loop ' + & + & clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def ACC_DATA(code, copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None) + #:assert code is not None + #:assert isinstance(code, str) + #:if code == '' or code.isspace() + #:stop 'GPU_DATA macro has no effect on the code as it is not surrounding any code' + #:endif + #:set copy_val = GEN_COPY_STR(copy) + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + #:set copyout_val = GEN_COPYOUT_STR(copyout) + #:set create_val = GEN_CREATE_STR(create) + #:set no_create_val = GEN_NOCREATE_STR(no_create) + #:set present_val = GEN_PRESENT_STR(present) + #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) + #:set attach_val = GEN_ATTACH_STR(attach) + #:set default_val = GEN_DEFAULT_STR(default) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') + & + & default_val.strip('\n') + #:set acc_directive = '!$acc data ' + clause_val + extraAccArgs_val.strip('\n') + #:set end_acc_directive = '!$acc end data' + $:acc_directive + $:code + $:end_acc_directive +#:enddef ! New line at end of file is required for FYPP \ No newline at end of file diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 4a27e8f093..23e17fbb21 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -17,22 +17,8 @@ #:assert isinstance(default, str) #:assert (default == 'present' or default == 'none') #:if default == 'present' - #:set default_val = 'defaultmap(present:all) ' - #:elif default == 'none' - #:stop 'Not Supported Yet' - #:endif - #:else - #:set default_val = '' - #:endif - $:default_val -#:enddef - -#:def OMP_DEFAULT_STR(default) - #:if default is not None - #:assert isinstance(default, str) - #:assert (default == 'present' or default == 'none') - #:if default == 'present' - #:set default_val = 'defaultmap(present:aggregate) defaultmap(present:allocatable) ' + #! #:set default_val = 'defaultmap(present:aggregate) defaultmap(present:allocatable) ' + #:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(present:allocatable) defaultmap(present:pointer)' #:elif default == 'none' #:stop 'Not Supported Yet' #:endif @@ -81,13 +67,19 @@ #:enddef #:def OMP_ATTACH_STR(attach) - #:if attach is not None - #:stop 'attach is not supported yet' - #:endif + #! #:if attach is not None + #! #:stop 'attach is not supported yet' + #! #:endif #:set attach_val = '' $:attach_val #:enddef +#:def OMP_TO_STR(to) + #! Not yet implemented + #:set to_val = '' + $:to_val +#:enddef + #:def OMP_PARALLELISM_STR(parallelism) #:set temp = '' $:temp @@ -130,7 +122,7 @@ #:set collapse_val = GEN_COLLAPSE_STR(collapse) #:set parallelism_val = OMP_PARALLELISM_STR(parallelism) - #! #:set default_val = OMP_DEFAULT_STR(default) + #:set default_val = OMP_DEFAULT_STR(default) #:set default_val = '' #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) @@ -154,4 +146,84 @@ & clause_val + extraOmpArgs_val.strip('\n') $:omp_directive #:enddef + +#:def OMP_ROUTINE(function_name, nohost, extraOmpArgs) + #:assert isinstance(nohost, bool) + #:if nohost == True + #:set nohost_val = 'device_type(nohost) ' + #:else + #:set nohost_val = 'device_type(any) ' + #:endif + #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) + #:if function_name is not None + #:set function_name_val = '(' + function_name + ') ' + #:else + #:set function_name_val = '' + #:endif + #:set clause_val = nohost_val.strip('\n') + #:set omp_directive = '!$omp declare target ' + & + & clause_val + extraOmpArgs_val.strip('\n') + $:omp_directive +#:enddef + +#:def OMP_DECLARE(copyin=None, copyinReadOnly=None, create=None, link=None, extraOmpArgs=None) + #:set copyin_val = OMP_TO_STR(copyin).strip('\n') + OMP_TO_STR(copyinReadOnly).strip('\n') + #:set create_val = GEN_CLAUSE('(', create) + #:set link_val = GEN_LINK_STR(link) + #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) + #:set clause_val = copyin_val.strip('\n') + & + & create_val.strip('\n') + link_val.strip('\n') + #:set omp_directive = '!$omp declare target ' + clause_val + extraOmpArgs_val.strip('\n') + $:omp_directive +#:enddef + +#! Not implemented yet +#:def OMP_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) + #:set collapse_val = GEN_COLLAPSE_STR(collapse) + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + #:if data_dependency is not None + #:assert isinstance(data_dependency, str) + #:assert (data_dependency == 'auto' or data_dependency == 'independent') + #:set data_dependency_val = data_dependency + #:else + #:set data_dependency_val = '' + #:endif + #:set private_val = GEN_PRIVATE_STR(private, False) + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & + & data_dependency_val.strip('\n') + private_val.strip('\n') + & + & reduction_val.strip('\n') + #:set acc_directive = '!$acc loop ' + & + & clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def OMP_DATA(code, copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraOmpArgs=None) + #:assert code is not None + #:assert isinstance(code, str) + #:if code == '' or code.isspace() + #:stop 'GPU_DATA macro has no effect on the code as it is not surrounding any code' + #:endif + #:set copy_val = OMP_COPY_STR(copy) + #:set copyin_val = OMP_COPYIN_STR(copyin).strip('\n') + OMP_COPYIN_STR(copyinReadOnly).strip('\n') + #:set copyout_val = OMP_COPYOUT_STR(copyout) + #:set create_val = OMP_CREATE_STR(create) + #:set no_create_val = OMP_NOCREATE_STR(no_create) + #:set present_val = OMP_PRESENT_STR(present) + #:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr) + #:set attach_val = OMP_ATTACH_STR(attach) + #:set default_val = OMP_DEFAULT_STR(default) + #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) + #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') + & + & default_val.strip('\n') + #:set omp_directive = '!$omp target data ' + clause_val + extraOmpArgs_val.strip('\n') + #:set end_omp_directive = '!$omp end target data' + $:omp_directive + $:code + $:end_omp_directive +#:enddef ! New line at end of file is required for FYPP \ No newline at end of file diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 00dc95493e..6594de7098 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -196,19 +196,11 @@ #endif #:enddef -#:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None) +#:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None, extraOmpArgs=None) #:assert isinstance(cray_inline, bool) - #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) - #:assert isinstance(nohost, bool) - #:if nohost == True - #:set nohost_val = 'nohost' - #:else - #:set nohost_val = '' - #:endif - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = parallelism_val.strip('\n') + nohost_val.strip('\n') - #:set acc_directive = '!$acc routine ' + & - & clause_val + extraAccArgs_val.strip('\n') + #:set acc_directive = ACC_ROUTINE(function_name=function_name, parallelism=parallelism, nohost=nohost, extraAccArgs=extraAccArgs) + #:set omp_directive = OMP_ROUTINE(function_name=function_name, nohost=nohost, extraOmpArgs=extraOmpArgs) + #:if cray_inline == True #:if not isinstance(function_name, str) #:stop "When inlining for Cray Compiler, function name must be given and given as a string" @@ -216,29 +208,33 @@ #:set cray_directive = ('!DIR$ INLINEALWAYS ' + function_name).strip('\n') #ifdef _CRAYFTN $:cray_directive -#else +#elif MFC_OpenACC $:acc_directive +#elif MFC_OpenMP + $:omp_directive #endif #:else +#if MFC_OpenACC $:acc_directive +#elif MFC_OpenMP + $:omp_directive +#endif #:endif #:enddef -#:def GPU_DECLARE(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None) - #:set copy_val = GEN_COPY_STR(copy) - #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - #:set copyout_val = GEN_COPYOUT_STR(copyout) - #:set create_val = GEN_CREATE_STR(create) - #:set present_val = GEN_PRESENT_STR(present) - #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) - #:set link_val = GEN_LINK_STR(link) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & - & copyout_val.strip('\n') + create_val.strip('\n') + & - & present_val.strip('\n') + deviceptr_val.strip('\n') + & - & link_val.strip('\n') - #:set acc_directive = '!$acc declare ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive +#:def GPU_DECLARE(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None, extraOmpArgs=None) + #:set acc_code = ACC_DECLARE(copy=copy, copyin=copyin, copyinReadOnly=copyinReadOnly, copyout=copyout, create=create, present=present, deviceptr=deviceptr, link=link, extraAccArgs=None) + #:assert copyout is None + #:assert present is None + #:assert deviceptr is None + #:assert copy is None + #:set omp_code = OMP_DECLARE(copyin=copyin, copyinReadOnly=copyinReadOnly, create=create, link=link, extraOmpArgs=extraOmpArgs) + +#if defined(MFC_OpenACC) + $:acc_code +#elif defined(MFC_OpenMP) + $:omp_code +#endif #:enddef #:def GPU_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) @@ -262,32 +258,17 @@ $:acc_directive #:enddef -#:def GPU_DATA(code, copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None) - #:assert code is not None - #:assert isinstance(code, str) - #:if code == '' or code.isspace() - #:stop 'GPU_DATA macro has no effect on the code as it is not surrounding any code' - #:endif - #:set copy_val = GEN_COPY_STR(copy) - #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - #:set copyout_val = GEN_COPYOUT_STR(copyout) - #:set create_val = GEN_CREATE_STR(create) - #:set no_create_val = GEN_NOCREATE_STR(no_create) - #:set present_val = GEN_PRESENT_STR(present) - #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) - #:set attach_val = GEN_ATTACH_STR(attach) - #:set default_val = GEN_DEFAULT_STR(default) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & - & copyout_val.strip('\n') + create_val.strip('\n') + & - & no_create_val.strip('\n') + present_val.strip('\n') + & - & deviceptr_val.strip('\n') + attach_val.strip('\n') + & - & default_val.strip('\n') - #:set acc_directive = '!$acc data ' + clause_val + extraAccArgs_val.strip('\n') - #:set end_acc_directive = '!$acc end data' - $:acc_directive +#:def GPU_DATA(code, copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None, extraOmpArgs=None) + #:set acc_code = ACC_DATA(code=code, copy=copy, copyin=copyin, copyinReadOnly=copyinReadOnly, copyout=copyout, create=create, no_create=no_create, present=present, deviceptr=deviceptr, attach=attach, default=default, extraAccArgs=extraAccArgs) + #:set omp_code = OMP_DATA(code=code, copy=copy, copyin=copyin, copyinReadOnly=copyinReadOnly, copyout=copyout, create=create, no_create=no_create, present=present, deviceptr=deviceptr, attach=attach, default=default, extraOmpArgs=extraOmpArgs) + +#if defined(MFC_OpenACC) + $:acc_code +#elif defined(MFC_OpenMP) + $:omp_code +#else $:code - $:end_acc_directive +#endif #:enddef #:def GPU_HOST_DATA(code, use_device=None, extraAccArgs=None) @@ -306,64 +287,20 @@ $:end_acc_directive #:enddef -#:def GEN_MP_PARENTHESES_CLAUSE(clause_name, clause_modifier, clause_str) - #:set clause_regex = re.compile(',(?![^(]*\\))') - #:assert isinstance(clause_name, str) - #:if clause_str is not None - #:set count = 0 - #:assert isinstance(clause_str, str) - #:assert clause_str[0] == '[' and clause_str[-1] == ']' - #:for c in clause_str - #:if c == '(' - #:set count = count + 1 - #:elif c == ')' - #:set count = count - 1 - #:endif - #:if c == ',' and count > 1 - #:stop 'Nested parentheses with comma inside is not supported. Incorrect clause: {}'.format(clause_str) - #:elif count < 0 - #:stop 'Missing parentheses. Incorrect clause: {}'.format(clause_str) - #:endif - #:endfor - #:set clause_str = re.sub(clause_regex, ';', clause_str) - #:set clause_list = [x.strip() for x in clause_str.strip('[]').split(';')] - $:ASSERT_LIST(clause_list, str) - #:set clause_str = clause_name + '(' + clause_modifier + ':' + ', '.join(clause_list) + ') ' - #:else - #:set clause_str = '' - #:endif - $:clause_str -#:enddef - -#:def GEN_TO_STR(to) - #:set to_str = GEN_MP_PARENTHESES_CLAUSE('map', 'to', to) - $:to_str -#:enddef - - -#:def GEN_ALLOC_STR(alloc) - #:set alloc_str = GEN_MP_PARENTHESES_CLAUSE('map', 'alloc', alloc) - $:alloc_str -#:enddef - #:def GPU_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') #:set create_val = GEN_CREATE_STR(create) #:set attach_val = GEN_ATTACH_STR(attach) - #:set to_val = GEN_TO_STR(copyin) - #:set alloc_val = GEN_ALLOC_STR(create) - #:set alloc_val2 = GEN_ALLOC_STR(attach) + #! #:set to_val = GEN_TO_STR(copyin) + #! #:set alloc_val = GEN_ALLOC_STR(create) + #! #:set alloc_val2 = GEN_ALLOC_STR(attach) #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set extraMpArgs_val = '' + #! #:set extraMpArgs_val = '' #:set acc_clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n') - #:set mp_clause_val = to_val.strip('\n') + alloc_val.strip('\n') + alloc_val2.strip('\n') + #! #:set mp_clause_val = to_val.strip('\n') + alloc_val.strip('\n') + alloc_val2.strip('\n') #:set acc_directive = '!$acc enter data ' + acc_clause_val + extraAccArgs_val.strip('\n') - #:set mp_directive = '!$omp target enter data ' + mp_clause_val + extraMpArgs_val.strip('\n') -#if MFC_OpenACC + #! #:set mp_directive = '!$omp target enter data ' + mp_clause_val + extraMpArgs_val.strip('\n') $:acc_directive -#elif MFC_OpenMP - $:mp_directive -#endif #:enddef #:def GPU_EXIT_DATA(copyout=None, delete=None, detach=None, extraAccArgs=None) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 8f4cdf51c5..5a64cb6d29 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -89,11 +89,11 @@ contains real(wp) :: rho, rM, m1, m2, MCT !< total density, total reacting mass, individual reacting masses real(wp) :: TvF !< total volume fraction - $:GPU_DECLARE(create='[pS,pSOV,pSSL,TS,TSOV,TSSL,TSatOV,TSatSL]') - $:GPU_DECLARE(create='[rhoe,dynE,rhos,rho,rM,m1,m2,MCT,TvF]') + ! $:GPU_DECLARE(create='[pS,pSOV,pSSL,TS,TSOV,TSSL,TSatOV,TSatSL]') + ! $:GPU_DECLARE(create='[rhoe,dynE,rhos,rho,rM,m1,m2,MCT,TvF]') real(wp), dimension(num_fluids) :: p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok - $:GPU_DECLARE(create='[p_infOV,p_infpT,p_infSL,sk,hk,gk,ek,rhok]') + ! $:GPU_DECLARE(create='[p_infOV,p_infpT,p_infSL,sk,hk,gk,ek,rhok]') !< Generic loop iterators integer :: i, j, k, l diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index fa6185c207..a9271506e1 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -212,10 +212,13 @@ module m_global_parameters !> @{ type(int_bounds_info) :: bc_x, bc_y, bc_z !> @} +#if defined(MFC_OpenACC) $:GPU_DECLARE(create='[bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3]') $:GPU_DECLARE(create='[bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3]') $:GPU_DECLARE(create='[bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3]') - +#elif defined(MFC_OpenMP) + $:GPU_DECLARE(create='[bc_x, bc_y, bc_z]') +#endif type(bounds_info) :: x_domain, y_domain, z_domain real(wp) :: x_a, y_a, z_a real(wp) :: x_b, y_b, z_b diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 09263c1ab5..df6ea1145b 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -45,8 +45,11 @@ module m_ibm integer :: num_gps !< Number of ghost points integer :: num_inner_gps !< Number of ghost points +#if defined(MFC_OpenACC) $:GPU_DECLARE(create='[gp_layers,num_gps,num_inner_gps]') - +#elif defined(MFC_OpenMP) + $:GPU_DECLARE(create='[num_gps,num_inner_gps]') +#endif contains !> Allocates memory for the variables in the IBM module From ed29d13bb1977df5a31d63d32eadf355e8636372 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Mon, 21 Jul 2025 14:00:21 -0400 Subject: [PATCH 07/60] Some manual changes to codebase, and implemented attach --- src/common/include/omp_macros.fpp | 12 ++++++------ src/simulation/m_acoustic_src.fpp | 4 +++- src/simulation/m_fftw.fpp | 2 +- src/simulation/m_global_parameters.fpp | 1 + 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 23e17fbb21..2a3ff7dfac 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -17,8 +17,8 @@ #:assert isinstance(default, str) #:assert (default == 'present' or default == 'none') #:if default == 'present' - #! #:set default_val = 'defaultmap(present:aggregate) defaultmap(present:allocatable) ' - #:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(present:allocatable) defaultmap(present:pointer)' + #! #:set default_val = 'defaultmap(present:aggregate) defaultmap(present:allocatable) defaultmap(present:pointer)' + #:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)' #:elif default == 'none' #:stop 'Not Supported Yet' #:endif @@ -98,7 +98,7 @@ #:set no_create_val = OMP_NOCREATE_STR(no_create) #:set present_val = OMP_PRESENT_STR(present) #:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr) - #:set attach_val = OMP_ATTACH_STR(attach) + #:set attach_val = OMP_MAP_STR('tofrom', attach) #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) #:set omp_clause_val = default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & & copy_val.strip('\n') + copyin_val.strip('\n') + & @@ -106,7 +106,7 @@ & no_create_val.strip('\n') + present_val.strip('\n') + & & deviceptr_val.strip('\n') + attach_val.strip('\n')) - #:set omp_clause_val = 'defaultmap(firstprivate:scalar) ' + omp_clause_val.strip('\n') + #:set omp_clause_val = omp_clause_val.strip('\n') #:set omp_directive = '!$omp target teams ' + omp_clause_val + extraOmpArgs_val.strip('\n') #:set end_omp_directive = '!$omp end target teams' @@ -133,7 +133,7 @@ #:set no_create_val = OMP_NOCREATE_STR(no_create) #:set present_val = OMP_PRESENT_STR(present) #:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr) - #:set attach_val = OMP_ATTACH_STR(attach) + #:set attach_val = OMP_MAP_STR('tofrom', attach) #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & @@ -212,7 +212,7 @@ #:set no_create_val = OMP_NOCREATE_STR(no_create) #:set present_val = OMP_PRESENT_STR(present) #:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr) - #:set attach_val = OMP_ATTACH_STR(attach) + #:set attach_val = OMP_MAP_STR('tofrom', attach) #:set default_val = OMP_DEFAULT_STR(default) #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 28009427bb..1b328e3d1f 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -178,6 +178,7 @@ contains end do end do end do + ! !$omp end target teams distribute parallel do simd ! Keep outer loop sequel because different sources can have very different number of points do ai = 1, num_source @@ -200,7 +201,8 @@ contains allocate (phi_rn(1:bb_num_freq(ai))) if (pulse(ai) == 4) then - call random_number(phi_rn(1:bb_num_freq(ai))) + ! call random_number(phi_rn(1:bb_num_freq(ai))) + phi_rn(1:bb_num_freq(ai)) = 1 ! Ensure all the ranks have the same random phase shift call s_mpi_send_random_number(phi_rn, bb_num_freq(ai)) end if diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 9563b99a6d..fabd15255e 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -264,6 +264,7 @@ contains end do end do + #:endcall GPU_DATA #else Nfq = 3 @@ -295,7 +296,6 @@ contains end do end do #endif - #:endcall GPU_DATA end subroutine s_apply_fourier_filter !> The purpose of this subroutine is to destroy the fftw plan diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index a9271506e1..9ae61c5112 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -184,6 +184,7 @@ module m_global_parameters #:endfor real(wp), dimension(3) :: accel_bf $:GPU_DECLARE(create='[accel_bf]') + $:GPU_DECLARE(create='[k_x,w_x,p_x,g_x,k_y,w_y,p_y,g_y,k_z,w_z,p_z,g_z]') integer :: cpu_start, cpu_end, cpu_rate From c1b41a68f32afe7cdab9a292d844d0b535b32c84 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Mon, 21 Jul 2025 17:08:30 -0400 Subject: [PATCH 08/60] Changed parallel loop to also include the end parallel --- src/common/include/acc_macros.fpp | 5 +- src/common/include/omp_macros.fpp | 28 +--- src/common/include/parallel_macros.fpp | 34 ++--- src/common/m_boundary_common.fpp | 36 +++-- src/common/m_chemistry.fpp | 3 +- src/common/m_finite_differences.fpp | 3 +- src/common/m_helper_basic.fpp | 2 +- src/common/m_mpi_common.fpp | 54 ++++--- src/common/m_phase_change.fpp | 3 +- src/common/m_variables_conversion.fpp | 9 +- src/simulation/m_acoustic_src.fpp | 12 +- src/simulation/m_body_forces.fpp | 15 +- src/simulation/m_bubbles_EE.fpp | 21 ++- src/simulation/m_bubbles_EL.fpp | 68 ++++++--- src/simulation/m_bubbles_EL_kernels.fpp | 6 +- src/simulation/m_cbc.fpp | 108 ++++++++----- src/simulation/m_data_output.fpp | 3 +- src/simulation/m_fftw.fpp | 24 ++- src/simulation/m_hyperelastic.fpp | 4 +- src/simulation/m_hypoelastic.fpp | 42 +++-- src/simulation/m_ibm.fpp | 12 +- src/simulation/m_mhd.fpp | 3 +- src/simulation/m_mpi_proxy.fpp | 18 ++- src/simulation/m_pressure_relaxation.fpp | 3 +- src/simulation/m_qbmm.fpp | 12 +- src/simulation/m_rhs.fpp | 151 +++++++++--------- src/simulation/m_riemann_solvers.fpp | 187 ++++++++++++----------- src/simulation/m_surface_tension.fpp | 36 +++-- src/simulation/m_time_steppers.fpp | 61 +++++--- src/simulation/m_viscous.fpp | 137 +++++++++++------ src/simulation/m_weno.fpp | 30 ++-- 31 files changed, 668 insertions(+), 462 deletions(-) diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp index a2dc19b525..4da5ab0231 100644 --- a/src/common/include/acc_macros.fpp +++ b/src/common/include/acc_macros.fpp @@ -28,7 +28,7 @@ $:end_acc_directive #:enddef -#:def ACC_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & +#:def ACC_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) @@ -54,7 +54,10 @@ & deviceptr_val.strip('\n') + attach_val.strip('\n') #:set acc_directive = '!$acc parallel loop ' + & & clause_val + extraAccArgs_val.strip('\n') + #:set acc_end_directive = '!$acc end parallel loop' $:acc_directive + $:code + $:acc_end_directive #:enddef #:def ACC_ROUTINE(function_name=None, parallelism=None, nohost=False, extraAccArgs=None) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 2a3ff7dfac..04b1c25465 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -115,7 +115,7 @@ $:omp_end_directive #:enddef -#:def OMP_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & +#:def OMP_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None) @@ -144,7 +144,10 @@ #! Hardcoding the parallelism for now #:set omp_directive = '!$omp target teams distribute parallel do simd ' + & & clause_val + extraOmpArgs_val.strip('\n') + #:set omp_end_directive = '!$omp end target teams distribute parallel do simd' $:omp_directive + $:code + $:omp_end_directive #:enddef #:def OMP_ROUTINE(function_name, nohost, extraOmpArgs) @@ -178,25 +181,10 @@ #:enddef #! Not implemented yet -#:def OMP_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) - #:set collapse_val = GEN_COLLAPSE_STR(collapse) - #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) - #:if data_dependency is not None - #:assert isinstance(data_dependency, str) - #:assert (data_dependency == 'auto' or data_dependency == 'independent') - #:set data_dependency_val = data_dependency - #:else - #:set data_dependency_val = '' - #:endif - #:set private_val = GEN_PRIVATE_STR(private, False) - #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & - & data_dependency_val.strip('\n') + private_val.strip('\n') + & - & reduction_val.strip('\n') - #:set acc_directive = '!$acc loop ' + & - & clause_val + extraAccArgs_val.strip('\n') - $:acc_directive +#:def OMP_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraOmpArgs=None) + #! loop is going to be ignored since all loops right now are seq + #:set temp = '' + $:temp #:enddef #:def OMP_DATA(code, copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraOmpArgs=None) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 6594de7098..ee0577613a 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -181,13 +181,13 @@ #:enddef -#:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & +#:def GPU_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None) - #:set acc_code = ACC_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) - #:set omp_code = OMP_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) + #:set acc_code = ACC_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) + #:set omp_code = OMP_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) #if defined(MFC_OpenACC) $:acc_code @@ -237,25 +237,15 @@ #endif #:enddef -#:def GPU_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) - #:set collapse_val = GEN_COLLAPSE_STR(collapse) - #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) - #:if data_dependency is not None - #:assert isinstance(data_dependency, str) - #:assert (data_dependency == 'auto' or data_dependency == 'independent') - #:set data_dependency_val = data_dependency - #:else - #:set data_dependency_val = '' - #:endif - #:set private_val = GEN_PRIVATE_STR(private, False) - #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & - & data_dependency_val.strip('\n') + private_val.strip('\n') + & - & reduction_val.strip('\n') - #:set acc_directive = '!$acc loop ' + & - & clause_val + extraAccArgs_val.strip('\n') - $:acc_directive +#:def GPU_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None, extraOmpArgs=None) + #:set acc_code = ACC_LOOP(collapse=collapse, parallelism=parallelism, data_dependency=data_dependency, reduction=reduction, reductionOp=reductionOp, private=private, extraAccArgs=extraAccArgs) + #:set omp_code = OMP_LOOP(collapse=collapse, parallelism=parallelism, data_dependency=data_dependency, reduction=reduction, reductionOp=reductionOp, private=private, extraOmpArgs=extraOmpArgs) + +#if defined(MFC_OpenACC) + $:acc_code +#elif defined(MFC_OpenMP) + $:omp_code +#endif #:enddef #:def GPU_DATA(code, copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None, extraOmpArgs=None) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 3335a79b53..fb61b78d96 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -91,7 +91,7 @@ contains if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb_in, mv_in) else - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = 0, n select case (int(bc_type(1, -1)%sf(0, k, l))) @@ -115,12 +115,13 @@ contains end if end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bc_x%end >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb_in, mv_in) else - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = 0, n select case (int(bc_type(1, 1)%sf(0, k, l))) @@ -144,6 +145,7 @@ contains end if end do end do + #:endcall GPU_PARALLEL_LOOP end if ! Population of Buffers in y-direction @@ -153,7 +155,7 @@ contains if (bc_y%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb_in, mv_in) else - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (int(bc_type(2, -1)%sf(k, 0, l))) @@ -180,12 +182,13 @@ contains end if end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bc_y%end >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb_in, mv_in) else - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (int(bc_type(2, 1)%sf(k, 0, l))) @@ -209,6 +212,7 @@ contains end if end do end do + #:endcall GPU_PARALLEL_LOOP end if ! Population of Buffers in z-direction @@ -218,7 +222,7 @@ contains if (bc_z%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb_in, mv_in) else - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (int(bc_type(3, -1)%sf(k, l, 0))) @@ -242,12 +246,13 @@ contains end if end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bc_z%end >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb_in, mv_in) else - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (int(bc_type(3, 1)%sf(k, l, 0))) @@ -271,6 +276,7 @@ contains end if end do end do + #:endcall GPU_PARALLEL_LOOP end if ! END: Population of Buffers in z-direction @@ -1162,7 +1168,7 @@ contains if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1) else - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, -1)%sf(0, k, l)) @@ -1175,12 +1181,13 @@ contains end select end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bc_x%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1) else - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, 1)%sf(0, k, l)) @@ -1193,6 +1200,7 @@ contains end select end do end do + #:endcall GPU_PARALLEL_LOOP end if if (n == 0) return @@ -1201,7 +1209,7 @@ contains if (bc_y%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1) else - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (bc_type(2, -1)%sf(k, 0, l)) @@ -1214,12 +1222,13 @@ contains end select end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bc_y%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1) else - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (bc_type(2, 1)%sf(k, 0, l)) @@ -1232,6 +1241,7 @@ contains end select end do end do + #:endcall GPU_PARALLEL_LOOP end if if (p == 0) return @@ -1240,7 +1250,7 @@ contains if (bc_z%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1) else - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (bc_type(3, -1)%sf(k, l, 0)) @@ -1253,12 +1263,13 @@ contains end select end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bc_z%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1) else - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (bc_type(3, 1)%sf(k, l, 0)) @@ -1271,6 +1282,7 @@ contains end select end do end do + #:endcall GPU_PARALLEL_LOOP end if end subroutine s_populate_capillary_buffers diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 71aa890e87..a2ed576ff9 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -99,7 +99,7 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - $:GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]') do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end @@ -126,6 +126,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_chemistry_reaction_flux diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index 4b7e399ad5..07dee3c024 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -18,7 +18,7 @@ contains real(wp) :: divergence - $:GPU_PARALLEL_LOOP(collapse=3, private='[divergence]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[divergence]') do x = ix_s%beg, ix_s%end do y = iy_s%beg, iy_s%end do z = iz_s%beg, iz_s%end @@ -56,6 +56,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_fd_divergence diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp index 0ee3102938..91eebe1992 100644 --- a/src/common/m_helper_basic.fpp +++ b/src/common/m_helper_basic.fpp @@ -111,7 +111,7 @@ contains end function f_is_integer subroutine s_configure_coordinate_bounds(weno_polyn, buff_size, idwint, idwbuff, & - viscous, bubbles_lagrange, m, n, p, num_dims, igr) + viscous, bubbles_lagrange, m, n, p, num_dims, igr) integer, intent(in) :: weno_polyn, m, n, p, num_dims integer, intent(inout) :: buff_size diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index da485fa3ac..2deb0c36ef 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -687,7 +687,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -698,9 +698,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -714,8 +715,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -729,9 +731,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') do i = 1, nVar do l = 0, p do k = 0, buff_size - 1 @@ -744,9 +747,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 @@ -761,8 +765,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 @@ -777,9 +782,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if #:else - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') do i = 1, nVar do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -792,9 +798,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -809,8 +816,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -825,6 +833,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if #:endif end if @@ -879,7 +888,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -897,9 +906,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -913,8 +923,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -928,9 +939,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') do i = 1, nVar do l = 0, p do k = -buff_size, -1 @@ -949,9 +961,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 @@ -966,8 +979,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 @@ -982,10 +996,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if #:else ! Unpacking buffer from bc_z%beg - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') do i = 1, nVar do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1005,9 +1020,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1023,8 +1039,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1040,6 +1057,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if #:endif end if diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 5a64cb6d29..76e76e9d64 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -99,7 +99,7 @@ contains integer :: i, j, k, l ! starting equilibrium solver - $:GPU_PARALLEL_LOOP(collapse=3, private='[p_infOV, p_infpT, p_infSL, & + #:call GPU_PARALLEL_LOOP(collapse=3, private='[p_infOV, p_infpT, p_infSL, & & sk, hk, gk, ek, rhok,pS, pSOV, pSSL, & & TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, & & dynE, rhos, rho, rM, m1, m2, MCT, TvF]') @@ -272,6 +272,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_infinite_relaxation_k diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 32821fb679..33094bfb4f 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -873,9 +873,7 @@ contains end if #:endif - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, Re_K, & - & nRtmp, rho_K, gamma_K, pi_inf_K,qv_K, & - & dyn_pres_K, rhoYks, B]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, Re_K, nRtmp, rho_K, gamma_K, pi_inf_K,qv_K, dyn_pres_K, rhoYks, B]') do l = ibounds(3)%beg, ibounds(3)%end do k = ibounds(2)%beg, ibounds(2)%end do j = ibounds(1)%beg, ibounds(1)%end @@ -1165,6 +1163,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_convert_conservative_to_primitive_variables @@ -1489,8 +1488,7 @@ contains ! Computing the flux variables from the primitive variables, without ! accounting for the contribution of either viscosity or capillarity #ifdef MFC_SIMULATION - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_K, vel_K, & - & alpha_K, Re_K, Y_K]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_K, vel_K, alpha_K, Re_K, Y_K]') do l = is3b, is3e do k = is2b, is2e do j = is1b, is1e @@ -1596,6 +1594,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP #endif end subroutine s_convert_primitive_to_flux_variables diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 1b328e3d1f..fe03b073d7 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -166,7 +166,7 @@ contains sim_time = t_step*dt - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -178,7 +178,7 @@ contains end do end do end do - ! !$omp end target teams distribute parallel do simd + #:endcall GPU_PARALLEL_LOOP ! Keep outer loop sequel because different sources can have very different number of points do ai = 1, num_source @@ -221,7 +221,7 @@ contains deallocate (phi_rn) - $:GPU_PARALLEL_LOOP(private='[myalpha,myalpha_rho]') + #:call GPU_PARALLEL_LOOP(private='[myalpha,myalpha_rho]') do i = 1, num_points j = source_spatials(ai)%coord(1, i) k = source_spatials(ai)%coord(2, i) @@ -318,10 +318,11 @@ contains end if end do + #:endcall GPU_PARALLEL_LOOP end do ! Update the rhs variables - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -337,6 +338,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_acoustic_src_calculations !> This subroutine gives the temporally varying amplitude of the pulse @@ -718,7 +720,7 @@ contains !! @param c Speed of sound !! @param ai Acoustic source index !! @return gauss_sigma_time_local Converted Gaussian sigma time - elemental function f_gauss_sigma_time_local(gauss_conv_flag, ai, c) + function f_gauss_sigma_time_local(gauss_conv_flag, ai, c) $:GPU_ROUTINE(parallelism='[seq]') logical, intent(in) :: gauss_conv_flag integer, intent(in) :: ai diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 1ea2c3be3b..9bf2f5a022 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -79,7 +79,7 @@ contains type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf integer :: i, j, k, l !< standard iterators - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -91,6 +91,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_mixture_density @@ -109,7 +110,7 @@ contains call s_compute_acceleration(mytime) call s_compute_mixture_density(q_cons_vf) - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do l = 0, p do k = 0, n @@ -119,10 +120,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (bf_x) then ! x-direction body forces - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -133,11 +135,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bf_y) then ! y-direction body forces - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -148,11 +151,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bf_z) then ! z-direction body forces - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -163,6 +167,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index d2aa192750..3e180d9585 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -76,7 +76,7 @@ contains real(wp) :: nR3bar integer(wp) :: i, j, k, l - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -89,6 +89,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_comp_alpha_from_n @@ -103,7 +104,7 @@ contains if (idir == 1) then if (.not. qbmm) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -115,11 +116,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if elseif (idir == 2) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -130,10 +132,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (idir == 3) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -144,6 +147,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if @@ -173,7 +177,7 @@ contains integer :: dmBub_id !< Dummy variables for unified subgrid bubble subroutines real(wp) :: dmMass_v, dmMass_n, dmBeta_c, dmBeta_t, dmCson - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -189,9 +193,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP adap_dt_stop_max = 0 - $:GPU_PARALLEL_LOOP(collapse=3, private='[Rtmp, Vtmp, myalpha_rho, myalpha]', & + #:call GPU_PARALLEL_LOOP(collapse=3, private='[Rtmp, Vtmp, myalpha_rho, myalpha]', & & reduction='[[adap_dt_stop_max]]', reductionOp='[MAX]', & & copy='[adap_dt_stop_max]') do l = 0, p @@ -321,11 +326,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") if (.not. adap_dt) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do q = 0, n do i = 0, m @@ -344,6 +350,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end subroutine s_compute_bubble_EE_source diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index b1e06f2f85..3f4135dec3 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -529,7 +529,7 @@ contains ! Subgrid p_inf model based on Maeda and Colonius (2018). if (lag_params%pressure_corrector) then ! Calculate velocity potentials (valid for one bubble per cell) - $:GPU_PARALLEL_LOOP(private='[k,cell]') + #:call GPU_PARALLEL_LOOP(private='[k,cell]') do k = 1, nBubs call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) myR0 = bub_R0(k) @@ -546,11 +546,12 @@ contains bub_dphidt(k) = bub_dphidt(k)/(1._wp - term1_fac) end if end do + #:endcall GPU_PARALLEL_LOOP end if ! Radial motion model adap_dt_stop_max = 0 - $:GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell]', & + #:call GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell]', & & reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', & & copy='[adap_dt_stop_max]',copyin='[stage]') do k = 1, nBubs @@ -616,17 +617,19 @@ contains adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) end do + #:endcall GPU_PARALLEL_LOOP if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") ! Bubbles remain in a fixed position - $:GPU_PARALLEL_LOOP(collapse=2, private='[k]', copyin='[stage]') + #:call GPU_PARALLEL_LOOP(collapse=2, private='[k]', copyin='[stage]') do k = 1, nBubs do l = 1, 3 mtn_dposdt(k, l, stage) = 0._wp mtn_dveldt(k, l, stage) = 0._wp end do end do + #:endcall GPU_PARALLEL_LOOP call nvtxEndRange @@ -650,7 +653,7 @@ contains if (lag_params%solver_approach == 2) then if (p == 0) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do k = 0, p do j = 0, n do i = 0, m @@ -665,8 +668,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do k = 0, p do j = 0, n do i = 0, m @@ -680,13 +684,14 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if do l = 1, num_dims call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l) - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -699,9 +704,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP !source in energy - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do k = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(2)%beg, idwbuff(2)%end do i = idwbuff(1)%beg, idwbuff(1)%end @@ -709,10 +715,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l) - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -724,6 +731,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end do end if @@ -769,7 +777,7 @@ contains call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS") - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, q_beta_idx do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -779,12 +787,13 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP call s_smoothfunction(nBubs, intfc_rad, intfc_vel, & mtn_s, mtn_pos, q_beta) !Store 1-beta - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -795,6 +804,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP call nvtxEndRange @@ -1025,7 +1035,7 @@ contains integer :: k if (time_stepper == 1) then ! 1st order TVD RK - $:GPU_PARALLEL_LOOP(private='[k]') + #:call GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1035,6 +1045,7 @@ contains gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do + #:endcall GPU_PARALLEL_LOOP call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1047,7 +1058,7 @@ contains elseif (time_stepper == 2) then ! 2nd order TVD RK if (stage == 1) then - $:GPU_PARALLEL_LOOP(private='[k]') + #:call GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1057,9 +1068,10 @@ contains gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do + #:endcall GPU_PARALLEL_LOOP elseif (stage == 2) then - $:GPU_PARALLEL_LOOP(private='[k]') + #:call GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp @@ -1069,6 +1081,7 @@ contains gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp end do + #:endcall GPU_PARALLEL_LOOP call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1083,7 +1096,7 @@ contains elseif (time_stepper == 3) then ! 3rd order TVD RK if (stage == 1) then - $:GPU_PARALLEL_LOOP(private='[k]') + #:call GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1093,9 +1106,10 @@ contains gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do + #:endcall GPU_PARALLEL_LOOP elseif (stage == 2) then - $:GPU_PARALLEL_LOOP(private='[k]') + #:call GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp @@ -1105,8 +1119,9 @@ contains gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp end do + #:endcall GPU_PARALLEL_LOOP elseif (stage == 3) then - $:GPU_PARALLEL_LOOP(private='[k]') + #:call GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) @@ -1116,6 +1131,7 @@ contains gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) end do + #:endcall GPU_PARALLEL_LOOP call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1192,7 +1208,7 @@ contains integer :: k - $:GPU_PARALLEL_LOOP(private='[k]') + #:call GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs gas_p(k, 2) = gas_p(k, 1) gas_mv(k, 2) = gas_mv(k, 1) @@ -1203,6 +1219,7 @@ contains mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) mtn_s(k, 1:3, 2) = mtn_s(k, 1:3, 1) end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_transfer_data_to_tmp @@ -1291,7 +1308,7 @@ contains if (dir == 1) then ! Gradient in x dir. - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1303,10 +1320,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else if (dir == 2) then ! Gradient in y dir. - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1318,9 +1336,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else ! Gradient in z dir. - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1332,6 +1351,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if @@ -1414,9 +1434,7 @@ contains lag_void_max = 0._wp lag_void_avg = 0._wp lag_vol = 0._wp - $:GPU_PARALLEL_LOOP(collapse=3, reduction='[[lag_vol, lag_void_avg], & - & [lag_void_max]]', reductionOp='[+, MAX]', & - & copy='[lag_vol, lag_void_avg, lag_void_max]') + #:call GPU_PARALLEL_LOOP(collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') do k = 0, p do j = 0, n do i = 0, m @@ -1429,6 +1447,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP #ifdef MFC_MPI if (num_procs > 1) then @@ -1600,7 +1619,7 @@ contains integer :: k - $:GPU_PARALLEL_LOOP(reduction='[[Rmax_glb], [Rmin_glb]]', & + #:call GPU_PARALLEL_LOOP(reduction='[[Rmax_glb], [Rmin_glb]]', & & reductionOp='[MAX, MIN]', copy='[Rmax_glb,Rmin_glb]') do k = 1, nBubs Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) @@ -1608,6 +1627,7 @@ contains Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k)) Rmin_stats(k) = min(Rmin_stats(k), intfc_rad(k, 1)/bub_R0(k)) end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_calculate_lag_bubble_stats diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 6f9c71a4c0..529b08bfd9 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -55,7 +55,7 @@ contains real(wp), dimension(3) :: s_coord integer :: l - $:GPU_PARALLEL_LOOP(private='[l,s_coord,cell]') + #:call GPU_PARALLEL_LOOP(private='[l,s_coord,cell]') do l = 1, nBubs volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp @@ -90,6 +90,7 @@ contains updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) + addFun3 end if end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_deltafunc @@ -120,7 +121,7 @@ contains smearGridz = smearGrid if (p == 0) smearGridz = 1 - $:GPU_PARALLEL_LOOP(private='[nodecoord,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') + #:call GPU_PARALLEL_LOOP(private='[nodecoord,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') do l = 1, nBubs nodecoord(1:3) = 0 center(1:3) = 0._wp @@ -195,6 +196,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_gaussian diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 1cb84e55c7..e4682c575d 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -695,7 +695,7 @@ contains F_src_rs${XYZ}$_vf, & is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -706,8 +706,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -718,6 +719,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP ! PI4 of flux_rs_vf and flux_src_rs_vf at j = 1/2, 3/2 else @@ -726,7 +728,7 @@ contains F_src_rs${XYZ}$_vf, & is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do j = 0, 1 do r = is3%beg, is3%end @@ -745,8 +747,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do j = 0, 1 do r = is3%beg, is3%end @@ -765,14 +768,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! FD2 or FD4 of RHS at j = 0 - $:GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv_local, & - & mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, & - & dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, & - & dYs_ds, h_k, Cp_i, Gamma_i, Xs]') + #:call GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]') do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1103,6 +1104,7 @@ contains end do end do + #:endcall GPU_PARALLEL_LOOP end if #:endfor @@ -1163,7 +1165,7 @@ contains ! Reshaping Inputted Data in x-direction if (cbc_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1174,8 +1176,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1185,8 +1188,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1198,8 +1202,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1208,9 +1213,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1221,8 +1227,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1232,6 +1239,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! END: Reshaping Inputted Data in x-direction @@ -1239,7 +1247,7 @@ contains ! Reshaping Inputted Data in y-direction elseif (cbc_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1250,8 +1258,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1261,8 +1270,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1274,8 +1284,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1284,9 +1295,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1297,8 +1309,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1308,6 +1321,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! END: Reshaping Inputted Data in y-direction @@ -1315,7 +1329,7 @@ contains ! Reshaping Inputted Data in z-direction else - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1326,8 +1340,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1337,8 +1352,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1350,8 +1366,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1360,9 +1377,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1373,8 +1391,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1384,6 +1403,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if @@ -1413,7 +1433,7 @@ contains ! Reshaping Outputted Data in x-direction if (cbc_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1425,7 +1445,8 @@ contains end do end do end do - $:GPU_PARALLEL_LOOP(collapse=3) + #:endcall GPU_PARALLEL_LOOP + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1434,9 +1455,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1447,8 +1469,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1458,13 +1481,14 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! END: Reshaping Outputted Data in x-direction ! Reshaping Outputted Data in y-direction elseif (cbc_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1476,8 +1500,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1486,9 +1511,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1499,8 +1525,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1510,6 +1537,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! END: Reshaping Outputted Data in y-direction @@ -1517,7 +1545,7 @@ contains ! Reshaping Outputted Data in z-direction else - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1529,8 +1557,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1539,9 +1568,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1552,8 +1582,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1563,6 +1594,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 117429487b..9f80df014e 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -279,7 +279,7 @@ contains integer :: j, k, l ! Computing Stability Criteria at Current Time-step - $:GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') do l = 0, p do k = 0, n do j = 0, m @@ -296,6 +296,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP ! end: Computing Stability Criteria at Current Time-step diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index fabd15255e..d6eb430dcc 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -141,7 +141,7 @@ contains if (bc_y%beg >= 0) return #if defined(MFC_GPU) - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, cmplx_size @@ -149,8 +149,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 0, p @@ -158,6 +159,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP p_real => data_real_gpu p_cmplx => data_cmplx_gpu @@ -175,7 +177,7 @@ contains Nfq = 3 $:GPU_UPDATE(device='[Nfq]') - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, Nfq @@ -183,6 +185,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') #if defined(__PGI) @@ -193,7 +196,7 @@ contains #endif #:endcall GPU_HOST_DATA - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 0, p @@ -202,10 +205,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP do i = 1, fourier_rings - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, cmplx_size @@ -213,8 +217,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') do k = 1, sys_size do j = 0, m do l = 0, p @@ -222,6 +227,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device='[p_real, p_cmplx]') #if defined(__PGI) @@ -235,7 +241,7 @@ contains Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) $:GPU_UPDATE(device='[Nfq]') - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, Nfq @@ -243,6 +249,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') #if defined(__PGI) @@ -253,7 +260,7 @@ contains #endif #:endcall GPU_HOST_DATA - $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') do k = 1, sys_size do j = 0, m do l = 0, p @@ -262,6 +269,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end do #:endcall GPU_DATA diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index ba9cc97850..9cb7bd4c43 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -106,8 +106,7 @@ contains real(wp) :: G_local integer :: j, k, l, i, r - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, rho, & - & gamma, pi_inf, qv, G_local, Re, tensora, tensorb]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, rho, gamma, pi_inf, qv, G_local, Re, tensora, tensorb]') do l = 0, p do k = 0, n do j = 0, m @@ -208,6 +207,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_hyperelastic_rmt_stress_update !> The following subroutine handles the calculation of the btensor. diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index f763928a64..84be088fae 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -104,7 +104,7 @@ contains ! calculate velocity gradients + rho_K and G_K ! TODO: re-organize these loops one by one for GPU efficiency if possible? - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -112,8 +112,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -126,9 +127,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (ndirs > 1) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -136,8 +138,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -153,11 +156,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP ! 3D if (ndirs == 3) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -166,8 +170,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -187,10 +192,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -212,9 +218,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP ! apply rhs source term to elastic stress equation - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -226,9 +233,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (idir == 2) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -261,9 +269,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (idir == 3) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -327,11 +336,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (cyl_coord .and. idir == 2) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -359,6 +369,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if @@ -393,13 +404,14 @@ contains if (n == 0) then l = 0; q = 0 - $:GPU_PARALLEL_LOOP() + #:call GPU_PARALLEL_LOOP() do k = 0, m rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(q_cons_vf(stress_idx%beg)%sf(k, l, q)) - tau_star, 0._wp))**cont_damage_s end do + #:endcall GPU_PARALLEL_LOOP elseif (p == 0) then q = 0 - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = 0, n do k = 0, m ! Maximum principal stress @@ -412,8 +424,9 @@ contains rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s end do end do + #:endcall GPU_PARALLEL_LOOP else - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -450,6 +463,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end subroutine s_compute_damage_state diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index df6ea1145b..97d31f9bc8 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -169,11 +169,7 @@ contains type(ghost_point) :: gp type(ghost_point) :: innerp - $:GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, & - & alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, & - & v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, & - & gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, & - & j,k,l,q]') + #:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, j,k,l,q]') do i = 1, num_gps gp = ghost_points(i) @@ -315,11 +311,10 @@ contains end do end if end do + #:endcall GPU_PARALLEL_LOOP !Correct the state of the inner points in IBs - $:GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, & - & alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp, & - & j,k,l,q]') + #:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp,j,k,l,q]') do i = 1, num_inner_gps innerp = inner_points(i) @@ -332,6 +327,7 @@ contains q_cons_vf(q)%sf(j, k, l) = 0._wp end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_ibm_correct_state diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index 9306dcb760..43d8aeef18 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -76,7 +76,7 @@ contains real(wp), dimension(3) :: v, B real(wp) :: divB, vdotB - $:GPU_PARALLEL_LOOP(collapse=3, private='[v, B]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[v, B]') do q = 0, p do l = 0, n do k = 0, m @@ -129,6 +129,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_mhd_powell_rhs diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index fc08409e36..915146fdfe 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -299,7 +299,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = 0, n do j = 0, gp_layers - 1 @@ -308,8 +308,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = 0, gp_layers - 1 do j = -gp_layers, m + gp_layers @@ -319,8 +320,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP #:else - $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, gp_layers - 1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -330,6 +332,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP #:endif end if #:endfor @@ -347,7 +350,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = 0, n do j = -gp_layers, -1 @@ -356,8 +359,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = -gp_layers, -1 do j = -gp_layers, m + gp_layers @@ -367,9 +371,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP #:else ! Unpacking buffer from bc_z%beg - $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') + #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = -gp_layers, -1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -380,6 +385,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP #:endif end if #:endfor diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp index 63fc17b328..5449e84058 100644 --- a/src/simulation/m_pressure_relaxation.fpp +++ b/src/simulation/m_pressure_relaxation.fpp @@ -70,7 +70,7 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer :: j, k, l - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -78,6 +78,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_pressure_relaxation_procedure diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index c7a75f2c96..c5fbd486b4 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -433,7 +433,7 @@ contains end select if (.not. polytropic) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[nb_q,nR,nR2,R,R2,nb_dot,nR_dot,nR2_dot,var,AX]') + #:call GPU_PARALLEL_LOOP(collapse=5,private='[nb_q,nR,nR2,R,R2,nb_dot,nR_dot,nR2_dot,var,AX]') do i = 1, nb do q = 1, nnode do l = 0, p @@ -534,11 +534,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! The following block is not repeated and is left as is if (idir == 1) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do q = 0, n do i = 0, m @@ -557,6 +558,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end subroutine s_compute_qbmm_rhs @@ -712,10 +714,7 @@ contains is1_qbmm = ix; is2_qbmm = iy; is3_qbmm = iz $:GPU_UPDATE(device='[is1_qbmm,is2_qbmm,is3_qbmm]') - $:GPU_PARALLEL_LOOP(collapse=3, private='[moms, msum, wght, abscX, & - & abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, & - & n_tait, B_tait, pres, rho, nbub, c, alf, momsum, & - & drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[moms, msum, wght, abscX, abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, n_tait, B_tait, pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T]') do id3 = is3_qbmm%beg, is3_qbmm%end do id2 = is2_qbmm%beg, is2_qbmm%end do id1 = is1_qbmm%beg, is1_qbmm%end @@ -852,6 +851,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP contains ! Helper to select the correct coefficient routine diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index ae66f00011..526751323f 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -636,7 +636,7 @@ contains if (.not. igr) then ! Association/Population of Working Variables - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -646,11 +646,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP ! Converting Conservative to Primitive Variables if (mpp_lim .and. bubbles_euler) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -667,6 +668,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if @@ -908,7 +910,7 @@ contains ! END: Dimensional Splitting Loop if (ib) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -920,6 +922,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! Additional Physics and Source Temrs @@ -974,7 +977,7 @@ contains if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then if (.not. igr) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -984,6 +987,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if @@ -1015,7 +1019,7 @@ contains real(wp) :: advected_qty_val, pressure_val, velocity_val if (alt_soundspeed) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do q_loop = 0, p do l_loop = 0, n do k_loop = 0, m @@ -1038,6 +1042,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if select case (idir) @@ -1049,7 +1054,7 @@ contains call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz) end if - $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') do j = 1, sys_size do q_loop = 0, p do l_loop = 0, n @@ -1062,10 +1067,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (model_eqns == 3) then - $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, & - & pressure_val,flux_face1,flux_face2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') do q_loop = 0, p do l_loop = 0, n do k_loop = 0, m @@ -1082,6 +1087,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if call s_add_directional_advection_source_terms(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, Kterm) @@ -1094,7 +1100,7 @@ contains call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz) end if - $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') do j = 1, sys_size do l = 0, p do k = 0, n @@ -1107,10 +1113,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (model_eqns == 3) then - $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, & - & pressure_val,flux_face1,flux_face2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') do l = 0, p do k = 0, n do q = 0, m @@ -1132,10 +1138,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (cyl_coord) then - $:GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]') do j = 1, sys_size do l = 0, p do k = 0, n @@ -1148,6 +1155,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if call s_add_directional_advection_source_terms(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, Kterm) @@ -1161,8 +1169,7 @@ contains end if if (grid_geometry == 3) then ! Cylindrical Coordinates - $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,velocity_val, & - & flux_face1,flux_face2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,velocity_val,flux_face1,flux_face2]') do j = 1, sys_size do k = 0, p do q = 0, n @@ -1177,7 +1184,8 @@ contains end do end do end do - $:GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]') + #:endcall GPU_PARALLEL_LOOP + #:call GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]') do j = 1, sys_size do k = 0, p do q = 0, n @@ -1190,8 +1198,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else ! Cartesian Coordinates - $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') do j = 1, sys_size do k = 0, p do q = 0, n @@ -1204,11 +1213,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (model_eqns == 3) then - $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, & - & pressure_val,flux_face1,flux_face2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') do k = 0, p do q = 0, n do l = 0, m @@ -1225,6 +1234,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if call s_add_directional_advection_source_terms(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, Kterm) @@ -1252,8 +1262,7 @@ contains case (1) ! x-direction use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then - $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, & - & local_term_coeff,local_flux1,local_flux2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do q_idx = 0, p ! z_extent do l_idx = 0, n ! y_extent @@ -1268,12 +1277,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, & - & local_q_cons_val, local_k_term_val, & - & local_term_coeff, local_flux1, local_flux2]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m local_inv_ds = 1._wp/dx(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) @@ -1284,11 +1292,9 @@ contains rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,& - & local_q_cons_val, local_k_term_val, & - & local_term_coeff, local_flux1, & - & local_flux2]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m local_inv_ds = 1._wp/dx(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) @@ -1299,10 +1305,10 @@ contains rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do + #:endcall GPU_PARALLEL_LOOP end if else ! NOT alt_soundspeed - $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, & - & local_term_coeff,local_flux1,local_flux2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m local_inv_ds = 1._wp/dx(k_idx) @@ -1312,6 +1318,7 @@ contains rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do + #:endcall GPU_PARALLEL_LOOP end do end if end if @@ -1319,8 +1326,7 @@ contains case (2) ! y-direction: loops q_idx (x), k_idx (y), l_idx (z); sf(q_idx, k_idx, l_idx); dy(k_idx); Kterm(q_idx,k_idx,l_idx) use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then - $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, & - & local_term_coeff,local_flux1,local_flux2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do l_idx = 0, p ! z_extent do k_idx = 0, n ! y_extent @@ -1335,13 +1341,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, & - & local_q_cons_val, local_k_term_val, & - & local_term_coeff, local_flux1, & - & local_flux2]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m local_inv_ds = 1._wp/dy(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) @@ -1356,11 +1360,9 @@ contains (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) end if end do; end do; end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, & - & local_q_cons_val, local_k_term_val, & - & local_term_coeff, local_flux1, & - & local_flux2]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m local_inv_ds = 1._wp/dy(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) @@ -1375,10 +1377,10 @@ contains (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) end if end do; end do; end do + #:endcall GPU_PARALLEL_LOOP end if else ! NOT alt_soundspeed - $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, & - & local_term_coeff,local_flux1,local_flux2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m local_inv_ds = 1._wp/dy(k_idx) @@ -1388,6 +1390,7 @@ contains rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do + #:endcall GPU_PARALLEL_LOOP end do end if end if @@ -1400,8 +1403,7 @@ contains end if if (use_standard_riemann) then - $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, & - & local_term_coeff,local_flux1,local_flux2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do k_idx = 0, p ! z_extent do q_idx = 0, n ! y_extent @@ -1416,13 +1418,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, & - & local_q_cons_val, local_k_term_val, & - & local_term_coeff, local_flux1, & - & local_flux2]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m local_inv_ds = 1._wp/dz(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) @@ -1433,11 +1433,9 @@ contains rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, & - & local_q_cons_val, local_k_term_val, & - & local_term_coeff, local_flux1, & - & local_flux2]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m local_inv_ds = 1._wp/dz(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) @@ -1448,10 +1446,10 @@ contains rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do + #:endcall GPU_PARALLEL_LOOP end if else ! NOT alt_soundspeed - $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, & - & local_term_coeff,local_flux1,local_flux2]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m local_inv_ds = 1._wp/dz(k_idx) @@ -1461,6 +1459,7 @@ contains rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do + #:endcall GPU_PARALLEL_LOOP end do end if end if @@ -1483,7 +1482,7 @@ contains if (idir == 1) then ! x-direction if (surface_tension) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1495,9 +1494,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1511,11 +1511,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (idir == 2) then ! y-direction if (surface_tension) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1527,6 +1528,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (cyl_coord .and. ((bc_y%beg == -2) .or. (bc_y%beg == -14))) then @@ -1547,7 +1549,7 @@ contains idwbuff(1), idwbuff(2), idwbuff(3)) end if - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do j = 0, m $:GPU_LOOP(parallelism='[seq]') @@ -1559,10 +1561,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 1, n do j = 0, m @@ -1576,9 +1579,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1592,6 +1596,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! Applying the geometrical viscous Riemann source fluxes calculated as average @@ -1599,7 +1604,7 @@ contains if (cyl_coord) then if ((bc_y%beg == -2) .or. (bc_y%beg == -14)) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 1, n do j = 0, m @@ -1613,9 +1618,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (viscous) then - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do j = 0, m $:GPU_LOOP(parallelism='[seq]') @@ -1626,10 +1632,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if else - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1643,14 +1650,14 @@ contains end do end do end do - + #:endcall GPU_PARALLEL_LOOP end if end if elseif (idir == 3) then ! z-direction if (surface_tension) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1662,9 +1669,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1678,9 +1686,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (grid_geometry == 3) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1696,6 +1705,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if @@ -1801,7 +1811,7 @@ contains $:GPU_UPDATE(device='[is1,is2,is3,iv]') if (recon_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -1812,8 +1822,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else if (recon_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -1824,8 +1835,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else if (recon_dir == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -1836,6 +1848,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end subroutine s_reconstruct_cell_boundary_values_first_order diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index bae30f3fd6..9214cc4893 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -357,14 +357,7 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, & - & vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R, & - & G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, & - & s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, & - & Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, & - & Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, & - & pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, & - & zcoef, vel_L_tmp, vel_R_tmp]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R,G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -1002,6 +995,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if #:endfor @@ -1188,14 +1182,7 @@ contains ! 6-EQUATION MODEL WITH HLLC if (model_eqns == 3) then !ME3 - $:GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, & - & vel_K_Star, Re_L, Re_R, rho_avg, h_avg, & - & gamma_avg, s_L, s_R, s_S, vel_avg_rms, & - & alpha_L, alpha_R, Ys_L, Ys_R, Xs_L, Xs_R, & - & Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, & - & Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, & - & tau_e_R, G_L, G_R, flux_ene_e, xi_field_L, & - & xi_field_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, vel_K_Star, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, alpha_L, alpha_R, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, tau_e_R, G_L, G_R, flux_ene_e, xi_field_L, xi_field_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -1618,13 +1605,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (model_eqns == 4) then !ME4 - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, & - & alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, & - & rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, & - & vel_avg_rms, nbub_L, nbub_R, ptilde_L, ptilde_R]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, nbub_L, nbub_R, ptilde_L, ptilde_R]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -1871,14 +1856,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (model_eqns == 2 .and. bubbles_euler) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[R0_L, R0_R, V0_L, & - & V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, & - & vel_R, rho_avg, alpha_L, alpha_R, h_avg, & - & gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, & - & ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, & - & pcorr, zcoef, vel_L_tmp, vel_R_tmp]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -2341,17 +2322,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else ! 5-EQUATION MODEL WITH HLLC - $:GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, & - & Re_L, Re_R, rho_avg, h_avg, gamma_avg, & - & alpha_L, alpha_R, s_L, s_R, s_S, & - & vel_avg_rms, pcorr, zcoef, vel_L_tmp, & - & vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, & - & Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, & - & tau_e_R, xi_field_L, xi_field_R, Yi_avg, & - & Phi_avg, h_iL, h_iR, h_avg_2]', & - & copyin='[is1, is2, is3]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg,Phi_avg, h_iL, h_iR, h_avg_2]', copyin='[is1, is2, is3]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -2855,6 +2829,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if #:endfor @@ -2967,12 +2942,7 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, & - & alpha_rho_R, vel, alpha_L, alpha_R, rho, pres, & - & E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, & - & c, c_fast, pres_mag, U_L, U_R, U_starL, & - & U_starR, U_doubleL, U_doubleR, F_L, F_R, & - & F_starL, F_starR, F_hlld]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, rho, pres,E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -3144,6 +3114,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if #:endfor @@ -3346,7 +3317,7 @@ contains if (norm_dir == 1) then if (bc_x%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3355,9 +3326,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (viscous) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3367,9 +3339,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (n > 0) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3379,9 +3352,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (p > 0) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3391,6 +3365,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if @@ -3401,7 +3376,7 @@ contains if (bc_x%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3410,10 +3385,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (viscous) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3423,9 +3399,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (n > 0) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3435,9 +3412,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (p > 0) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3447,6 +3425,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if @@ -3460,7 +3439,7 @@ contains elseif (norm_dir == 2) then if (bc_y%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3469,10 +3448,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (viscous) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3481,8 +3461,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3491,9 +3472,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (p > 0) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3502,6 +3484,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if @@ -3510,7 +3493,7 @@ contains if (bc_y%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3519,10 +3502,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (viscous) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3531,8 +3515,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3541,9 +3526,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (p > 0) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3552,6 +3538,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if @@ -3563,7 +3550,7 @@ contains else if (bc_z%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3572,9 +3559,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (viscous) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3583,7 +3571,8 @@ contains end do end do end do - $:GPU_PARALLEL_LOOP(collapse=3) + #:endcall GPU_PARALLEL_LOOP + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3592,7 +3581,8 @@ contains end do end do end do - $:GPU_PARALLEL_LOOP(collapse=3) + #:endcall GPU_PARALLEL_LOOP + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3601,13 +3591,14 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if if (bc_z%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3616,9 +3607,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (viscous) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3627,8 +3619,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3637,8 +3630,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3647,6 +3641,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if @@ -3690,7 +3685,7 @@ contains if (viscous .or. (surface_tension)) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3700,11 +3695,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (qbmm) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3714,13 +3710,14 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! Reshaping Inputted Data in y-direction elseif (norm_dir == 2) then if (viscous .or. (surface_tension)) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -3730,10 +3727,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (qbmm) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3743,13 +3741,14 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! Reshaping Inputted Data in z-direction else if (viscous .or. (surface_tension)) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -3759,10 +3758,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (qbmm) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3772,6 +3772,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if @@ -3828,10 +3829,7 @@ contains integer :: i_vel !!< Loop iterator for velocity components. integer :: idx_rp(3) !!< Indices $(j,k,l)$ of 'right' point for averaging. - $:GPU_PARALLEL_LOOP(collapse=3, private='[idx_rp, avg_v_int, & - & avg_dvdx_int, avg_dvdy_int, avg_dvdz_int, Re_s, Re_b, & - & vel_src_int, r_eff, divergence_cyl, stress_vector_shear, & - & stress_normal_bulk, div_v_term_const]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[idx_rp, avg_v_int, avg_dvdx_int, avg_dvdy_int, avg_dvdz_int, Re_s, Re_b, vel_src_int, r_eff, divergence_cyl, stress_vector_shear, stress_normal_bulk, div_v_term_const]') do l = iz%beg, iz%end do k = iy%beg, iy%end do j = ix%beg, ix%end @@ -3937,6 +3935,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_cylindrical_viscous_source_flux @@ -3990,9 +3989,7 @@ contains real(wp) :: divergence_v !< Velocity divergence at interface. - $:GPU_PARALLEL_LOOP(collapse=3, private='[idx_right_phys, vel_grad_avg, & - & current_tau_shear, current_tau_bulk, vel_src_at_interface, & - & Re_shear, Re_bulk, divergence_v, i_dim, vel_comp_idx]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[idx_right_phys, vel_grad_avg, current_tau_shear, current_tau_bulk, vel_src_at_interface, Re_shear, Re_bulk, divergence_v, i_dim, vel_comp_idx]') do l_loop = isz%beg, isz%end do k_loop = isy%beg, isy%end do j_loop = isx%beg, isx%end @@ -4073,6 +4070,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_cartesian_viscous_source_flux @@ -4157,7 +4155,7 @@ contains ! Reshaping Outputted Data in y-direction if (norm_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4168,9 +4166,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (cyl_coord) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4181,9 +4180,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3%beg, is3%end do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4192,9 +4192,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1 .or. riemann_solver == 4) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = advxb + 1, advxe do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4205,11 +4206,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! Reshaping Outputted Data in z-direction elseif (norm_dir == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4221,8 +4223,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (grid_geometry == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4234,9 +4237,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do j = is1%beg, is1%end do k = is2%beg, is2%end do l = is3%beg, is3%end @@ -4245,9 +4249,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1 .or. riemann_solver == 4) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = advxb + 1, advxe do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4258,10 +4263,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if elseif (norm_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4272,8 +4278,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -4282,9 +4289,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1 .or. riemann_solver == 4) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = advxb + 1, advxe do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4295,6 +4303,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 8c61a422fb..c817f7cce5 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -85,9 +85,7 @@ contains integer :: j, k, l, i if (id == 1) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, & - & w1R, w2R, w3R, w1, w2, w3, normWL, & - & normWR, normW]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -129,12 +127,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (id == 2) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, & - & w1R, w2R, w3R, w1, w2, w3, normWL, normWR, & - & normW]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -176,12 +173,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (id == 3) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, & - & w1R, w2R, w3R, w1, w2, w3, normWL, normWR, & - & normW]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -223,6 +219,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if @@ -243,7 +240,7 @@ contains isx%end = m; isy%end = n; isz%end = p ! compute gradient components - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -252,8 +249,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -262,9 +260,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (p > 0) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -273,9 +272,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -291,6 +291,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP call s_populate_capillary_buffers(c_divs, bc_type) @@ -338,7 +339,7 @@ contains $:GPU_UPDATE(device='[is1,is2,is3,iv]') if (recon_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -349,8 +350,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else if (recon_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -361,8 +363,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else if (recon_dir == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -373,6 +376,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end subroutine s_reconstruct_cell_boundary_values_capillary diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index d040650bfa..84aa0b4107 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -390,7 +390,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -402,10 +402,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP + !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) + #:call GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -419,10 +421,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) + #:call GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -436,6 +439,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, dt) @@ -496,7 +500,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -508,10 +512,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) + #:call GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -526,9 +531,10 @@ contains end do end do end if + #:endcall GPU_PARALLEL_LOOP if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) + #:call GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -542,6 +548,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) @@ -568,7 +575,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -581,9 +588,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) + #:call GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -598,10 +606,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) + #:call GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -616,6 +625,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) @@ -682,7 +692,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -694,10 +704,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) + #:call GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -711,10 +722,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) + #:call GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -728,6 +740,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) @@ -754,7 +767,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -767,9 +780,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) + #:call GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -784,10 +798,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) + #:call GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -802,6 +817,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt/4._wp) @@ -827,7 +843,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3) - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -840,9 +856,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) + #:call GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -857,10 +874,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (qbmm .and. (.not. polytropic)) then - $:GPU_PARALLEL_LOOP(collapse=5) + #:call GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -875,6 +893,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) @@ -1002,7 +1021,7 @@ contains idwint) end if - $:GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') do l = 0, p do k = 0, n do j = 0, m @@ -1019,6 +1038,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL(copyout='[dt_local]', copyin='[max_dt]') dt_local = minval(max_dt) @@ -1049,7 +1069,7 @@ contains call nvtxStartRange("RHS-BODYFORCES") call s_compute_body_forces_rhs(q_prim_vf_in, q_cons_vf, rhs_vf_in) - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do l = 0, p do k = 0, n @@ -1060,6 +1080,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP call nvtxEndRange diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 2aeec0a532..d22fa9c5dd 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -77,7 +77,7 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -88,9 +88,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP + if (shear_stress) then ! Shear stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, & - & alpha_rho_visc, Re_visc, tau_Re]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -194,11 +195,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bulk_stress) then ! Bulk stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, & - & alpha_rho_visc, Re_visc, tau_Re]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -297,13 +298,13 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (p == 0) return if (shear_stress) then ! Shear stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, & - & alpha_rho_visc, Re_visc, tau_Re]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -408,11 +409,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bulk_stress) then ! Bulk stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, & - & alpha_rho_visc, Re_visc, tau_Re]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -509,6 +510,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end subroutine s_compute_viscous_stress_tensor @@ -593,7 +595,7 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = iy%beg, iy%end do j = is1_viscous%beg + 1, is1_viscous%end @@ -607,8 +609,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 @@ -622,10 +625,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (n > 0) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -639,8 +643,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -654,8 +659,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -673,8 +679,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -693,8 +700,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg + 1, is1_viscous%end @@ -713,8 +721,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end - 1 @@ -733,10 +742,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (p > 0) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -751,8 +761,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -767,8 +778,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end @@ -788,8 +800,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 @@ -809,8 +822,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -830,8 +844,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -851,8 +866,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -872,8 +888,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -893,7 +910,8 @@ contains end do end do end do - $:GPU_PARALLEL_LOOP(collapse=3) + #:endcall GPU_PARALLEL_LOOP + #:call GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -913,7 +931,8 @@ contains end do end do end do - $:GPU_PARALLEL_LOOP(collapse=3) + #:endcall GPU_PARALLEL_LOOP + #:call GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -932,6 +951,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP do i = iv%beg, iv%end call s_compute_fd_gradient(q_prim_qp%vf(i), & @@ -1023,7 +1043,7 @@ contains if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1034,8 +1054,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (norm_dir == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1046,8 +1067,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (norm_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1058,6 +1080,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if end if @@ -1122,7 +1145,7 @@ contains if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1133,8 +1156,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (norm_dir == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1145,8 +1169,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (norm_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1157,6 +1182,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end if end if @@ -1210,7 +1236,7 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -1226,6 +1252,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP ! END: First-Order Spatial Derivatives in x-direction @@ -1238,7 +1265,7 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end @@ -1254,6 +1281,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP ! END: First-Order Spatial Derivatives in y-direction @@ -1266,7 +1294,7 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1282,6 +1310,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! END: First-Order Spatial Derivatives in z-direction @@ -1321,7 +1350,7 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1331,9 +1360,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (n > 0) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1343,10 +1373,11 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if if (p > 0) then - $:GPU_PARALLEL_LOOP(collapse=3) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1356,9 +1387,10 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(idwbuff(1)%beg, k, l) = & @@ -1369,8 +1401,9 @@ contains (x_cc(idwbuff(1)%end) - x_cc(idwbuff(1)%end - 2)) end do end do + #:endcall GPU_PARALLEL_LOOP if (n > 0) then - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, idwbuff(2)%beg, l) = & @@ -1381,8 +1414,9 @@ contains (y_cc(idwbuff(2)%end) - y_cc(idwbuff(2)%end - 2)) end do end do + #:endcall GPU_PARALLEL_LOOP if (p > 0) then - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, idwbuff(3)%beg) = & @@ -1393,49 +1427,54 @@ contains (z_cc(idwbuff(3)%end) - z_cc(idwbuff(3)%end - 2)) end do end do + #:endcall GPU_PARALLEL_LOOP end if end if if (bc_x%beg <= BC_GHOST_EXTRAP) then - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ & (x_cc(2) - x_cc(0)) end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bc_x%end <= BC_GHOST_EXTRAP) then - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & (x_cc(m) - x_cc(m - 2)) end do end do + #:endcall GPU_PARALLEL_LOOP end if if (n > 0) then if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_NULL) then - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ & (y_cc(2) - y_cc(0)) end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bc_y%end <= BC_GHOST_EXTRAP) then - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & (y_cc(n) - y_cc(n - 2)) end do end do + #:endcall GPU_PARALLEL_LOOP end if if (p > 0) then if (bc_z%beg <= BC_GHOST_EXTRAP) then - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, 0) = & @@ -1443,9 +1482,10 @@ contains (z_cc(2) - z_cc(0)) end do end do + #:endcall GPU_PARALLEL_LOOP end if if (bc_z%end <= BC_GHOST_EXTRAP) then - $:GPU_PARALLEL_LOOP(collapse=2) + #:call GPU_PARALLEL_LOOP(collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, p) = & @@ -1453,6 +1493,7 @@ contains (z_cc(p) - z_cc(p - 2)) end do end do + #:endcall GPU_PARALLEL_LOOP end if end if end if diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 5c525c4664..9dd9dcfbf7 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -668,7 +668,7 @@ contains if (weno_order == 1) then if (weno_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end @@ -679,8 +679,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else if (weno_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end @@ -691,8 +692,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else if (weno_dir == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end @@ -703,11 +705,12 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if elseif (weno_order == 3) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=4,private='[beta,dvd,poly,omega,alpha,tau]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[beta,dvd,poly,omega,alpha,tau]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -780,12 +783,13 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if #:endfor elseif (weno_order == 5) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3,private='[dvd,poly,beta,alpha,omega,tau,delta]') + #:call GPU_PARALLEL_LOOP(collapse=3,private='[dvd,poly,beta,alpha,omega,tau,delta]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -894,6 +898,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP if (mp_weno) then call s_preserve_monotonicity(v_rs_ws_${XYZ}$, vL_rs_vf_${XYZ}$, & @@ -904,7 +909,7 @@ contains elseif (weno_order == 7) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3,private='[poly,beta,alpha,omega,tau,delta,dvd,v]') + #:call GPU_PARALLEL_LOOP(collapse=3,private='[poly,beta,alpha,omega,tau,delta,dvd,v]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -1089,6 +1094,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if #:endfor @@ -1126,7 +1132,7 @@ contains $:GPU_UPDATE(device='[v_size]') if (weno_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end @@ -1136,13 +1142,14 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! Reshaping/Projecting onto Characteristic Fields in y-direction if (n == 0) return if (weno_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end @@ -1152,13 +1159,14 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if ! Reshaping/Projecting onto Characteristic Fields in z-direction if (p == 0) return if (weno_dir == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end @@ -1168,6 +1176,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if end subroutine s_initialize_weno @@ -1217,7 +1226,7 @@ contains real(wp), parameter :: alpha_mp = 2._wp real(wp), parameter :: beta_mp = 4._wp/3._wp - $:GPU_PARALLEL_LOOP(collapse=4,private='[d]') + #:call GPU_PARALLEL_LOOP(collapse=4,private='[d]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -1342,6 +1351,7 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_preserve_monotonicity From 29e9404cae87bfcab60445df9dc6a28832f502d0 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Mon, 21 Jul 2025 17:16:46 -0400 Subject: [PATCH 09/60] Ran formatter --- src/common/include/parallel_macros.fpp | 9 +- src/common/m_boundary_common.fpp | 402 +- src/common/m_chemistry.fpp | 32 +- src/common/m_finite_differences.fpp | 60 +- src/common/m_mpi_common.fpp | 364 +- src/common/m_phase_change.fpp | 237 +- src/common/m_variables_conversion.fpp | 640 +-- src/simulation/m_acoustic_src.fpp | 194 +- src/simulation/m_body_forces.fpp | 76 +- src/simulation/m_bubbles.fpp | 20 +- src/simulation/m_bubbles_EE.fpp | 314 +- src/simulation/m_bubbles_EL.fpp | 482 +- src/simulation/m_bubbles_EL_kernels.fpp | 194 +- src/simulation/m_cbc.fpp | 1044 ++--- src/simulation/m_data_output.fpp | 22 +- src/simulation/m_derived_variables.fpp | 2 +- src/simulation/m_fftw.fpp | 122 +- src/simulation/m_hyperelastic.fpp | 182 +- src/simulation/m_hypoelastic.fpp | 476 +- src/simulation/m_ibm.fpp | 260 +- src/simulation/m_mhd.fpp | 92 +- src/simulation/m_mpi_proxy.fpp | 82 +- src/simulation/m_pressure_relaxation.fpp | 10 +- src/simulation/m_qbmm.fpp | 428 +- src/simulation/m_rhs.fpp | 880 ++-- src/simulation/m_riemann_solvers.fpp | 5134 +++++++++++----------- src/simulation/m_surface_tension.fpp | 240 +- src/simulation/m_time_steppers.fpp | 932 ++-- src/simulation/m_viscous.fpp | 1390 +++--- src/simulation/m_weno.fpp | 924 ++-- 30 files changed, 7620 insertions(+), 7624 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index ee0577613a..a2199f9cfd 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -1,4 +1,3 @@ -#:mute #:include 'shared_parallel_macros.fpp' #:include 'omp_macros.fpp' #:include 'acc_macros.fpp' @@ -109,8 +108,6 @@ $:link_val #:enddef - - #:def GEN_PARALLELISM_STR(parallelism) #:if parallelism is not None #:assert isinstance(parallelism, str) @@ -167,7 +164,7 @@ #:def GPU_PARALLEL(code, private=None, default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None) - + #:set acc_code = ACC_PARALLEL(code, private, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) #:set omp_code = OMP_PARALLEL(code, private, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) @@ -185,7 +182,7 @@ & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None) - + #:set acc_code = ACC_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) #:set omp_code = OMP_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) @@ -346,6 +343,4 @@ #endif #:enddef - -#:endmute ! New line at end of file is required for FYPP diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index fb61b78d96..bb92ec70c1 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -92,29 +92,29 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb_in, mv_in) else #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = 0, n - select case (int(bc_type(1, -1)%sf(0, k, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 1, -1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 1, -1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 1, -1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 1, -1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 1, -1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(1, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(1, -1, k, l, pb_in, mv_in) - end if + do l = 0, p + do k = 0, n + select case (int(bc_type(1, -1)%sf(0, k, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 1, -1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 1, -1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 1, -1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 1, -1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 1, -1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(1, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(1, -1, k, l, pb_in, mv_in) + end if + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -122,29 +122,29 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb_in, mv_in) else #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = 0, n - select case (int(bc_type(1, 1)%sf(0, k, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) ! Ghost-cell extrap. BC at end - call s_ghost_cell_extrapolation(q_prim_vf, 1, 1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 1, 1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 1, 1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 1, 1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 1, 1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 1, 1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(1, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(1, 1, k, l, pb_in, mv_in) - end if + do l = 0, p + do k = 0, n + select case (int(bc_type(1, 1)%sf(0, k, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) ! Ghost-cell extrap. BC at end + call s_ghost_cell_extrapolation(q_prim_vf, 1, 1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 1, 1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 1, 1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 1, 1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 1, 1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 1, 1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(1, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(1, 1, k, l, pb_in, mv_in) + end if + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -156,32 +156,32 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb_in, mv_in) else #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (int(bc_type(2, -1)%sf(k, 0, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l) - case (BC_AXIS) - call s_axis(q_prim_vf, pb_in, mv_in, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 2, -1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 2, -1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 2, -1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 2, -1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 2, -1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(2, -1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. & - (bc_type(2, -1)%sf(k, 0, l) /= BC_AXIS)) then - call s_qbmm_extrapolation(2, -1, k, l, pb_in, mv_in) - end if + do l = 0, p + do k = -buff_size, m + buff_size + select case (int(bc_type(2, -1)%sf(k, 0, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l) + case (BC_AXIS) + call s_axis(q_prim_vf, pb_in, mv_in, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 2, -1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 2, -1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 2, -1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 2, -1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 2, -1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, -1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. & + (bc_type(2, -1)%sf(k, 0, l) /= BC_AXIS)) then + call s_qbmm_extrapolation(2, -1, k, l, pb_in, mv_in) + end if + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -189,29 +189,29 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb_in, mv_in) else #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (int(bc_type(2, 1)%sf(k, 0, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 2, 1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 2, 1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 2, 1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 2, 1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 2, 1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(2, 1, k, l, pb_in, mv_in) - end if + do l = 0, p + do k = -buff_size, m + buff_size + select case (int(bc_type(2, 1)%sf(k, 0, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 2, 1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 2, 1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 2, 1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 2, 1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 2, 1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(2, 1, k, l, pb_in, mv_in) + end if + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -223,29 +223,29 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb_in, mv_in) else #:call GPU_PARALLEL_LOOP(collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (int(bc_type(3, -1)%sf(k, l, 0))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 3, -1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 3, -1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 3, -1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 3, -1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 3, -1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 3, -1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(3, -1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(3, -1, k, l, pb_in, mv_in) - end if + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (int(bc_type(3, -1)%sf(k, l, 0))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 3, -1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 3, -1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 3, -1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 3, -1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 3, -1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 3, -1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(3, -1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(3, -1, k, l, pb_in, mv_in) + end if + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -253,29 +253,29 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb_in, mv_in) else #:call GPU_PARALLEL_LOOP(collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (int(bc_type(3, 1)%sf(k, l, 0))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 3, 1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 3, 1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 3, 1, k, l, pb_in, mv_in) - case (BC_SlIP_WALL) - call s_slip_wall(q_prim_vf, 3, 1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 3, 1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 3, 1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(3, 1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(3, 1, k, l, pb_in, mv_in) - end if + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (int(bc_type(3, 1)%sf(k, l, 0))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 3, 1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 3, 1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 3, 1, k, l, pb_in, mv_in) + case (BC_SlIP_WALL) + call s_slip_wall(q_prim_vf, 3, 1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 3, 1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 3, 1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(3, 1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(3, 1, k, l, pb_in, mv_in) + end if + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if ! END: Population of Buffers in z-direction @@ -1169,18 +1169,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1) else #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, -1)%sf(0, k, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 1, -1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 1, -1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 1, -1, k, l) - end select + do l = 0, p + do k = 0, n + select case (bc_type(1, -1)%sf(0, k, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 1, -1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 1, -1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 1, -1, k, l) + end select + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1188,18 +1188,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1) else #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, 1)%sf(0, k, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 1, 1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 1, 1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 1, 1, k, l) - end select + do l = 0, p + do k = 0, n + select case (bc_type(1, 1)%sf(0, k, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 1, 1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 1, 1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 1, 1, k, l) + end select + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1210,18 +1210,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1) else #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (bc_type(2, -1)%sf(k, 0, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 2, -1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 2, -1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 2, -1, k, l) - end select + do l = 0, p + do k = -buff_size, m + buff_size + select case (bc_type(2, -1)%sf(k, 0, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 2, -1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 2, -1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 2, -1, k, l) + end select + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1229,18 +1229,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1) else #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (bc_type(2, 1)%sf(k, 0, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 2, 1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 2, 1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 2, 1, k, l) - end select + do l = 0, p + do k = -buff_size, m + buff_size + select case (bc_type(2, 1)%sf(k, 0, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 2, 1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 2, 1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 2, 1, k, l) + end select + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1251,18 +1251,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1) else #:call GPU_PARALLEL_LOOP(collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (bc_type(3, -1)%sf(k, l, 0)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 3, -1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 3, -1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 3, -1, k, l) - end select + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (bc_type(3, -1)%sf(k, l, 0)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 3, -1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 3, -1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 3, -1, k, l) + end select + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1270,18 +1270,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1) else #:call GPU_PARALLEL_LOOP(collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (bc_type(3, 1)%sf(k, l, 0)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 3, 1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 3, 1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 3, 1, k, l) - end select + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (bc_type(3, 1)%sf(k, l, 0)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 3, 1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 3, 1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 3, 1, k, l) + end select + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end subroutine s_populate_capillary_buffers diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index a2ed576ff9..e9e5bc5ee8 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -100,32 +100,32 @@ contains real(wp), dimension(num_species) :: omega #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]') - do z = bounds(3)%beg, bounds(3)%end - do y = bounds(2)%beg, bounds(2)%end - do x = bounds(1)%beg, bounds(1)%end + do z = bounds(3)%beg, bounds(3)%end + do y = bounds(2)%beg, bounds(2)%end + do x = bounds(1)%beg, bounds(1)%end - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Ys(eqn - chemxb + 1) = q_prim_qp(eqn)%sf(x, y, z) - end do + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + Ys(eqn - chemxb + 1) = q_prim_qp(eqn)%sf(x, y, z) + end do - rho = q_cons_qp(contxe)%sf(x, y, z) - T = q_T_sf%sf(x, y, z) + rho = q_cons_qp(contxe)%sf(x, y, z) + T = q_T_sf%sf(x, y, z) - call get_net_production_rates(rho, T, Ys, omega) + call get_net_production_rates(rho, T, Ys, omega) - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe - omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1) + omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1) - rhs_vf(eqn)%sf(x, y, z) = rhs_vf(eqn)%sf(x, y, z) + omega_m + rhs_vf(eqn)%sf(x, y, z) = rhs_vf(eqn)%sf(x, y, z) + omega_m - end do + end do + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_chemistry_reaction_flux diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index 07dee3c024..c01953e216 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -19,43 +19,43 @@ contains real(wp) :: divergence #:call GPU_PARALLEL_LOOP(collapse=3, private='[divergence]') - do x = ix_s%beg, ix_s%end - do y = iy_s%beg, iy_s%end - do z = iz_s%beg, iz_s%end - - if (x == ix_s%beg) then - divergence = (-3._wp*fields(1)%sf(x, y, z) + 4._wp*fields(1)%sf(x + 1, y, z) - fields(1)%sf(x + 2, y, z))/(x_cc(x + 2) - x_cc(x)) - else if (x == ix_s%end) then - divergence = (+3._wp*fields(1)%sf(x, y, z) - 4._wp*fields(1)%sf(x - 1, y, z) + fields(1)%sf(x - 2, y, z))/(x_cc(x) - x_cc(x - 2)) - else - divergence = (fields(1)%sf(x + 1, y, z) - fields(1)%sf(x - 1, y, z))/(x_cc(x + 1) - x_cc(x - 1)) - end if - - if (n > 0) then - if (y == iy_s%beg) then - divergence = divergence + (-3._wp*fields(2)%sf(x, y, z) + 4._wp*fields(2)%sf(x, y + 1, z) - fields(2)%sf(x, y + 2, z))/(y_cc(y + 2) - y_cc(y)) - else if (y == iy_s%end) then - divergence = divergence + (+3._wp*fields(2)%sf(x, y, z) - 4._wp*fields(2)%sf(x, y - 1, z) + fields(2)%sf(x, y - 2, z))/(y_cc(y) - y_cc(y - 2)) + do x = ix_s%beg, ix_s%end + do y = iy_s%beg, iy_s%end + do z = iz_s%beg, iz_s%end + + if (x == ix_s%beg) then + divergence = (-3._wp*fields(1)%sf(x, y, z) + 4._wp*fields(1)%sf(x + 1, y, z) - fields(1)%sf(x + 2, y, z))/(x_cc(x + 2) - x_cc(x)) + else if (x == ix_s%end) then + divergence = (+3._wp*fields(1)%sf(x, y, z) - 4._wp*fields(1)%sf(x - 1, y, z) + fields(1)%sf(x - 2, y, z))/(x_cc(x) - x_cc(x - 2)) else - divergence = divergence + (fields(2)%sf(x, y + 1, z) - fields(2)%sf(x, y - 1, z))/(y_cc(y + 1) - y_cc(y - 1)) + divergence = (fields(1)%sf(x + 1, y, z) - fields(1)%sf(x - 1, y, z))/(x_cc(x + 1) - x_cc(x - 1)) end if - end if - if (p > 0) then - if (z == iz_s%beg) then - divergence = divergence + (-3._wp*fields(3)%sf(x, y, z) + 4._wp*fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z + 2))/(z_cc(z + 2) - z_cc(z)) - else if (z == iz_s%end) then - divergence = divergence + (+3._wp*fields(3)%sf(x, y, z) - 4._wp*fields(3)%sf(x, y, z - 1) + fields(2)%sf(x, y, z - 2))/(z_cc(z) - z_cc(z - 2)) - else - divergence = divergence + (fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z - 1))/(z_cc(z + 1) - z_cc(z - 1)) + if (n > 0) then + if (y == iy_s%beg) then + divergence = divergence + (-3._wp*fields(2)%sf(x, y, z) + 4._wp*fields(2)%sf(x, y + 1, z) - fields(2)%sf(x, y + 2, z))/(y_cc(y + 2) - y_cc(y)) + else if (y == iy_s%end) then + divergence = divergence + (+3._wp*fields(2)%sf(x, y, z) - 4._wp*fields(2)%sf(x, y - 1, z) + fields(2)%sf(x, y - 2, z))/(y_cc(y) - y_cc(y - 2)) + else + divergence = divergence + (fields(2)%sf(x, y + 1, z) - fields(2)%sf(x, y - 1, z))/(y_cc(y + 1) - y_cc(y - 1)) + end if + end if + + if (p > 0) then + if (z == iz_s%beg) then + divergence = divergence + (-3._wp*fields(3)%sf(x, y, z) + 4._wp*fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z + 2))/(z_cc(z + 2) - z_cc(z)) + else if (z == iz_s%end) then + divergence = divergence + (+3._wp*fields(3)%sf(x, y, z) - 4._wp*fields(3)%sf(x, y, z - 1) + fields(2)%sf(x, y, z - 2))/(z_cc(z) - z_cc(z - 2)) + else + divergence = divergence + (fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z - 1))/(z_cc(z + 1) - z_cc(z - 1)) + end if end if - end if - div%sf(x, y, z) = div%sf(x, y, z) + divergence + div%sf(x, y, z) = div%sf(x, y, z) + divergence + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_fd_divergence @@ -71,7 +71,7 @@ contains !! @param s_cc Locations of the cell-centers in the s-coordinate direction !! @param fd_coeff_s Finite-diff. coefficients in the s-coordinate direction subroutine s_compute_finite_difference_coefficients(q, s_cc, fd_coeff_s, local_buff_size, & - fd_number_in, fd_order_in, offset_s) + fd_number_in, fd_order_in, offset_s) integer :: lB, lE !< loop bounds integer, intent(IN) :: q diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 2deb0c36ef..098dbe82d6 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -688,151 +688,151 @@ contains if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVar - r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l) + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = 1, nVar + r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) + buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (qbmm_comm) then #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = pb_in(j + pack_offset, k, l, i - nVar, q) + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = pb_in(j + pack_offset, k, l, i - nVar, q) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = mv_in(j + pack_offset, k, l, i - nVar, q) + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = mv_in(j + pack_offset, k, l, i - nVar, q) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:elif mpi_dir == 2 #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l) + do i = 1, nVar + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (qbmm_comm) then #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = pb_in(j, k + pack_offset, l, i - nVar, q) + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = pb_in(j, k + pack_offset, l, i - nVar, q) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = mv_in(j, k + pack_offset, l, i - nVar, q) + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = mv_in(j, k + pack_offset, l, i - nVar, q) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:else #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset) + do i = 1, nVar + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (qbmm_comm) then #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = pb_in(j, k, l + pack_offset, i - nVar, q) + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = pb_in(j, k, l + pack_offset, i - nVar, q) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = mv_in(j, k, l + pack_offset, i - nVar, q) + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = mv_in(j, k, l + pack_offset, i - nVar, q) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:endif @@ -889,174 +889,174 @@ contains if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, nVar - r = (i - 1) + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r) + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = 1, nVar + r = (i - 1) + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (qbmm_comm) then #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - pb_in(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + pb_in(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - mv_in(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + mv_in(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:elif mpi_dir == 2 #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r) + do i = 1, nVar + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (qbmm_comm) then #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - pb_in(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + pb_in(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - mv_in(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + mv_in(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:else ! Unpacking buffer from bc_z%beg #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do i = 1, nVar - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r) + do i = 1, nVar + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (qbmm_comm) then #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - pb_in(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + pb_in(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - mv_in(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + mv_in(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:endif diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 76e76e9d64..c36cff840d 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -99,179 +99,176 @@ contains integer :: i, j, k, l ! starting equilibrium solver - #:call GPU_PARALLEL_LOOP(collapse=3, private='[p_infOV, p_infpT, p_infSL, & - & sk, hk, gk, ek, rhok,pS, pSOV, pSSL, & - & TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, & - & dynE, rhos, rho, rM, m1, m2, MCT, TvF]') - do j = 0, m - do k = 0, n - do l = 0, p + #:call GPU_PARALLEL_LOOP(collapse=3, private='[p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok,pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF]') + do j = 0, m + do k = 0, n + do l = 0, p - rho = 0.0_wp; TvF = 0.0_wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + rho = 0.0_wp; TvF = 0.0_wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - ! Mixture density - rho = rho + q_cons_vf(i + contxb - 1)%sf(j, k, l) + ! Mixture density + rho = rho + q_cons_vf(i + contxb - 1)%sf(j, k, l) - ! Total Volume Fraction - TvF = TvF + q_cons_vf(i + advxb - 1)%sf(j, k, l) + ! Total Volume Fraction + TvF = TvF + q_cons_vf(i + advxb - 1)%sf(j, k, l) - end do + end do - ! calculating the total reacting mass for the phase change process. By hypothesis, this should not change - ! throughout the phase-change process. - rM = q_cons_vf(lp + contxb - 1)%sf(j, k, l) + q_cons_vf(vp + contxb - 1)%sf(j, k, l) + ! calculating the total reacting mass for the phase change process. By hypothesis, this should not change + ! throughout the phase-change process. + rM = q_cons_vf(lp + contxb - 1)%sf(j, k, l) + q_cons_vf(vp + contxb - 1)%sf(j, k, l) - ! correcting negative (reacting) mass fraction values in case they happen - call s_correct_partial_densities(MCT, q_cons_vf, rM, j, k, l) + ! correcting negative (reacting) mass fraction values in case they happen + call s_correct_partial_densities(MCT, q_cons_vf, rM, j, k, l) - ! fixing m1 and m2 AFTER correcting the partial densities. Note that these values must be stored for the phase - ! change process that will happen a posteriori - m1 = q_cons_vf(lp + contxb - 1)%sf(j, k, l) + ! fixing m1 and m2 AFTER correcting the partial densities. Note that these values must be stored for the phase + ! change process that will happen a posteriori + m1 = q_cons_vf(lp + contxb - 1)%sf(j, k, l) - m2 = q_cons_vf(vp + contxb - 1)%sf(j, k, l) + m2 = q_cons_vf(vp + contxb - 1)%sf(j, k, l) - ! kinetic energy as an auxiliary variable to the calculation of the total internal energy - dynE = 0.0_wp - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe + ! kinetic energy as an auxiliary variable to the calculation of the total internal energy + dynE = 0.0_wp + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe - dynE = dynE + 5.0e-1_wp*q_cons_vf(i)%sf(j, k, l)**2/rho + dynE = dynE + 5.0e-1_wp*q_cons_vf(i)%sf(j, k, l)**2/rho - end do + end do - ! calculating the total energy that MUST be preserved throughout the pT- and pTg-relaxation procedures - ! at each of the cells. The internal energy is calculated as the total energy minus the kinetic - ! energy to preserved its value at sharp interfaces - rhoe = q_cons_vf(E_idx)%sf(j, k, l) - dynE + ! calculating the total energy that MUST be preserved throughout the pT- and pTg-relaxation procedures + ! at each of the cells. The internal energy is calculated as the total energy minus the kinetic + ! energy to preserved its value at sharp interfaces + rhoe = q_cons_vf(E_idx)%sf(j, k, l) - dynE - ! Calling pT-equilibrium for either finishing phase-change module, or as an IC for the pTg-equilibrium - ! for this case, MFL cannot be either 0 or 1, so I chose it to be 2 - call s_infinite_pt_relaxation_k(j, k, l, 2, pS, p_infpT, q_cons_vf, rhoe, TS) + ! Calling pT-equilibrium for either finishing phase-change module, or as an IC for the pTg-equilibrium + ! for this case, MFL cannot be either 0 or 1, so I chose it to be 2 + call s_infinite_pt_relaxation_k(j, k, l, 2, pS, p_infpT, q_cons_vf, rhoe, TS) - ! check if pTg-equilibrium is required - ! NOTE that NOTHING else needs to be updated OTHER than the individual partial densities - ! given the outputs from the pT- and pTg-equilibrium solvers are just p and one of the partial masses - ! (pTg- case) - if ((relax_model == 6) .and. ((q_cons_vf(lp + contxb - 1)%sf(j, k, l) > mixM*rM) & - .and. (q_cons_vf(vp + contxb - 1)%sf(j, k, l) > mixM*rM)) & - .and. (pS < pCr) .and. (TS < TCr)) then + ! check if pTg-equilibrium is required + ! NOTE that NOTHING else needs to be updated OTHER than the individual partial densities + ! given the outputs from the pT- and pTg-equilibrium solvers are just p and one of the partial masses + ! (pTg- case) + if ((relax_model == 6) .and. ((q_cons_vf(lp + contxb - 1)%sf(j, k, l) > mixM*rM) & + .and. (q_cons_vf(vp + contxb - 1)%sf(j, k, l) > mixM*rM)) & + .and. (pS < pCr) .and. (TS < TCr)) then - ! Checking if phase change is needed, by checking whether the final solution is either subcoooled - ! liquid or overheated vapor. + ! Checking if phase change is needed, by checking whether the final solution is either subcoooled + ! liquid or overheated vapor. - ! overheated vapor case - ! depleting the mass of liquid - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM + ! overheated vapor case + ! depleting the mass of liquid + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM - ! tranferring the total mass to vapor - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM + ! tranferring the total mass to vapor + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - ! calling pT-equilibrium for overheated vapor, which is MFL = 0 - call s_infinite_pt_relaxation_k(j, k, l, 0, pSOV, p_infOV, q_cons_vf, rhoe, TSOV) + ! calling pT-equilibrium for overheated vapor, which is MFL = 0 + call s_infinite_pt_relaxation_k(j, k, l, 0, pSOV, p_infOV, q_cons_vf, rhoe, TSOV) - ! calculating Saturation temperature - call s_TSat(pSOV, TSatOV, TSOV) + ! calculating Saturation temperature + call s_TSat(pSOV, TSatOV, TSOV) - ! subcooled liquid case - ! tranferring the total mass to liquid - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM + ! subcooled liquid case + ! tranferring the total mass to liquid + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - ! depleting the mass of vapor - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM + ! depleting the mass of vapor + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM - ! calling pT-equilibrium for subcooled liquid, which is MFL = 1 - call s_infinite_pt_relaxation_k(j, k, l, 1, pSSL, p_infSL, q_cons_vf, rhoe, TSSL) + ! calling pT-equilibrium for subcooled liquid, which is MFL = 1 + call s_infinite_pt_relaxation_k(j, k, l, 1, pSSL, p_infSL, q_cons_vf, rhoe, TSSL) - ! calculating Saturation temperature - call s_TSat(pSSL, TSatSL, TSSL) + ! calculating Saturation temperature + call s_TSat(pSSL, TSatSL, TSSL) - ! checking the conditions for overheated vapor and subcooled liquide - if (TSOV > TSatOV) then + ! checking the conditions for overheated vapor and subcooled liquide + if (TSOV > TSatOV) then - ! Assigning pressure - pS = pSOV + ! Assigning pressure + pS = pSOV - ! Assigning Temperature - TS = TSOV + ! Assigning Temperature + TS = TSOV - ! correcting the liquid partial density - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM + ! correcting the liquid partial density + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM - ! correcting the vapor partial density - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM + ! correcting the vapor partial density + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - elseif (TSSL < TSatSL) then + elseif (TSSL < TSatSL) then - ! Assigning pressure - pS = pSSL + ! Assigning pressure + pS = pSSL - ! Assigning Temperature - TS = TSSL + ! Assigning Temperature + TS = TSSL - ! correcting the liquid partial density - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM + ! correcting the liquid partial density + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - ! correcting the vapor partial density - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM + ! correcting the vapor partial density + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM - else + else - ! returning partial pressures to what they were from the homogeneous solver - ! liquid - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = m1 + ! returning partial pressures to what they were from the homogeneous solver + ! liquid + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = m1 - ! vapor - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = m2 + ! vapor + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = m2 - ! calling the pTg-equilibrium solver - call s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS) + ! calling the pTg-equilibrium solver + call s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS) - end if + end if - end if + end if - ! Calculations AFTER equilibrium + ! Calculations AFTER equilibrium - ! entropy - sk(1:num_fluids) = cvs(1:num_fluids)*log((TS**gs_min(1:num_fluids)) & - /((pS + ps_inf(1:num_fluids))**(gs_min(1:num_fluids) - 1.0_wp))) + qvps(1:num_fluids) + ! entropy + sk(1:num_fluids) = cvs(1:num_fluids)*log((TS**gs_min(1:num_fluids)) & + /((pS + ps_inf(1:num_fluids))**(gs_min(1:num_fluids) - 1.0_wp))) + qvps(1:num_fluids) - ! enthalpy - hk(1:num_fluids) = gs_min(1:num_fluids)*cvs(1:num_fluids)*TS & - + qvs(1:num_fluids) + ! enthalpy + hk(1:num_fluids) = gs_min(1:num_fluids)*cvs(1:num_fluids)*TS & + + qvs(1:num_fluids) - ! Gibbs-free energy - gk(1:num_fluids) = hk(1:num_fluids) - TS*sk(1:num_fluids) + ! Gibbs-free energy + gk(1:num_fluids) = hk(1:num_fluids) - TS*sk(1:num_fluids) - ! densities - rhok(1:num_fluids) = (pS + ps_inf(1:num_fluids)) & - /((gs_min(1:num_fluids) - 1)*cvs(1:num_fluids)*TS) + ! densities + rhok(1:num_fluids) = (pS + ps_inf(1:num_fluids)) & + /((gs_min(1:num_fluids) - 1)*cvs(1:num_fluids)*TS) - ! internal energy - ek(1:num_fluids) = (pS + gs_min(1:num_fluids) & - *ps_inf(1:num_fluids))/(pS + ps_inf(1:num_fluids)) & - *cvs(1:num_fluids)*TS + qvs(1:num_fluids) + ! internal energy + ek(1:num_fluids) = (pS + gs_min(1:num_fluids) & + *ps_inf(1:num_fluids))/(pS + ps_inf(1:num_fluids)) & + *cvs(1:num_fluids)*TS + qvs(1:num_fluids) - ! calculating volume fractions, internal energies, and total entropy - rhos = 0.0_wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + ! calculating volume fractions, internal energies, and total entropy + rhos = 0.0_wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - ! volume fractions - q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/rhok(i) + ! volume fractions + q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/rhok(i) - ! alpha*rho*e - q_cons_vf(i + intxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)*ek(i) + ! alpha*rho*e + q_cons_vf(i + intxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)*ek(i) - ! Total entropy - rhos = rhos + q_cons_vf(i + contxb - 1)%sf(j, k, l)*sk(i) + ! Total entropy + rhos = rhos + q_cons_vf(i + contxb - 1)%sf(j, k, l)*sk(i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_infinite_relaxation_k diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 33094bfb4f..1a49e7e5b6 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -465,9 +465,9 @@ contains end subroutine s_convert_species_to_mixture_variables subroutine s_convert_species_to_mixture_variables_acc(rho_K, & - gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K, & - G_K, G) + gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K, & + G_K, G) $:GPU_ROUTINE(function_name='s_convert_species_to_mixture_variables_acc', & & parallelism='[seq]', cray_inline=True) @@ -544,8 +544,8 @@ contains end subroutine s_convert_species_to_mixture_variables_acc subroutine s_convert_species_to_mixture_variables_bubbles_acc(rho_K, & - gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) + gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) $:GPU_ROUTINE(function_name='s_convert_species_to_mixture_variables_bubbles_acc', & & parallelism='[seq]', cray_inline=True) @@ -874,295 +874,295 @@ contains #:endif #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, Re_K, nRtmp, rho_K, gamma_K, pi_inf_K,qv_K, dyn_pres_K, rhoYks, B]') - do l = ibounds(3)%beg, ibounds(3)%end - do k = ibounds(2)%beg, ibounds(2)%end - do j = ibounds(1)%beg, ibounds(1)%end - dyn_pres_K = 0._wp - - if (igr) then - if (num_fluids == 1) then - alpha_rho_K(1) = qK_cons_vf(contxb)%sf(j, k, l) - alpha_K(1) = qK_cons_vf(advxb)%sf(j, k, l) - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) - alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) - end do - - alpha_rho_K(num_fluids) = qK_cons_vf(num_fluids)%sf(j, k, l) - alpha_K(num_fluids) = 1._wp - sum(alpha_K(1:num_fluids - 1)) - end if - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) - alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) - end do - end if + do l = ibounds(3)%beg, ibounds(3)%end + do k = ibounds(2)%beg, ibounds(2)%end + do j = ibounds(1)%beg, ibounds(1)%end + dyn_pres_K = 0._wp + + if (igr) then + if (num_fluids == 1) then + alpha_rho_K(1) = qK_cons_vf(contxb)%sf(j, k, l) + alpha_K(1) = qK_cons_vf(advxb)%sf(j, k, l) + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) + alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) + end do + + alpha_rho_K(num_fluids) = qK_cons_vf(num_fluids)%sf(j, k, l) + alpha_K(num_fluids) = 1._wp - sum(alpha_K(1:num_fluids - 1)) + end if + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) + alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) + end do + end if - if (model_eqns /= 4) then + if (model_eqns /= 4) then #ifdef MFC_SIMULATION - ! If in simulation, use acc mixture subroutines - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, alpha_K, & - alpha_rho_K, Re_K, G_K, Gs) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - else - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - end if + ! If in simulation, use acc mixture subroutines + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, alpha_K, & + alpha_rho_K, Re_K, G_K, Gs) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + else + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + end if #else - ! If pre-processing, use non acc mixture subroutines - if (elasticity) then - call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & - rho_K, gamma_K, pi_inf_K, qv_K, Re_K, G_K, fluid_pp(:)%G) - else - call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & - rho_K, gamma_K, pi_inf_K, qv_K) - end if + ! If pre-processing, use non acc mixture subroutines + if (elasticity) then + call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & + rho_K, gamma_K, pi_inf_K, qv_K, Re_K, G_K, fluid_pp(:)%G) + else + call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & + rho_K, gamma_K, pi_inf_K, qv_K) + end if #endif - end if - - if (relativity) then - if (n == 0) then - B(1) = Bx0 - B(2) = qK_cons_vf(B_idx%beg)%sf(j, k, l) - B(3) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) - else - B(1) = qK_cons_vf(B_idx%beg)%sf(j, k, l) - B(2) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) - B(3) = qK_cons_vf(B_idx%beg + 2)%sf(j, k, l) end if - B2 = B(1)**2 + B(2)**2 + B(3)**2 - - m2 = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2 - end do - S = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i) - end do + if (relativity) then + if (n == 0) then + B(1) = Bx0 + B(2) = qK_cons_vf(B_idx%beg)%sf(j, k, l) + B(3) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) + else + B(1) = qK_cons_vf(B_idx%beg)%sf(j, k, l) + B(2) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) + B(3) = qK_cons_vf(B_idx%beg + 2)%sf(j, k, l) + end if + B2 = B(1)**2 + B(2)**2 + B(3)**2 - E = qK_cons_vf(E_idx)%sf(j, k, l) + m2 = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe + m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2 + end do - D = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - D = D + qK_cons_vf(i)%sf(j, k, l) - end do + S = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i) + end do - ! Newton-Raphson - W = E + D - $:GPU_LOOP(parallelism='[seq]') - do iter = 1, relativity_cons_to_prim_max_iter - Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) - pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS - f = W - pres + (1 - 1/(2*Ga**2))*B2 - S**2/(2*W**2) - E - D - - ! The first equation below corrects a typo in (Mignone & Bodo, 2006) - ! m2*W**2 → 2*m2*W**2, which would cancel with the 2* in other terms - ! This corrected version is not used as the second equation empirically converges faster. - ! First equation is kept for further investigation. - ! dGa_dW = -Ga**3 * ( S**2*(3*W**2+3*W*B2+B2**2) + m2*W**2 ) / (W**3 * (W+B2)**3) ! first (corrected) - dGa_dW = -Ga**3*(2*S**2*(3*W**2 + 3*W*B2 + B2**2) + m2*W**2)/(2*W**3*(W + B2)**3) ! second (in paper) - - dp_dW = (Ga*(1 + D*dGa_dW) - 2*W*dGa_dW)/((gamma_K + 1)*Ga**3) - df_dW = 1 - dp_dW + (B2/Ga**3)*dGa_dW + S**2/W**3 - - dW = -f/df_dW - W = W + dW - if (abs(dW) < 1.e-12_wp*W) exit - end do + E = qK_cons_vf(E_idx)%sf(j, k, l) - ! Recalculate pressure using converged W - Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) - qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2) + D = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + D = D + qK_cons_vf(i)%sf(j, k, l) + end do - ! Recover the other primitive variables - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2) - end do - qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now + ! Newton-Raphson + W = E + D + $:GPU_LOOP(parallelism='[seq]') + do iter = 1, relativity_cons_to_prim_max_iter + Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) + pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS + f = W - pres + (1 - 1/(2*Ga**2))*B2 - S**2/(2*W**2) - E - D + + ! The first equation below corrects a typo in (Mignone & Bodo, 2006) + ! m2*W**2 → 2*m2*W**2, which would cancel with the 2* in other terms + ! This corrected version is not used as the second equation empirically converges faster. + ! First equation is kept for further investigation. + ! dGa_dW = -Ga**3 * ( S**2*(3*W**2+3*W*B2+B2**2) + m2*W**2 ) / (W**3 * (W+B2)**3) ! first (corrected) + dGa_dW = -Ga**3*(2*S**2*(3*W**2 + 3*W*B2 + B2**2) + m2*W**2)/(2*W**3*(W + B2)**3) ! second (in paper) + + dp_dW = (Ga*(1 + D*dGa_dW) - 2*W*dGa_dW)/((gamma_K + 1)*Ga**3) + df_dW = 1 - dp_dW + (B2/Ga**3)*dGa_dW + S**2/W**3 + + dW = -f/df_dW + W = W + dW + if (abs(dW) < 1.e-12_wp*W) exit + end do - $:GPU_LOOP(parallelism='[seq]') - do i = B_idx%beg, B_idx%end - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do + ! Recalculate pressure using converged W + Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) + qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2) - cycle ! skip all the non-relativistic conversions below - end if + ! Recover the other primitive variables + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2) + end do + qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now - if (chemistry) then - rho_K = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l)) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = B_idx%beg, B_idx%end + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - qK_prim_vf(i)%sf(j, k, l) = rho_K - end do + cycle ! skip all the non-relativistic conversions below + end if - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do - end if + if (chemistry) then + rho_K = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l)) + end do -#ifdef MFC_SIMULATION - rho_K = max(rho_K, sgm_eps) -#endif + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + qK_prim_vf(i)%sf(j, k, l) = rho_K + end do - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - if (model_eqns /= 4) then - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & - /rho_K - dyn_pres_K = dyn_pres_K + 5.e-1_wp*qK_cons_vf(i)%sf(j, k, l) & - *qK_prim_vf(i)%sf(j, k, l) + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K) + end do else - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & - /qK_cons_vf(1)%sf(j, k, l) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do end if - end do - if (chemistry) then +#ifdef MFC_SIMULATION + rho_K = max(rho_K, sgm_eps) +#endif + $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l) + do i = momxb, momxe + if (model_eqns /= 4) then + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & + /rho_K + dyn_pres_K = dyn_pres_K + 5.e-1_wp*qK_cons_vf(i)%sf(j, k, l) & + *qK_prim_vf(i)%sf(j, k, l) + else + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & + /qK_cons_vf(1)%sf(j, k, l) + end if end do - T = q_T_sf%sf(j, k, l) - end if + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_species + rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l) + end do - if (mhd) then - if (n == 0) then - pres_mag = 0.5_wp*(Bx0**2 + qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2) + T = q_T_sf%sf(j, k, l) + end if + + if (mhd) then + if (n == 0) then + pres_mag = 0.5_wp*(Bx0**2 + qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2) + else + pres_mag = 0.5_wp*(qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 2)%sf(j, k, l)**2) + end if else - pres_mag = 0.5_wp*(qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 2)%sf(j, k, l)**2) + pres_mag = 0._wp end if - else - pres_mag = 0._wp - end if - call s_compute_pressure(qK_cons_vf(E_idx)%sf(j, k, l), & - qK_cons_vf(alf_idx)%sf(j, k, l), & - dyn_pres_K, pi_inf_K, gamma_K, rho_K, & - qv_K, rhoYks, pres, T, pres_mag=pres_mag) + call s_compute_pressure(qK_cons_vf(E_idx)%sf(j, k, l), & + qK_cons_vf(alf_idx)%sf(j, k, l), & + dyn_pres_K, pi_inf_K, gamma_K, rho_K, & + qv_K, rhoYks, pres, T, pres_mag=pres_mag) - qK_prim_vf(E_idx)%sf(j, k, l) = pres + qK_prim_vf(E_idx)%sf(j, k, l) = pres - if (chemistry) then - q_T_sf%sf(j, k, l) = T - end if + if (chemistry) then + q_T_sf%sf(j, k, l) = T + end if - if (bubbles_euler) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - nRtmp(i) = qK_cons_vf(bubrs(i))%sf(j, k, l) - end do + if (bubbles_euler) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + nRtmp(i) = qK_cons_vf(bubrs(i))%sf(j, k, l) + end do - vftmp = qK_cons_vf(alf_idx)%sf(j, k, l) + vftmp = qK_cons_vf(alf_idx)%sf(j, k, l) - if (qbmm) then - !Get nb (constant across all R0 bins) - nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) + if (qbmm) then + !Get nb (constant across all R0 bins) + nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) - !Convert cons to prim - $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc - end do - !Need to keep track of nb in the primitive variable list (converted back to true value before output) + !Convert cons to prim + $:GPU_LOOP(parallelism='[seq]') + do i = bubxb, bubxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc + end do + !Need to keep track of nb in the primitive variable list (converted back to true value before output) #ifdef MFC_SIMULATION - qK_prim_vf(bubxb)%sf(j, k, l) = qK_cons_vf(bubxb)%sf(j, k, l) + qK_prim_vf(bubxb)%sf(j, k, l) = qK_cons_vf(bubxb)%sf(j, k, l) #endif - else - if (adv_n) then - qK_prim_vf(n_idx)%sf(j, k, l) = qK_cons_vf(n_idx)%sf(j, k, l) - nbub_sc = qK_prim_vf(n_idx)%sf(j, k, l) else - call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight) + if (adv_n) then + qK_prim_vf(n_idx)%sf(j, k, l) = qK_cons_vf(n_idx)%sf(j, k, l) + nbub_sc = qK_prim_vf(n_idx)%sf(j, k, l) + else + call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight) + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = bubxb, bubxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc + end do end if + end if + if (mhd) then $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc + do i = B_idx%beg, B_idx%end + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do end if - end if - - if (mhd) then - $:GPU_LOOP(parallelism='[seq]') - do i = B_idx%beg, B_idx%end - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do - end if - if (elasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K - end do - end if + if (elasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K + end do + end if - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - ! subtracting elastic contribution for pressure calculation - if (G_K > verysmall) then - if (cont_damage) G_K = G_K*max((1._wp - qK_cons_vf(damage_idx)%sf(j, k, l)), 0._wp) - qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & - ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K - ! Double for shear stresses - if (any(i == shear_indices)) then + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + ! subtracting elastic contribution for pressure calculation + if (G_K > verysmall) then + if (cont_damage) G_K = G_K*max((1._wp - qK_cons_vf(damage_idx)%sf(j, k, l)), 0._wp) qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K + ! Double for shear stresses + if (any(i == shear_indices)) then + qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & + ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K + end if end if - end if - end do - end if + end do + end if + + if (hyperelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = xibeg, xiend + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K + end do + end if - if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = xibeg, xiend - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K + do i = advxb, advxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do - if (surface_tension) then - qK_prim_vf(c_idx)%sf(j, k, l) = qK_cons_vf(c_idx)%sf(j, k, l) - end if + if (surface_tension) then + qK_prim_vf(c_idx)%sf(j, k, l) = qK_cons_vf(c_idx)%sf(j, k, l) + end if - if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l) + if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l) #ifdef MFC_POST_PROCESS - if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l) + if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l) #endif + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_convert_conservative_to_primitive_variables @@ -1489,111 +1489,111 @@ contains ! accounting for the contribution of either viscosity or capillarity #ifdef MFC_SIMULATION #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_K, vel_K, alpha_K, Re_K, Y_K]') - do l = is3b, is3e - do k = is2b, is2e - do j = is1b, is1e - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_K(i) = qK_prim_vf(j, k, l, i) - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_K(i) = qK_prim_vf(j, k, l, contxe + i) - end do + do l = is3b, is3e + do k = is2b, is2e + do j = is1b, is1e - vel_K_sum = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_K_sum = vel_K_sum + vel_K(i)**2._wp - end do - - pres_K = qK_prim_vf(j, k, l, E_idx) - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K, & - G_K, Gs) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, & - pi_inf_K, qv_K, alpha_K, alpha_rho_K, Re_K) - else - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_K(i) = qK_prim_vf(j, k, l, i) + end do - ! Computing the energy from the pressure + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_K(i) = qK_prim_vf(j, k, l, contxe + i) + end do - if (chemistry) then + vel_K_sum = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) + do i = 1, num_vels + vel_K_sum = vel_K_sum + vel_K(i)**2._wp end do - !Computing the energy from the internal energy of the mixture - call get_mixture_molecular_weight(Y_k, mix_mol_weight) - R_gas = gas_constant/mix_mol_weight - T_K = pres_K/rho_K/R_gas - call get_mixture_energy_mass(T_K, Y_K, E_K) - E_K = rho_K*E_K + 5.e-1_wp*rho_K*vel_K_sum - else - ! Computing the energy from the pressure - E_K = gamma_K*pres_K + pi_inf_K & - + 5.e-1_wp*rho_K*vel_K_sum + qv_K - end if - ! mass flux, this should be \alpha_i \rho_i u_i - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1)) - end do + pres_K = qK_prim_vf(j, k, l, E_idx) + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K, & + G_K, Gs) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, & + pi_inf_K, qv_K, alpha_K, alpha_rho_K, Re_K) + else + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - FK_vf(j, k, l, contxe + dir_idx(i)) = & - rho_K*vel_K(dir_idx(1)) & - *vel_K(dir_idx(i)) & - + pres_K*dir_flg(dir_idx(i)) - end do + ! Computing the energy from the pressure - ! energy flux, u(E+p) - FK_vf(j, k, l, E_idx) = vel_K(dir_idx(1))*(E_K + pres_K) + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) + end do + !Computing the energy from the internal energy of the mixture + call get_mixture_molecular_weight(Y_k, mix_mol_weight) + R_gas = gas_constant/mix_mol_weight + T_K = pres_K/rho_K/R_gas + call get_mixture_energy_mass(T_K, Y_K, E_K) + E_K = rho_K*E_K + 5.e-1_wp*rho_K*vel_K_sum + else + ! Computing the energy from the pressure + E_K = gamma_K*pres_K + pi_inf_K & + + 5.e-1_wp*rho_K*vel_K_sum + qv_K + end if - ! Species advection Flux, \rho*u*Y - if (chemistry) then + ! mass flux, this should be \alpha_i \rho_i u_i $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i)) + do i = 1, contxe + FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1)) end do - end if - if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_vf(j, k, l, i) = 0._wp - FK_src_vf(j, k, l, i) = alpha_K(i - E_idx) + do i = 1, num_vels + FK_vf(j, k, l, contxe + dir_idx(i)) = & + rho_K*vel_K(dir_idx(1)) & + *vel_K(dir_idx(i)) & + + pres_K*dir_flg(dir_idx(i)) end do - else - ! Could be bubbles_euler! - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx) - end do + ! energy flux, u(E+p) + FK_vf(j, k, l, E_idx) = vel_K(dir_idx(1))*(E_K + pres_K) - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_src_vf(j, k, l, i) = vel_K(dir_idx(1)) - end do + ! Species advection Flux, \rho*u*Y + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_species + FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i)) + end do + end if - end if + if (riemann_solver == 1 .or. riemann_solver == 4) then + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + FK_vf(j, k, l, i) = 0._wp + FK_src_vf(j, k, l, i) = alpha_K(i - E_idx) + end do + + else + ! Could be bubbles_euler! + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + FK_src_vf(j, k, l, i) = vel_K(dir_idx(1)) + end do + + end if + + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #endif end subroutine s_convert_primitive_to_flux_variables diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index fe03b073d7..8b2efa6bcf 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -167,17 +167,17 @@ contains sim_time = t_step*dt #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - mass_src(j, k, l) = 0._wp - mom_src(1, j, k, l) = 0._wp - e_src(j, k, l) = 0._wp - if (n > 0) mom_src(2, j, k, l) = 0._wp - if (p > 0) mom_src(3, j, k, l) = 0._wp + do l = 0, p + do k = 0, n + do j = 0, m + mass_src(j, k, l) = 0._wp + mom_src(1, j, k, l) = 0._wp + e_src(j, k, l) = 0._wp + if (n > 0) mom_src(2, j, k, l) = 0._wp + if (p > 0) mom_src(3, j, k, l) = 0._wp + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP ! Keep outer loop sequel because different sources can have very different number of points @@ -222,122 +222,122 @@ contains deallocate (phi_rn) #:call GPU_PARALLEL_LOOP(private='[myalpha,myalpha_rho]') - do i = 1, num_points - j = source_spatials(ai)%coord(1, i) - k = source_spatials(ai)%coord(2, i) - l = source_spatials(ai)%coord(3, i) - - ! Compute speed of sound - myRho = 0._wp - B_tait = 0._wp - small_gamma = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - myalpha_rho(q) = q_cons_vf(q)%sf(j, k, l) - myalpha(q) = q_cons_vf(advxb + q - 1)%sf(j, k, l) - end do + do i = 1, num_points + j = source_spatials(ai)%coord(1, i) + k = source_spatials(ai)%coord(2, i) + l = source_spatials(ai)%coord(3, i) + + ! Compute speed of sound + myRho = 0._wp + B_tait = 0._wp + small_gamma = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids + myalpha_rho(q) = q_cons_vf(q)%sf(j, k, l) + myalpha(q) = q_cons_vf(advxb + q - 1)%sf(j, k, l) + end do - if (bubbles_euler) then - if (num_fluids > 2) then + if (bubbles_euler) then + if (num_fluids > 2) then + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids - 1 + myRho = myRho + myalpha_rho(q) + B_tait = B_tait + myalpha(q)*pi_infs(q) + small_gamma = small_gamma + myalpha(q)*gammas(q) + end do + else + myRho = myalpha_rho(1) + B_tait = pi_infs(1) + small_gamma = gammas(1) + end if + end if + + if ((.not. bubbles_euler) .or. (mpp_lim .and. (num_fluids > 2))) then $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - 1 + do q = 1, num_fluids myRho = myRho + myalpha_rho(q) B_tait = B_tait + myalpha(q)*pi_infs(q) small_gamma = small_gamma + myalpha(q)*gammas(q) end do - else - myRho = myalpha_rho(1) - B_tait = pi_infs(1) - small_gamma = gammas(1) end if - end if - if ((.not. bubbles_euler) .or. (mpp_lim .and. (num_fluids > 2))) then - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - myRho = myRho + myalpha_rho(q) - B_tait = B_tait + myalpha(q)*pi_infs(q) - small_gamma = small_gamma + myalpha(q)*gammas(q) - end do - end if + small_gamma = 1._wp/small_gamma + 1._wp + c = sqrt(small_gamma*(q_prim_vf(E_idx)%sf(j, k, l) + ((small_gamma - 1._wp)/small_gamma)*B_tait)/myRho) - small_gamma = 1._wp/small_gamma + 1._wp - c = sqrt(small_gamma*(q_prim_vf(E_idx)%sf(j, k, l) + ((small_gamma - 1._wp)/small_gamma)*B_tait)/myRho) + ! Wavelength to frequency conversion + if (pulse(ai) == 1 .or. pulse(ai) == 3) frequency_local = f_frequency_local(freq_conv_flag, ai, c) + if (pulse(ai) == 2) gauss_sigma_time_local = f_gauss_sigma_time_local(gauss_conv_flag, ai, c) - ! Wavelength to frequency conversion - if (pulse(ai) == 1 .or. pulse(ai) == 3) frequency_local = f_frequency_local(freq_conv_flag, ai, c) - if (pulse(ai) == 2) gauss_sigma_time_local = f_gauss_sigma_time_local(gauss_conv_flag, ai, c) + ! Update momentum source term + call s_source_temporal(sim_time, c, ai, mom_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) + mom_src_diff = source_temporal*source_spatials(ai)%val(i) - ! Update momentum source term - call s_source_temporal(sim_time, c, ai, mom_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) - mom_src_diff = source_temporal*source_spatials(ai)%val(i) + if (dipole(ai)) then ! Double amplitude & No momentum source term (only works for Planar) + mass_src(j, k, l) = mass_src(j, k, l) + 2._wp*mom_src_diff/c + if (model_eqns /= 4) E_src(j, k, l) = E_src(j, k, l) + 2._wp*mom_src_diff*c/(small_gamma - 1._wp) + cycle + end if - if (dipole(ai)) then ! Double amplitude & No momentum source term (only works for Planar) - mass_src(j, k, l) = mass_src(j, k, l) + 2._wp*mom_src_diff/c - if (model_eqns /= 4) E_src(j, k, l) = E_src(j, k, l) + 2._wp*mom_src_diff*c/(small_gamma - 1._wp) - cycle - end if + if (n == 0) then ! 1D + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*sign(1._wp, dir(ai)) ! Left or right-going wave - if (n == 0) then ! 1D - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*sign(1._wp, dir(ai)) ! Left or right-going wave + elseif (p == 0) then ! 2D + if (support(ai) < 5) then ! Planar + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) + else + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(source_spatials(ai)%angle(i)) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(source_spatials(ai)%angle(i)) + end if - elseif (p == 0) then ! 2D - if (support(ai) < 5) then ! Planar - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) - else - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(source_spatials(ai)%angle(i)) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(source_spatials(ai)%angle(i)) + else ! 3D + if (support(ai) < 5) then ! Planar + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) + else + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(1, i) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(2, i) + mom_src(3, j, k, l) = mom_src(3, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(3, i) + end if end if - else ! 3D + ! Update mass source term if (support(ai) < 5) then ! Planar - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) - else - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(1, i) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(2, i) - mom_src(3, j, k, l) = mom_src(3, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(3, i) + mass_src_diff = mom_src_diff/c + else ! Spherical or cylindrical support + ! Mass source term must be calculated differently using a correction term for spherical and cylindrical support + call s_source_temporal(sim_time, c, ai, mass_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) + mass_src_diff = source_temporal*source_spatials(ai)%val(i) end if - end if - - ! Update mass source term - if (support(ai) < 5) then ! Planar - mass_src_diff = mom_src_diff/c - else ! Spherical or cylindrical support - ! Mass source term must be calculated differently using a correction term for spherical and cylindrical support - call s_source_temporal(sim_time, c, ai, mass_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) - mass_src_diff = source_temporal*source_spatials(ai)%val(i) - end if - mass_src(j, k, l) = mass_src(j, k, l) + mass_src_diff + mass_src(j, k, l) = mass_src(j, k, l) + mass_src_diff - ! Update energy source term - if (model_eqns /= 4) then - E_src(j, k, l) = E_src(j, k, l) + mass_src_diff*c**2._wp/(small_gamma - 1._wp) - end if + ! Update energy source term + if (model_eqns /= 4) then + E_src(j, k, l) = E_src(j, k, l) + mass_src_diff*c**2._wp/(small_gamma - 1._wp) + end if - end do + end do #:endcall GPU_PARALLEL_LOOP end do ! Update the rhs variables #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do q = contxb, contxe - rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mass_src(j, k, l) - end do - $:GPU_LOOP(parallelism='[seq]') - do q = momxb, momxe - rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mom_src(q - contxe, j, k, l) + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do q = contxb, contxe + rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mass_src(j, k, l) + end do + $:GPU_LOOP(parallelism='[seq]') + do q = momxb, momxe + rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mom_src(q - contxe, j, k, l) + end do + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + e_src(j, k, l) end do - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + e_src(j, k, l) end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_acoustic_src_calculations diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 9bf2f5a022..213d9b7fc1 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -80,17 +80,17 @@ contains integer :: i, j, k, l !< standard iterators #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhoM(j, k, l) = 0._wp - do i = 1, num_fluids - rhoM(j, k, l) = rhoM(j, k, l) + & - q_cons_vf(contxb + i - 1)%sf(j, k, l) + do l = 0, p + do k = 0, n + do j = 0, m + rhoM(j, k, l) = 0._wp + do i = 1, num_fluids + rhoM(j, k, l) = rhoM(j, k, l) + & + q_cons_vf(contxb + i - 1)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_mixture_density @@ -111,62 +111,62 @@ contains call s_compute_mixture_density(q_cons_vf) #:call GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = 0._wp + do i = momxb, E_idx + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = 0._wp + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (bf_x) then ! x-direction body forces #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - rhoM(j, k, l)*accel_bf(1) - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - q_cons_vf(momxb)%sf(j, k, l)*accel_bf(1) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + rhoM(j, k, l)*accel_bf(1) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + q_cons_vf(momxb)%sf(j, k, l)*accel_bf(1) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (bf_y) then ! y-direction body forces #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - rhoM(j, k, l)*accel_bf(2) - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - q_cons_vf(momxb + 1)%sf(j, k, l)*accel_bf(2) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + rhoM(j, k, l)*accel_bf(2) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + q_cons_vf(momxb + 1)%sf(j, k, l)*accel_bf(2) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (bf_z) then ! z-direction body forces #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(momxe)%sf(j, k, l) = rhs_vf(momxe)%sf(j, k, l) + & - (rhoM(j, k, l))*accel_bf(3) - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - q_cons_vf(momxe)%sf(j, k, l)*accel_bf(3) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(momxe)%sf(j, k, l) = rhs_vf(momxe)%sf(j, k, l) + & + (rhoM(j, k, l))*accel_bf(3) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + q_cons_vf(momxe)%sf(j, k, l)*accel_bf(3) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index 549c38873c..7c6b84a37f 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -462,9 +462,9 @@ contains !! @param fCson Speed of sound (EL) !! @param adap_dt_stop Fail-safe exit if max iteration count reached subroutine s_advance_step(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & - fntait, fBtait, f_bub_adv_src, f_divu, & - bub_id, fmass_v, fmass_n, fbeta_c, & - fbeta_t, fCson, adap_dt_stop) + fntait, fBtait, f_bub_adv_src, f_divu, & + bub_id, fmass_v, fmass_n, fbeta_c, & + fbeta_t, fCson, adap_dt_stop) $:GPU_ROUTINE(function_name='s_advance_step',parallelism='[seq]', & & cray_inline=True) @@ -595,8 +595,8 @@ contains !! @param fCson Speed of sound (EL) !! @param h Time step size subroutine s_initial_substep_h(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & - fntait, fBtait, f_bub_adv_src, f_divu, & - fCson, h) + fntait, fBtait, f_bub_adv_src, f_divu, & + fCson, h) $:GPU_ROUTINE(function_name='s_initial_substep_h',parallelism='[seq]', & & cray_inline=True) @@ -677,10 +677,10 @@ contains !! @param myPb_tmp Internal bubble pressure at each stage (EL) !! @param myMv_tmp Mass of vapor in the bubble at each stage (EL) subroutine s_advance_substep(err, fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & - fntait, fBtait, f_bub_adv_src, f_divu, & - bub_id, fmass_v, fmass_n, fbeta_c, & - fbeta_t, fCson, h, & - myR_tmp, myV_tmp, myPb_tmp, myMv_tmp) + fntait, fBtait, f_bub_adv_src, f_divu, & + bub_id, fmass_v, fmass_n, fbeta_c, & + fbeta_t, fCson, h, & + myR_tmp, myV_tmp, myPb_tmp, myMv_tmp) $:GPU_ROUTINE(function_name='s_advance_substep',parallelism='[seq]', & & cray_inline=True) @@ -779,7 +779,7 @@ contains !! @param fdPbdt_tmp Rate of change of the internal bubble pressure !! @param fdMvdt_tmp Rate of change of the mass of vapor in the bubble elemental subroutine s_advance_EL(fR_tmp, fV_tmp, fPb_tmp, fMv_tmp, bub_id, & - fmass_n, fbeta_c, fbeta_t, fdPbdt_tmp, advance_EL) + fmass_n, fbeta_c, fbeta_t, fdPbdt_tmp, advance_EL) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(IN) :: fR_tmp, fV_tmp, fPb_tmp, fMv_tmp real(wp), intent(IN) :: fmass_n, fbeta_c, fbeta_t diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index 3e180d9585..f198d2e78c 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -77,18 +77,18 @@ contains integer(wp) :: i, j, k, l #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - nR3bar = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - nR3bar = nR3bar + weight(i)*(q_cons_vf(rs(i))%sf(j, k, l))**3._wp + do l = 0, p + do k = 0, n + do j = 0, m + nR3bar = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + nR3bar = nR3bar + weight(i)*(q_cons_vf(rs(i))%sf(j, k, l))**3._wp + end do + q_cons_vf(alf_idx)%sf(j, k, l) = (4._wp*pi*nR3bar)/(3._wp*q_cons_vf(n_idx)%sf(j, k, l)**2._wp) end do - q_cons_vf(alf_idx)%sf(j, k, l) = (4._wp*pi*nR3bar)/(3._wp*q_cons_vf(n_idx)%sf(j, k, l)**2._wp) end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_comp_alpha_from_n @@ -105,48 +105,48 @@ contains if (.not. qbmm) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - divu_in%sf(j, k, l) = 0._wp - divu_in%sf(j, k, l) = & - 5.e-1_wp/dx(j)*(q_prim_vf(contxe + idir)%sf(j + 1, k, l) - & - q_prim_vf(contxe + idir)%sf(j - 1, k, l)) + do l = 0, p + do k = 0, n + do j = 0, m + divu_in%sf(j, k, l) = 0._wp + divu_in%sf(j, k, l) = & + 5.e-1_wp/dx(j)*(q_prim_vf(contxe + idir)%sf(j + 1, k, l) - & + q_prim_vf(contxe + idir)%sf(j - 1, k, l)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if elseif (idir == 2) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - divu_in%sf(j, k, l) = divu_in%sf(j, k, l) + & - 5.e-1_wp/dy(k)*(q_prim_vf(contxe + idir)%sf(j, k + 1, l) - & - q_prim_vf(contxe + idir)%sf(j, k - 1, l)) + do l = 0, p + do k = 0, n + do j = 0, m + divu_in%sf(j, k, l) = divu_in%sf(j, k, l) + & + 5.e-1_wp/dy(k)*(q_prim_vf(contxe + idir)%sf(j, k + 1, l) - & + q_prim_vf(contxe + idir)%sf(j, k - 1, l)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP elseif (idir == 3) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - divu_in%sf(j, k, l) = divu_in%sf(j, k, l) + & - 5.e-1_wp/dz(l)*(q_prim_vf(contxe + idir)%sf(j, k, l + 1) - & - q_prim_vf(contxe + idir)%sf(j, k, l - 1)) + do l = 0, p + do k = 0, n + do j = 0, m + divu_in%sf(j, k, l) = divu_in%sf(j, k, l) + & + 5.e-1_wp/dz(l)*(q_prim_vf(contxe + idir)%sf(j, k, l + 1) - & + q_prim_vf(contxe + idir)%sf(j, k, l - 1)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -178,178 +178,178 @@ contains real(wp) :: dmMass_v, dmMass_n, dmBeta_c, dmBeta_t, dmCson #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - bub_adv_src(j, k, l) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - bub_r_src(j, k, l, q) = 0._wp - bub_v_src(j, k, l, q) = 0._wp - bub_p_src(j, k, l, q) = 0._wp - bub_m_src(j, k, l, q) = 0._wp + do l = 0, p + do k = 0, n + do j = 0, m + bub_adv_src(j, k, l) = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + bub_r_src(j, k, l, q) = 0._wp + bub_v_src(j, k, l, q) = 0._wp + bub_p_src(j, k, l, q) = 0._wp + bub_m_src(j, k, l, q) = 0._wp + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP adap_dt_stop_max = 0 #:call GPU_PARALLEL_LOOP(collapse=3, private='[Rtmp, Vtmp, myalpha_rho, myalpha]', & & reduction='[[adap_dt_stop_max]]', reductionOp='[MAX]', & & copy='[adap_dt_stop_max]') - do l = 0, p - do k = 0, n - do j = 0, m - - if (adv_n) then - nbub = q_prim_vf(n_idx)%sf(j, k, l) - else - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - Rtmp(q) = q_prim_vf(rs(q))%sf(j, k, l) - Vtmp(q) = q_prim_vf(vs(q))%sf(j, k, l) - end do + do l = 0, p + do k = 0, n + do j = 0, m - R3 = 0._wp + if (adv_n) then + nbub = q_prim_vf(n_idx)%sf(j, k, l) + else + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + Rtmp(q) = q_prim_vf(rs(q))%sf(j, k, l) + Vtmp(q) = q_prim_vf(vs(q))%sf(j, k, l) + end do - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - R3 = R3 + weight(q)*Rtmp(q)**3._wp - end do + R3 = 0._wp - nbub = (3._wp/(4._wp*pi))*q_prim_vf(alf_idx)%sf(j, k, l)/R3 - end if + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + R3 = R3 + weight(q)*Rtmp(q)**3._wp + end do - if (.not. adap_dt) then - R2Vav = 0._wp + nbub = (3._wp/(4._wp*pi))*q_prim_vf(alf_idx)%sf(j, k, l)/R3 + end if - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - R2Vav = R2Vav + weight(q)*Rtmp(q)**2._wp*Vtmp(q) - end do + if (.not. adap_dt) then + R2Vav = 0._wp - bub_adv_src(j, k, l) = 4._wp*pi*nbub*R2Vav - end if + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + R2Vav = R2Vav + weight(q)*Rtmp(q)**2._wp*Vtmp(q) + end do - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb + bub_adv_src(j, k, l) = 4._wp*pi*nbub*R2Vav + end if $:GPU_LOOP(parallelism='[seq]') - do ii = 1, num_fluids - myalpha_rho(ii) = q_cons_vf(ii)%sf(j, k, l) - myalpha(ii) = q_cons_vf(advxb + ii - 1)%sf(j, k, l) - end do - - myRho = 0._wp - n_tait = 0._wp - B_tait = 0._wp + do q = 1, nb - if (mpp_lim .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do ii = 1, num_fluids - myRho = myRho + myalpha_rho(ii) - n_tait = n_tait + myalpha(ii)*gammas(ii) - B_tait = B_tait + myalpha(ii)*pi_infs(ii) - end do - else if (num_fluids > 2) then - $:GPU_LOOP(parallelism='[seq]') - do ii = 1, num_fluids - 1 - myRho = myRho + myalpha_rho(ii) - n_tait = n_tait + myalpha(ii)*gammas(ii) - B_tait = B_tait + myalpha(ii)*pi_infs(ii) + myalpha_rho(ii) = q_cons_vf(ii)%sf(j, k, l) + myalpha(ii) = q_cons_vf(advxb + ii - 1)%sf(j, k, l) end do - else - myRho = myalpha_rho(1) - n_tait = gammas(1) - B_tait = pi_infs(1)/pi_fac - end if - n_tait = 1._wp/n_tait + 1._wp !make this the usual little 'gamma' - B_tait = B_tait*(n_tait - 1)/n_tait ! make this the usual pi_inf + myRho = 0._wp + n_tait = 0._wp + B_tait = 0._wp + + if (mpp_lim .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do ii = 1, num_fluids + myRho = myRho + myalpha_rho(ii) + n_tait = n_tait + myalpha(ii)*gammas(ii) + B_tait = B_tait + myalpha(ii)*pi_infs(ii) + end do + else if (num_fluids > 2) then + $:GPU_LOOP(parallelism='[seq]') + do ii = 1, num_fluids - 1 + myRho = myRho + myalpha_rho(ii) + n_tait = n_tait + myalpha(ii)*gammas(ii) + B_tait = B_tait + myalpha(ii)*pi_infs(ii) + end do + else + myRho = myalpha_rho(1) + n_tait = gammas(1) + B_tait = pi_infs(1)/pi_fac + end if - myRho = q_prim_vf(1)%sf(j, k, l) - myP = q_prim_vf(E_idx)%sf(j, k, l) - alf = q_prim_vf(alf_idx)%sf(j, k, l) - myR = q_prim_vf(rs(q))%sf(j, k, l) - myV = q_prim_vf(vs(q))%sf(j, k, l) + n_tait = 1._wp/n_tait + 1._wp !make this the usual little 'gamma' + B_tait = B_tait*(n_tait - 1)/n_tait ! make this the usual pi_inf - if (.not. polytropic) then - pb_local = q_prim_vf(ps(q))%sf(j, k, l) - mv_local = q_prim_vf(ms(q))%sf(j, k, l) - call s_bwproperty(pb_local, q, chi_vw, k_mw, rho_mw) - call s_vflux(myR, myV, pb_local, mv_local, q, vflux) - pbdot = f_bpres_dot(vflux, myR, myV, pb_local, mv_local, q) + myRho = q_prim_vf(1)%sf(j, k, l) + myP = q_prim_vf(E_idx)%sf(j, k, l) + alf = q_prim_vf(alf_idx)%sf(j, k, l) + myR = q_prim_vf(rs(q))%sf(j, k, l) + myV = q_prim_vf(vs(q))%sf(j, k, l) - bub_p_src(j, k, l, q) = nbub*pbdot - bub_m_src(j, k, l, q) = nbub*vflux*4._wp*pi*(myR**2._wp) - else - pb_local = 0._wp; mv_local = 0._wp; vflux = 0._wp; pbdot = 0._wp - end if + if (.not. polytropic) then + pb_local = q_prim_vf(ps(q))%sf(j, k, l) + mv_local = q_prim_vf(ms(q))%sf(j, k, l) + call s_bwproperty(pb_local, q, chi_vw, k_mw, rho_mw) + call s_vflux(myR, myV, pb_local, mv_local, q, vflux) + pbdot = f_bpres_dot(vflux, myR, myV, pb_local, mv_local, q) + + bub_p_src(j, k, l, q) = nbub*pbdot + bub_m_src(j, k, l, q) = nbub*vflux*4._wp*pi*(myR**2._wp) + else + pb_local = 0._wp; mv_local = 0._wp; vflux = 0._wp; pbdot = 0._wp + end if - ! Adaptive time stepping - adap_dt_stop = 0 + ! Adaptive time stepping + adap_dt_stop = 0 - if (adap_dt) then + if (adap_dt) then - call s_advance_step(myRho, myP, myR, myV, R0(q), & - pb_local, pbdot, alf, n_tait, B_tait, & - bub_adv_src(j, k, l), divu_in%sf(j, k, l), & - dmBub_id, dmMass_v, dmMass_n, dmBeta_c, & - dmBeta_t, dmCson, adap_dt_stop) + call s_advance_step(myRho, myP, myR, myV, R0(q), & + pb_local, pbdot, alf, n_tait, B_tait, & + bub_adv_src(j, k, l), divu_in%sf(j, k, l), & + dmBub_id, dmMass_v, dmMass_n, dmBeta_c, & + dmBeta_t, dmCson, adap_dt_stop) - q_cons_vf(rs(q))%sf(j, k, l) = nbub*myR - q_cons_vf(vs(q))%sf(j, k, l) = nbub*myV + q_cons_vf(rs(q))%sf(j, k, l) = nbub*myR + q_cons_vf(vs(q))%sf(j, k, l) = nbub*myV - else - rddot = f_rddot(myRho, myP, myR, myV, R0(q), & - pb_local, pbdot, alf, n_tait, B_tait, & - bub_adv_src(j, k, l), divu_in%sf(j, k, l), & - dmCson) - bub_v_src(j, k, l, q) = nbub*rddot - bub_r_src(j, k, l, q) = q_cons_vf(vs(q))%sf(j, k, l) - end if + else + rddot = f_rddot(myRho, myP, myR, myV, R0(q), & + pb_local, pbdot, alf, n_tait, B_tait, & + bub_adv_src(j, k, l), divu_in%sf(j, k, l), & + dmCson) + bub_v_src(j, k, l, q) = nbub*rddot + bub_r_src(j, k, l, q) = q_cons_vf(vs(q))%sf(j, k, l) + end if - adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) + adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) - if (alf < 1.e-11_wp) then - bub_adv_src(j, k, l) = 0._wp - bub_r_src(j, k, l, q) = 0._wp - bub_v_src(j, k, l, q) = 0._wp - if (.not. polytropic) then - bub_p_src(j, k, l, q) = 0._wp - bub_m_src(j, k, l, q) = 0._wp + if (alf < 1.e-11_wp) then + bub_adv_src(j, k, l) = 0._wp + bub_r_src(j, k, l, q) = 0._wp + bub_v_src(j, k, l, q) = 0._wp + if (.not. polytropic) then + bub_p_src(j, k, l, q) = 0._wp + bub_m_src(j, k, l, q) = 0._wp + end if end if - end if + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") if (.not. adap_dt) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do q = 0, n - do i = 0, m - rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + bub_adv_src(i, q, l) - if (num_fluids > 1) rhs_vf(advxb)%sf(i, q, l) = & - rhs_vf(advxb)%sf(i, q, l) - bub_adv_src(i, q, l) - $:GPU_LOOP(parallelism='[seq]') - do k = 1, nb - rhs_vf(rs(k))%sf(i, q, l) = rhs_vf(rs(k))%sf(i, q, l) + bub_r_src(i, q, l, k) - rhs_vf(vs(k))%sf(i, q, l) = rhs_vf(vs(k))%sf(i, q, l) + bub_v_src(i, q, l, k) - if (polytropic .neqv. .true.) then - rhs_vf(ps(k))%sf(i, q, l) = rhs_vf(ps(k))%sf(i, q, l) + bub_p_src(i, q, l, k) - rhs_vf(ms(k))%sf(i, q, l) = rhs_vf(ms(k))%sf(i, q, l) + bub_m_src(i, q, l, k) - end if + do l = 0, p + do q = 0, n + do i = 0, m + rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + bub_adv_src(i, q, l) + if (num_fluids > 1) rhs_vf(advxb)%sf(i, q, l) = & + rhs_vf(advxb)%sf(i, q, l) - bub_adv_src(i, q, l) + $:GPU_LOOP(parallelism='[seq]') + do k = 1, nb + rhs_vf(rs(k))%sf(i, q, l) = rhs_vf(rs(k))%sf(i, q, l) + bub_r_src(i, q, l, k) + rhs_vf(vs(k))%sf(i, q, l) = rhs_vf(vs(k))%sf(i, q, l) + bub_v_src(i, q, l, k) + if (polytropic .neqv. .true.) then + rhs_vf(ps(k))%sf(i, q, l) = rhs_vf(ps(k))%sf(i, q, l) + bub_p_src(i, q, l, k) + rhs_vf(ms(k))%sf(i, q, l) = rhs_vf(ms(k))%sf(i, q, l) + bub_m_src(i, q, l, k) + end if + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end subroutine s_compute_bubble_EE_source diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 3f4135dec3..9b3beb01a0 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -530,22 +530,22 @@ contains if (lag_params%pressure_corrector) then ! Calculate velocity potentials (valid for one bubble per cell) #:call GPU_PARALLEL_LOOP(private='[k,cell]') - do k = 1, nBubs - call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) - myR0 = bub_R0(k) - myR = intfc_rad(k, 2) - myV = intfc_vel(k, 2) - myPb = gas_p(k, 2) - pint = f_cpbw_KM(myR0, myR, myV, myPb) - pint = pint + 0.5_wp*myV**2._wp - if (lag_params%cluster_type == 2) then - bub_dphidt(k) = (paux - pint) + term2 - ! Accounting for the potential induced by the bubble averaged over the control volume - ! Note that this is based on the incompressible flow assumption near the bubble. - term1_fac = 3._wp/2._wp*(myR*(Romega**2._wp - myR**2._wp))/(Romega**3._wp - myR**3._wp) - bub_dphidt(k) = bub_dphidt(k)/(1._wp - term1_fac) - end if - end do + do k = 1, nBubs + call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) + myR0 = bub_R0(k) + myR = intfc_rad(k, 2) + myV = intfc_vel(k, 2) + myPb = gas_p(k, 2) + pint = f_cpbw_KM(myR0, myR, myV, myPb) + pint = pint + 0.5_wp*myV**2._wp + if (lag_params%cluster_type == 2) then + bub_dphidt(k) = (paux - pint) + term2 + ! Accounting for the potential induced by the bubble averaged over the control volume + ! Note that this is based on the incompressible flow assumption near the bubble. + term1_fac = 3._wp/2._wp*(myR*(Romega**2._wp - myR**2._wp))/(Romega**3._wp - myR**3._wp) + bub_dphidt(k) = bub_dphidt(k)/(1._wp - term1_fac) + end if + end do #:endcall GPU_PARALLEL_LOOP end if @@ -554,81 +554,81 @@ contains #:call GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell]', & & reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', & & copy='[adap_dt_stop_max]',copyin='[stage]') - do k = 1, nBubs - ! Keller-Miksis model - - ! Current bubble state - myPb = gas_p(k, 2) - myMass_n = gas_mg(k) - myMass_v = gas_mv(k, 2) - myR = intfc_rad(k, 2) - myV = intfc_vel(k, 2) - myBeta_c = gas_betaC(k) - myBeta_t = gas_betaT(k) - myR0 = bub_R0(k) - - ! Vapor and heat fluxes - call s_vflux(myR, myV, myPb, myMass_v, k, myVapFlux, myMass_n, myBeta_c, myR_m, mygamma_m) - myPbdot = f_bpres_dot(myVapFlux, myR, myV, myPb, myMass_v, k, myBeta_t, myR_m, mygamma_m) - myMvdot = 4._wp*pi*myR**2._wp*myVapFlux - - ! Obtaining driving pressure - call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2) - - ! Obtain liquid density and computing speed of sound from pinf - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3)) - myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3)) - end do - call s_convert_species_to_mixture_variables_acc(myRho, gamma, pi_inf, qv, myalpha, & - myalpha_rho, Re) - call s_compute_cson_from_pinf(q_prim_vf, myPinf, cell, myRho, gamma, pi_inf, myCson) + do k = 1, nBubs + ! Keller-Miksis model + + ! Current bubble state + myPb = gas_p(k, 2) + myMass_n = gas_mg(k) + myMass_v = gas_mv(k, 2) + myR = intfc_rad(k, 2) + myV = intfc_vel(k, 2) + myBeta_c = gas_betaC(k) + myBeta_t = gas_betaT(k) + myR0 = bub_R0(k) - ! Adaptive time stepping - adap_dt_stop = 0 + ! Vapor and heat fluxes + call s_vflux(myR, myV, myPb, myMass_v, k, myVapFlux, myMass_n, myBeta_c, myR_m, mygamma_m) + myPbdot = f_bpres_dot(myVapFlux, myR, myV, myPb, myMass_v, k, myBeta_t, myR_m, mygamma_m) + myMvdot = 4._wp*pi*myR**2._wp*myVapFlux - if (adap_dt) then + ! Obtaining driving pressure + call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2) - call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, & - dmntait, dmBtait, dm_bub_adv_src, dm_divu, & - k, myMass_v, myMass_n, myBeta_c, & - myBeta_t, myCson, adap_dt_stop) + ! Obtain liquid density and computing speed of sound from pinf + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3)) + myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3)) + end do + call s_convert_species_to_mixture_variables_acc(myRho, gamma, pi_inf, qv, myalpha, & + myalpha_rho, Re) + call s_compute_cson_from_pinf(q_prim_vf, myPinf, cell, myRho, gamma, pi_inf, myCson) - ! Update bubble state - intfc_rad(k, 1) = myR - intfc_vel(k, 1) = myV - gas_p(k, 1) = myPb - gas_mv(k, 1) = myMass_v + ! Adaptive time stepping + adap_dt_stop = 0 - else + if (adap_dt) then - ! Radial acceleration from bubble models - intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, & - myPb, myPbdot, dmalf, dmntait, dmBtait, & - dm_bub_adv_src, dm_divu, & - myCson) - intfc_draddt(k, stage) = myV - gas_dmvdt(k, stage) = myMvdot - gas_dpdt(k, stage) = myPbdot + call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, & + dmntait, dmBtait, dm_bub_adv_src, dm_divu, & + k, myMass_v, myMass_n, myBeta_c, & + myBeta_t, myCson, adap_dt_stop) - end if + ! Update bubble state + intfc_rad(k, 1) = myR + intfc_vel(k, 1) = myV + gas_p(k, 1) = myPb + gas_mv(k, 1) = myMass_v + + else - adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) + ! Radial acceleration from bubble models + intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, & + myPb, myPbdot, dmalf, dmntait, dmBtait, & + dm_bub_adv_src, dm_divu, & + myCson) + intfc_draddt(k, stage) = myV + gas_dmvdt(k, stage) = myMvdot + gas_dpdt(k, stage) = myPbdot - end do + end if + + adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) + + end do #:endcall GPU_PARALLEL_LOOP if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") ! Bubbles remain in a fixed position #:call GPU_PARALLEL_LOOP(collapse=2, private='[k]', copyin='[stage]') - do k = 1, nBubs - do l = 1, 3 - mtn_dposdt(k, l, stage) = 0._wp - mtn_dveldt(k, l, stage) = 0._wp + do k = 1, nBubs + do l = 1, 3 + mtn_dposdt(k, l, stage) = 0._wp + mtn_dveldt(k, l, stage) = 0._wp + end do end do - end do #:endcall GPU_PARALLEL_LOOP call nvtxEndRange @@ -654,36 +654,36 @@ contains if (p == 0) then #:call GPU_PARALLEL_LOOP(collapse=4) - do k = 0, p - do j = 0, n - do i = 0, m - do l = 1, E_idx - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & - q_cons_vf(l)%sf(i, j, k)*(q_beta%vf(2)%sf(i, j, k) + & - q_beta%vf(5)%sf(i, j, k)) - - end if + do k = 0, p + do j = 0, n + do i = 0, m + do l = 1, E_idx + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & + q_cons_vf(l)%sf(i, j, k)*(q_beta%vf(2)%sf(i, j, k) + & + q_beta%vf(5)%sf(i, j, k)) + + end if + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else #:call GPU_PARALLEL_LOOP(collapse=4) - do k = 0, p - do j = 0, n - do i = 0, m - do l = 1, E_idx - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & - q_cons_vf(l)%sf(i, j, k)/q_beta%vf(1)%sf(i, j, k)* & - q_beta%vf(2)%sf(i, j, k) - end if + do k = 0, p + do j = 0, n + do i = 0, m + do l = 1, E_idx + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & + q_cons_vf(l)%sf(i, j, k)/q_beta%vf(1)%sf(i, j, k)* & + q_beta%vf(2)%sf(i, j, k) + end if + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -692,45 +692,45 @@ contains call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l) #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - & - (1._wp - q_beta%vf(1)%sf(i, j, k))/ & - q_beta%vf(1)%sf(i, j, k)* & - q_beta%vf(3)%sf(i, j, k) - end if + do k = 0, p + do j = 0, n + do i = 0, m + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - & + (1._wp - q_beta%vf(1)%sf(i, j, k))/ & + q_beta%vf(1)%sf(i, j, k)* & + q_beta%vf(3)%sf(i, j, k) + end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP !source in energy #:call GPU_PARALLEL_LOOP(collapse=3) - do k = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(2)%beg, idwbuff(2)%end - do i = idwbuff(1)%beg, idwbuff(1)%end - q_beta%vf(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(1)%beg, idwbuff(1)%end + q_beta%vf(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l) #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - & - q_beta%vf(4)%sf(i, j, k)*(1._wp - q_beta%vf(1)%sf(i, j, k))/ & - q_beta%vf(1)%sf(i, j, k) - end if + do k = 0, p + do j = 0, n + do i = 0, m + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - & + q_beta%vf(4)%sf(i, j, k)*(1._wp - q_beta%vf(1)%sf(i, j, k))/ & + q_beta%vf(1)%sf(i, j, k) + end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end do @@ -778,15 +778,15 @@ contains call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS") #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, q_beta_idx - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_beta%vf(i)%sf(j, k, l) = 0._wp + do i = 1, q_beta_idx + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_beta%vf(i)%sf(j, k, l) = 0._wp + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP call s_smoothfunction(nBubs, intfc_rad, intfc_vel, & @@ -794,16 +794,16 @@ contains !Store 1-beta #:call GPU_PARALLEL_LOOP(collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_beta%vf(1)%sf(j, k, l) = 1._wp - q_beta%vf(1)%sf(j, k, l) - ! Limiting void fraction given max value - q_beta%vf(1)%sf(j, k, l) = max(q_beta%vf(1)%sf(j, k, l), & - 1._wp - lag_params%valmaxvoid) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_beta%vf(1)%sf(j, k, l) = 1._wp - q_beta%vf(1)%sf(j, k, l) + ! Limiting void fraction given max value + q_beta%vf(1)%sf(j, k, l) = max(q_beta%vf(1)%sf(j, k, l), & + 1._wp - lag_params%valmaxvoid) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP call nvtxEndRange @@ -1036,15 +1036,15 @@ contains if (time_stepper == 1) then ! 1st order TVD RK #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{1} = u{n} + dt * RHS{n} - intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) - intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) - gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) - gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) - end do + do k = 1, nBubs + !u{1} = u{n} + dt * RHS{n} + intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) + intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) + gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + end do #:endcall GPU_PARALLEL_LOOP call s_transfer_data_to_tmp() @@ -1059,28 +1059,28 @@ contains elseif (time_stepper == 2) then ! 2nd order TVD RK if (stage == 1) then #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{1} = u{n} + dt * RHS{n} - intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) - intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) - gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) - gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) - end do + do k = 1, nBubs + !u{1} = u{n} + dt * RHS{n} + intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) + intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) + gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + end do #:endcall GPU_PARALLEL_LOOP elseif (stage == 2) then #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) - intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp - intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp - gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp - gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp - end do + do k = 1, nBubs + !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) + intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp + intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp + gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp + gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp + end do #:endcall GPU_PARALLEL_LOOP call s_transfer_data_to_tmp() @@ -1097,40 +1097,40 @@ contains elseif (time_stepper == 3) then ! 3rd order TVD RK if (stage == 1) then #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{1} = u{n} + dt * RHS{n} - intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) - intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) - gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) - gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) - end do + do k = 1, nBubs + !u{1} = u{n} + dt * RHS{n} + intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) + intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) + gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + end do #:endcall GPU_PARALLEL_LOOP elseif (stage == 2) then #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] - intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp - intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp - gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp - gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp - end do + do k = 1, nBubs + !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] + intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp + intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp + gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp + gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp + end do #:endcall GPU_PARALLEL_LOOP elseif (stage == 3) then #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] - intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) - intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, 2)/4._wp + intfc_dveldt(k, 3)) - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3)) - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3)) - gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) - gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) - end do + do k = 1, nBubs + !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] + intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) + intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, 2)/4._wp + intfc_dveldt(k, 3)) + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3)) + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3)) + gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) + gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) + end do #:endcall GPU_PARALLEL_LOOP call s_transfer_data_to_tmp() @@ -1209,16 +1209,16 @@ contains integer :: k #:call GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - gas_p(k, 2) = gas_p(k, 1) - gas_mv(k, 2) = gas_mv(k, 1) - intfc_rad(k, 2) = intfc_rad(k, 1) - intfc_vel(k, 2) = intfc_vel(k, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) - mtn_posPrev(k, 1:3, 2) = mtn_posPrev(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) - mtn_s(k, 1:3, 2) = mtn_s(k, 1:3, 1) - end do + do k = 1, nBubs + gas_p(k, 2) = gas_p(k, 1) + gas_mv(k, 2) = gas_mv(k, 1) + intfc_rad(k, 2) = intfc_rad(k, 1) + intfc_vel(k, 2) = intfc_vel(k, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + mtn_posPrev(k, 1:3, 2) = mtn_posPrev(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + mtn_s(k, 1:3, 2) = mtn_s(k, 1:3, 1) + end do #:endcall GPU_PARALLEL_LOOP end subroutine s_transfer_data_to_tmp @@ -1309,48 +1309,48 @@ contains if (dir == 1) then ! Gradient in x dir. #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dx(i + 1) - dx(i - 1)) & - + q%sf(i + 1, j, k)*(dx(i) + dx(i - 1)) & - - q%sf(i - 1, j, k)*(dx(i) + dx(i + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dx(i) + dx(i - 1))*(dx(i) + dx(i + 1))) + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dx(i + 1) - dx(i - 1)) & + + q%sf(i + 1, j, k)*(dx(i) + dx(i - 1)) & + - q%sf(i - 1, j, k)*(dx(i) + dx(i + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dx(i) + dx(i - 1))*(dx(i) + dx(i + 1))) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else if (dir == 2) then ! Gradient in y dir. #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) & - + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) & - - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) & + + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) & + - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else ! Gradient in z dir. #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) & - + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) & - - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) & + + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) & + - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end if @@ -1435,18 +1435,18 @@ contains lag_void_avg = 0._wp lag_vol = 0._wp #:call GPU_PARALLEL_LOOP(collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') - do k = 0, p - do j = 0, n - do i = 0, m - lag_void_max = max(lag_void_max, 1._wp - q_beta%vf(1)%sf(i, j, k)) - call s_get_char_vol(i, j, k, volcell) - if ((1._wp - q_beta%vf(1)%sf(i, j, k)) > 5.0d-11) then - lag_void_avg = lag_void_avg + (1._wp - q_beta%vf(1)%sf(i, j, k))*volcell - lag_vol = lag_vol + volcell - end if + do k = 0, p + do j = 0, n + do i = 0, m + lag_void_max = max(lag_void_max, 1._wp - q_beta%vf(1)%sf(i, j, k)) + call s_get_char_vol(i, j, k, volcell) + if ((1._wp - q_beta%vf(1)%sf(i, j, k)) > 5.0d-11) then + lag_void_avg = lag_void_avg + (1._wp - q_beta%vf(1)%sf(i, j, k))*volcell + lag_vol = lag_vol + volcell + end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #ifdef MFC_MPI @@ -1621,12 +1621,12 @@ contains #:call GPU_PARALLEL_LOOP(reduction='[[Rmax_glb], [Rmin_glb]]', & & reductionOp='[MAX, MIN]', copy='[Rmax_glb,Rmin_glb]') - do k = 1, nBubs - Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) - Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k)) - Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k)) - Rmin_stats(k) = min(Rmin_stats(k), intfc_rad(k, 1)/bub_R0(k)) - end do + do k = 1, nBubs + Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) + Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k)) + Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k)) + Rmin_stats(k) = min(Rmin_stats(k), intfc_rad(k, 1)/bub_R0(k)) + end do #:endcall GPU_PARALLEL_LOOP end subroutine s_calculate_lag_bubble_stats diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 529b08bfd9..218eaa6ea6 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -56,40 +56,40 @@ contains integer :: l #:call GPU_PARALLEL_LOOP(private='[l,s_coord,cell]') - do l = 1, nBubs + do l = 1, nBubs - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp - s_coord(1:3) = lbk_s(l, 1:3, 2) - call s_get_cell(s_coord, cell) + volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp + s_coord(1:3) = lbk_s(l, 1:3, 2) + call s_get_cell(s_coord, cell) - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) - if (num_dims == 2) then - Vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth - if (cyl_coord) Vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi - else - Vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) - end if + if (num_dims == 2) then + Vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + if (cyl_coord) Vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + Vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) + end if - !Update void fraction field - addFun1 = strength_vol/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) + addFun1 - - !Update time derivative of void fraction - addFun2 = strength_vel/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) + addFun2 + !Update void fraction field + addFun1 = strength_vol/Vol + $:GPU_ATOMIC(atomic='update') + updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) + addFun1 - !Product of two smeared functions - !Update void fraction * time derivative of void fraction - if (lag_params%cluster_type >= 4) then - addFun3 = (strength_vol*strength_vel)/Vol + !Update time derivative of void fraction + addFun2 = strength_vel/Vol $:GPU_ATOMIC(atomic='update') - updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) + addFun3 - end if - end do + updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) + addFun2 + + !Product of two smeared functions + !Update void fraction * time derivative of void fraction + if (lag_params%cluster_type >= 4) then + addFun3 = (strength_vol*strength_vel)/Vol + $:GPU_ATOMIC(atomic='update') + updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) + addFun3 + end if + end do #:endcall GPU_PARALLEL_LOOP end subroutine s_deltafunc @@ -122,80 +122,80 @@ contains if (p == 0) smearGridz = 1 #:call GPU_PARALLEL_LOOP(private='[nodecoord,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') - do l = 1, nBubs - nodecoord(1:3) = 0 - center(1:3) = 0._wp - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp - s_coord(1:3) = lbk_s(l, 1:3, 2) - center(1:2) = lbk_pos(l, 1:2, 2) - if (p > 0) center(3) = lbk_pos(l, 3, 2) - call s_get_cell(s_coord, cell) - call s_compute_stddsv(cell, volpart, stddsv) - - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) - - $:GPU_LOOP(collapse=3,private='[cellaux,nodecoord]') - do i = 1, smearGrid - do j = 1, smearGrid - do k = 1, smearGridz - cellaux(1) = cell(1) + i - (mapCells + 1) - cellaux(2) = cell(2) + j - (mapCells + 1) - cellaux(3) = cell(3) + k - (mapCells + 1) - if (p == 0) cellaux(3) = 0 - - !Check if the cells intended to smear the bubbles in are in the computational domain - !and redefine the cells for symmetric boundary - call s_check_celloutside(cellaux, celloutside) - - if (.not. celloutside) then - - nodecoord(1) = x_cc(cellaux(1)) - nodecoord(2) = y_cc(cellaux(2)) - if (p > 0) nodecoord(3) = z_cc(cellaux(3)) - call s_applygaussian(center, cellaux, nodecoord, stddsv, 0._wp, func) - if (lag_params%cluster_type >= 4) call s_applygaussian(center, cellaux, nodecoord, stddsv, 1._wp, func2) - - ! Relocate cells for bubbles intersecting symmetric boundaries - if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_REFLECTIVE)) then - call s_shift_cell_symmetric_bc(cellaux, cell) - end if - else - func = 0._wp - func2 = 0._wp - cellaux(1) = cell(1) - cellaux(2) = cell(2) - cellaux(3) = cell(3) + do l = 1, nBubs + nodecoord(1:3) = 0 + center(1:3) = 0._wp + volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp + s_coord(1:3) = lbk_s(l, 1:3, 2) + center(1:2) = lbk_pos(l, 1:2, 2) + if (p > 0) center(3) = lbk_pos(l, 3, 2) + call s_get_cell(s_coord, cell) + call s_compute_stddsv(cell, volpart, stddsv) + + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) + + $:GPU_LOOP(collapse=3,private='[cellaux,nodecoord]') + do i = 1, smearGrid + do j = 1, smearGrid + do k = 1, smearGridz + cellaux(1) = cell(1) + i - (mapCells + 1) + cellaux(2) = cell(2) + j - (mapCells + 1) + cellaux(3) = cell(3) + k - (mapCells + 1) if (p == 0) cellaux(3) = 0 - end if - - !Update void fraction field - addFun1 = func*strength_vol - $:GPU_ATOMIC(atomic='update') - updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + addFun1 - - !Update time derivative of void fraction - addFun2 = func*strength_vel - $:GPU_ATOMIC(atomic='update') - updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + addFun2 - - !Product of two smeared functions - !Update void fraction * time derivative of void fraction - if (lag_params%cluster_type >= 4) then - addFun3 = func2*strength_vol*strength_vel + + !Check if the cells intended to smear the bubbles in are in the computational domain + !and redefine the cells for symmetric boundary + call s_check_celloutside(cellaux, celloutside) + + if (.not. celloutside) then + + nodecoord(1) = x_cc(cellaux(1)) + nodecoord(2) = y_cc(cellaux(2)) + if (p > 0) nodecoord(3) = z_cc(cellaux(3)) + call s_applygaussian(center, cellaux, nodecoord, stddsv, 0._wp, func) + if (lag_params%cluster_type >= 4) call s_applygaussian(center, cellaux, nodecoord, stddsv, 1._wp, func2) + + ! Relocate cells for bubbles intersecting symmetric boundaries + if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_REFLECTIVE)) then + call s_shift_cell_symmetric_bc(cellaux, cell) + end if + else + func = 0._wp + func2 = 0._wp + cellaux(1) = cell(1) + cellaux(2) = cell(2) + cellaux(3) = cell(3) + if (p == 0) cellaux(3) = 0 + end if + + !Update void fraction field + addFun1 = func*strength_vol $:GPU_ATOMIC(atomic='update') - updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + addFun3 - end if + updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) = & + updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) & + + addFun1 + + !Update time derivative of void fraction + addFun2 = func*strength_vel + $:GPU_ATOMIC(atomic='update') + updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) = & + updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) & + + addFun2 + + !Product of two smeared functions + !Update void fraction * time derivative of void fraction + if (lag_params%cluster_type >= 4) then + addFun3 = func2*strength_vol*strength_vel + $:GPU_ATOMIC(atomic='update') + updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) = & + updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) & + + addFun3 + end if + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_gaussian diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index e4682c575d..14d81fdf31 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -696,29 +696,29 @@ contains is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - flux_rs${XYZ}$_vf_l(0, k, r, i) = F_rs${XYZ}$_vf(0, k, r, i) & - + pi_coef_${XYZ}$ (0, 0, cbc_loc)* & - (F_rs${XYZ}$_vf(1, k, r, i) - & - F_rs${XYZ}$_vf(0, k, r, i)) + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + flux_rs${XYZ}$_vf_l(0, k, r, i) = F_rs${XYZ}$_vf(0, k, r, i) & + + pi_coef_${XYZ}$ (0, 0, cbc_loc)* & + (F_rs${XYZ}$_vf(1, k, r, i) - & + F_rs${XYZ}$_vf(0, k, r, i)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - flux_src_rs${XYZ}$_vf_l(0, k, r, i) = F_src_rs${XYZ}$_vf(0, k, r, i) + & - (F_src_rs${XYZ}$_vf(1, k, r, i) - & - F_src_rs${XYZ}$_vf(0, k, r, i)) & - *pi_coef_${XYZ}$ (0, 0, cbc_loc) + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + flux_src_rs${XYZ}$_vf_l(0, k, r, i) = F_src_rs${XYZ}$_vf(0, k, r, i) + & + (F_src_rs${XYZ}$_vf(1, k, r, i) - & + F_src_rs${XYZ}$_vf(0, k, r, i)) & + *pi_coef_${XYZ}$ (0, 0, cbc_loc) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP ! PI4 of flux_rs_vf and flux_src_rs_vf at j = 1/2, 3/2 @@ -729,381 +729,381 @@ contains is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index - do j = 0, 1 - do r = is3%beg, is3%end - do k = is2%beg, is2%end - flux_rs${XYZ}$_vf_l(j, k, r, i) = F_rs${XYZ}$_vf(j, k, r, i) & - + pi_coef_${XYZ}$ (j, 0, cbc_loc)* & - (F_rs${XYZ}$_vf(3, k, r, i) - & - F_rs${XYZ}$_vf(2, k, r, i)) & - + pi_coef_${XYZ}$ (j, 1, cbc_loc)* & - (F_rs${XYZ}$_vf(2, k, r, i) - & - F_rs${XYZ}$_vf(1, k, r, i)) & - + pi_coef_${XYZ}$ (j, 2, cbc_loc)* & - (F_rs${XYZ}$_vf(1, k, r, i) - & - F_rs${XYZ}$_vf(0, k, r, i)) + do i = 1, flux_cbc_index + do j = 0, 1 + do r = is3%beg, is3%end + do k = is2%beg, is2%end + flux_rs${XYZ}$_vf_l(j, k, r, i) = F_rs${XYZ}$_vf(j, k, r, i) & + + pi_coef_${XYZ}$ (j, 0, cbc_loc)* & + (F_rs${XYZ}$_vf(3, k, r, i) - & + F_rs${XYZ}$_vf(2, k, r, i)) & + + pi_coef_${XYZ}$ (j, 1, cbc_loc)* & + (F_rs${XYZ}$_vf(2, k, r, i) - & + F_rs${XYZ}$_vf(1, k, r, i)) & + + pi_coef_${XYZ}$ (j, 2, cbc_loc)* & + (F_rs${XYZ}$_vf(1, k, r, i) - & + F_rs${XYZ}$_vf(0, k, r, i)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do j = 0, 1 - do r = is3%beg, is3%end - do k = is2%beg, is2%end - flux_src_rs${XYZ}$_vf_l(j, k, r, i) = F_src_rs${XYZ}$_vf(j, k, r, i) + & - (F_src_rs${XYZ}$_vf(3, k, r, i) - & - F_src_rs${XYZ}$_vf(2, k, r, i)) & - *pi_coef_${XYZ}$ (j, 0, cbc_loc) + & - (F_src_rs${XYZ}$_vf(2, k, r, i) - & - F_src_rs${XYZ}$_vf(1, k, r, i)) & - *pi_coef_${XYZ}$ (j, 1, cbc_loc) + & - (F_src_rs${XYZ}$_vf(1, k, r, i) - & - F_src_rs${XYZ}$_vf(0, k, r, i)) & - *pi_coef_${XYZ}$ (j, 2, cbc_loc) + do i = advxb, advxe + do j = 0, 1 + do r = is3%beg, is3%end + do k = is2%beg, is2%end + flux_src_rs${XYZ}$_vf_l(j, k, r, i) = F_src_rs${XYZ}$_vf(j, k, r, i) + & + (F_src_rs${XYZ}$_vf(3, k, r, i) - & + F_src_rs${XYZ}$_vf(2, k, r, i)) & + *pi_coef_${XYZ}$ (j, 0, cbc_loc) + & + (F_src_rs${XYZ}$_vf(2, k, r, i) - & + F_src_rs${XYZ}$_vf(1, k, r, i)) & + *pi_coef_${XYZ}$ (j, 1, cbc_loc) + & + (F_src_rs${XYZ}$_vf(1, k, r, i) - & + F_src_rs${XYZ}$_vf(0, k, r, i)) & + *pi_coef_${XYZ}$ (j, 2, cbc_loc) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if ! FD2 or FD4 of RHS at j = 0 #:call GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]') - do r = is3%beg, is3%end - do k = is2%beg, is2%end - - ! Transferring the Primitive Variables - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho(i) = q_prim_rs${XYZ}$_vf(0, k, r, i) - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel(i) = q_prim_rs${XYZ}$_vf(0, k, r, contxe + i) - end do - - vel_K_sum = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_K_sum = vel_K_sum + vel(i)**2._wp - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end - pres = q_prim_rs${XYZ}$_vf(0, k, r, E_idx) + ! Transferring the Primitive Variables + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho(i) = q_prim_rs${XYZ}$_vf(0, k, r, i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - adv_local(i) = q_prim_rs${XYZ}$_vf(0, k, r, E_idx + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel(i) = q_prim_rs${XYZ}$_vf(0, k, r, contxe + i) + end do - if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) - else - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) - end if + vel_K_sum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_K_sum = vel_K_sum + vel(i)**2._wp + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - mf(i) = alpha_rho(i)/rho - end do + pres = q_prim_rs${XYZ}$_vf(0, k, r, E_idx) - if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i) + do i = 1, advxe - E_idx + adv_local(i) = q_prim_rs${XYZ}$_vf(0, k, r, E_idx + i) end do - call get_mixture_molecular_weight(Ys, Mw) - R_gas = gas_constant/Mw - T = pres/rho/R_gas - call get_mixture_specific_heat_cp_mass(T, Ys, Cp) - call get_mixture_energy_mass(T, Ys, e_mix) - E = rho*e_mix + 5.e-1_wp*rho*vel_K_sum - if (chem_params%gamma_method == 1) then - !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - call get_mole_fractions(Mw, Ys, Xs) - call get_species_specific_heats_r(T, Cp_i) - Gamma_i = Cp_i/(Cp_i - 1.0_wp) - gamma = sum(Xs(:)/(Gamma_i(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cv_mass(T, Ys, Cv) - gamma = 1.0_wp/(Cp/Cv - 1.0_wp) + if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) + else + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) end if - else - E = gamma*pres + pi_inf + 5.e-1_wp*rho*vel_K_sum - end if - H = (E + pres)/rho - - ! Compute mixture sound speed - call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv_local, vel_K_sum, 0._wp, c) - - ! First-Order Spatial Derivatives of Primitive Variables + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + mf(i) = alpha_rho(i)/rho + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - dalpha_rho_ds(i) = 0._wp - end do + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel_ds(i) = 0._wp - end do + call get_mixture_molecular_weight(Ys, Mw) + R_gas = gas_constant/Mw + T = pres/rho/R_gas + call get_mixture_specific_heat_cp_mass(T, Ys, Cp) + call get_mixture_energy_mass(T, Ys, e_mix) + E = rho*e_mix + 5.e-1_wp*rho*vel_K_sum + if (chem_params%gamma_method == 1) then + !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + call get_mole_fractions(Mw, Ys, Xs) + call get_species_specific_heats_r(T, Cp_i) + Gamma_i = Cp_i/(Cp_i - 1.0_wp) + gamma = sum(Xs(:)/(Gamma_i(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cv_mass(T, Ys, Cv) + gamma = 1.0_wp/(Cp/Cv - 1.0_wp) + end if + else + E = gamma*pres + pi_inf + 5.e-1_wp*rho*vel_K_sum + end if - dpres_ds = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - dadv_ds(i) = 0._wp - end do + H = (E + pres)/rho - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - dYs_ds(i) = 0._wp - end do - end if + ! Compute mixture sound speed + call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv_local, vel_K_sum, 0._wp, c) - $:GPU_LOOP(parallelism='[seq]') - do j = 0, buff_size + ! First-Order Spatial Derivatives of Primitive Variables $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe - dalpha_rho_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dalpha_rho_ds(i) + dalpha_rho_ds(i) = 0._wp end do + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - dvel_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, contxe + i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dvel_ds(i) + dvel_ds(i) = 0._wp end do - dpres_ds = q_prim_rs${XYZ}$_vf(j, k, r, E_idx)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dpres_ds + dpres_ds = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, advxe - E_idx - dadv_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, E_idx + i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dadv_ds(i) + dadv_ds(i) = 0._wp end do if (chemistry) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species - dYs_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, chemxb - 1 + i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dYs_ds(i) + dYs_ds(i) = 0._wp end do end if - end do - ! First-Order Temporal Derivatives of Primitive Variables - lambda(1) = vel(dir_idx(1)) - c - lambda(2) = vel(dir_idx(1)) - lambda(3) = vel(dir_idx(1)) + c - - Ma = vel(dir_idx(1))/c - - if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SLIP_WALL) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SLIP_WALL)) then - call s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_BUFFER) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_BUFFER)) then - call s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_INFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_INFLOW)) then - call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) - ! Add GRCBC for Subsonic Inflow - if (bc_${XYZ}$%grcbc_in) then + $:GPU_LOOP(parallelism='[seq]') + do j = 0, buff_size + $:GPU_LOOP(parallelism='[seq]') - do i = 2, momxb - L(2) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) + do i = 1, contxe + dalpha_rho_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dalpha_rho_ds(i) end do - if (n > 0) then - L(momxb + 1) = c*Ma*(vel(dir_idx(2)) - vel_in(${CBC_DIR}$, dir_idx(2)))/Del_in(${CBC_DIR}$) - if (p > 0) then - L(momxb + 2) = c*Ma*(vel(dir_idx(3)) - vel_in(${CBC_DIR}$, dir_idx(3)))/Del_in(${CBC_DIR}$) - end if - end if $:GPU_LOOP(parallelism='[seq]') - do i = E_idx, advxe - 1 - L(i) = c*Ma*(adv_local(i + 1 - E_idx) - alpha_in(i + 1 - E_idx, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) + do i = 1, num_dims + dvel_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, contxe + i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dvel_ds(i) end do - L(advxe) = rho*c**2._wp*(1._wp + Ma)*(vel(dir_idx(1)) + vel_in(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_in(${CBC_DIR}$) + c*(1._wp + Ma)*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) - end if - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_OUTFLOW)) then - call s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) - ! Add GRCBC for Subsonic Outflow (Pressure) - if (bc_${XYZ}$%grcbc_out) then - L(advxe) = c*(1._wp - Ma)*(pres - pres_out(${CBC_DIR}$))/Del_out(${CBC_DIR}$) - - ! Add GRCBC for Subsonic Outflow (Normal Velocity) - if (bc_${XYZ}$%grcbc_vel_out) then - L(advxe) = L(advxe) + rho*c**2._wp*(1._wp - Ma)*(vel(dir_idx(1)) + vel_out(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_out(${CBC_DIR}$) + + dpres_ds = q_prim_rs${XYZ}$_vf(j, k, r, E_idx)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dpres_ds + $:GPU_LOOP(parallelism='[seq]') + do i = 1, advxe - E_idx + dadv_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, E_idx + i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dadv_ds(i) + end do + + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_species + dYs_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, chemxb - 1 + i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dYs_ds(i) + end do end if - end if - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_FF_SUB_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_FF_SUB_OUTFLOW)) then - call s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_CP_SUB_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_CP_SUB_OUTFLOW)) then - call s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_INFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_INFLOW)) then - call s_compute_supersonic_inflow_L(L) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_OUTFLOW)) then - call s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) - end if - - ! Be careful about the cylindrical coordinate! - if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then - dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) + rho*c*c*vel(dir_idx(1)) & - /y_cc(n) - else - dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - dalpha_rho_dt(i) = & - -(L(i + 1) - mf(i)*dpres_dt)/(c*c) - end do + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel_dt(dir_idx(i)) = dir_flg(dir_idx(i))* & - (L(1) - L(advxe))/(2._wp*rho*c) + & - (dir_flg(dir_idx(i)) - 1._wp)* & - L(momxb + i - 1) - end do + ! First-Order Temporal Derivatives of Primitive Variables + lambda(1) = vel(dir_idx(1)) - c + lambda(2) = vel(dir_idx(1)) + lambda(3) = vel(dir_idx(1)) + c + + Ma = vel(dir_idx(1))/c + + if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SLIP_WALL) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SLIP_WALL)) then + call s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_BUFFER) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_BUFFER)) then + call s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_INFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_INFLOW)) then + call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) + ! Add GRCBC for Subsonic Inflow + if (bc_${XYZ}$%grcbc_in) then + $:GPU_LOOP(parallelism='[seq]') + do i = 2, momxb + L(2) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) + end do + if (n > 0) then + L(momxb + 1) = c*Ma*(vel(dir_idx(2)) - vel_in(${CBC_DIR}$, dir_idx(2)))/Del_in(${CBC_DIR}$) + if (p > 0) then + L(momxb + 2) = c*Ma*(vel(dir_idx(3)) - vel_in(${CBC_DIR}$, dir_idx(3)))/Del_in(${CBC_DIR}$) + end if + end if + $:GPU_LOOP(parallelism='[seq]') + do i = E_idx, advxe - 1 + L(i) = c*Ma*(adv_local(i + 1 - E_idx) - alpha_in(i + 1 - E_idx, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) + end do + L(advxe) = rho*c**2._wp*(1._wp + Ma)*(vel(dir_idx(1)) + vel_in(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_in(${CBC_DIR}$) + c*(1._wp + Ma)*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) + end if + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_OUTFLOW)) then + call s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + ! Add GRCBC for Subsonic Outflow (Pressure) + if (bc_${XYZ}$%grcbc_out) then + L(advxe) = c*(1._wp - Ma)*(pres - pres_out(${CBC_DIR}$))/Del_out(${CBC_DIR}$) + + ! Add GRCBC for Subsonic Outflow (Normal Velocity) + if (bc_${XYZ}$%grcbc_vel_out) then + L(advxe) = L(advxe) + rho*c**2._wp*(1._wp - Ma)*(vel(dir_idx(1)) + vel_out(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_out(${CBC_DIR}$) + end if + end if + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_FF_SUB_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_FF_SUB_OUTFLOW)) then + call s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_CP_SUB_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_CP_SUB_OUTFLOW)) then + call s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_INFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_INFLOW)) then + call s_compute_supersonic_inflow_L(L) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_OUTFLOW)) then + call s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + end if - vel_dv_dt_sum = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_dv_dt_sum = vel_dv_dt_sum + vel(i)*dvel_dt(i) - end do + ! Be careful about the cylindrical coordinate! + if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then + dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) + rho*c*c*vel(dir_idx(1)) & + /y_cc(n) + else + dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) + end if - if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - dYs_dt(i) = -1._wp*L(chemxb + i - 1) + do i = 1, contxe + dalpha_rho_dt(i) = & + -(L(i + 1) - mf(i)*dpres_dt)/(c*c) end do - end if - ! The treatment of void fraction source is unclear - if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - dadv_dt(i) = -L(momxe + i) !+ adv_local(i) * vel(dir_idx(1))/y_cc(n) + do i = 1, num_dims + dvel_dt(dir_idx(i)) = dir_flg(dir_idx(i))* & + (L(1) - L(advxe))/(2._wp*rho*c) + & + (dir_flg(dir_idx(i)) - 1._wp)* & + L(momxb + i - 1) end do - else + + vel_dv_dt_sum = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - dadv_dt(i) = -L(momxe + i) + do i = 1, num_dims + vel_dv_dt_sum = vel_dv_dt_sum + vel(i)*dvel_dt(i) end do - end if - drho_dt = 0._wp; dgamma_dt = 0._wp; dpi_inf_dt = 0._wp; dqv_dt = 0._wp + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_species + dYs_dt(i) = -1._wp*L(chemxb + i - 1) + end do + end if - if (model_eqns == 1) then - drho_dt = dalpha_rho_dt(1) - dgamma_dt = dadv_dt(1) - dpi_inf_dt = dadv_dt(2) - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - drho_dt = drho_dt + dalpha_rho_dt(i) - dgamma_dt = dgamma_dt + dadv_dt(i)*gammas(i) - dpi_inf_dt = dpi_inf_dt + dadv_dt(i)*pi_infs(i) - dqv_dt = dqv_dt + dalpha_rho_dt(i)*qvs(i) - end do - end if + ! The treatment of void fraction source is unclear + if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, advxe - E_idx + dadv_dt(i) = -L(momxe + i) !+ adv_local(i) * vel(dir_idx(1))/y_cc(n) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, advxe - E_idx + dadv_dt(i) = -L(momxe + i) + end do + end if - ! flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & - + ds(0)*dalpha_rho_dt(i) - end do + drho_dt = 0._wp; dgamma_dt = 0._wp; dpi_inf_dt = 0._wp; dqv_dt = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & - + ds(0)*(vel(i - contxe)*drho_dt & - + rho*dvel_dt(i - contxe)) - end do + if (model_eqns == 1) then + drho_dt = dalpha_rho_dt(1) + dgamma_dt = dadv_dt(1) + dpi_inf_dt = dadv_dt(2) + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + drho_dt = drho_dt + dalpha_rho_dt(i) + dgamma_dt = dgamma_dt + dadv_dt(i)*gammas(i) + dpi_inf_dt = dpi_inf_dt + dadv_dt(i)*pi_infs(i) + dqv_dt = dqv_dt + dalpha_rho_dt(i)*qvs(i) + end do + end if - if (chemistry) then - ! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990 - call get_species_enthalpies_rt(T, h_k) - sum_Enthalpies = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T - sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) - end do - flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & - + ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies) + ! flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - flux_rs${XYZ}$_vf_l(-1, k, r, i - 1 + chemxb) = flux_rs${XYZ}$_vf_l(0, k, r, chemxb + i - 1) & - + ds(0)*(drho_dt*Ys(i) + rho*dYs_dt(i)) - end do - else - flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & - + ds(0)*(pres*dgamma_dt & - + gamma*dpres_dt & - + dpi_inf_dt & - + dqv_dt & - + rho*vel_dv_dt_sum & - + 5.e-1_wp*drho_dt*vel_K_sum) - end if - - if (riemann_solver == 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = 0._wp + do i = 1, contxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + + ds(0)*dalpha_rho_dt(i) end do $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = & - 1._wp/max(abs(vel(dir_idx(1))), sgm_eps) & - *sign(1._wp, vel(dir_idx(1))) & - *(flux_rs${XYZ}$_vf_l(0, k, r, i) & - + vel(dir_idx(1)) & - *flux_src_rs${XYZ}$_vf_l(0, k, r, i) & - + ds(0)*dadv_dt(i - E_idx)) + do i = momxb, momxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + + ds(0)*(vel(i - contxe)*drho_dt & + + rho*dvel_dt(i - contxe)) end do - else + if (chemistry) then + ! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990 + call get_species_enthalpies_rt(T, h_k) + sum_Enthalpies = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_species + h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T + sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) + end do + flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & + + ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_species + flux_rs${XYZ}$_vf_l(-1, k, r, i - 1 + chemxb) = flux_rs${XYZ}$_vf_l(0, k, r, chemxb + i - 1) & + + ds(0)*(drho_dt*Ys(i) + rho*dYs_dt(i)) + end do + else + flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & + + ds(0)*(pres*dgamma_dt & + + gamma*dpres_dt & + + dpi_inf_dt & + + dqv_dt & + + rho*vel_dv_dt_sum & + + 5.e-1_wp*drho_dt*vel_K_sum) + end if - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) + & - ds(0)*dadv_dt(i - E_idx) - end do + if (riemann_solver == 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = 0._wp + end do - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = flux_src_rs${XYZ}$_vf_l(0, k, r, i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = & + 1._wp/max(abs(vel(dir_idx(1))), sgm_eps) & + *sign(1._wp, vel(dir_idx(1))) & + *(flux_rs${XYZ}$_vf_l(0, k, r, i) & + + vel(dir_idx(1)) & + *flux_src_rs${XYZ}$_vf_l(0, k, r, i) & + + ds(0)*dadv_dt(i - E_idx)) + end do - end if - ! END: flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 + else + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) + & + ds(0)*dadv_dt(i - E_idx) + end do + + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = flux_src_rs${XYZ}$_vf_l(0, k, r, i) + end do + + end if + ! END: flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 + + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:endfor @@ -1166,79 +1166,79 @@ contains if (cbc_dir == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsx_vf(j, k, r, i) = & - q_prim_vf(i)%sf(dj*(m - 2*j) + j, k, r) + do i = 1, sys_size + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsx_vf(j, k, r, i) = & + q_prim_vf(i)%sf(dj*(m - 2*j) + j, k, r) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsx_vf(j, k, r, momxb) = & - q_prim_vf(momxb)%sf(dj*(m - 2*j) + j, k, r)* & - sign(1._wp, -1._wp*cbc_loc) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsx_vf(j, k, r, momxb) = & + q_prim_vf(momxb)%sf(dj*(m - 2*j) + j, k, r)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_rsx_vf_l(j, k, r, i) = & - flux_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r)* & - sign(1._wp, -1._wp*cbc_loc) + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsx_vf_l(j, k, r, i) = & + flux_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_rsx_vf_l(j, k, r, momxb) = & - flux_vf(momxb)%sf(dj*((m - 1) - 2*j) + j, k, r) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsx_vf_l(j, k, r, momxb) = & + flux_vf(momxb)%sf(dj*((m - 1) - 2*j) + j, k, r) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsx_vf_l(j, k, r, i) = & - flux_src_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_rsx_vf_l(j, k, r, i) = & + flux_src_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsx_vf_l(j, k, r, advxb) = & - flux_src_vf(advxb)%sf(dj*((m - 1) - 2*j) + j, k, r)* & - sign(1._wp, -1._wp*cbc_loc) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_rsx_vf_l(j, k, r, advxb) = & + flux_src_vf(advxb)%sf(dj*((m - 1) - 2*j) + j, k, r)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1248,79 +1248,79 @@ contains elseif (cbc_dir == 2) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsy_vf(j, k, r, i) = & - q_prim_vf(i)%sf(k, dj*(n - 2*j) + j, r) + do i = 1, sys_size + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsy_vf(j, k, r, i) = & + q_prim_vf(i)%sf(k, dj*(n - 2*j) + j, r) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsy_vf(j, k, r, momxb + 1) = & - q_prim_vf(momxb + 1)%sf(k, dj*(n - 2*j) + j, r)* & - sign(1._wp, -1._wp*cbc_loc) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsy_vf(j, k, r, momxb + 1) = & + q_prim_vf(momxb + 1)%sf(k, dj*(n - 2*j) + j, r)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_rsy_vf_l(j, k, r, i) = & - flux_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r)* & - sign(1._wp, -1._wp*cbc_loc) + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsy_vf_l(j, k, r, i) = & + flux_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_rsy_vf_l(j, k, r, momxb + 1) = & - flux_vf(momxb + 1)%sf(k, dj*((n - 1) - 2*j) + j, r) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsy_vf_l(j, k, r, momxb + 1) = & + flux_vf(momxb + 1)%sf(k, dj*((n - 1) - 2*j) + j, r) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsy_vf_l(j, k, r, i) = & - flux_src_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_rsy_vf_l(j, k, r, i) = & + flux_src_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsy_vf_l(j, k, r, advxb) = & - flux_src_vf(advxb)%sf(k, dj*((n - 1) - 2*j) + j, r)* & - sign(1._wp, -1._wp*cbc_loc) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_rsy_vf_l(j, k, r, advxb) = & + flux_src_vf(advxb)%sf(k, dj*((n - 1) - 2*j) + j, r)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1330,79 +1330,79 @@ contains else #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsz_vf(j, k, r, i) = & - q_prim_vf(i)%sf(r, k, dj*(p - 2*j) + j) + do i = 1, sys_size + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsz_vf(j, k, r, i) = & + q_prim_vf(i)%sf(r, k, dj*(p - 2*j) + j) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsz_vf(j, k, r, momxe) = & - q_prim_vf(momxe)%sf(r, k, dj*(p - 2*j) + j)* & - sign(1._wp, -1._wp*cbc_loc) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsz_vf(j, k, r, momxe) = & + q_prim_vf(momxe)%sf(r, k, dj*(p - 2*j) + j)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_rsz_vf_l(j, k, r, i) = & - flux_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j)* & - sign(1._wp, -1._wp*cbc_loc) + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsz_vf_l(j, k, r, i) = & + flux_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_rsz_vf_l(j, k, r, momxe) = & - flux_vf(momxe)%sf(r, k, dj*((p - 1) - 2*j) + j) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsz_vf_l(j, k, r, momxe) = & + flux_vf(momxe)%sf(r, k, dj*((p - 1) - 2*j) + j) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsz_vf_l(j, k, r, i) = & - flux_src_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_rsz_vf_l(j, k, r, i) = & + flux_src_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsz_vf_l(j, k, r, advxb) = & - flux_src_vf(advxb)%sf(r, k, dj*((p - 1) - 2*j) + j)* & - sign(1._wp, -1._wp*cbc_loc) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_rsz_vf_l(j, k, r, advxb) = & + flux_src_vf(advxb)%sf(r, k, dj*((p - 1) - 2*j) + j)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1434,53 +1434,53 @@ contains if (cbc_dir == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) = & - flux_rsx_vf_l(j, k, r, i)* & - sign(1._wp, -1._wp*cbc_loc) + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) = & + flux_rsx_vf_l(j, k, r, i)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(momxb)%sf(dj*((m - 1) - 2*j) + j, k, r) = & - flux_rsx_vf_l(j, k, r, momxb) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(momxb)%sf(dj*((m - 1) - 2*j) + j, k, r) = & + flux_rsx_vf_l(j, k, r, momxb) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) = & - flux_src_rsx_vf_l(j, k, r, i) + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) = & + flux_src_rsx_vf_l(j, k, r, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(advxb)%sf(dj*((m - 1) - 2*j) + j, k, r) = & - flux_src_rsx_vf_l(j, k, r, advxb)* & - sign(1._wp, -1._wp*cbc_loc) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(advxb)%sf(dj*((m - 1) - 2*j) + j, k, r) = & + flux_src_rsx_vf_l(j, k, r, advxb)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if ! END: Reshaping Outputted Data in x-direction @@ -1489,54 +1489,54 @@ contains elseif (cbc_dir == 2) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) = & - flux_rsy_vf_l(j, k, r, i)* & - sign(1._wp, -1._wp*cbc_loc) + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) = & + flux_rsy_vf_l(j, k, r, i)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(momxb + 1)%sf(k, dj*((n - 1) - 2*j) + j, r) = & - flux_rsy_vf_l(j, k, r, momxb + 1) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(momxb + 1)%sf(k, dj*((n - 1) - 2*j) + j, r) = & + flux_rsy_vf_l(j, k, r, momxb + 1) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) = & - flux_src_rsy_vf_l(j, k, r, i) + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) = & + flux_src_rsy_vf_l(j, k, r, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(advxb)%sf(k, dj*((n - 1) - 2*j) + j, r) = & - flux_src_rsy_vf_l(j, k, r, advxb)* & - sign(1._wp, -1._wp*cbc_loc) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(advxb)%sf(k, dj*((n - 1) - 2*j) + j, r) = & + flux_src_rsy_vf_l(j, k, r, advxb)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1546,54 +1546,54 @@ contains else #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) = & - flux_rsz_vf_l(j, k, r, i)* & - sign(1._wp, -1._wp*cbc_loc) + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) = & + flux_rsz_vf_l(j, k, r, i)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(momxe)%sf(r, k, dj*((p - 1) - 2*j) + j) = & - flux_rsz_vf_l(j, k, r, momxe) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(momxe)%sf(r, k, dj*((p - 1) - 2*j) + j) = & + flux_rsz_vf_l(j, k, r, momxe) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) = & - flux_src_rsz_vf_l(j, k, r, i) + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) = & + flux_src_rsz_vf_l(j, k, r, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else #:call GPU_PARALLEL_LOOP(collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(advxb)%sf(r, k, dj*((p - 1) - 2*j) + j) = & - flux_src_rsz_vf_l(j, k, r, advxb)* & - sign(1._wp, -1._wp*cbc_loc) + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(advxb)%sf(r, k, dj*((p - 1) - 2*j) + j) = & + flux_src_rsz_vf_l(j, k, r, advxb)* & + sign(1._wp, -1._wp*cbc_loc) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 9f80df014e..ecae911da3 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -280,22 +280,22 @@ contains ! Computing Stability Criteria at Current Time-step #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') - do l = 0, p - do k = 0, n - do j = 0, m - call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) + do l = 0, p + do k = 0, n + do j = 0, m + call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) - call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) + call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) - if (viscous) then - call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf, Rc_sf) - else - call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf) - end if + if (viscous) then + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf, Rc_sf) + else + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf) + end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP ! end: Computing Stability Criteria at Current Time-step diff --git a/src/simulation/m_derived_variables.fpp b/src/simulation/m_derived_variables.fpp index 467737937e..18c88f85c3 100644 --- a/src/simulation/m_derived_variables.fpp +++ b/src/simulation/m_derived_variables.fpp @@ -188,7 +188,7 @@ contains !! @param q_prim_vf3 Primitive variables !! @param q_sf Acceleration component subroutine s_derive_acceleration_component(i, q_prim_vf0, q_prim_vf1, & - q_prim_vf2, q_prim_vf3, q_sf) + q_prim_vf2, q_prim_vf3, q_sf) integer, intent(in) :: i diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index d6eb430dcc..be081f44d9 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -142,23 +142,23 @@ contains #if defined(MFC_GPU) #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP p_real => data_real_gpu @@ -178,13 +178,13 @@ contains $:GPU_UPDATE(device='[Nfq]') #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') @@ -197,36 +197,36 @@ contains #:endcall GPU_HOST_DATA #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP do i = 1, fourier_rings #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device='[p_real, p_cmplx]') @@ -242,13 +242,13 @@ contains $:GPU_UPDATE(device='[Nfq]') #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') @@ -261,48 +261,48 @@ contains #:endcall GPU_HOST_DATA #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end do #:endcall GPU_DATA #else - Nfq = 3 + Nfq = 3 + do j = 0, m + do k = 1, sys_size + data_fltr_cmplx(:) = (0_dp, 0_dp) + data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p) + call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) + data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) + call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) + data_real(:) = data_real(:)/real(real_size, dp) + q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1) + end do + end do + + ! Apply Fourier filter to additional rings + do i = 1, fourier_rings + Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) do j = 0, m do k = 1, sys_size data_fltr_cmplx(:) = (0_dp, 0_dp) - data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p) + data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p) call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) data_real(:) = data_real(:)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1) - end do - end do - - ! Apply Fourier filter to additional rings - do i = 1, fourier_rings - Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) - do j = 0, m - do k = 1, sys_size - data_fltr_cmplx(:) = (0_dp, 0_dp) - data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p) - call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) - data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) - call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) - data_real(:) = data_real(:)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1) - end do + q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1) end do end do + end do #endif end subroutine s_apply_fourier_filter diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 9cb7bd4c43..bd79e0f7fb 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -107,106 +107,106 @@ contains integer :: j, k, l, i, r #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, rho, gamma, pi_inf, qv, G_local, Re, tensora, tensorb]') - do l = 0, p - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_k(i) = q_cons_vf(i)%sf(j, k, l) - alpha_k(i) = q_cons_vf(advxb + i - 1)%sf(j, k, l) - end do - ! If in simulation, use acc mixture subroutines - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha_k, & - alpha_rho_k, Re, G_local, Gs) - rho = max(rho, sgm_eps) - G_local = max(G_local, sgm_eps) - !if ( G_local <= verysmall ) G_K = 0._wp - - if (G_local > verysmall) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, tensor_size - tensora(i) = 0._wp - end do - ! STEP 1: computing the grad_xi tensor using finite differences - ! grad_xi definition / organization - ! number for the tensor 1-3: dxix_dx, dxiy_dx, dxiz_dx - ! 4-6 : dxix_dy, dxiy_dy, dxiz_dy - ! 7-9 : dxix_dz, dxiy_dz, dxiz_dz + do l = 0, p + do k = 0, n + do j = 0, m $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - ! derivatives in the x-direction - tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x(r, j) - tensora(2) = tensora(2) + q_prim_vf(xibeg + 1)%sf(j + r, k, l)*fd_coeff_x(r, j) - tensora(3) = tensora(3) + q_prim_vf(xiend)%sf(j + r, k, l)*fd_coeff_x(r, j) - ! derivatives in the y-direction - tensora(4) = tensora(4) + q_prim_vf(xibeg)%sf(j, k + r, l)*fd_coeff_y(r, k) - tensora(5) = tensora(5) + q_prim_vf(xibeg + 1)%sf(j, k + r, l)*fd_coeff_y(r, k) - tensora(6) = tensora(6) + q_prim_vf(xiend)%sf(j, k + r, l)*fd_coeff_y(r, k) - ! derivatives in the z-direction - tensora(7) = tensora(7) + q_prim_vf(xibeg)%sf(j, k, l + r)*fd_coeff_z(r, l) - tensora(8) = tensora(8) + q_prim_vf(xibeg + 1)%sf(j, k, l + r)*fd_coeff_z(r, l) - tensora(9) = tensora(9) + q_prim_vf(xiend)%sf(j, k, l + r)*fd_coeff_z(r, l) + do i = 1, num_fluids + alpha_rho_k(i) = q_cons_vf(i)%sf(j, k, l) + alpha_k(i) = q_cons_vf(advxb + i - 1)%sf(j, k, l) end do - ! STEP 2a: computing the adjoint of the grad_xi tensor for the inverse - tensorb(1) = tensora(5)*tensora(9) - tensora(6)*tensora(8) - tensorb(2) = -(tensora(2)*tensora(9) - tensora(3)*tensora(8)) - tensorb(3) = tensora(2)*tensora(6) - tensora(3)*tensora(5) - tensorb(4) = -(tensora(4)*tensora(9) - tensora(6)*tensora(7)) - tensorb(5) = tensora(1)*tensora(9) - tensora(3)*tensora(7) - tensorb(6) = -(tensora(1)*tensora(6) - tensora(4)*tensora(3)) - tensorb(7) = tensora(4)*tensora(8) - tensora(5)*tensora(7) - tensorb(8) = -(tensora(1)*tensora(8) - tensora(2)*tensora(7)) - tensorb(9) = tensora(1)*tensora(5) - tensora(2)*tensora(4) - - ! STEP 2b: computing the determinant of the grad_xi tensor - tensorb(tensor_size) = tensora(1)*(tensora(5)*tensora(9) - tensora(6)*tensora(8)) & - - tensora(2)*(tensora(4)*tensora(9) - tensora(6)*tensora(7)) & - + tensora(3)*(tensora(4)*tensora(8) - tensora(5)*tensora(7)) - - if (tensorb(tensor_size) > verysmall) then - ! STEP 2c: computing the inverse of grad_xi tensor = F - ! tensorb is the adjoint, tensora becomes F + ! If in simulation, use acc mixture subroutines + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha_k, & + alpha_rho_k, Re, G_local, Gs) + rho = max(rho, sgm_eps) + G_local = max(G_local, sgm_eps) + !if ( G_local <= verysmall ) G_K = 0._wp + + if (G_local > verysmall) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, tensor_size - 1 - tensora(i) = tensorb(i)/tensorb(tensor_size) + do i = 1, tensor_size + tensora(i) = 0._wp end do - - ! STEP 2d: computing the J = det(F) = 1/det(\grad{\xi}) - tensorb(tensor_size) = 1._wp/tensorb(tensor_size) - - ! STEP 3: computing F transpose F - tensorb(1) = tensora(1)**2 + tensora(2)**2 + tensora(3)**2 - tensorb(5) = tensora(4)**2 + tensora(5)**2 + tensora(6)**2 - tensorb(9) = tensora(7)**2 + tensora(8)**2 + tensora(9)**2 - tensorb(2) = tensora(1)*tensora(4) + tensora(2)*tensora(5) + tensora(3)*tensora(6) - tensorb(3) = tensora(1)*tensora(7) + tensora(2)*tensora(8) + tensora(3)*tensora(9) - tensorb(6) = tensora(4)*tensora(7) + tensora(5)*tensora(8) + tensora(6)*tensora(9) - ! STEP 4: update the btensor, this is consistent with Riemann solvers - #:for BIJ, TXY in [(1,1),(2,2),(3,5),(4,3),(5,6),(6,9)] - btensor%vf(${BIJ}$)%sf(j, k, l) = tensorb(${TXY}$) - #:endfor - ! store the determinant at the last entry of the btensor - btensor%vf(b_size)%sf(j, k, l) = tensorb(tensor_size) - ! STEP 5a: updating the Cauchy stress primitive scalar field - if (hyper_model == 1) then - call s_neoHookean_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) - elseif (hyper_model == 2) then - call s_Mooney_Rivlin_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) - end if - ! STEP 5b: updating the pressure field - q_prim_vf(E_idx)%sf(j, k, l) = q_prim_vf(E_idx)%sf(j, k, l) - & - G_local*q_prim_vf(xiend + 1)%sf(j, k, l)/gamma - ! STEP 5c: updating the Cauchy stress conservative scalar field + ! STEP 1: computing the grad_xi tensor using finite differences + ! grad_xi definition / organization + ! number for the tensor 1-3: dxix_dx, dxiy_dx, dxiz_dx + ! 4-6 : dxix_dy, dxiy_dy, dxiz_dy + ! 7-9 : dxix_dz, dxiy_dz, dxiz_dz $:GPU_LOOP(parallelism='[seq]') - do i = 1, b_size - 1 - q_cons_vf(strxb + i - 1)%sf(j, k, l) = & - rho*q_prim_vf(strxb + i - 1)%sf(j, k, l) + do r = -fd_number, fd_number + ! derivatives in the x-direction + tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x(r, j) + tensora(2) = tensora(2) + q_prim_vf(xibeg + 1)%sf(j + r, k, l)*fd_coeff_x(r, j) + tensora(3) = tensora(3) + q_prim_vf(xiend)%sf(j + r, k, l)*fd_coeff_x(r, j) + ! derivatives in the y-direction + tensora(4) = tensora(4) + q_prim_vf(xibeg)%sf(j, k + r, l)*fd_coeff_y(r, k) + tensora(5) = tensora(5) + q_prim_vf(xibeg + 1)%sf(j, k + r, l)*fd_coeff_y(r, k) + tensora(6) = tensora(6) + q_prim_vf(xiend)%sf(j, k + r, l)*fd_coeff_y(r, k) + ! derivatives in the z-direction + tensora(7) = tensora(7) + q_prim_vf(xibeg)%sf(j, k, l + r)*fd_coeff_z(r, l) + tensora(8) = tensora(8) + q_prim_vf(xibeg + 1)%sf(j, k, l + r)*fd_coeff_z(r, l) + tensora(9) = tensora(9) + q_prim_vf(xiend)%sf(j, k, l + r)*fd_coeff_z(r, l) end do + ! STEP 2a: computing the adjoint of the grad_xi tensor for the inverse + tensorb(1) = tensora(5)*tensora(9) - tensora(6)*tensora(8) + tensorb(2) = -(tensora(2)*tensora(9) - tensora(3)*tensora(8)) + tensorb(3) = tensora(2)*tensora(6) - tensora(3)*tensora(5) + tensorb(4) = -(tensora(4)*tensora(9) - tensora(6)*tensora(7)) + tensorb(5) = tensora(1)*tensora(9) - tensora(3)*tensora(7) + tensorb(6) = -(tensora(1)*tensora(6) - tensora(4)*tensora(3)) + tensorb(7) = tensora(4)*tensora(8) - tensora(5)*tensora(7) + tensorb(8) = -(tensora(1)*tensora(8) - tensora(2)*tensora(7)) + tensorb(9) = tensora(1)*tensora(5) - tensora(2)*tensora(4) + + ! STEP 2b: computing the determinant of the grad_xi tensor + tensorb(tensor_size) = tensora(1)*(tensora(5)*tensora(9) - tensora(6)*tensora(8)) & + - tensora(2)*(tensora(4)*tensora(9) - tensora(6)*tensora(7)) & + + tensora(3)*(tensora(4)*tensora(8) - tensora(5)*tensora(7)) + + if (tensorb(tensor_size) > verysmall) then + ! STEP 2c: computing the inverse of grad_xi tensor = F + ! tensorb is the adjoint, tensora becomes F + $:GPU_LOOP(parallelism='[seq]') + do i = 1, tensor_size - 1 + tensora(i) = tensorb(i)/tensorb(tensor_size) + end do + + ! STEP 2d: computing the J = det(F) = 1/det(\grad{\xi}) + tensorb(tensor_size) = 1._wp/tensorb(tensor_size) + + ! STEP 3: computing F transpose F + tensorb(1) = tensora(1)**2 + tensora(2)**2 + tensora(3)**2 + tensorb(5) = tensora(4)**2 + tensora(5)**2 + tensora(6)**2 + tensorb(9) = tensora(7)**2 + tensora(8)**2 + tensora(9)**2 + tensorb(2) = tensora(1)*tensora(4) + tensora(2)*tensora(5) + tensora(3)*tensora(6) + tensorb(3) = tensora(1)*tensora(7) + tensora(2)*tensora(8) + tensora(3)*tensora(9) + tensorb(6) = tensora(4)*tensora(7) + tensora(5)*tensora(8) + tensora(6)*tensora(9) + ! STEP 4: update the btensor, this is consistent with Riemann solvers + #:for BIJ, TXY in [(1,1),(2,2),(3,5),(4,3),(5,6),(6,9)] + btensor%vf(${BIJ}$)%sf(j, k, l) = tensorb(${TXY}$) + #:endfor + ! store the determinant at the last entry of the btensor + btensor%vf(b_size)%sf(j, k, l) = tensorb(tensor_size) + ! STEP 5a: updating the Cauchy stress primitive scalar field + if (hyper_model == 1) then + call s_neoHookean_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) + elseif (hyper_model == 2) then + call s_Mooney_Rivlin_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) + end if + ! STEP 5b: updating the pressure field + q_prim_vf(E_idx)%sf(j, k, l) = q_prim_vf(E_idx)%sf(j, k, l) - & + G_local*q_prim_vf(xiend + 1)%sf(j, k, l)/gamma + ! STEP 5c: updating the Cauchy stress conservative scalar field + $:GPU_LOOP(parallelism='[seq]') + do i = 1, b_size - 1 + q_cons_vf(strxb + i - 1)%sf(j, k, l) = & + rho*q_prim_vf(strxb + i - 1)%sf(j, k, l) + end do + end if end if - end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_hyperelastic_rmt_stress_update diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 84be088fae..e4c781ebe9 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -105,270 +105,270 @@ contains ! TODO: re-organize these loops one by one for GPU efficiency if possible? #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - du_dx(k, l, q) = 0._wp + do q = 0, p + do l = 0, n + do k = 0, m + du_dx(k, l, q) = 0._wp + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - du_dx(k, l, q) = du_dx(k, l, q) & - + q_prim_vf(momxb)%sf(k + r, l, q)*fd_coeff_x_h(r, k) - end do + do q = 0, p + do l = 0, n + do k = 0, m + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + du_dx(k, l, q) = du_dx(k, l, q) & + + q_prim_vf(momxb)%sf(k + r, l, q)*fd_coeff_x_h(r, k) + end do + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (ndirs > 1) then #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - du_dy(k, l, q) = 0._wp; dv_dx(k, l, q) = 0._wp; dv_dy(k, l, q) = 0._wp + do q = 0, p + do l = 0, n + do k = 0, m + du_dy(k, l, q) = 0._wp; dv_dx(k, l, q) = 0._wp; dv_dy(k, l, q) = 0._wp + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - du_dy(k, l, q) = du_dy(k, l, q) & - + q_prim_vf(momxb)%sf(k, l + r, q)*fd_coeff_y_h(r, l) - dv_dx(k, l, q) = dv_dx(k, l, q) & - + q_prim_vf(momxb + 1)%sf(k + r, l, q)*fd_coeff_x_h(r, k) - dv_dy(k, l, q) = dv_dy(k, l, q) & - + q_prim_vf(momxb + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) + do q = 0, p + do l = 0, n + do k = 0, m + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + du_dy(k, l, q) = du_dy(k, l, q) & + + q_prim_vf(momxb)%sf(k, l + r, q)*fd_coeff_y_h(r, l) + dv_dx(k, l, q) = dv_dx(k, l, q) & + + q_prim_vf(momxb + 1)%sf(k + r, l, q)*fd_coeff_x_h(r, k) + dv_dy(k, l, q) = dv_dy(k, l, q) & + + q_prim_vf(momxb + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP ! 3D if (ndirs == 3) then #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - du_dz(k, l, q) = 0._wp; dv_dz(k, l, q) = 0._wp; dw_dx(k, l, q) = 0._wp; - dw_dy(k, l, q) = 0._wp; dw_dz(k, l, q) = 0._wp; + do q = 0, p + do l = 0, n + do k = 0, m + du_dz(k, l, q) = 0._wp; dv_dz(k, l, q) = 0._wp; dw_dx(k, l, q) = 0._wp; + dw_dy(k, l, q) = 0._wp; dw_dz(k, l, q) = 0._wp; + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - du_dz(k, l, q) = du_dz(k, l, q) & - + q_prim_vf(momxb)%sf(k, l, q + r)*fd_coeff_z_h(r, q) - dv_dz(k, l, q) = dv_dz(k, l, q) & - + q_prim_vf(momxb + 1)%sf(k, l, q + r)*fd_coeff_z_h(r, q) - dw_dx(k, l, q) = dw_dx(k, l, q) & - + q_prim_vf(momxe)%sf(k + r, l, q)*fd_coeff_x_h(r, k) - dw_dy(k, l, q) = dw_dy(k, l, q) & - + q_prim_vf(momxe)%sf(k, l + r, q)*fd_coeff_y_h(r, l) - dw_dz(k, l, q) = dw_dz(k, l, q) & - + q_prim_vf(momxe)%sf(k, l, q + r)*fd_coeff_z_h(r, q) + do q = 0, p + do l = 0, n + do k = 0, m + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + du_dz(k, l, q) = du_dz(k, l, q) & + + q_prim_vf(momxb)%sf(k, l, q + r)*fd_coeff_z_h(r, q) + dv_dz(k, l, q) = dv_dz(k, l, q) & + + q_prim_vf(momxb + 1)%sf(k, l, q + r)*fd_coeff_z_h(r, q) + dw_dx(k, l, q) = dw_dx(k, l, q) & + + q_prim_vf(momxe)%sf(k + r, l, q)*fd_coeff_x_h(r, k) + dw_dy(k, l, q) = dw_dy(k, l, q) & + + q_prim_vf(momxe)%sf(k, l + r, q)*fd_coeff_y_h(r, l) + dw_dz(k, l, q) = dw_dz(k, l, q) & + + q_prim_vf(momxe)%sf(k, l, q + r)*fd_coeff_z_h(r, q) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end if #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - rho_K = 0._wp; G_K = 0._wp - do i = 1, num_fluids - rho_K = rho_K + q_prim_vf(i)%sf(k, l, q) !alpha_rho_K(1) - G_K = G_K + q_prim_vf(advxb - 1 + i)%sf(k, l, q)*Gs(i) !alpha_K(1) * Gs(1) - end do + do q = 0, p + do l = 0, n + do k = 0, m + rho_K = 0._wp; G_K = 0._wp + do i = 1, num_fluids + rho_K = rho_K + q_prim_vf(i)%sf(k, l, q) !alpha_rho_K(1) + G_K = G_K + q_prim_vf(advxb - 1 + i)%sf(k, l, q)*Gs(i) !alpha_K(1) * Gs(1) + end do - if (cont_damage) G_K = G_K*max((1._wp - q_prim_vf(damage_idx)%sf(k, l, q)), 0._wp) + if (cont_damage) G_K = G_K*max((1._wp - q_prim_vf(damage_idx)%sf(k, l, q)), 0._wp) - rho_K_field(k, l, q) = rho_K - G_K_field(k, l, q) = G_K + rho_K_field(k, l, q) = rho_K + G_K_field(k, l, q) = G_K - !TODO: take this out if not needed - if (G_K < verysmall) then - G_K_field(k, l, q) = 0 - end if + !TODO: take this out if not needed + if (G_K < verysmall) then + G_K_field(k, l, q) = 0 + end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP ! apply rhs source term to elastic stress equation #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - rhs_vf(strxb)%sf(k, l, q) = & - rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & - ((4._wp*G_K_field(k, l, q)/3._wp) + & - q_prim_vf(strxb)%sf(k, l, q))* & - du_dx(k, l, q) + do q = 0, p + do l = 0, n + do k = 0, m + rhs_vf(strxb)%sf(k, l, q) = & + rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & + ((4._wp*G_K_field(k, l, q)/3._wp) + & + q_prim_vf(strxb)%sf(k, l, q))* & + du_dx(k, l, q) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP elseif (idir == 2) then #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy(k, l, q) - & - q_prim_vf(strxb)%sf(k, l, q)*dv_dy(k, l, q) - & - 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dv_dy(k, l, q)) - - rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxb)%sf(k, l, q)*dv_dx(k, l, q) - & - q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*du_dy(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy(k, l, q) - & - q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dy(k, l, q) + & - dv_dx(k, l, q))) - - rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx(k, l, q) - & - q_prim_vf(strxb + 2)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy(k, l, q) - & - q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(dv_dy(k, l, q) - (1._wp/3._wp)* & - (du_dx(k, l, q) + & - dv_dy(k, l, q)))) + do q = 0, p + do l = 0, n + do k = 0, m + rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy(k, l, q) - & + q_prim_vf(strxb)%sf(k, l, q)*dv_dy(k, l, q) - & + 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dv_dy(k, l, q)) + + rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx(k, l, q) + & + q_prim_vf(strxb)%sf(k, l, q)*dv_dx(k, l, q) - & + q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*du_dy(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy(k, l, q) - & + q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dy(k, l, q) + & + dv_dx(k, l, q))) + + rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx(k, l, q) - & + q_prim_vf(strxb + 2)%sf(k, l, q)*du_dx(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy(k, l, q) - & + q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(dv_dy(k, l, q) - (1._wp/3._wp)* & + (du_dx(k, l, q) + & + dv_dy(k, l, q)))) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP elseif (idir == 3) then #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz(k, l, q) + & - q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz(k, l, q) - & - q_prim_vf(strxb)%sf(k, l, q)*dw_dz(k, l, q) - & - 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dw_dz(k, l, q)) - - rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 4)%sf(k, l, q)*du_dz(k, l, q) + & - q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dz(k, l, q) - & - q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dz(k, l, q)) - - rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz(k, l, q) - & - q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dz(k, l, q) - & + do q = 0, p + do l = 0, n + do k = 0, m + rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz(k, l, q) + & + q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz(k, l, q) - & + q_prim_vf(strxb)%sf(k, l, q)*dw_dz(k, l, q) - & 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dw_dz(k, l, q)) - rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxb)%sf(k, l, q)*dw_dx(k, l, q) - & - q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*du_dy(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dy(k, l, q) - & - q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dy(k, l, q) + & - q_prim_vf(strxb + 5)%sf(k, l, q)*du_dz(k, l, q) + & - q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz(k, l, q) - & - q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dz(k, l, q) + & - dw_dx(k, l, q))) - - rhs_vf(strxb + 4)%sf(k, l, q) = rhs_vf(strxb + 4)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dx(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dx(k, l, q) - & - q_prim_vf(strxb + 4)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dy(k, l, q) - & - q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy(k, l, q) + & - q_prim_vf(strxb + 5)%sf(k, l, q)*dv_dz(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz(k, l, q) - & - q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(dv_dz(k, l, q) + & - dw_dy(k, l, q))) - - rhs_vf(strxe)%sf(k, l, q) = rhs_vf(strxe)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx(k, l, q) + & - q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx(k, l, q) - & - q_prim_vf(strxe)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy(k, l, q) + & - q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy(k, l, q) - & - q_prim_vf(strxe)%sf(k, l, q)*dv_dy(k, l, q) + & - q_prim_vf(strxe)%sf(k, l, q)*dw_dz(k, l, q) + & - q_prim_vf(strxe)%sf(k, l, q)*dw_dz(k, l, q) - & - q_prim_vf(strxe)%sf(k, l, q)*dw_dz(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(dw_dz(k, l, q) - (1._wp/3._wp)* & - (du_dx(k, l, q) + & - dv_dy(k, l, q) + & - dw_dz(k, l, q)))) + rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 4)%sf(k, l, q)*du_dz(k, l, q) + & + q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dz(k, l, q) - & + q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dz(k, l, q)) + + rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz(k, l, q) - & + q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dz(k, l, q) - & + 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dw_dz(k, l, q)) + + rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx(k, l, q) + & + q_prim_vf(strxb)%sf(k, l, q)*dw_dx(k, l, q) - & + q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*du_dy(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dy(k, l, q) - & + q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dy(k, l, q) + & + q_prim_vf(strxb + 5)%sf(k, l, q)*du_dz(k, l, q) + & + q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz(k, l, q) - & + q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dz(k, l, q) + & + dw_dx(k, l, q))) + + rhs_vf(strxb + 4)%sf(k, l, q) = rhs_vf(strxb + 4)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dx(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dx(k, l, q) - & + q_prim_vf(strxb + 4)%sf(k, l, q)*du_dx(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dy(k, l, q) - & + q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy(k, l, q) + & + q_prim_vf(strxb + 5)%sf(k, l, q)*dv_dz(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz(k, l, q) - & + q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(dv_dz(k, l, q) + & + dw_dy(k, l, q))) + + rhs_vf(strxe)%sf(k, l, q) = rhs_vf(strxe)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx(k, l, q) + & + q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx(k, l, q) - & + q_prim_vf(strxe)%sf(k, l, q)*du_dx(k, l, q) + & + q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy(k, l, q) + & + q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy(k, l, q) - & + q_prim_vf(strxe)%sf(k, l, q)*dv_dy(k, l, q) + & + q_prim_vf(strxe)%sf(k, l, q)*dw_dz(k, l, q) + & + q_prim_vf(strxe)%sf(k, l, q)*dw_dz(k, l, q) - & + q_prim_vf(strxe)%sf(k, l, q)*dw_dz(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(dw_dz(k, l, q) - (1._wp/3._wp)* & + (du_dx(k, l, q) + & + dv_dy(k, l, q) + & + dw_dz(k, l, q)))) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (cyl_coord .and. idir == 2) then #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - ! S_xx -= rho * v/r * (tau_xx + 2/3*G) - rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) - & - rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & - (q_prim_vf(strxb)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q)) ! tau_xx + 2/3*G - - ! S_xr -= rho * v/r * tau_xr - rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) - & - rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & - q_prim_vf(strxb + 1)%sf(k, l, q) ! tau_xx - - ! S_rr -= rho * v/r * (tau_rr + 2/3*G) - rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) - & + do q = 0, p + do l = 0, n + do k = 0, m + ! S_xx -= rho * v/r * (tau_xx + 2/3*G) + rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) - & rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & - (q_prim_vf(strxb + 2)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q)) ! tau_rr + 2/3*G - - ! S_thetatheta += rho * ( -(tau_thetatheta + 2/3*G)*(du/dx + dv/dr + v/r) + 2*(tau_thetatheta + G)*v/r ) - rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + & - rho_K_field(k, l, q)*( & - -(q_prim_vf(strxb + 3)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q))* & - (du_dx(k, l, q) + dv_dy(k, l, q) + q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) & - + 2._wp*(q_prim_vf(strxb + 3)%sf(k, l, q) + G_K_field(k, l, q))*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) + (q_prim_vf(strxb)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q)) ! tau_xx + 2/3*G + + ! S_xr -= rho * v/r * tau_xr + rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) - & + rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & + q_prim_vf(strxb + 1)%sf(k, l, q) ! tau_xx + + ! S_rr -= rho * v/r * (tau_rr + 2/3*G) + rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) - & + rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & + (q_prim_vf(strxb + 2)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q)) ! tau_rr + 2/3*G + + ! S_thetatheta += rho * ( -(tau_thetatheta + 2/3*G)*(du/dx + dv/dr + v/r) + 2*(tau_thetatheta + G)*v/r ) + rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + & + rho_K_field(k, l, q)*( & + -(q_prim_vf(strxb + 3)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q))* & + (du_dx(k, l, q) + dv_dy(k, l, q) + q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) & + + 2._wp*(q_prim_vf(strxb + 3)%sf(k, l, q) + G_K_field(k, l, q))*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -405,64 +405,64 @@ contains if (n == 0) then l = 0; q = 0 #:call GPU_PARALLEL_LOOP() - do k = 0, m - rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(q_cons_vf(stress_idx%beg)%sf(k, l, q)) - tau_star, 0._wp))**cont_damage_s - end do + do k = 0, m + rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(q_cons_vf(stress_idx%beg)%sf(k, l, q)) - tau_star, 0._wp))**cont_damage_s + end do #:endcall GPU_PARALLEL_LOOP elseif (p == 0) then q = 0 #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, n - do k = 0, m - ! Maximum principal stress - tau_p = 0.5_wp*(q_cons_vf(stress_idx%beg)%sf(k, l, q) + & - q_cons_vf(stress_idx%beg + 2)%sf(k, l, q)) + & - sqrt((q_cons_vf(stress_idx%beg)%sf(k, l, q) - & - q_cons_vf(stress_idx%beg + 2)%sf(k, l, q))**2.0_wp + & - 4._wp*q_cons_vf(stress_idx%beg + 1)%sf(k, l, q)**2.0_wp)/2._wp - - rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s - end do - end do - #:endcall GPU_PARALLEL_LOOP - else - #:call GPU_PARALLEL_LOOP(collapse=3) - do q = 0, p do l = 0, n do k = 0, m - tau_xx = q_cons_vf(stress_idx%beg)%sf(k, l, q) - tau_xy = q_cons_vf(stress_idx%beg + 1)%sf(k, l, q) - tau_yy = q_cons_vf(stress_idx%beg + 2)%sf(k, l, q) - tau_xz = q_cons_vf(stress_idx%beg + 3)%sf(k, l, q) - tau_yz = q_cons_vf(stress_idx%beg + 4)%sf(k, l, q) - tau_zz = q_cons_vf(stress_idx%beg + 5)%sf(k, l, q) - - ! Invariants of the stress tensor - I1 = tau_xx + tau_yy + tau_zz - I2 = tau_xx*tau_yy + tau_xx*tau_zz + tau_yy*tau_zz - & - (tau_xy**2.0_wp + tau_xz**2.0_wp + tau_yz**2.0_wp) - I3 = tau_xx*tau_yy*tau_zz + 2.0_wp*tau_xy*tau_xz*tau_yz - & - tau_xx*tau_yz**2.0_wp - tau_yy*tau_xz**2.0_wp - tau_zz*tau_xy**2.0_wp - ! Maximum principal stress - temp = I1**2.0_wp - 3.0_wp*I2 - sqrt_term_1 = sqrt(max(temp, 0.0_wp)) - if (sqrt_term_1 > verysmall) then ! Avoid 0/0 - argument = (2.0_wp*I1*I1*I1 - 9.0_wp*I1*I2 + 27.0_wp*I3)/ & - (2.0_wp*sqrt_term_1*sqrt_term_1*sqrt_term_1) - if (argument > 1.0_wp) argument = 1.0_wp - if (argument < -1.0_wp) argument = -1.0_wp - phi = acos(argument) - sqrt_term_2 = sqrt(max(I1**2.0_wp - 3.0_wp*I2, 0.0_wp)) - tau_p = I1/3.0_wp + 2.0_wp/sqrt(3.0_wp)*sqrt_term_2*cos(phi/3.0_wp) - else - tau_p = I1/3.0_wp - end if + tau_p = 0.5_wp*(q_cons_vf(stress_idx%beg)%sf(k, l, q) + & + q_cons_vf(stress_idx%beg + 2)%sf(k, l, q)) + & + sqrt((q_cons_vf(stress_idx%beg)%sf(k, l, q) - & + q_cons_vf(stress_idx%beg + 2)%sf(k, l, q))**2.0_wp + & + 4._wp*q_cons_vf(stress_idx%beg + 1)%sf(k, l, q)**2.0_wp)/2._wp rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s end do end do - end do + #:endcall GPU_PARALLEL_LOOP + else + #:call GPU_PARALLEL_LOOP(collapse=3) + do q = 0, p + do l = 0, n + do k = 0, m + tau_xx = q_cons_vf(stress_idx%beg)%sf(k, l, q) + tau_xy = q_cons_vf(stress_idx%beg + 1)%sf(k, l, q) + tau_yy = q_cons_vf(stress_idx%beg + 2)%sf(k, l, q) + tau_xz = q_cons_vf(stress_idx%beg + 3)%sf(k, l, q) + tau_yz = q_cons_vf(stress_idx%beg + 4)%sf(k, l, q) + tau_zz = q_cons_vf(stress_idx%beg + 5)%sf(k, l, q) + + ! Invariants of the stress tensor + I1 = tau_xx + tau_yy + tau_zz + I2 = tau_xx*tau_yy + tau_xx*tau_zz + tau_yy*tau_zz - & + (tau_xy**2.0_wp + tau_xz**2.0_wp + tau_yz**2.0_wp) + I3 = tau_xx*tau_yy*tau_zz + 2.0_wp*tau_xy*tau_xz*tau_yz - & + tau_xx*tau_yz**2.0_wp - tau_yy*tau_xz**2.0_wp - tau_zz*tau_xy**2.0_wp + + ! Maximum principal stress + temp = I1**2.0_wp - 3.0_wp*I2 + sqrt_term_1 = sqrt(max(temp, 0.0_wp)) + if (sqrt_term_1 > verysmall) then ! Avoid 0/0 + argument = (2.0_wp*I1*I1*I1 - 9.0_wp*I1*I2 + 27.0_wp*I3)/ & + (2.0_wp*sqrt_term_1*sqrt_term_1*sqrt_term_1) + if (argument > 1.0_wp) argument = 1.0_wp + if (argument < -1.0_wp) argument = -1.0_wp + phi = acos(argument) + sqrt_term_2 = sqrt(max(I1**2.0_wp - 3.0_wp*I2, 0.0_wp)) + tau_p = I1/3.0_wp + 2.0_wp/sqrt(3.0_wp)*sqrt_term_2*cos(phi/3.0_wp) + else + tau_p = I1/3.0_wp + end if + + rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s + end do + end do + end do #:endcall GPU_PARALLEL_LOOP end if diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 97d31f9bc8..668a15d41e 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -170,163 +170,163 @@ contains type(ghost_point) :: innerp #:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, j,k,l,q]') - do i = 1, num_gps - - gp = ghost_points(i) - j = gp%loc(1) - k = gp%loc(2) - l = gp%loc(3) - patch_id = ghost_points(i)%ib_patch_id + do i = 1, num_gps - ! Calculate physical location of GP - if (p > 0) then - physical_loc = [x_cc(j), y_cc(k), z_cc(l)] - else - physical_loc = [x_cc(j), y_cc(k), 0._wp] - end if + gp = ghost_points(i) + j = gp%loc(1) + k = gp%loc(2) + l = gp%loc(3) + patch_id = ghost_points(i)%ib_patch_id - !Interpolate primitive variables at image point associated w/ GP - if (bubbles_euler .and. .not. qbmm) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP) - else if (qbmm .and. polytropic) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP, nmom_IP) - else if (qbmm .and. .not. polytropic) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb_in, mv_in, presb_IP, massv_IP) - else - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP) - end if + ! Calculate physical location of GP + if (p > 0) then + physical_loc = [x_cc(j), y_cc(k), z_cc(l)] + else + physical_loc = [x_cc(j), y_cc(k), 0._wp] + end if - dyn_pres = 0._wp + !Interpolate primitive variables at image point associated w/ GP + if (bubbles_euler .and. .not. qbmm) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP) + else if (qbmm .and. polytropic) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP, nmom_IP) + else if (qbmm .and. .not. polytropic) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb_in, mv_in, presb_IP, massv_IP) + else + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP) + end if - ! Set q_prim_vf params at GP so that mixture vars calculated properly - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) - q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) - end do + dyn_pres = 0._wp - if (surface_tension) then - q_prim_vf(c_idx)%sf(j, k, l) = c_IP - end if + ! Set q_prim_vf params at GP so that mixture vars calculated properly + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids + q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) + q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) + end do - if (model_eqns /= 4) then - ! If in simulation, use acc mixture subroutines - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K, G_K, Gs) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K) - else - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K) + if (surface_tension) then + q_prim_vf(c_idx)%sf(j, k, l) = c_IP end if - end if - ! Calculate velocity of ghost cell - if (gp%slip) then - norm(1:3) = levelset_norm%sf(gp%loc(1), gp%loc(2), gp%loc(3), gp%ib_patch_id, 1:3) - buf = sqrt(sum(norm**2)) - norm = norm/buf - vel_norm_IP = sum(vel_IP*norm)*norm - vel_g = vel_IP - vel_norm_IP - else - vel_g = 0._wp - end if + if (model_eqns /= 4) then + ! If in simulation, use acc mixture subroutines + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & + alpha_rho_IP, Re_K, G_K, Gs) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & + alpha_rho_IP, Re_K) + else + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & + alpha_rho_IP, Re_K) + end if + end if - ! Set momentum - $:GPU_LOOP(parallelism='[seq]') - do q = momxb, momxe - q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) - dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & - vel_g(q - momxb + 1)/2._wp - end do + ! Calculate velocity of ghost cell + if (gp%slip) then + norm(1:3) = levelset_norm%sf(gp%loc(1), gp%loc(2), gp%loc(3), gp%ib_patch_id, 1:3) + buf = sqrt(sum(norm**2)) + norm = norm/buf + vel_norm_IP = sum(vel_IP*norm)*norm + vel_g = vel_IP - vel_norm_IP + else + vel_g = 0._wp + end if - ! Set continuity and adv vars - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - q_cons_vf(q)%sf(j, k, l) = alpha_rho_IP(q) - q_cons_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) - end do + ! Set momentum + $:GPU_LOOP(parallelism='[seq]') + do q = momxb, momxe + q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) + dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & + vel_g(q - momxb + 1)/2._wp + end do - ! Set color function - if (surface_tension) then - q_cons_vf(c_idx)%sf(j, k, l) = c_IP - end if + ! Set continuity and adv vars + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids + q_cons_vf(q)%sf(j, k, l) = alpha_rho_IP(q) + q_cons_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) + end do - ! Set Energy - if (bubbles_euler) then - q_cons_vf(E_idx)%sf(j, k, l) = (1 - alpha_IP(1))*(gamma*pres_IP + pi_inf + dyn_pres) - else - q_cons_vf(E_idx)%sf(j, k, l) = gamma*pres_IP + pi_inf + dyn_pres - end if + ! Set color function + if (surface_tension) then + q_cons_vf(c_idx)%sf(j, k, l) = c_IP + end if - ! Set bubble vars - if (bubbles_euler .and. .not. qbmm) then - call s_comp_n_from_prim(alpha_IP(1), r_IP, nbub, weight) - do q = 1, nb - q_cons_vf(bubxb + (q - 1)*2)%sf(j, k, l) = nbub*r_IP(q) - q_cons_vf(bubxb + (q - 1)*2 + 1)%sf(j, k, l) = nbub*v_IP(q) - if (.not. polytropic) then - q_cons_vf(bubxb + (q - 1)*4)%sf(j, k, l) = nbub*r_IP(q) - q_cons_vf(bubxb + (q - 1)*4 + 1)%sf(j, k, l) = nbub*v_IP(q) - q_cons_vf(bubxb + (q - 1)*4 + 2)%sf(j, k, l) = nbub*pb_IP(q) - q_cons_vf(bubxb + (q - 1)*4 + 3)%sf(j, k, l) = nbub*mv_IP(q) - end if - end do - end if + ! Set Energy + if (bubbles_euler) then + q_cons_vf(E_idx)%sf(j, k, l) = (1 - alpha_IP(1))*(gamma*pres_IP + pi_inf + dyn_pres) + else + q_cons_vf(E_idx)%sf(j, k, l) = gamma*pres_IP + pi_inf + dyn_pres + end if - if (qbmm) then + ! Set bubble vars + if (bubbles_euler .and. .not. qbmm) then + call s_comp_n_from_prim(alpha_IP(1), r_IP, nbub, weight) + do q = 1, nb + q_cons_vf(bubxb + (q - 1)*2)%sf(j, k, l) = nbub*r_IP(q) + q_cons_vf(bubxb + (q - 1)*2 + 1)%sf(j, k, l) = nbub*v_IP(q) + if (.not. polytropic) then + q_cons_vf(bubxb + (q - 1)*4)%sf(j, k, l) = nbub*r_IP(q) + q_cons_vf(bubxb + (q - 1)*4 + 1)%sf(j, k, l) = nbub*v_IP(q) + q_cons_vf(bubxb + (q - 1)*4 + 2)%sf(j, k, l) = nbub*pb_IP(q) + q_cons_vf(bubxb + (q - 1)*4 + 3)%sf(j, k, l) = nbub*mv_IP(q) + end if + end do + end if - nbub = nmom_IP(1) - do q = 1, nb*nmom - q_cons_vf(bubxb + q - 1)%sf(j, k, l) = nbub*nmom_IP(q) - end do - do q = 1, nb - q_cons_vf(bubxb + (q - 1)*nmom)%sf(j, k, l) = nbub - end do + if (qbmm) then - if (.not. polytropic) then + nbub = nmom_IP(1) + do q = 1, nb*nmom + q_cons_vf(bubxb + q - 1)%sf(j, k, l) = nbub*nmom_IP(q) + end do do q = 1, nb - do r = 1, nnode - pb_in(j, k, l, r, q) = presb_IP((q - 1)*nnode + r) - mv_in(j, k, l, r, q) = massv_IP((q - 1)*nnode + r) - end do + q_cons_vf(bubxb + (q - 1)*nmom)%sf(j, k, l) = nbub end do + + if (.not. polytropic) then + do q = 1, nb + do r = 1, nnode + pb_in(j, k, l, r, q) = presb_IP((q - 1)*nnode + r) + mv_in(j, k, l, r, q) = massv_IP((q - 1)*nnode + r) + end do + end do + end if end if - end if - if (model_eqns == 3) then - $:GPU_LOOP(parallelism='[seq]') - do q = intxb, intxe - q_cons_vf(q)%sf(j, k, l) = alpha_IP(q - intxb + 1)*(gammas(q - intxb + 1)*pres_IP & - + pi_infs(q - intxb + 1)) - end do - end if - end do + if (model_eqns == 3) then + $:GPU_LOOP(parallelism='[seq]') + do q = intxb, intxe + q_cons_vf(q)%sf(j, k, l) = alpha_IP(q - intxb + 1)*(gammas(q - intxb + 1)*pres_IP & + + pi_infs(q - intxb + 1)) + end do + end if + end do #:endcall GPU_PARALLEL_LOOP !Correct the state of the inner points in IBs #:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp,j,k,l,q]') - do i = 1, num_inner_gps + do i = 1, num_inner_gps - innerp = inner_points(i) - j = innerp%loc(1) - k = innerp%loc(2) - l = innerp%loc(3) + innerp = inner_points(i) + j = innerp%loc(1) + k = innerp%loc(2) + l = innerp%loc(3) - $:GPU_LOOP(parallelism='[seq]') - do q = momxb, momxe - q_cons_vf(q)%sf(j, k, l) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = momxb, momxe + q_cons_vf(q)%sf(j, k, l) = 0._wp + end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_ibm_correct_state diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index 43d8aeef18..1680efdb6f 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -77,58 +77,58 @@ contains real(wp) :: divB, vdotB #:call GPU_PARALLEL_LOOP(collapse=3, private='[v, B]') - do q = 0, p - do l = 0, n - do k = 0, m - - divB = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - divB = divB + q_prim_vf(B_idx%beg)%sf(k + r, l, q)*fd_coeff_x_h(r, k) - end do - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - divB = divB + q_prim_vf(B_idx%beg + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) - end do - if (p > 0) then + do q = 0, p + do l = 0, n + do k = 0, m + + divB = 0._wp $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number - divB = divB + q_prim_vf(B_idx%beg + 2)%sf(k, l, q + r)*fd_coeff_z_h(r, q) + divB = divB + q_prim_vf(B_idx%beg)%sf(k + r, l, q)*fd_coeff_x_h(r, k) end do - end if - - v(1) = q_prim_vf(momxb)%sf(k, l, q) - v(2) = q_prim_vf(momxb + 1)%sf(k, l, q) - v(3) = q_prim_vf(momxb + 2)%sf(k, l, q) - - B(1) = q_prim_vf(B_idx%beg)%sf(k, l, q) - B(2) = q_prim_vf(B_idx%beg + 1)%sf(k, l, q) - B(3) = q_prim_vf(B_idx%beg + 2)%sf(k, l, q) - - vdotB = sum(v*B) - - ! 1: rho -> unchanged - ! 2: vx -> - (divB) * Bx - ! 3: vy -> - (divB) * By - ! 4: vz -> - (divB) * Bz - ! 5: E -> - (divB) * (vdotB) - ! 6: Bx -> - (divB) * vx - ! 7: By -> - (divB) * vy - ! 8: Bz -> - (divB) * vz - - rhs_vf(momxb)%sf(k, l, q) = rhs_vf(momxb)%sf(k, l, q) - divB*B(1) - rhs_vf(momxb + 1)%sf(k, l, q) = rhs_vf(momxb + 1)%sf(k, l, q) - divB*B(2) - rhs_vf(momxb + 2)%sf(k, l, q) = rhs_vf(momxb + 2)%sf(k, l, q) - divB*B(3) - - rhs_vf(E_idx)%sf(k, l, q) = rhs_vf(E_idx)%sf(k, l, q) - divB*vdotB - - rhs_vf(B_idx%beg)%sf(k, l, q) = rhs_vf(B_idx%beg)%sf(k, l, q) - divB*v(1) - rhs_vf(B_idx%beg + 1)%sf(k, l, q) = rhs_vf(B_idx%beg + 1)%sf(k, l, q) - divB*v(2) - rhs_vf(B_idx%beg + 2)%sf(k, l, q) = rhs_vf(B_idx%beg + 2)%sf(k, l, q) - divB*v(3) + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + divB = divB + q_prim_vf(B_idx%beg + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) + end do + if (p > 0) then + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + divB = divB + q_prim_vf(B_idx%beg + 2)%sf(k, l, q + r)*fd_coeff_z_h(r, q) + end do + end if + + v(1) = q_prim_vf(momxb)%sf(k, l, q) + v(2) = q_prim_vf(momxb + 1)%sf(k, l, q) + v(3) = q_prim_vf(momxb + 2)%sf(k, l, q) + + B(1) = q_prim_vf(B_idx%beg)%sf(k, l, q) + B(2) = q_prim_vf(B_idx%beg + 1)%sf(k, l, q) + B(3) = q_prim_vf(B_idx%beg + 2)%sf(k, l, q) + + vdotB = sum(v*B) + + ! 1: rho -> unchanged + ! 2: vx -> - (divB) * Bx + ! 3: vy -> - (divB) * By + ! 4: vz -> - (divB) * Bz + ! 5: E -> - (divB) * (vdotB) + ! 6: Bx -> - (divB) * vx + ! 7: By -> - (divB) * vy + ! 8: Bz -> - (divB) * vz + + rhs_vf(momxb)%sf(k, l, q) = rhs_vf(momxb)%sf(k, l, q) - divB*B(1) + rhs_vf(momxb + 1)%sf(k, l, q) = rhs_vf(momxb + 1)%sf(k, l, q) - divB*B(2) + rhs_vf(momxb + 2)%sf(k, l, q) = rhs_vf(momxb + 2)%sf(k, l, q) - divB*B(3) + + rhs_vf(E_idx)%sf(k, l, q) = rhs_vf(E_idx)%sf(k, l, q) - divB*vdotB + + rhs_vf(B_idx%beg)%sf(k, l, q) = rhs_vf(B_idx%beg)%sf(k, l, q) - divB*v(1) + rhs_vf(B_idx%beg + 1)%sf(k, l, q) = rhs_vf(B_idx%beg + 1)%sf(k, l, q) - divB*v(2) + rhs_vf(B_idx%beg + 2)%sf(k, l, q) = rhs_vf(B_idx%beg + 2)%sf(k, l, q) - divB*v(3) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_mhd_powell_rhs diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 915146fdfe..2a012cdc75 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -300,38 +300,38 @@ contains if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, gp_layers - 1 - r = (j + gp_layers*(k + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) + do l = 0, p + do k = 0, n + do j = 0, gp_layers - 1 + r = (j + gp_layers*(k + (n + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:elif mpi_dir == 2 #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') - do l = 0, p - do k = 0, gp_layers - 1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - (k + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) + do l = 0, p + do k = 0, gp_layers - 1 + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + (k + gp_layers*l)) + ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:else #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') - do l = 0, gp_layers - 1 - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) + do l = 0, gp_layers - 1 + do k = -gp_layers, n + gp_layers + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + ((k + gp_layers) + (n + 2*gp_layers + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:endif end if @@ -351,40 +351,40 @@ contains if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') - do l = 0, p - do k = 0, n - do j = -gp_layers, -1 - r = (j + gp_layers*((k + 1) + (n + 1)*l)) - ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) + do l = 0, p + do k = 0, n + do j = -gp_layers, -1 + r = (j + gp_layers*((k + 1) + (n + 1)*l)) + ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:elif mpi_dir == 2 #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') - do l = 0, p - do k = -gp_layers, -1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + gp_layers*l)) - ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) + do l = 0, p + do k = -gp_layers, -1 + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + ((k + gp_layers) + gp_layers*l)) + ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:else ! Unpacking buffer from bc_z%beg #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') - do l = -gp_layers, -1 - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l + gp_layers))) - ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) + do l = -gp_layers, -1 + do k = -gp_layers, n + gp_layers + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + ((k + gp_layers) + (n + 2*gp_layers + 1)* & + (l + gp_layers))) + ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:endif end if diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp index 5449e84058..52340ccbb2 100644 --- a/src/simulation/m_pressure_relaxation.fpp +++ b/src/simulation/m_pressure_relaxation.fpp @@ -71,13 +71,13 @@ contains integer :: j, k, l #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - call s_relax_cell_pressure(q_cons_vf, j, k, l) + do l = 0, p + do k = 0, n + do j = 0, m + call s_relax_cell_pressure(q_cons_vf, j, k, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_pressure_relaxation_procedure diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index c5fbd486b4..6ce6005e87 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -434,130 +434,130 @@ contains if (.not. polytropic) then #:call GPU_PARALLEL_LOOP(collapse=5,private='[nb_q,nR,nR2,R,R2,nb_dot,nR_dot,nR2_dot,var,AX]') - do i = 1, nb - do q = 1, nnode - do l = 0, p - do k = 0, n - do j = 0, m - nb_q = q_cons_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR = q_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2 = q_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - R = q_prim_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - R2 = q_prim_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - var = max(R2 - R**2._wp, verysmall) - if (q <= 2) then - AX = R - sqrt(var) - else - AX = R + sqrt(var) - end if - select case (idir) - case (1) - nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2)* & - (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) - case (2) - nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2)* & - (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) - case (3) - if (is_axisym) then - nb_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l)) - nR_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)) - nR2_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2)* & - (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) + do i = 1, nb + do q = 1, nnode + do l = 0, p + do k = 0, n + do j = 0, m + nb_q = q_cons_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR = q_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2 = q_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + R = q_prim_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + R2 = q_prim_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + var = max(R2 - R**2._wp, verysmall) + if (q <= 2) then + AX = R - sqrt(var) else - nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2)* & - (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) + AX = R + sqrt(var) end if - end select - if (q <= 2) then select case (idir) case (1) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2)* & + (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) case (2) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2)* & + (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) case (3) if (is_axisym) then - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + nb_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l)) + nR_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)) + nR2_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2)* & + (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) + else + nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2)* & + (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) + end if + end select + if (q <= 2) then + select case (idir) + case (1) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - else - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & + case (2) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - end if - end select - else - select case (idir) - case (1) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - case (2) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - case (3) - if (is_axisym) then - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + case (3) + if (is_axisym) then + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + else + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + end if + end select + else + select case (idir) + case (1) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - else - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & + case (2) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - end if - end select - end if + case (3) + if (is_axisym) then + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + else + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + end if + end select + end if + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if ! The following block is not repeated and is left as is if (idir == 1) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do q = 0, n - do i = 0, m - rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + mom_sp(2)%sf(i, q, l) - j = bubxb - $:GPU_LOOP(parallelism='[seq]') - do k = 1, nb - rhs_vf(j)%sf(i, q, l) = rhs_vf(j)%sf(i, q, l) + mom_3d(0, 0, k)%sf(i, q, l) - rhs_vf(j + 1)%sf(i, q, l) = rhs_vf(j + 1)%sf(i, q, l) + mom_3d(1, 0, k)%sf(i, q, l) - rhs_vf(j + 2)%sf(i, q, l) = rhs_vf(j + 2)%sf(i, q, l) + mom_3d(0, 1, k)%sf(i, q, l) - rhs_vf(j + 3)%sf(i, q, l) = rhs_vf(j + 3)%sf(i, q, l) + mom_3d(2, 0, k)%sf(i, q, l) - rhs_vf(j + 4)%sf(i, q, l) = rhs_vf(j + 4)%sf(i, q, l) + mom_3d(1, 1, k)%sf(i, q, l) - rhs_vf(j + 5)%sf(i, q, l) = rhs_vf(j + 5)%sf(i, q, l) + mom_3d(0, 2, k)%sf(i, q, l) - j = j + 6 + do l = 0, p + do q = 0, n + do i = 0, m + rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + mom_sp(2)%sf(i, q, l) + j = bubxb + $:GPU_LOOP(parallelism='[seq]') + do k = 1, nb + rhs_vf(j)%sf(i, q, l) = rhs_vf(j)%sf(i, q, l) + mom_3d(0, 0, k)%sf(i, q, l) + rhs_vf(j + 1)%sf(i, q, l) = rhs_vf(j + 1)%sf(i, q, l) + mom_3d(1, 0, k)%sf(i, q, l) + rhs_vf(j + 2)%sf(i, q, l) = rhs_vf(j + 2)%sf(i, q, l) + mom_3d(0, 1, k)%sf(i, q, l) + rhs_vf(j + 3)%sf(i, q, l) = rhs_vf(j + 3)%sf(i, q, l) + mom_3d(2, 0, k)%sf(i, q, l) + rhs_vf(j + 4)%sf(i, q, l) = rhs_vf(j + 4)%sf(i, q, l) + mom_3d(1, 1, k)%sf(i, q, l) + rhs_vf(j + 5)%sf(i, q, l) = rhs_vf(j + 5)%sf(i, q, l) + mom_3d(0, 2, k)%sf(i, q, l) + j = j + 6 + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -715,142 +715,142 @@ contains $:GPU_UPDATE(device='[is1_qbmm,is2_qbmm,is3_qbmm]') #:call GPU_PARALLEL_LOOP(collapse=3, private='[moms, msum, wght, abscX, abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, n_tait, B_tait, pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T]') - do id3 = is3_qbmm%beg, is3_qbmm%end - do id2 = is2_qbmm%beg, is2_qbmm%end - do id1 = is1_qbmm%beg, is1_qbmm%end - - alf = q_prim_vf(alf_idx)%sf(id1, id2, id3) - pres = q_prim_vf(E_idx)%sf(id1, id2, id3) - rho = q_prim_vf(contxb)%sf(id1, id2, id3) - - if (bubble_model == 2) then - n_tait = 1._wp/gammas(1) + 1._wp - B_tait = pi_infs(1)*(n_tait - 1)/n_tait - c = n_tait*(pres + B_tait)*(1._wp - alf)/(rho) - c = merge(sqrt(c), sgm_eps, c > 0._wp) - end if + do id3 = is3_qbmm%beg, is3_qbmm%end + do id2 = is2_qbmm%beg, is2_qbmm%end + do id1 = is1_qbmm%beg, is1_qbmm%end + + alf = q_prim_vf(alf_idx)%sf(id1, id2, id3) + pres = q_prim_vf(E_idx)%sf(id1, id2, id3) + rho = q_prim_vf(contxb)%sf(id1, id2, id3) + + if (bubble_model == 2) then + n_tait = 1._wp/gammas(1) + 1._wp + B_tait = pi_infs(1)*(n_tait - 1)/n_tait + c = n_tait*(pres + B_tait)*(1._wp - alf)/(rho) + c = merge(sqrt(c), sgm_eps, c > 0._wp) + end if - call s_coeff_selector(pres, rho, c, coeff, polytropic) + call s_coeff_selector(pres, rho, c, coeff, polytropic) - if (alf > small_alf) then - nbub = q_cons_vf(bubxb)%sf(id1, id2, id3) - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - ! Gather moments for this bubble bin + if (alf > small_alf) then + nbub = q_cons_vf(bubxb)%sf(id1, id2, id3) $:GPU_LOOP(parallelism='[seq]') - do r = 2, nmom - moms(r) = q_prim_vf(bubmoms(q, r))%sf(id1, id2, id3) - end do - moms(1) = 1._wp - call s_chyqmom(moms, wght(:, q), abscX(:, q), abscY(:, q)) - - if (polytropic) then - $:GPU_LOOP(parallelism='[seq]') - do j = 1, nnode - wght_pb(j, q) = wght(j, q)*(pb0(q) - pv) - end do - else + do q = 1, nb + ! Gather moments for this bubble bin $:GPU_LOOP(parallelism='[seq]') - do j = 1, nnode - chi_vw = 1._wp/(1._wp + R_v/R_n*(pb(id1, id2, id3, j, q)/pv - 1._wp)) - x_vw = M_n*chi_vw/(M_v + (M_n - M_v)*chi_vw) - k_mw = x_vw*k_v(q)/(x_vw + (1._wp - x_vw)*phi_vn) + (1._wp - x_vw)*k_n(q)/(x_vw*phi_nv + 1._wp - x_vw) - rho_mw = pv/(chi_vw*R_v*Tw) - rhs_mv(id1, id2, id3, j, q) = -Re_trans_c(q)*((mv(id1, id2, id3, j, q)/(mv(id1, id2, id3, j, q) + mass_n0(q))) - chi_vw) - rhs_mv(id1, id2, id3, j, q) = rho_mw*rhs_mv(id1, id2, id3, j, q)/Pe_c/(1._wp - chi_vw)/abscX(j, q) - T_bar = Tw*(pb(id1, id2, id3, j, q)/pb0(q))*(abscX(j, q)/R0(q))**3*(mass_n0(q) + mass_v0(q))/(mass_n0(q) + mv(id1, id2, id3, j, q)) - grad_T = -Re_trans_T(q)*(T_bar - Tw) - ht(j, q) = pb0(q)*k_mw*grad_T/Pe_T(q)/abscX(j, q) - wght_pb(j, q) = wght(j, q)*(pb(id1, id2, id3, j, q)) - wght_mv(j, q) = wght(j, q)*(rhs_mv(id1, id2, id3, j, q)) - wght_ht(j, q) = wght(j, q)*ht(j, q) + do r = 2, nmom + moms(r) = q_prim_vf(bubmoms(q, r))%sf(id1, id2, id3) end do - end if + moms(1) = 1._wp + call s_chyqmom(moms, wght(:, q), abscX(:, q), abscY(:, q)) + + if (polytropic) then + $:GPU_LOOP(parallelism='[seq]') + do j = 1, nnode + wght_pb(j, q) = wght(j, q)*(pb0(q) - pv) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do j = 1, nnode + chi_vw = 1._wp/(1._wp + R_v/R_n*(pb(id1, id2, id3, j, q)/pv - 1._wp)) + x_vw = M_n*chi_vw/(M_v + (M_n - M_v)*chi_vw) + k_mw = x_vw*k_v(q)/(x_vw + (1._wp - x_vw)*phi_vn) + (1._wp - x_vw)*k_n(q)/(x_vw*phi_nv + 1._wp - x_vw) + rho_mw = pv/(chi_vw*R_v*Tw) + rhs_mv(id1, id2, id3, j, q) = -Re_trans_c(q)*((mv(id1, id2, id3, j, q)/(mv(id1, id2, id3, j, q) + mass_n0(q))) - chi_vw) + rhs_mv(id1, id2, id3, j, q) = rho_mw*rhs_mv(id1, id2, id3, j, q)/Pe_c/(1._wp - chi_vw)/abscX(j, q) + T_bar = Tw*(pb(id1, id2, id3, j, q)/pb0(q))*(abscX(j, q)/R0(q))**3*(mass_n0(q) + mass_v0(q))/(mass_n0(q) + mv(id1, id2, id3, j, q)) + grad_T = -Re_trans_T(q)*(T_bar - Tw) + ht(j, q) = pb0(q)*k_mw*grad_T/Pe_T(q)/abscX(j, q) + wght_pb(j, q) = wght(j, q)*(pb(id1, id2, id3, j, q)) + wght_mv(j, q) = wght(j, q)*(rhs_mv(id1, id2, id3, j, q)) + wght_ht(j, q) = wght(j, q)*ht(j, q) + end do + end if - ! Compute change in moments due to bubble dynamics - r = 1 - $:GPU_LOOP(parallelism='[seq]') - do i2 = 0, 2 + ! Compute change in moments due to bubble dynamics + r = 1 $:GPU_LOOP(parallelism='[seq]') - do i1 = 0, 2 - if ((i1 + i2) <= 2) then - momsum = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do j = 1, nterms - select case (bubble_model) - case (3) - if (j == 3) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) - else - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) - end if - case (2) - if ((j >= 7 .and. j <= 9) .or. (j >= 22 .and. j <= 23) .or. (j >= 10 .and. j <= 11) .or. (j == 26)) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) - else if ((j >= 27 .and. j <= 29) .and. (.not. polytropic)) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_mv(:, q), momrhs(:, i1, i2, j, q)) - else if ((j >= 30 .and. j <= 32) .and. (.not. polytropic)) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_ht(:, q), momrhs(:, i1, i2, j, q)) - else - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) - end if - end select - end do - moms3d(i1, i2, q)%sf(id1, id2, id3) = nbub*momsum - msum(r) = momsum - r = r + 1 - end if + do i2 = 0, 2 + $:GPU_LOOP(parallelism='[seq]') + do i1 = 0, 2 + if ((i1 + i2) <= 2) then + momsum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do j = 1, nterms + select case (bubble_model) + case (3) + if (j == 3) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) + else + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) + end if + case (2) + if ((j >= 7 .and. j <= 9) .or. (j >= 22 .and. j <= 23) .or. (j >= 10 .and. j <= 11) .or. (j == 26)) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) + else if ((j >= 27 .and. j <= 29) .and. (.not. polytropic)) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_mv(:, q), momrhs(:, i1, i2, j, q)) + else if ((j >= 30 .and. j <= 32) .and. (.not. polytropic)) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_ht(:, q), momrhs(:, i1, i2, j, q)) + else + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) + end if + end select + end do + moms3d(i1, i2, q)%sf(id1, id2, id3) = nbub*momsum + msum(r) = momsum + r = r + 1 + end if + end do end do - end do - ! Compute change in pb and mv for non-polytropic model - if (.not. polytropic) then - $:GPU_LOOP(parallelism='[seq]') - do j = 1, nnode - drdt = msum(2) - drdt2 = merge(-1._wp, 1._wp, j == 1 .or. j == 2)/(2._wp*sqrt(merge(moms(4) - moms(2)**2._wp, verysmall, moms(4) - moms(2)**2._wp > 0._wp))) - drdt2 = drdt2*(msum(3) - 2._wp*moms(2)*msum(2)) - drdt = drdt + drdt2 - rhs_pb(id1, id2, id3, j, q) = (-3._wp*gam*drdt/abscX(j, q))*(pb(id1, id2, id3, j, q)) - rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*rhs_mv(id1, id2, id3, j, q)*R_v*Tw - rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*ht(j, q) - rhs_mv(id1, id2, id3, j, q) = rhs_mv(id1, id2, id3, j, q)*(4._wp*pi*abscX(j, q)**2._wp) - end do - end if - end do + ! Compute change in pb and mv for non-polytropic model + if (.not. polytropic) then + $:GPU_LOOP(parallelism='[seq]') + do j = 1, nnode + drdt = msum(2) + drdt2 = merge(-1._wp, 1._wp, j == 1 .or. j == 2)/(2._wp*sqrt(merge(moms(4) - moms(2)**2._wp, verysmall, moms(4) - moms(2)**2._wp > 0._wp))) + drdt2 = drdt2*(msum(3) - 2._wp*moms(2)*msum(2)) + drdt = drdt + drdt2 + rhs_pb(id1, id2, id3, j, q) = (-3._wp*gam*drdt/abscX(j, q))*(pb(id1, id2, id3, j, q)) + rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*rhs_mv(id1, id2, id3, j, q)*R_v*Tw + rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*ht(j, q) + rhs_mv(id1, id2, id3, j, q) = rhs_mv(id1, id2, id3, j, q)*(4._wp*pi*abscX(j, q)**2._wp) + end do + end if + end do - ! Compute special high-order moments - momsp(1)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) - momsp(2)%sf(id1, id2, id3) = 4._wp*pi*nbub*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - momsp(3)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3._wp, 2._wp, 0._wp) - if (abs(gam - 1._wp) <= 1.e-4_wp) then - momsp(4)%sf(id1, id2, id3) = 1._wp - else - if (polytropic) then - momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp*(1._wp - gam), 0._wp, 3._wp*gam) + pv*f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) + ! Compute special high-order moments + momsp(1)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) + momsp(2)%sf(id1, id2, id3) = 4._wp*pi*nbub*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) + momsp(3)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3._wp, 2._wp, 0._wp) + if (abs(gam - 1._wp) <= 1.e-4_wp) then + momsp(4)%sf(id1, id2, id3) = 1._wp else - momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) + if (polytropic) then + momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp*(1._wp - gam), 0._wp, 3._wp*gam) + pv*f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) + else + momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) + end if end if - end if - else - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb + else $:GPU_LOOP(parallelism='[seq]') - do i1 = 0, 2 + do q = 1, nb $:GPU_LOOP(parallelism='[seq]') - do i2 = 0, 2 - moms3d(i1, i2, q)%sf(id1, id2, id3) = 0._wp + do i1 = 0, 2 + $:GPU_LOOP(parallelism='[seq]') + do i2 = 0, 2 + moms3d(i1, i2, q)%sf(id1, id2, id3) = 0._wp + end do end do end do - end do - momsp(1)%sf(id1, id2, id3) = 0._wp - momsp(2)%sf(id1, id2, id3) = 0._wp - momsp(3)%sf(id1, id2, id3) = 0._wp - momsp(4)%sf(id1, id2, id3) = 0._wp - end if + momsp(1)%sf(id1, id2, id3) = 0._wp + momsp(2)%sf(id1, id2, id3) = 0._wp + momsp(3)%sf(id1, id2, id3) = 0._wp + momsp(4)%sf(id1, id2, id3) = 0._wp + end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP contains diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 526751323f..9761f469ef 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -911,17 +911,17 @@ contains if (ib) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - if (ib_markers%sf(j, k, l) /= 0) then - do i = 1, sys_size - rhs_vf(i)%sf(j, k, l) = 0._wp - end do - end if + do l = 0, p + do k = 0, n + do j = 0, m + if (ib_markers%sf(j, k, l) /= 0) then + do i = 1, sys_size + rhs_vf(i)%sf(j, k, l) = 0._wp + end do + end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1020,28 +1020,28 @@ contains if (alt_soundspeed) then #:call GPU_PARALLEL_LOOP(collapse=3) - do q_loop = 0, p - do l_loop = 0, n - do k_loop = 0, m - blkmod1(k_loop, l_loop, q_loop) = ((gammas(1) + 1._wp)*q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + & - pi_infs(1))/gammas(1) - blkmod2(k_loop, l_loop, q_loop) = ((gammas(2) + 1._wp)*q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + & - pi_infs(2))/gammas(2) - alpha1(k_loop, l_loop, q_loop) = q_cons_vf%vf(advxb)%sf(k_loop, l_loop, q_loop) - - if (bubbles_euler) then - alpha2(k_loop, l_loop, q_loop) = q_cons_vf%vf(alf_idx - 1)%sf(k_loop, l_loop, q_loop) - else - alpha2(k_loop, l_loop, q_loop) = q_cons_vf%vf(advxe)%sf(k_loop, l_loop, q_loop) - end if - - Kterm(k_loop, l_loop, q_loop) = alpha1(k_loop, l_loop, q_loop)*alpha2(k_loop, l_loop, q_loop)* & - (blkmod2(k_loop, l_loop, q_loop) - blkmod1(k_loop, l_loop, q_loop))/ & - (alpha1(k_loop, l_loop, q_loop)*blkmod2(k_loop, l_loop, q_loop) + & - alpha2(k_loop, l_loop, q_loop)*blkmod1(k_loop, l_loop, q_loop)) + do q_loop = 0, p + do l_loop = 0, n + do k_loop = 0, m + blkmod1(k_loop, l_loop, q_loop) = ((gammas(1) + 1._wp)*q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + & + pi_infs(1))/gammas(1) + blkmod2(k_loop, l_loop, q_loop) = ((gammas(2) + 1._wp)*q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + & + pi_infs(2))/gammas(2) + alpha1(k_loop, l_loop, q_loop) = q_cons_vf%vf(advxb)%sf(k_loop, l_loop, q_loop) + + if (bubbles_euler) then + alpha2(k_loop, l_loop, q_loop) = q_cons_vf%vf(alf_idx - 1)%sf(k_loop, l_loop, q_loop) + else + alpha2(k_loop, l_loop, q_loop) = q_cons_vf%vf(advxe)%sf(k_loop, l_loop, q_loop) + end if + + Kterm(k_loop, l_loop, q_loop) = alpha1(k_loop, l_loop, q_loop)*alpha2(k_loop, l_loop, q_loop)* & + (blkmod2(k_loop, l_loop, q_loop) - blkmod1(k_loop, l_loop, q_loop))/ & + (alpha1(k_loop, l_loop, q_loop)*blkmod2(k_loop, l_loop, q_loop) + & + alpha2(k_loop, l_loop, q_loop)*blkmod1(k_loop, l_loop, q_loop)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1055,38 +1055,38 @@ contains end if #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') - do j = 1, sys_size - do q_loop = 0, p - do l_loop = 0, n - do k_loop = 0, m - inv_ds = 1._wp/dx(k_loop) - flux_face1 = flux_n(1)%vf(j)%sf(k_loop - 1, l_loop, q_loop) - flux_face2 = flux_n(1)%vf(j)%sf(k_loop, l_loop, q_loop) - rhs_vf(j)%sf(k_loop, l_loop, q_loop) = inv_ds*(flux_face1 - flux_face2) + do j = 1, sys_size + do q_loop = 0, p + do l_loop = 0, n + do k_loop = 0, m + inv_ds = 1._wp/dx(k_loop) + flux_face1 = flux_n(1)%vf(j)%sf(k_loop - 1, l_loop, q_loop) + flux_face2 = flux_n(1)%vf(j)%sf(k_loop, l_loop, q_loop) + rhs_vf(j)%sf(k_loop, l_loop, q_loop) = inv_ds*(flux_face1 - flux_face2) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (model_eqns == 3) then #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') - do q_loop = 0, p - do l_loop = 0, n - do k_loop = 0, m - do i_fluid_loop = 1, num_fluids - inv_ds = 1._wp/dx(k_loop) - advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(k_loop, l_loop, q_loop) - pressure_val = q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) - flux_face1 = flux_src_n_vf%vf(advxb)%sf(k_loop, l_loop, q_loop) - flux_face2 = flux_src_n_vf%vf(advxb)%sf(k_loop - 1, l_loop, q_loop) - rhs_vf(i_fluid_loop + intxb - 1)%sf(k_loop, l_loop, q_loop) = & - rhs_vf(i_fluid_loop + intxb - 1)%sf(k_loop, l_loop, q_loop) - & - inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) + do q_loop = 0, p + do l_loop = 0, n + do k_loop = 0, m + do i_fluid_loop = 1, num_fluids + inv_ds = 1._wp/dx(k_loop) + advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(k_loop, l_loop, q_loop) + pressure_val = q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + flux_face1 = flux_src_n_vf%vf(advxb)%sf(k_loop, l_loop, q_loop) + flux_face2 = flux_src_n_vf%vf(advxb)%sf(k_loop - 1, l_loop, q_loop) + rhs_vf(i_fluid_loop + intxb - 1)%sf(k_loop, l_loop, q_loop) = & + rhs_vf(i_fluid_loop + intxb - 1)%sf(k_loop, l_loop, q_loop) - & + inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1101,60 +1101,60 @@ contains end if #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') - do j = 1, sys_size - do l = 0, p - do k = 0, n - do q = 0, m - inv_ds = 1._wp/dy(k) - flux_face1 = flux_n(2)%vf(j)%sf(q, k - 1, l) - flux_face2 = flux_n(2)%vf(j)%sf(q, k, l) - rhs_vf(j)%sf(q, k, l) = rhs_vf(j)%sf(q, k, l) + inv_ds*(flux_face1 - flux_face2) + do j = 1, sys_size + do l = 0, p + do k = 0, n + do q = 0, m + inv_ds = 1._wp/dy(k) + flux_face1 = flux_n(2)%vf(j)%sf(q, k - 1, l) + flux_face2 = flux_n(2)%vf(j)%sf(q, k, l) + rhs_vf(j)%sf(q, k, l) = rhs_vf(j)%sf(q, k, l) + inv_ds*(flux_face1 - flux_face2) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (model_eqns == 3) then #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') - do l = 0, p - do k = 0, n - do q = 0, m - do i_fluid_loop = 1, num_fluids - inv_ds = 1._wp/dy(k) - advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(q, k, l) - pressure_val = q_prim_vf%vf(E_idx)%sf(q, k, l) - flux_face1 = flux_src_n_vf%vf(advxb)%sf(q, k, l) - flux_face2 = flux_src_n_vf%vf(advxb)%sf(q, k - 1, l) - rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) = & - rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) - & - inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) - if (cyl_coord) then + do l = 0, p + do k = 0, n + do q = 0, m + do i_fluid_loop = 1, num_fluids + inv_ds = 1._wp/dy(k) + advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(q, k, l) + pressure_val = q_prim_vf%vf(E_idx)%sf(q, k, l) + flux_face1 = flux_src_n_vf%vf(advxb)%sf(q, k, l) + flux_face2 = flux_src_n_vf%vf(advxb)%sf(q, k - 1, l) rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) = & rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) - & - 5.e-1_wp/y_cc(k)*advected_qty_val*pressure_val*(flux_face1 + flux_face2) - end if + inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) + if (cyl_coord) then + rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) = & + rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) - & + 5.e-1_wp/y_cc(k)*advected_qty_val*pressure_val*(flux_face1 + flux_face2) + end if + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (cyl_coord) then #:call GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]') - do j = 1, sys_size - do l = 0, p - do k = 0, n - do q = 0, m - flux_face1 = flux_gsrc_n(2)%vf(j)%sf(q, k - 1, l) - flux_face2 = flux_gsrc_n(2)%vf(j)%sf(q, k, l) - rhs_vf(j)%sf(q, k, l) = rhs_vf(j)%sf(q, k, l) - & - 5.e-1_wp/y_cc(k)*(flux_face1 + flux_face2) + do j = 1, sys_size + do l = 0, p + do k = 0, n + do q = 0, m + flux_face1 = flux_gsrc_n(2)%vf(j)%sf(q, k - 1, l) + flux_face2 = flux_gsrc_n(2)%vf(j)%sf(q, k, l) + rhs_vf(j)%sf(q, k, l) = rhs_vf(j)%sf(q, k, l) - & + 5.e-1_wp/y_cc(k)*(flux_face1 + flux_face2) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1170,70 +1170,70 @@ contains if (grid_geometry == 3) then ! Cylindrical Coordinates #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,velocity_val,flux_face1,flux_face2]') - do j = 1, sys_size - do k = 0, p - do q = 0, n - do l = 0, m - inv_ds = 1._wp/(dz(k)*y_cc(q)) - velocity_val = q_prim_vf%vf(contxe + idir)%sf(l, q, k) - flux_face1 = flux_n(3)%vf(j)%sf(l, q, k - 1) - flux_face2 = flux_n(3)%vf(j)%sf(l, q, k) - rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) + & - inv_ds*velocity_val*(flux_face1 - flux_face2) + do j = 1, sys_size + do k = 0, p + do q = 0, n + do l = 0, m + inv_ds = 1._wp/(dz(k)*y_cc(q)) + velocity_val = q_prim_vf%vf(contxe + idir)%sf(l, q, k) + flux_face1 = flux_n(3)%vf(j)%sf(l, q, k - 1) + flux_face2 = flux_n(3)%vf(j)%sf(l, q, k) + rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) + & + inv_ds*velocity_val*(flux_face1 - flux_face2) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]') - do j = 1, sys_size - do k = 0, p - do q = 0, n - do l = 0, m - flux_face1 = flux_gsrc_n(3)%vf(j)%sf(l, q, k - 1) - flux_face2 = flux_gsrc_n(3)%vf(j)%sf(l, q, k) - rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) - & - 5.e-1_wp/y_cc(q)*(flux_face1 + flux_face2) + do j = 1, sys_size + do k = 0, p + do q = 0, n + do l = 0, m + flux_face1 = flux_gsrc_n(3)%vf(j)%sf(l, q, k - 1) + flux_face2 = flux_gsrc_n(3)%vf(j)%sf(l, q, k) + rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) - & + 5.e-1_wp/y_cc(q)*(flux_face1 + flux_face2) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else ! Cartesian Coordinates #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') - do j = 1, sys_size - do k = 0, p - do q = 0, n - do l = 0, m - inv_ds = 1._wp/dz(k) - flux_face1 = flux_n(3)%vf(j)%sf(l, q, k - 1) - flux_face2 = flux_n(3)%vf(j)%sf(l, q, k) - rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) + inv_ds*(flux_face1 - flux_face2) + do j = 1, sys_size + do k = 0, p + do q = 0, n + do l = 0, m + inv_ds = 1._wp/dz(k) + flux_face1 = flux_n(3)%vf(j)%sf(l, q, k - 1) + flux_face2 = flux_n(3)%vf(j)%sf(l, q, k) + rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) + inv_ds*(flux_face1 - flux_face2) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (model_eqns == 3) then #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') - do k = 0, p - do q = 0, n - do l = 0, m - do i_fluid_loop = 1, num_fluids - inv_ds = 1._wp/dz(k) - advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(l, q, k) - pressure_val = q_prim_vf%vf(E_idx)%sf(l, q, k) - flux_face1 = flux_src_n_vf%vf(advxb)%sf(l, q, k) - flux_face2 = flux_src_n_vf%vf(advxb)%sf(l, q, k - 1) - rhs_vf(i_fluid_loop + intxb - 1)%sf(l, q, k) = & - rhs_vf(i_fluid_loop + intxb - 1)%sf(l, q, k) - & - inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) + do k = 0, p + do q = 0, n + do l = 0, m + do i_fluid_loop = 1, num_fluids + inv_ds = 1._wp/dz(k) + advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(l, q, k) + pressure_val = q_prim_vf%vf(E_idx)%sf(l, q, k) + flux_face1 = flux_src_n_vf%vf(advxb)%sf(l, q, k) + flux_face2 = flux_src_n_vf%vf(advxb)%sf(l, q, k - 1) + rhs_vf(i_fluid_loop + intxb - 1)%sf(l, q, k) = & + rhs_vf(i_fluid_loop + intxb - 1)%sf(l, q, k) - & + inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1263,210 +1263,210 @@ contains use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do q_idx = 0, p ! z_extent - do l_idx = 0, n ! y_extent - do k_idx = 0, m ! x_extent - local_inv_ds = 1._wp/dx(k_idx) - local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(k_idx, l_idx, q_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx - 1, l_idx, q_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) - rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else ! Other Riemann solvers - if (alt_soundspeed) then - if (bubbles_euler .neqv. .true.) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m - local_inv_ds = 1._wp/dx(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) - local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe due to outer alt_soundspeed check - local_term_coeff = local_q_cons_val - local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx - 1, l_idx, q_idx) - rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') - do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m - local_inv_ds = 1._wp/dx(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) - local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe - local_term_coeff = local_q_cons_val + local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(k_idx - 1, l_idx, q_idx) - rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - end if - else ! NOT alt_soundspeed - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe - do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m + do q_idx = 0, p ! z_extent + do l_idx = 0, n ! y_extent + do k_idx = 0, m ! x_extent local_inv_ds = 1._wp/dx(k_idx) - local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx - 1, l_idx, q_idx) + local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(k_idx, l_idx, q_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx - 1, l_idx, q_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - end do - end if - end if - - case (2) ! y-direction: loops q_idx (x), k_idx (y), l_idx (z); sf(q_idx, k_idx, l_idx); dy(k_idx); Kterm(q_idx,k_idx,l_idx) - use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) - if (use_standard_riemann) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do l_idx = 0, p ! z_extent - do k_idx = 0, n ! y_extent - do q_idx = 0, m ! x_extent - local_inv_ds = 1._wp/dy(k_idx) - local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(q_idx, k_idx, l_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) - rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m - local_inv_ds = 1._wp/dy(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) - local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe - local_term_coeff = local_q_cons_val - local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx - 1, l_idx) - rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - if (cyl_coord) then - rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) - & - (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) - end if - end do; end do; end do + do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m + local_inv_ds = 1._wp/dx(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) + local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe due to outer alt_soundspeed check + local_term_coeff = local_q_cons_val - local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx - 1, l_idx, q_idx) + rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do #:endcall GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') - do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m - local_inv_ds = 1._wp/dy(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) - local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe - local_term_coeff = local_q_cons_val + local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx - 1, l_idx) - rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - if (cyl_coord) then - rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & - (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) - end if - end do; end do; end do + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') + do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m + local_inv_ds = 1._wp/dx(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) + local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe + local_term_coeff = local_q_cons_val + local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(k_idx - 1, l_idx, q_idx) + rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do #:endcall GPU_PARALLEL_LOOP end if else ! NOT alt_soundspeed #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m - local_inv_ds = 1._wp/dy(k_idx) - local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) - rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - end do - end if - end if - - case (3) ! z-direction: loops l_idx (x), q_idx (y), k_idx (z); sf(l_idx, q_idx, k_idx); dz(k_idx); Kterm(l_idx,q_idx,k_idx) - if (grid_geometry == 3) then - use_standard_riemann = (riemann_solver == 1) - else - use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) - end if - - if (use_standard_riemann) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do k_idx = 0, p ! z_extent - do q_idx = 0, n ! y_extent - do l_idx = 0, m ! x_extent - local_inv_ds = 1._wp/dz(k_idx) - local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(l_idx, q_idx, k_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) - rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else ! Other Riemann solvers - if (alt_soundspeed) then - if (bubbles_euler .neqv. .true.) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m - local_inv_ds = 1._wp/dz(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) - local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe - local_term_coeff = local_q_cons_val - local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx - 1) - rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do + do j_adv = advxb, advxe + do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m + local_inv_ds = 1._wp/dx(k_idx) + local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx - 1, l_idx, q_idx) + rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do #:endcall GPU_PARALLEL_LOOP + end do + end if + end if - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m - local_inv_ds = 1._wp/dz(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) - local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe - local_term_coeff = local_q_cons_val + local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx - 1) - rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do + case (2) ! y-direction: loops q_idx (x), k_idx (y), l_idx (z); sf(q_idx, k_idx, l_idx); dy(k_idx); Kterm(q_idx,k_idx,l_idx) + use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) + if (use_standard_riemann) then + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do l_idx = 0, p ! z_extent + do k_idx = 0, n ! y_extent + do q_idx = 0, m ! x_extent + local_inv_ds = 1._wp/dy(k_idx) + local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(q_idx, k_idx, l_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) + rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do + end do + end do + end do #:endcall GPU_PARALLEL_LOOP - end if - else ! NOT alt_soundspeed - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m - local_inv_ds = 1._wp/dz(k_idx) - local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) - rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - end do - end if - end if - end select - end subroutine s_add_directional_advection_source_terms + else ! Other Riemann solvers + if (alt_soundspeed) then + if (bubbles_euler .neqv. .true.) then + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m + local_inv_ds = 1._wp/dy(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) + local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe + local_term_coeff = local_q_cons_val - local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx - 1, l_idx) + rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + if (cyl_coord) then + rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) - & + (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) + end if + end do; end do; end do + #:endcall GPU_PARALLEL_LOOP + + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') + do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m + local_inv_ds = 1._wp/dy(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) + local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe + local_term_coeff = local_q_cons_val + local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx - 1, l_idx) + rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + if (cyl_coord) then + rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & + (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) + end if + end do; end do; end do + #:endcall GPU_PARALLEL_LOOP + end if + else ! NOT alt_soundspeed + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m + local_inv_ds = 1._wp/dy(k_idx) + local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) + rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + #:endcall GPU_PARALLEL_LOOP + end do + end if + end if - end subroutine s_compute_advection_source_term + case (3) ! z-direction: loops l_idx (x), q_idx (y), k_idx (z); sf(l_idx, q_idx, k_idx); dz(k_idx); Kterm(l_idx,q_idx,k_idx) + if (grid_geometry == 3) then + use_standard_riemann = (riemann_solver == 1) + else + use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) + end if + + if (use_standard_riemann) then + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do k_idx = 0, p ! z_extent + do q_idx = 0, n ! y_extent + do l_idx = 0, m ! x_extent + local_inv_ds = 1._wp/dz(k_idx) + local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(l_idx, q_idx, k_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) + rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + else ! Other Riemann solvers + if (alt_soundspeed) then + if (bubbles_euler .neqv. .true.) then + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m + local_inv_ds = 1._wp/dz(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) + local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe + local_term_coeff = local_q_cons_val - local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx - 1) + rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + #:endcall GPU_PARALLEL_LOOP + + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m + local_inv_ds = 1._wp/dz(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) + local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe + local_term_coeff = local_q_cons_val + local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx - 1) + rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + #:endcall GPU_PARALLEL_LOOP + end if + else ! NOT alt_soundspeed + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m + local_inv_ds = 1._wp/dz(k_idx) + local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) + rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + #:endcall GPU_PARALLEL_LOOP + end do + end if + end if + end select + end subroutine s_add_directional_advection_source_terms + + end subroutine s_compute_advection_source_term subroutine s_compute_additional_physics_rhs(idir, q_prim_vf, rhs_vf, flux_src_n_in, & dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf) @@ -1477,9 +1477,9 @@ contains type(scalar_field), dimension(sys_size), intent(in) :: flux_src_n_in type(scalar_field), dimension(sys_size), intent(in) :: dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf - integer :: i, j, k, l + integer :: i, j, k, l - if (idir == 1) then ! x-direction + if (idir == 1) then ! x-direction if (surface_tension) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -1513,7 +1513,7 @@ contains end do #:endcall GPU_PARALLEL_LOOP - elseif (idir == 2) then ! y-direction + elseif (idir == 2) then ! y-direction if (surface_tension) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -1549,21 +1549,21 @@ contains idwbuff(1), idwbuff(2), idwbuff(3)) end if - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, 0, l) = & - rhs_vf(i)%sf(j, 0, l) + 1._wp/(y_cc(1) - y_cc(-1))* & - (tau_Re_vf(i)%sf(j, -1, l) & - - tau_Re_vf(i)%sf(j, 1, l)) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + #:call GPU_PARALLEL_LOOP(collapse=2) + do l = 0, p + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, 0, l) = & + rhs_vf(i)%sf(j, 0, l) + 1._wp/(y_cc(1) - y_cc(-1))* & + (tau_Re_vf(i)%sf(j, -1, l) & + - tau_Re_vf(i)%sf(j, 1, l)) + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP - end if + end if #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p @@ -1620,21 +1620,21 @@ contains end do #:endcall GPU_PARALLEL_LOOP - if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, 0, l) = & - rhs_vf(i)%sf(j, 0, l) - 1._wp/y_cc(0)* & - tau_Re_vf(i)%sf(j, 0, l) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if - else + if (viscous) then + #:call GPU_PARALLEL_LOOP(collapse=2) + do l = 0, p + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, 0, l) = & + rhs_vf(i)%sf(j, 0, l) - 1._wp/y_cc(0)* & + tau_Re_vf(i)%sf(j, 0, l) + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if + else #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p @@ -1654,7 +1654,7 @@ contains end if end if - elseif (idir == 3) then ! z-direction + elseif (idir == 3) then ! z-direction if (surface_tension) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -1709,16 +1709,16 @@ contains end if end if - end subroutine s_compute_additional_physics_rhs + end subroutine s_compute_additional_physics_rhs - !> The purpose of this procedure is to infinitely relax + !> The purpose of this procedure is to infinitely relax !! the pressures from the internal-energy equations to a !! unique pressure, from which the corresponding volume !! fraction of each phase are recomputed. For conservation !! purpose, this pressure is finally corrected using the !! mixture-total-energy equation. - !> The purpose of this subroutine is to WENO-reconstruct the + !> The purpose of this subroutine is to WENO-reconstruct the !! left and the right cell-boundary values, including values !! at the Gaussian quadrature points, from the cell-averaged !! variables. @@ -1728,34 +1728,34 @@ contains !! @param vR_qp Right WENO-reconstructed, cell-boundary values including !! the values at the quadrature points, of the cell-average variables !! @param norm_dir Splitting coordinate direction - subroutine s_reconstruct_cell_boundary_values(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, & - norm_dir) + subroutine s_reconstruct_cell_boundary_values(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, & + norm_dir) - type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vL_x, vL_y, vL_z - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vR_x, vR_y, vR_z - integer, intent(in) :: norm_dir + type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf + real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vL_x, vL_y, vL_z + real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vR_x, vR_y, vR_z + integer, intent(in) :: norm_dir - integer :: weno_dir !< Coordinate direction of the WENO reconstruction + integer :: weno_dir !< Coordinate direction of the WENO reconstruction - ! Reconstruction in s1-direction + ! Reconstruction in s1-direction - if (norm_dir == 1) then - is1 = idwbuff(1); is2 = idwbuff(2); is3 = idwbuff(3) - weno_dir = 1; is1%beg = is1%beg + weno_polyn - is1%end = is1%end - weno_polyn + if (norm_dir == 1) then + is1 = idwbuff(1); is2 = idwbuff(2); is3 = idwbuff(3) + weno_dir = 1; is1%beg = is1%beg + weno_polyn + is1%end = is1%end - weno_polyn - elseif (norm_dir == 2) then - is1 = idwbuff(2); is2 = idwbuff(1); is3 = idwbuff(3) - weno_dir = 2; is1%beg = is1%beg + weno_polyn - is1%end = is1%end - weno_polyn + elseif (norm_dir == 2) then + is1 = idwbuff(2); is2 = idwbuff(1); is3 = idwbuff(3) + weno_dir = 2; is1%beg = is1%beg + weno_polyn + is1%end = is1%end - weno_polyn - else - is1 = idwbuff(3); is2 = idwbuff(2); is3 = idwbuff(1) - weno_dir = 3; is1%beg = is1%beg + weno_polyn - is1%end = is1%end - weno_polyn + else + is1 = idwbuff(3); is2 = idwbuff(2); is3 = idwbuff(1) + weno_dir = 3; is1%beg = is1%beg + weno_polyn + is1%end = is1%end - weno_polyn - end if + end if if (n > 0) then if (p > 0) then @@ -1776,89 +1776,89 @@ contains is1, is2, is3) end if - end subroutine s_reconstruct_cell_boundary_values - - subroutine s_reconstruct_cell_boundary_values_first_order(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, & - norm_dir) + end subroutine s_reconstruct_cell_boundary_values - type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vL_x, vL_y, vL_z - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vR_x, vR_y, vR_z - integer, intent(in) :: norm_dir + subroutine s_reconstruct_cell_boundary_values_first_order(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, & + norm_dir) - integer :: recon_dir !< Coordinate direction of the WENO reconstruction + type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf + real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vL_x, vL_y, vL_z + real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vR_x, vR_y, vR_z + integer, intent(in) :: norm_dir - integer :: i, j, k, l - ! Reconstruction in s1-direction + integer :: recon_dir !< Coordinate direction of the WENO reconstruction - if (norm_dir == 1) then - is1 = idwbuff(1); is2 = idwbuff(2); is3 = idwbuff(3) - recon_dir = 1; is1%beg = is1%beg + weno_polyn - is1%end = is1%end - weno_polyn + integer :: i, j, k, l + ! Reconstruction in s1-direction - elseif (norm_dir == 2) then - is1 = idwbuff(2); is2 = idwbuff(1); is3 = idwbuff(3) - recon_dir = 2; is1%beg = is1%beg + weno_polyn - is1%end = is1%end - weno_polyn + if (norm_dir == 1) then + is1 = idwbuff(1); is2 = idwbuff(2); is3 = idwbuff(3) + recon_dir = 1; is1%beg = is1%beg + weno_polyn + is1%end = is1%end - weno_polyn - else - is1 = idwbuff(3); is2 = idwbuff(2); is3 = idwbuff(1) - recon_dir = 3; is1%beg = is1%beg + weno_polyn - is1%end = is1%end - weno_polyn + elseif (norm_dir == 2) then + is1 = idwbuff(2); is2 = idwbuff(1); is3 = idwbuff(3) + recon_dir = 2; is1%beg = is1%beg + weno_polyn + is1%end = is1%end - weno_polyn - end if + else + is1 = idwbuff(3); is2 = idwbuff(2); is3 = idwbuff(1) + recon_dir = 3; is1%beg = is1%beg + weno_polyn + is1%end = is1%end - weno_polyn - $:GPU_UPDATE(device='[is1,is2,is3,iv]') + end if - if (recon_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else if (recon_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else if (recon_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if + $:GPU_UPDATE(device='[is1,is2,is3,iv]') + + if (recon_dir == 1) then + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + else if (recon_dir == 2) then + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + else if (recon_dir == 3) then + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - end subroutine s_reconstruct_cell_boundary_values_first_order + end subroutine s_reconstruct_cell_boundary_values_first_order - !> Module deallocation and/or disassociation procedures - impure subroutine s_finalize_rhs_module + !> Module deallocation and/or disassociation procedures + impure subroutine s_finalize_rhs_module - integer :: i, j, l + integer :: i, j, l - call s_finalize_pressure_relaxation_module + call s_finalize_pressure_relaxation_module if (.not. igr) then do j = cont_idx%beg, cont_idx%end @@ -1975,8 +1975,8 @@ contains deallocate (alf_sum%sf) end if - @:DEALLOCATE(dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n) - @:DEALLOCATE(dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n) + @:DEALLOCATE(dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n) + @:DEALLOCATE(dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n) if (.not. igr) then do i = num_dims, 1, -1 @@ -2017,7 +2017,7 @@ contains @:DEALLOCATE(flux_n, flux_src_n, flux_gsrc_n) end if - end subroutine s_finalize_rhs_module + end subroutine s_finalize_rhs_module -end module m_rhs + end module m_rhs diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 9214cc4893..4a82254c3d 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -204,16 +204,16 @@ contains !! 1) s_compute_cartesian_viscous_source_flux !! 2) s_compute_cylindrical_viscous_source_flux subroutine s_compute_viscous_source_flux(velL_vf, & - dvelL_dx_vf, & - dvelL_dy_vf, & - dvelL_dz_vf, & - velR_vf, & - dvelR_dx_vf, & - dvelR_dy_vf, & - dvelR_dz_vf, & - flux_src_vf, & - norm_dir, & - ix, iy, iz) + dvelL_dx_vf, & + dvelL_dy_vf, & + dvelL_dz_vf, & + velR_vf, & + dvelR_dx_vf, & + dvelR_dy_vf, & + dvelR_dz_vf, & + flux_src_vf, & + norm_dir, & + ix, iy, iz) type(scalar_field), & dimension(num_vels), & @@ -358,643 +358,643 @@ contains if (norm_dir == ${NORM_DIR}$) then #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R,G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - vel_L_rms = 0._wp; vel_R_rms = 0._wp + vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do + + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - if (mhd) then - if (n == 0) then ! 1D: constant Bx; By, Bz as variables - B%L(1) = Bx0 - B%R(1) = Bx0 - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - else ! 2D/3D: Bx, By, Bz as variables - B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) + if (mhd) then + if (n == 0) then ! 1D: constant Bx; By, Bz as variables + B%L(1) = Bx0 + B%R(1) = Bx0 + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + else ! 2D/3D: Bx, By, Bz as variables + B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) + end if end if - end if - rho_L = 0._wp - gamma_L = 0._wp - pi_inf_L = 0._wp - qv_L = 0._wp + rho_L = 0._wp + gamma_L = 0._wp + pi_inf_L = 0._wp + qv_L = 0._wp + + rho_R = 0._wp + gamma_R = 0._wp + pi_inf_R = 0._wp + qv_R = 0._wp + + alpha_L_sum = 0._wp + alpha_R_sum = 0._wp - rho_R = 0._wp - gamma_R = 0._wp - pi_inf_R = 0._wp - qv_R = 0._wp + pres_mag%L = 0._wp + pres_mag%R = 0._wp - alpha_L_sum = 0._wp - alpha_R_sum = 0._wp + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) + alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) + alpha_L_sum = alpha_L_sum + alpha_L(i) + alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) + alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) + alpha_R_sum = alpha_R_sum + alpha_R(i) + end do - pres_mag%L = 0._wp - pres_mag%R = 0._wp + alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) + alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) + end if - if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) - alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) - alpha_L_sum = alpha_L_sum + alpha_L(i) - alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) - alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) - alpha_R_sum = alpha_R_sum + alpha_R(i) + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + qv_L = qv_L + alpha_rho_L(i)*qvs(i) + + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + qv_R = qv_R + alpha_rho_R(i)*qvs(i) end do - alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) - alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) - end if + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) - end do + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_L(i) = alpha_L(Re_idx(i, q))/Res(i, q) & + + Re_L(i) + Re_R(i) = alpha_R(Re_idx(i, q))/Res(i, q) & + + Re_R(i) + end do - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res(i, q) & - + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res(i, q) & - + Re_R(i) + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) end do - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do - end if - - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) - - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) - - R_gas_L = gas_constant/MW_L - R_gas_R = gas_constant/MW_R - T_L = pres_L/rho_L/R_gas_L - T_R = pres_R/rho_R/R_gas_R - - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) - - if (chem_params%gamma_method == 1) then - ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) - Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) - - gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) - gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) - - Gamm_L = Cp_L/Cv_L - gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) - Gamm_R = Cp_R/Cv_R - gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) - end if + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) - call get_mixture_energy_mass(T_L, Ys_L, E_L) - call get_mixture_energy_mass(T_R, Ys_R, E_R) - - E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms - E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - elseif (mhd .and. relativity) then - Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) - Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) - vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) - vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) - - b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L - b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R - B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp - B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp - - pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) - pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) - - ! Hard-coded EOS - H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L - H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R - - cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) - cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) - - E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L - E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R - elseif (mhd .and. .not. relativity) then - pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) - pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) - E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L - E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy - H_L = (E_L + pres_L - pres_mag%L)/rho_L - H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) - else - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - end if + R_gas_L = gas_constant/MW_L + R_gas_R = gas_constant/MW_R + T_L = pres_L/rho_L/R_gas_L + T_R = pres_R/rho_R/R_gas_R - ! elastic energy update - if (hypoelasticity) then - G_L = 0._wp; G_R = 0._wp + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs(i) - G_R = G_R + alpha_R(i)*Gs(i) - end do + if (chem_params%gamma_method == 1) then + ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) + Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) - if (cont_damage) then - G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - end if + gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) + gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - ! Elastic contribution to energy if G large enough - !TODO take out if statement if stable without - if ((G_L > 1000) .and. (G_R > 1000)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Double for shear stresses - if (any(strxb - 1 + i == shear_indices)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if + Gamm_L = Cp_L/Cv_L + gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) + Gamm_R = Cp_R/Cv_R + gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) end if - end do - end if - - ! elastic energy update - !if ( hyperelasticity ) then - ! G_L = 0._wp - ! G_R = 0._wp - ! - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, num_fluids - ! G_L = G_L + alpha_L(i)*Gs(i) - ! G_R = G_R + alpha_R(i)*Gs(i) - ! end do - ! ! Elastic contribution to energy if G large enough - ! if ((G_L > 1.e-3_wp) .and. (G_R > 1.e-3_wp)) then - ! E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) - ! E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, b_size-1 - ! tau_e_L(i) = G_L*qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - ! tau_e_R(i) = G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - ! end do - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, b_size-1 - ! tau_e_L(i) = 0._wp - ! tau_e_R(i) = 0._wp - ! end do - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, num_dims - ! xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) - ! xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) - ! end do - ! end if - !end if - - @:compute_average_state() - - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) - - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) - - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. - - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, c_sum_Yi_Phi, c_avg) - - if (mhd) then - call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) - call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) - end if - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do - end if + call get_mixture_energy_mass(T_L, Ys_L, E_L) + call get_mixture_energy_mass(T_R, Ys_R, E_R) - if (wave_speeds == 1) then - if (mhd) then - s_L = min(vel_L(dir_idx(1)) - c_fast%L, vel_R(dir_idx(1)) - c_fast%R) - s_R = max(vel_R(dir_idx(1)) + c_fast%R, vel_L(dir_idx(1)) + c_fast%L) - elseif (hypoelasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + & - tau_e_L(dir_idx_tau(1)))/rho_L) & - , vel_R(dir_idx(1)) - sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + & - tau_e_R(dir_idx_tau(1)))/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + & - tau_e_R(dir_idx_tau(1)))/rho_R) & - , vel_L(dir_idx(1)) + sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + & - tau_e_L(dir_idx_tau(1)))/rho_L)) - else if (hyperelasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L) & - , vel_R(dir_idx(1)) - sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R) & - , vel_L(dir_idx(1)) + sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L)) + E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms + E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + elseif (mhd .and. relativity) then + Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) + Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) + vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) + vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) + + b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L + b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R + B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp + B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp + + pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) + pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) + + ! Hard-coded EOS + H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L + H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R + + cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) + cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) + + E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L + E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R + elseif (mhd .and. .not. relativity) then + pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) + pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) + E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L + E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy + H_L = (E_L + pres_L - pres_mag%L)/rho_L + H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) else - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R end if - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - & - rho_R*vel_R(dir_idx(1))* & - (s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - & - rho_R*(s_R - vel_R(dir_idx(1)))) - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + ! elastic energy update + if (hypoelasticity) then + G_L = 0._wp; G_R = 0._wp - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs(i) + G_R = G_R + alpha_R(i)*Gs(i) + end do - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_L)) & - + (5.e-1_wp - sign(5.e-1_wp, s_L)) & - *(5.e-1_wp + sign(5.e-1_wp, s_R)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_R)) & - + (5.e-1_wp - sign(5.e-1_wp, s_L)) & - *(5.e-1_wp + sign(5.e-1_wp, s_R)) + if (cont_damage) then + G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) + G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) + end if - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + ! Elastic contribution to energy if G large enough + !TODO take out if statement if stable without + if ((G_L > 1000) .and. (G_R > 1000)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Double for shear stresses + if (any(strxb - 1 + i == shear_indices)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + end if + end if + end do + end if - ! Mass - if (.not. relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(alpha_rho_L(i) & - - alpha_rho_R(i))) & - /(s_M - s_P) - end do - elseif (relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(Ga%L*alpha_rho_L(i) & - - Ga%R*alpha_rho_R(i))) & - /(s_M - s_P) - end do - end if + ! elastic energy update + !if ( hyperelasticity ) then + ! G_L = 0._wp + ! G_R = 0._wp + ! + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, num_fluids + ! G_L = G_L + alpha_L(i)*Gs(i) + ! G_R = G_R + alpha_R(i)*Gs(i) + ! end do + ! ! Elastic contribution to energy if G large enough + ! if ((G_L > 1.e-3_wp) .and. (G_R > 1.e-3_wp)) then + ! E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) + ! E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, b_size-1 + ! tau_e_L(i) = G_L*qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + ! tau_e_R(i) = G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + ! end do + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, b_size-1 + ! tau_e_L(i) = 0._wp + ! tau_e_R(i) = 0._wp + ! end do + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, num_dims + ! xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) + ! xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) + ! end do + ! end if + !end if - ! Momentum - if (mhd .and. (.not. relativity)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of rho*v_i in the ${XYZ}$ direction - ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & - - B%R(i)*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & - - B%L(i)*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & - /(s_M - s_P) - end do - elseif (mhd .and. relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of m_i in the ${XYZ}$ direction - ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(cm%R(i)*vel_R(norm_dir) & - - b4%R(i)/Ga%R*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(cm%L(i)*vel_L(norm_dir) & - - b4%L(i)/Ga%L*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(cm%L(i) - cm%R(i))) & - /(s_M - s_P) - end do - elseif (bubbles_euler) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) - end do - else if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R & - - tau_e_R(dir_idx_tau(i))) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L & - - tau_e_L(dir_idx_tau(i))) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) - end do - end if + @:compute_average_state() - ! Energy - if (mhd .and. (.not. relativity)) then - ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & - - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) - elseif (mhd .and. relativity) then - ! energy flux = m_${XYZ}$ - mass flux - ! Hard-coded for single-component for now - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & - - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) - else if (bubbles_euler) then - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - else if (hypoelasticity) then - flux_tau_L = 0._wp; flux_tau_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) - flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) - end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & - - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & - + s_M*s_P*(E_L - E_R))/(s_M - s_P) - else - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - end if + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - ! Elastic Stresses - if (hypoelasticity) then - do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *tau_e_R(i)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *tau_e_L(i)) & - + s_M*s_P*(rho_L*tau_e_L(i) & - - rho_R*tau_e_R(i))) & - /(s_M - s_P) - end do - end if + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - ! Advection - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (qL_prim_rs${XYZ}$_vf(j, k, l, i) & - - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & - *s_M*s_P/(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & - /(s_M - s_P) - end do + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. - ! Xi field - !if ( hyperelasticity ) then - ! do i = 1, num_dims - ! flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & - ! (s_M*rho_R*vel_R(dir_idx(1))*xi_field_R(i) & - ! - s_P*rho_L*vel_L(dir_idx(1))*xi_field_L(i) & - ! + s_M*s_P*(rho_L*xi_field_L(i) & - ! - rho_R*xi_field_R(i))) & - ! /(s_M - s_P) - ! end do - !end if - - ! Div(U)? - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & - (xi_M*(rho_L*vel_L(dir_idx(i))* & - (s_L - vel_L(dir_idx(1))) - & - pres_L*dir_flg(dir_idx(i))) - & - xi_P*(rho_R*vel_R(dir_idx(i))* & - (s_R - vel_R(dir_idx(1))) - & - pres_R*dir_flg(dir_idx(i)))) & - /(xi_M*rho_L*(s_L - vel_L(dir_idx(1))) - & - xi_P*rho_R*(s_R - vel_R(dir_idx(1)))) - end do + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, c_sum_Yi_Phi, c_avg) - if (bubbles_euler) then - ! From HLLC: Kills mass transport @ bubble gas density - if (num_fluids > 1) then - flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp + if (mhd) then + call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) + call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) end if - end if - - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - - flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & - - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & - + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & - /(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if - if (mhd) then - if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. - ! B_y flux = v_x * B_y - v_y * Bx0 - ! B_z flux = v_x * B_z - v_z * Bx0 - $:GPU_LOOP(parallelism='[seq]') - do i = 0, 1 - flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & - - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & - + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) - end do - else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction - ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) - ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) - ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) + if (viscous) then $:GPU_LOOP(parallelism='[seq]') - do i = 0, 2 - flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & - s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & - s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & - s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp - end if - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & - - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - end if + if (wave_speeds == 1) then + if (mhd) then + s_L = min(vel_L(dir_idx(1)) - c_fast%L, vel_R(dir_idx(1)) - c_fast%R) + s_R = max(vel_R(dir_idx(1)) + c_fast%R, vel_L(dir_idx(1)) + c_fast%L) + elseif (hypoelasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + & + tau_e_L(dir_idx_tau(1)))/rho_L) & + , vel_R(dir_idx(1)) - sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + & + tau_e_R(dir_idx_tau(1)))/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + & + tau_e_R(dir_idx_tau(1)))/rho_R) & + , vel_L(dir_idx(1)) + sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + & + tau_e_L(dir_idx_tau(1)))/rho_L)) + else if (hyperelasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L) & + , vel_R(dir_idx(1)) - sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R) & + , vel_L(dir_idx(1)) + sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L)) + else + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + end if - if (cyl_coord .and. hypoelasticity) then - ! += tau_sigmasigma using HLL - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & - (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & - /(s_M - s_P) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - & + rho_R*vel_R(dir_idx(1))* & + (s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - & + rho_R*(s_R - vel_R(dir_idx(1)))) + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + pres_SR = pres_SL + + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R + + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if + + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_L)) & + + (5.e-1_wp - sign(5.e-1_wp, s_L)) & + *(5.e-1_wp + sign(5.e-1_wp, s_R)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_R)) & + + (5.e-1_wp - sign(5.e-1_wp, s_L)) & + *(5.e-1_wp + sign(5.e-1_wp, s_R)) + + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if + + ! Mass + if (.not. relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(alpha_rho_L(i) & + - alpha_rho_R(i))) & + /(s_M - s_P) + end do + elseif (relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(Ga%L*alpha_rho_L(i) & + - Ga%R*alpha_rho_R(i))) & + /(s_M - s_P) + end do + end if + + ! Momentum + if (mhd .and. (.not. relativity)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + ! Flux of rho*v_i in the ${XYZ}$ direction + ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & + - B%R(i)*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & + - B%L(i)*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & + /(s_M - s_P) + end do + elseif (mhd .and. relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + ! Flux of m_i in the ${XYZ}$ direction + ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(cm%R(i)*vel_R(norm_dir) & + - b4%R(i)/Ga%R*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(cm%L(i)*vel_L(norm_dir) & + - b4%L(i)/Ga%L*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(cm%L(i) - cm%R(i))) & + /(s_M - s_P) + end do + elseif (bubbles_euler) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + end do + else if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R & + - tau_e_R(dir_idx_tau(i))) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L & + - tau_e_L(dir_idx_tau(i))) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) end do end if - #:endif + ! Energy + if (mhd .and. (.not. relativity)) then + ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & + - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + elseif (mhd .and. relativity) then + ! energy flux = m_${XYZ}$ - mass flux + ! Hard-coded for single-component for now + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & + - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + else if (bubbles_euler) then + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + else if (hypoelasticity) then + flux_tau_L = 0._wp; flux_tau_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) + flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) + end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & + - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & + + s_M*s_P*(E_L - E_R))/(s_M - s_P) + else + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + end if + + ! Elastic Stresses + if (hypoelasticity) then + do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *tau_e_R(i)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *tau_e_L(i)) & + + s_M*s_P*(rho_L*tau_e_L(i) & + - rho_R*tau_e_R(i))) & + /(s_M - s_P) + end do + end if + + ! Advection + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (qL_prim_rs${XYZ}$_vf(j, k, l, i) & + - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & + *s_M*s_P/(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & + /(s_M - s_P) + end do + + ! Xi field + !if ( hyperelasticity ) then + ! do i = 1, num_dims + ! flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & + ! (s_M*rho_R*vel_R(dir_idx(1))*xi_field_R(i) & + ! - s_P*rho_L*vel_L(dir_idx(1))*xi_field_L(i) & + ! + s_M*s_P*(rho_L*xi_field_L(i) & + ! - rho_R*xi_field_R(i))) & + ! /(s_M - s_P) + ! end do + !end if + + ! Div(U)? + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & + (xi_M*(rho_L*vel_L(dir_idx(i))* & + (s_L - vel_L(dir_idx(1))) - & + pres_L*dir_flg(dir_idx(i))) - & + xi_P*(rho_R*vel_R(dir_idx(i))* & + (s_R - vel_R(dir_idx(1))) - & + pres_R*dir_flg(dir_idx(i)))) & + /(xi_M*rho_L*(s_L - vel_L(dir_idx(1))) - & + xi_P*rho_R*(s_R - vel_R(dir_idx(1)))) + end do + + if (bubbles_euler) then + ! From HLLC: Kills mass transport @ bubble gas density + if (num_fluids > 1) then + flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp + end if + end if + + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + + flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & + - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & + + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & + /(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + end if + + if (mhd) then + if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. + ! B_y flux = v_x * B_y - v_y * Bx0 + ! B_z flux = v_x * B_z - v_z * Bx0 + $:GPU_LOOP(parallelism='[seq]') + do i = 0, 1 + flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & + - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & + + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) + end do + else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction + ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) + ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) + ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) + $:GPU_LOOP(parallelism='[seq]') + do i = 0, 2 + flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & + s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & + s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & + s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) + end do + end if + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + end if + + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & + - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + end if + + if (cyl_coord .and. hypoelasticity) then + ! += tau_sigmasigma using HLL + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & + (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & + /(s_M - s_P) + + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + end if + #:endif + + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1183,1652 +1183,1652 @@ contains if (model_eqns == 3) then !ME3 #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, vel_K_Star, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, alpha_L, alpha_R, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, tau_e_R, G_L, G_R, flux_ene_e, xi_field_L, xi_field_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - - idx1 = dir_idx(1) - - vel_L_rms = 0._wp; vel_R_rms = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do - - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - rho_L = 0._wp - gamma_L = 0._wp - pi_inf_L = 0._wp - qv_L = 0._wp - - rho_R = 0._wp - gamma_R = 0._wp - pi_inf_R = 0._wp - qv_R = 0._wp - - alpha_L_sum = 0._wp - alpha_R_sum = 0._wp + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) - alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - end do + idx1 = dir_idx(1) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) - end do + vel_L_rms = 0._wp; vel_R_rms = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) - alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) - end do - end if + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) - - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, advxb + i - 1) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, advxb + i - 1) - end do + rho_L = 0._wp + gamma_L = 0._wp + pi_inf_L = 0._wp + qv_L = 0._wp - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real + rho_R = 0._wp + gamma_R = 0._wp + pi_inf_R = 0._wp + qv_R = 0._wp - if (Re_size(i) > 0) Re_L(i) = 0._wp + alpha_L_sum = 0._wp + alpha_R_sum = 0._wp + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & - + Re_L(i) + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) + alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) end do - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_R(i) = dflt_real - - if (Re_size(i) > 0) Re_R(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & - + Re_R(i) + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) end do - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do - end if - - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L - - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R - - ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do - G_L = 0._wp; G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs(i) - G_R = G_R + alpha_R(i)*Gs(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - ! Elastic contribution to energy if G large enough - if ((G_L > verysmall) .and. (G_R > verysmall)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Additional terms in 2D and 3D - if ((i == 2) .or. (i == 4) .or. (i == 5)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if - end if - end do - end if - - ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) - xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) - end do - G_L = 0._wp; G_R = 0._wp; - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - ! Mixture left and right shear modulus - G_L = G_L + alpha_L(i)*Gs(i) - G_R = G_R + alpha_R(i)*Gs(i) - end do - ! Elastic contribution to energy if G large enough - if (G_L > verysmall .and. G_R > verysmall) then - E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) - E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, b_size - 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do - end if - - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - - @:compute_average_state() - - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) - - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) - - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, 0._wp, c_avg) - - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do - end if - - ! Low Mach correction - if (low_Mach == 2) then - @:compute_low_Mach_correction() - end if - - ! COMPUTING THE DIRECT WAVE SPEEDS - if (wave_speeds == 1) then - if (elasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) - s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & - tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & - rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & - rho_R*(s_R - vel_R(idx1))) - else - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) - - end if - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if - - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(idx1))/(s_L - s_S) - xi_R = (s_R - vel_R(idx1))/(s_R - s_S) - - ! goes with numerical star velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(0.5_wp, s_S)) - xi_P = (5.e-1_wp - sign(0.5_wp, s_S)) - - ! goes with the numerical velocity in x/y/z directions - ! xi_P/M (pressure) = min/max(0. sgn(1,sL/sR)) - xi_MP = -min(0._wp, sign(1._wp, s_L)) - xi_PP = max(0._wp, sign(1._wp, s_R)) - - E_star = xi_M*(E_L + xi_MP*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & - (rho_L*s_S + pres_L/(s_L - vel_L(dir_idx(1))))) - E_L)) + & - xi_P*(E_R + xi_PP*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & - (rho_R*s_S + pres_R/(s_R - vel_R(dir_idx(1))))) - E_R)) - p_Star = xi_M*(pres_L + xi_MP*(rho_L*(s_L - vel_L(dir_idx(1)))*(s_S - vel_L(dir_idx(1))))) + & - xi_P*(pres_R + xi_PP*(rho_R*(s_R - vel_R(dir_idx(1)))*(s_S - vel_R(dir_idx(1))))) - - rho_Star = xi_M*(rho_L*(xi_MP*xi_L + 1._wp - xi_MP)) + & - xi_P*(rho_R*(xi_PP*xi_R + 1._wp - xi_PP)) - - vel_K_Star = vel_L(idx1)*(1._wp - xi_MP) + xi_MP*vel_R(idx1) + & - xi_MP*xi_PP*(s_S - vel_R(idx1)) - - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if - - ! COMPUTING FLUXES - ! MASS FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*(vel_L(idx1) + s_M*(xi_L - 1._wp)) + & - xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end do - - ! MOMENTUM FLUX. - ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = rho_Star*vel_K_Star* & - (dir_flg(idxi)*vel_K_Star + (1._wp - dir_flg(idxi))*(xi_M*vel_L(idxi) + xi_P*vel_R(idxi))) + dir_flg(idxi)*p_Star & - + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr - end do - - ! ENERGY FLUX. - ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = (E_star + p_Star)*vel_K_Star & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S - - ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux - if (elasticity) then - flux_ene_e = 0._wp; - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - ! MOMENTUM ELASTIC FLUX. - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & - - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) - ! ENERGY ELASTIC FLUX. - flux_ene_e = flux_ene_e - & - xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & - s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & - xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & - s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) - end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e - end if - - ! VOLUME FRACTION FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*s_S + & - xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*s_S - end do - - ! SOURCE TERM FOR VOLUME FRACTION ADVECTION FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & - xi_M*(vel_L(idxi) + dir_flg(idxi)*(s_S*(xi_MP*(xi_L - 1) + 1) - vel_L(idxi))) + & - xi_P*(vel_R(idxi) + dir_flg(idxi)*(s_S*(xi_PP*(xi_R - 1) + 1) - vel_R(idxi))) - end do - - ! INTERNAL ENERGIES ADVECTION FLUX. - ! K-th pressure and velocity in preparation for the internal energy flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - p_K_Star = xi_M*(xi_MP*((pres_L + pi_infs(i)/(1._wp + gammas(i)))* & - xi_L**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_L) + pres_L) + & - xi_P*(xi_PP*((pres_R + pi_infs(i)/(1._wp + gammas(i)))* & - xi_R**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_R) + pres_R) - - flux_rs${XYZ}$_vf(j, k, l, i + intxb - 1) = & - ((xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1))* & - (gammas(i)*p_K_Star + pi_infs(i)) + & - (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + contxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + contxb - 1))* & - qvs(i))*vel_K_Star & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S*(xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1)) - end do - - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) - - ! HYPOELASTIC STRESS EVOLUTION FLUX. - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) - end do - end if - - ! REFERENCE MAP FLUX. - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & - - rho_L*vel_L(idx1)*xi_field_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & - - rho_R*vel_R(idx1)*xi_field_R(i)) - end do - end if - - ! COLOR FUNCTION FLUX - if (surface_tension) then - flux_rs${XYZ}$_vf(j, k, l, c_idx) = & - (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) + & - xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx))*s_S - end if - - ! Geometrical source flux for cylindrical coordinates - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = intxb, intxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star - ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + do i = 1, num_fluids + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) + alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do - end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + do i = 1, num_fluids + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star - - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) end if - #:endif - - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - elseif (model_eqns == 4) then - !ME4 - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, nbub_L, nbub_R, ptilde_L, ptilde_R]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - end do + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, advxb + i - 1) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, advxb + i - 1) + end do - vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + if (Re_size(i) > 0) Re_L(i) = 0._wp - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + + Re_L(i) + end do - rho_L = 0._wp - gamma_L = 0._wp - pi_inf_L = 0._wp - qv_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - end do + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - rho_R = 0._wp - gamma_R = 0._wp - pi_inf_R = 0._wp - qv_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) - end do + end do - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_R(i) = dflt_real - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R + if (Re_size(i) > 0) Re_R(i) = 0._wp - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + + Re_R(i) + end do - @:compute_average_state() + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. + ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + G_L = 0._wp; G_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs(i) + G_R = G_R + alpha_R(i)*Gs(i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + ! Elastic contribution to energy if G large enough + if ((G_L > verysmall) .and. (G_R > verysmall)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Additional terms in 2D and 3D + if ((i == 2) .or. (i == 4) .or. (i == 5)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + end if + end if + end do + end if - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, 0._wp, c_avg) + ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY + if (hyperelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) + xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) + end do + G_L = 0._wp; G_R = 0._wp; + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + ! Mixture left and right shear modulus + G_L = G_L + alpha_L(i)*Gs(i) + G_R = G_R + alpha_R(i)*Gs(i) + end do + ! Elastic contribution to energy if G large enough + if (G_L > verysmall .and. G_R > verysmall) then + E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) + E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) + end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, b_size - 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + end if - if (wave_speeds == 1) then - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - & - rho_R*vel_R(dir_idx(1))* & - (s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - & - rho_R*(s_R - vel_R(dir_idx(1)))) - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) + @:compute_average_state() - pres_SR = pres_SL + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, 0._wp, c_avg) - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do + end if - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + ! Low Mach correction + if (low_Mach == 2) then + @:compute_low_Mach_correction() + end if - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) - xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) + ! COMPUTING THE DIRECT WAVE SPEEDS + if (wave_speeds == 1) then + if (elasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) + s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & + tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & + rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & + rho_R*(s_R - vel_R(idx1))) + else + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) - ! goes with numerical velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + end if + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) + + pres_SR = pres_SL + + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R + + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*alpha_rho_L(i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*alpha_rho_R(i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(idx1))/(s_L - s_S) + xi_R = (s_R - vel_R(idx1))/(s_R - s_S) + + ! goes with numerical star velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(0.5_wp, s_S)) + xi_P = (5.e-1_wp - sign(0.5_wp, s_S)) + + ! goes with the numerical velocity in x/y/z directions + ! xi_P/M (pressure) = min/max(0. sgn(1,sL/sR)) + xi_MP = -min(0._wp, sign(1._wp, s_L)) + xi_PP = max(0._wp, sign(1._wp, s_R)) + + E_star = xi_M*(E_L + xi_MP*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & + (rho_L*s_S + pres_L/(s_L - vel_L(dir_idx(1))))) - E_L)) + & + xi_P*(E_R + xi_PP*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & + (rho_R*s_S + pres_R/(s_R - vel_R(dir_idx(1))))) - E_R)) + p_Star = xi_M*(pres_L + xi_MP*(rho_L*(s_L - vel_L(dir_idx(1)))*(s_S - vel_L(dir_idx(1))))) + & + xi_P*(pres_R + xi_PP*(rho_R*(s_R - vel_R(dir_idx(1)))*(s_S - vel_R(dir_idx(1))))) + + rho_Star = xi_M*(rho_L*(xi_MP*xi_L + 1._wp - xi_MP)) + & + xi_P*(rho_R*(xi_PP*xi_R + 1._wp - xi_PP)) + + vel_K_Star = vel_L(idx1)*(1._wp - xi_MP) + xi_MP*vel_R(idx1) + & + xi_MP*xi_PP*(s_S - vel_R(idx1)) + + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if - ! Momentum flux. - ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(i)) + & - s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & - dir_flg(dir_idx(i))*pres_L) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(i)) + & - s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & - dir_flg(dir_idx(i))*pres_R) - end do + ! COMPUTING FLUXES + ! MASS FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*(vel_L(idx1) + s_M*(xi_L - 1._wp)) + & + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end do - if (bubbles_euler) then - ! Put p_tilde in + ! MOMENTUM FLUX. + ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + & - xi_M*(dir_flg(dir_idx(i))*(-1._wp*ptilde_L)) & - + xi_P*(dir_flg(dir_idx(i))*(-1._wp*ptilde_R)) + idxi = dir_idx(i) + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = rho_Star*vel_K_Star* & + (dir_flg(idxi)*vel_K_Star + (1._wp - dir_flg(idxi))*(xi_M*vel_L(idxi) + xi_P*vel_R(idxi))) + dir_flg(idxi)*p_Star & + + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr end do - end if - - flux_rs${XYZ}$_vf(j, k, l, E_idx) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = alf_idx, alf_idx !only advect the void fraction - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + ! ENERGY FLUX. + ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = (E_star + p_Star)*vel_K_Star & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S - ! Source for volume fraction advection equation - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims + ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux + if (elasticity) then + flux_ene_e = 0._wp; + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + ! MOMENTUM ELASTIC FLUX. + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & + - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) + ! ENERGY ELASTIC FLUX. + flux_ene_e = flux_ene_e - & + xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & + s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & + xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & + s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) + end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e + end if - vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = 0._wp - !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(dir_idx(i))%sf(j,k,l) = 0._wp - end do + ! VOLUME FRACTION FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*s_S + & + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*s_S + end do - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + ! SOURCE TERM FOR VOLUME FRACTION ADVECTION FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & + xi_M*(vel_L(idxi) + dir_flg(idxi)*(s_S*(xi_MP*(xi_L - 1) + 1) - vel_L(idxi))) + & + xi_P*(vel_R(idxi) + dir_flg(idxi)*(s_S*(xi_PP*(xi_R - 1) + 1) - vel_R(idxi))) + end do - ! Add advection flux for bubble variables - if (bubbles_euler) then + ! INTERNAL ENERGIES ADVECTION FLUX. + ! K-th pressure and velocity in preparation for the internal energy flux $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + do i = 1, num_fluids + p_K_Star = xi_M*(xi_MP*((pres_L + pi_infs(i)/(1._wp + gammas(i)))* & + xi_L**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_L) + pres_L) + & + xi_P*(xi_PP*((pres_R + pi_infs(i)/(1._wp + gammas(i)))* & + xi_R**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_R) + pres_R) + + flux_rs${XYZ}$_vf(j, k, l, i + intxb - 1) = & + ((xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1))* & + (gammas(i)*p_K_Star + pi_infs(i)) + & + (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + contxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + contxb - 1))* & + qvs(i))*vel_K_Star & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S*(xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1)) end do - end if - ! Geometrical source flux for cylindrical coordinates + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) - #:if (NORM_DIR == 2) - if (cyl_coord) then - ! Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - ! Geometrical source of the void fraction(s) is zero + ! HYPOELASTIC STRESS EVOLUTION FLUX. + if (hypoelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + do i = 1, strxe - strxb + 1 + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) end do end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then + + ! REFERENCE MAP FLUX. + if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & + - rho_L*vel_L(idx1)*xi_field_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & + - rho_R*vel_R(idx1)*xi_field_R(i)) end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & - -xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - - xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) end if - #:endif - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - elseif (model_eqns == 2 .and. bubbles_euler) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + ! COLOR FUNCTION FLUX + if (surface_tension) then + flux_rs${XYZ}$_vf(j, k, l, c_idx) = & + (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) + & + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx))*s_S + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + ! Geometrical source flux for cylindrical coordinates + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = intxb, intxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star - vel_L_rms = 0._wp; vel_R_rms = 0._wp + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) + end if + #:endif - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do + end do + end do + #:endcall GPU_PARALLEL_LOOP - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + elseif (model_eqns == 4) then + !ME4 + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, nbub_L, nbub_R, ptilde_L, ptilde_R]') + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end - rho_L = 0._wp - gamma_L = 0._wp - pi_inf_L = 0._wp - qv_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - ! Retain this in the refactor - if (mpp_lim .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) end do - else if (num_fluids > 2) then + + vel_L_rms = 0._wp; vel_R_rms = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + do i = 1, num_dims + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - else - rho_L = qL_prim_rs${XYZ}$_vf(j, k, l, 1) - gamma_L = gammas(1) - pi_inf_L = pi_infs(1) - qv_L = qvs(1) - end if - rho_R = 0._wp - gamma_R = 0._wp - pi_inf_R = 0._wp - qv_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - if (mpp_lim .and. (num_fluids > 2)) then + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + + rho_L = 0._wp + gamma_L = 0._wp + pi_inf_L = 0._wp + qv_L = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + qv_L = qv_L + alpha_rho_L(i)*qvs(i) end do - else if (num_fluids > 2) then + + rho_R = 0._wp + gamma_R = 0._wp + pi_inf_R = 0._wp + qv_R = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + do i = 1, num_fluids + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + qv_R = qv_R + alpha_rho_R(i)*qvs(i) end do - else - rho_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, 1) - gamma_R = gammas(1) - pi_inf_R = pi_infs(1) - qv_R = qvs(1) - end if - if (viscous) then - if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L - if (Re_size(i) > 0) Re_L(i) = 0._wp + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & - + Re_L(i) - end do + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + @:compute_average_state() - end do + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_R(i) = dflt_real + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - if (Re_size(i) > 0) Re_R(i) = 0._wp + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & - + Re_R(i) - end do + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, 0._wp, c_avg) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do + if (wave_speeds == 1) then + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - & + rho_R*vel_R(dir_idx(1))* & + (s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - & + rho_R*(s_R - vel_R(dir_idx(1)))) + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) + + pres_SR = pres_SL + + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R + + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) end if - end if - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) + xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + ! goes with numerical velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) - if (avg_state == 2) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - R0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, rs(i)) - R0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, rs(i)) - - V0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, vs(i)) - V0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, vs(i)) - if (.not. polytropic .and. .not. qbmm) then - P0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, ps(i)) - P0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, ps(i)) - end if + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*alpha_rho_L(i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*alpha_rho_R(i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) end do - if (.not. qbmm) then - if (adv_n) then - nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, n_idx) - nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, n_idx) - else - nbub_L_denom = 0._wp - nbub_R_denom = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - nbub_L_denom = nbub_L_denom + (R0_L(i)**3._wp)*weight(i) - nbub_R_denom = nbub_R_denom + (R0_R(i)**3._wp)*weight(i) - end do - nbub_L = (3._wp/(4._wp*pi))*qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)/nbub_L_denom - nbub_R = (3._wp/(4._wp*pi))*qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)/nbub_R_denom - end if - else - !nb stored in 0th moment of first R0 bin in variable conversion module - nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, bubxb) - nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, bubxb) - end if - + ! Momentum flux. + ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - if (.not. qbmm) then - if (polytropic) then - pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), 0._wp) - pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), 0._wp) - else - pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), P0_L(i)) - pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), P0_R(i)) - end if - end if + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(i)) + & + s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & + dir_flg(dir_idx(i))*pres_L) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(i)) + & + s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & + dir_flg(dir_idx(i))*pres_R) end do - if (qbmm) then - PbwR3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 4) - PbwR3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 4) + if (bubbles_euler) then + ! Put p_tilde in + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + & + xi_M*(dir_flg(dir_idx(i))*(-1._wp*ptilde_L)) & + + xi_P*(dir_flg(dir_idx(i))*(-1._wp*ptilde_R)) + end do + end if - R3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 1) - R3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 1) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = 0._wp - R3V2Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 3) - R3V2Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 3) - else + $:GPU_LOOP(parallelism='[seq]') + do i = alf_idx, alf_idx !only advect the void fraction + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do - PbwR3Lbar = 0._wp - PbwR3Rbar = 0._wp + ! Source for volume fraction advection equation + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims - R3Lbar = 0._wp - R3Rbar = 0._wp + vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = 0._wp + !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(dir_idx(i))%sf(j,k,l) = 0._wp + end do - R3V2Lbar = 0._wp - R3V2Rbar = 0._wp + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + ! Add advection flux for bubble variables + if (bubbles_euler) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - PbwR3Lbar = PbwR3Lbar + pbw_L(i)*(R0_L(i)**3._wp)*weight(i) - PbwR3Rbar = PbwR3Rbar + pbw_R(i)*(R0_R(i)**3._wp)*weight(i) - - R3Lbar = R3Lbar + (R0_L(i)**3._wp)*weight(i) - R3Rbar = R3Rbar + (R0_R(i)**3._wp)*weight(i) - - R3V2Lbar = R3V2Lbar + (R0_L(i)**3._wp)*(V0_L(i)**2._wp)*weight(i) - R3V2Rbar = R3V2Rbar + (R0_R(i)**3._wp)*(V0_R(i)**2._wp)*weight(i) + do i = bubxb, bubxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) end do end if - if (qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids) < small_alf .or. R3Lbar < small_alf) then - ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*pres_L - else - ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*(pres_L - PbwR3Lbar/R3Lbar - & - rho_L*R3V2Lbar/R3Lbar) - end if + ! Geometrical source flux for cylindrical coordinates - if (qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids) < small_alf .or. R3Rbar < small_alf) then - ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*pres_R - else - ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*(pres_R - PbwR3Rbar/R3Rbar - & - rho_R*R3V2Rbar/R3Rbar) - end if + #:if (NORM_DIR == 2) + if (cyl_coord) then + ! Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & + -xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + - xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) + end if + #:endif + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP - if ((.not. f_approx_equal(ptilde_L, ptilde_L)) .or. (.not. f_approx_equal(ptilde_R, ptilde_R))) then - end if + elseif (model_eqns == 2 .and. bubbles_euler) then + #:call GPU_PARALLEL_LOOP(collapse=3, private='[R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end - rho_avg = 5.e-1_wp*(rho_L + rho_R) - H_avg = 5.e-1_wp*(H_L + H_R) - gamma_avg = 5.e-1_wp*(gamma_L + gamma_R) - vel_avg_rms = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do + + vel_L_rms = 0._wp; vel_R_rms = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_avg_rms = vel_avg_rms + (5.e-1_wp*(vel_L(i) + vel_R(i)))**2._wp + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - end if + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + rho_L = 0._wp + gamma_L = 0._wp + pi_inf_L = 0._wp + qv_L = 0._wp - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + ! Retain this in the refactor + if (mpp_lim .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + end do + else if (num_fluids > 2) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + end do + else + rho_L = qL_prim_rs${XYZ}$_vf(j, k, l, 1) + gamma_L = gammas(1) + pi_inf_L = pi_infs(1) + qv_L = qvs(1) + end if - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, 0._wp, c_avg) + rho_R = 0._wp + gamma_R = 0._wp + pi_inf_R = 0._wp + qv_R = 0._wp - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do - end if + if (mpp_lim .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + end do + else if (num_fluids > 2) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + end do + else + rho_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, 1) + gamma_R = gammas(1) + pi_inf_R = pi_infs(1) + qv_R = qvs(1) + end if - ! Low Mach correction - if (low_Mach == 2) then - @:compute_low_Mach_correction() - end if + if (viscous) then + if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real - if (wave_speeds == 1) then - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + if (Re_size(i) > 0) Re_L(i) = 0._wp - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - & - rho_R*vel_R(dir_idx(1))* & - (s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - & - rho_R*(s_R - vel_R(dir_idx(1)))) - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & + + Re_L(i) + end do - pres_SR = pres_SL + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + end do - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_R(i) = dflt_real - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + if (Re_size(i) > 0) Re_R(i) = 0._wp - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & + + Re_R(i) + end do - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) - xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if + end if - ! goes with numerical velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R - if (bubbles_euler .and. (num_fluids > 1)) then - ! Kill mass transport @ gas density - flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp - end if + if (avg_state == 2) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + R0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, rs(i)) + R0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, rs(i)) + + V0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, vs(i)) + V0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, vs(i)) + if (.not. polytropic .and. .not. qbmm) then + P0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, ps(i)) + P0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, ps(i)) + end if + end do - ! Momentum flux. - ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + if (.not. qbmm) then + if (adv_n) then + nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, n_idx) + nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, n_idx) + else + nbub_L_denom = 0._wp + nbub_R_denom = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + nbub_L_denom = nbub_L_denom + (R0_L(i)**3._wp)*weight(i) + nbub_R_denom = nbub_R_denom + (R0_R(i)**3._wp)*weight(i) + end do + nbub_L = (3._wp/(4._wp*pi))*qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)/nbub_L_denom + nbub_R = (3._wp/(4._wp*pi))*qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)/nbub_R_denom + end if + else + !nb stored in 0th moment of first R0 bin in variable conversion module + nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, bubxb) + nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, bubxb) + end if - ! Include p_tilde + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + if (.not. qbmm) then + if (polytropic) then + pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), 0._wp) + pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), 0._wp) + else + pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), P0_L(i)) + pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), P0_R(i)) + end if + end if + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(i)) + & - s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & - dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(i)) + & - s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & - dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & - + (s_M/s_L)*(s_P/s_R)*dir_flg(dir_idx(i))*pcorr - end do + if (qbmm) then + PbwR3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 4) + PbwR3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 4) - ! Energy flux. - ! f = u*(E+p), q = E, q_star = \xi*E+(s-u)(\rho s_star + p/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - xi_M*(vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) + & - s_M*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & - (rho_L*s_S + (pres_L - ptilde_L)/ & - (s_L - vel_L(dir_idx(1))))) - E_L)) & - + xi_P*(vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) + & - s_P*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & - (rho_R*s_S + (pres_R - ptilde_R)/ & - (s_R - vel_R(dir_idx(1))))) - E_R)) & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S - - ! Volume fraction flux - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + R3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 1) + R3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 1) - ! Source for volume fraction advection equation - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & - xi_M*(vel_L(dir_idx(i)) + & - dir_flg(dir_idx(i))* & - s_M*(xi_L - 1._wp)) & - + xi_P*(vel_R(dir_idx(i)) + & - dir_flg(dir_idx(i))* & - s_P*(xi_R - 1._wp)) - - !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(idxi)%sf(j,k,l) = 0._wp - end do + R3V2Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 3) + R3V2Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 3) + else - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + PbwR3Lbar = 0._wp + PbwR3Rbar = 0._wp - ! Add advection flux for bubble variables - $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + R3Lbar = 0._wp + R3Rbar = 0._wp - if (qbmm) then - flux_rs${XYZ}$_vf(j, k, l, bubxb) = & - xi_M*nbub_L & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end if + R3V2Lbar = 0._wp + R3V2Rbar = 0._wp - if (adv_n) then - flux_rs${XYZ}$_vf(j, k, l, n_idx) = & - xi_M*nbub_L & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + PbwR3Lbar = PbwR3Lbar + pbw_L(i)*(R0_L(i)**3._wp)*weight(i) + PbwR3Rbar = PbwR3Rbar + pbw_R(i)*(R0_R(i)**3._wp)*weight(i) + + R3Lbar = R3Lbar + (R0_L(i)**3._wp)*weight(i) + R3Rbar = R3Rbar + (R0_R(i)**3._wp)*weight(i) + + R3V2Lbar = R3V2Lbar + (R0_L(i)**3._wp)*(V0_L(i)**2._wp)*weight(i) + R3V2Rbar = R3V2Rbar + (R0_R(i)**3._wp)*(V0_R(i)**2._wp)*weight(i) + end do + end if + + if (qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids) < small_alf .or. R3Lbar < small_alf) then + ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*pres_L + else + ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*(pres_L - PbwR3Lbar/R3Lbar - & + rho_L*R3V2Lbar/R3Lbar) + end if + + if (qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids) < small_alf .or. R3Rbar < small_alf) then + ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*pres_R + else + ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*(pres_R - PbwR3Rbar/R3Rbar - & + rho_R*R3V2Rbar/R3Rbar) + end if + + if ((.not. f_approx_equal(ptilde_L, ptilde_L)) .or. (.not. f_approx_equal(ptilde_R, ptilde_R))) then + end if + + rho_avg = 5.e-1_wp*(rho_L + rho_R) + H_avg = 5.e-1_wp*(H_L + H_R) + gamma_avg = 5.e-1_wp*(gamma_L + gamma_R) + vel_avg_rms = 0._wp - ! Geometrical source flux for cylindrical coordinates - #:if (NORM_DIR == 2) - if (cyl_coord) then - ! Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + do i = 1, num_dims + vel_avg_rms = vel_avg_rms + (5.e-1_wp*(vel_L(i) + vel_R(i)))**2._wp end do + end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & - -xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - - xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) + + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, 0._wp, c_avg) + + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do end if - #:endif - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else - ! 5-EQUATION MODEL WITH HLLC - #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg,Phi_avg, h_iL, h_iR, h_avg_2]', copyin='[is1, is2, is3]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - !idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 + ! Low Mach correction + if (low_Mach == 2) then + @:compute_low_Mach_correction() + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + if (wave_speeds == 1) then + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - & + rho_R*vel_R(dir_idx(1))* & + (s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - & + rho_R*(s_R - vel_R(dir_idx(1)))) + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) + + pres_SR = pres_SL + + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R + + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - rho_L = 0._wp - gamma_L = 0._wp - pi_inf_L = 0._wp - qv_L = 0._wp + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) + xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) - rho_R = 0._wp - gamma_R = 0._wp - pi_inf_R = 0._wp - qv_R = 0._wp + ! goes with numerical velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) - alpha_L_sum = 0._wp - alpha_R_sum = 0._wp + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if - ! Change this by splitting it into the cases - ! present in the bubbles_euler - if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) - alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) end do + if (bubbles_euler .and. (num_fluids > 1)) then + ! Kill mass transport @ gas density + flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp + end if + + ! Momentum flux. + ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + + ! Include p_tilde + $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(i)) + & + s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(i)) + & + s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & + + (s_M/s_L)*(s_P/s_R)*dir_flg(dir_idx(i))*pcorr end do + ! Energy flux. + ! f = u*(E+p), q = E, q_star = \xi*E+(s-u)(\rho s_star + p/(s-u)) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + xi_M*(vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) + & + s_M*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & + (rho_L*s_S + (pres_L - ptilde_L)/ & + (s_L - vel_L(dir_idx(1))))) - E_L)) & + + xi_P*(vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) + & + s_P*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & + (rho_R*s_S + (pres_R - ptilde_R)/ & + (s_R - vel_R(dir_idx(1))))) - E_R)) & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + + ! Volume fraction flux $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) - alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) end do + ! Source for volume fraction advection equation $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) + do i = 1, num_dims + vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & + xi_M*(vel_L(dir_idx(i)) + & + dir_flg(dir_idx(i))* & + s_M*(xi_L - 1._wp)) & + + xi_P*(vel_R(dir_idx(i)) + & + dir_flg(dir_idx(i))* & + s_P*(xi_R - 1._wp)) + + !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(idxi)%sf(j,k,l) = 0._wp end do - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) - end do + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) - if (viscous) then + ! Add advection flux for bubble variables $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real + do i = bubxb, bubxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do - if (Re_size(i) > 0) Re_L(i) = 0._wp + if (qbmm) then + flux_rs${XYZ}$_vf(j, k, l, bubxb) = & + xi_M*nbub_L & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*nbub_R & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end if - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & - + Re_L(i) - end do + if (adv_n) then + flux_rs${XYZ}$_vf(j, k, l, n_idx) = & + xi_M*nbub_L & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*nbub_R & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end if - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + ! Geometrical source flux for cylindrical coordinates + #:if (NORM_DIR == 2) + if (cyl_coord) then + ! Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & + -xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + - xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) + + end if + #:endif + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + else + ! 5-EQUATION MODEL WITH HLLC + #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg,Phi_avg, h_iL, h_iR, h_avg_2]', copyin='[is1, is2, is3]') + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + + !idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do + vel_L_rms = 0._wp; vel_R_rms = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_R(i) = dflt_real + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - if (Re_size(i) > 0) Re_R(i) = 0._wp + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + + rho_L = 0._wp + gamma_L = 0._wp + pi_inf_L = 0._wp + qv_L = 0._wp + + rho_R = 0._wp + gamma_R = 0._wp + pi_inf_R = 0._wp + qv_R = 0._wp + + alpha_L_sum = 0._wp + alpha_R_sum = 0._wp + ! Change this by splitting it into the cases + ! present in the bubbles_euler + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & - + Re_R(i) + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) + alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) end do - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) + end do + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) + alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) + end do + end if - if (chemistry) then - c_sum_Yi_Phi = 0.0_wp $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + do i = 1, num_fluids + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) end do - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + if (Re_size(i) > 0) Re_L(i) = 0._wp - R_gas_L = gas_constant/MW_L - R_gas_R = gas_constant/MW_R + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + + Re_L(i) + end do - T_L = pres_L/rho_L/R_gas_L - T_R = pres_R/rho_R/R_gas_R + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) + end do - if (chem_params%gamma_method == 1) then - !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) - Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_R(i) = dflt_real - gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) - gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + if (Re_size(i) > 0) Re_R(i) = 0._wp - Gamm_L = Cp_L/Cv_L - gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) - Gamm_R = Cp_R/Cv_R - gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + + Re_R(i) + end do + + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do end if - call get_mixture_energy_mass(T_L, Ys_L, E_L) - call get_mixture_energy_mass(T_R, Ys_R, E_R) + if (chemistry) then + c_sum_Yi_Phi = 0.0_wp + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms - E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - else - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - end if + R_gas_L = gas_constant/MW_L + R_gas_R = gas_constant/MW_R - ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do - G_L = 0._wp - G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs(i) - G_R = G_R + alpha_R(i)*Gs(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - ! Elastic contribution to energy if G large enough - if ((G_L > verysmall) .and. (G_R > verysmall)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Additional terms in 2D and 3D - if ((i == 2) .or. (i == 4) .or. (i == 5)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if + T_L = pres_L/rho_L/R_gas_L + T_R = pres_R/rho_R/R_gas_R + + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) + + if (chem_params%gamma_method == 1) then + !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) + Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + + gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) + gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + + Gamm_L = Cp_L/Cv_L + gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) + Gamm_R = Cp_R/Cv_R + gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) end if - end do - end if - ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) - xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) - end do - G_L = 0._wp - G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - ! Mixture left and right shear modulus - G_L = G_L + alpha_L(i)*Gs(i) - G_R = G_R + alpha_R(i)*Gs(i) - end do - ! Elastic contribution to energy if G large enough - if (G_L > verysmall .and. G_R > verysmall) then - E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) - E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, b_size - 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do - end if + call get_mixture_energy_mass(T_L, Ys_L, E_L) + call get_mixture_energy_mass(T_R, Ys_R, E_R) - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms + E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + else + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L - @:compute_average_state() + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + end if - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + G_L = 0._wp + G_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs(i) + G_R = G_R + alpha_R(i)*Gs(i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + ! Elastic contribution to energy if G large enough + if ((G_L > verysmall) .and. (G_R > verysmall)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Additional terms in 2D and 3D + if ((i == 2) .or. (i == 4) .or. (i == 5)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + end if + end if + end do + end if - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, c_sum_Yi_Phi, c_avg) + ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY + if (hyperelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) + xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) + end do + G_L = 0._wp + G_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + ! Mixture left and right shear modulus + G_L = G_L + alpha_L(i)*Gs(i) + G_R = G_R + alpha_R(i)*Gs(i) + end do + ! Elastic contribution to energy if G large enough + if (G_L > verysmall .and. G_R > verysmall) then + E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) + E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) + end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, b_size - 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + end if - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do - end if + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R - ! Low Mach correction - if (low_Mach == 2) then - @:compute_low_Mach_correction() - end if + @:compute_average_state() - if (wave_speeds == 1) then - if (elasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) - s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & - tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & - rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & - rho_R*(s_R - vel_R(idx1))) - else - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - end if - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(idx1) - & - vel_R(idx1))) + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - pres_SR = pres_SL + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, c_sum_Yi_Phi, c_avg) - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do + end if - s_L = vel_L(idx1) - c_L*Ms_L - s_R = vel_R(idx1) + c_R*Ms_R + ! Low Mach correction + if (low_Mach == 2) then + @:compute_low_Mach_correction() + end if - s_S = 5.e-1_wp*((vel_L(idx1) + vel_R(idx1)) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + if (wave_speeds == 1) then + if (elasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) + s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & + tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & + rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & + rho_R*(s_R - vel_R(idx1))) + else + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + end if + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(idx1) - & + vel_R(idx1))) + + pres_SR = pres_SL + + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + + s_L = vel_L(idx1) - c_L*Ms_L + s_R = vel_R(idx1) + c_R*Ms_R + + s_S = 5.e-1_wp*((vel_L(idx1) + vel_R(idx1)) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(idx1))/(s_L - s_S) - xi_R = (s_R - vel_R(idx1))/(s_R - s_S) + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - ! goes with numerical velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(idx1))/(s_L - s_S) + xi_R = (s_R - vel_R(idx1))/(s_R - s_S) - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + ! goes with numerical velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) - ! COMPUTING THE HLLC FLUXES - ! MASS FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end do + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if - ! MOMENTUM FLUX. - ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & - xi_M*(rho_L*(vel_L(idx1)* & - vel_L(idxi) + & - s_M*(xi_L*(dir_flg(idxi)*s_S + & - (1._wp - dir_flg(idxi))* & - vel_L(idxi)) - vel_L(idxi))) + & - dir_flg(idxi)*(pres_L)) & - + xi_P*(rho_R*(vel_R(idx1)* & - vel_R(idxi) + & - s_P*(xi_R*(dir_flg(idxi)*s_S + & - (1._wp - dir_flg(idxi))* & - vel_R(idxi)) - vel_R(idxi))) + & - dir_flg(idxi)*(pres_R)) & - + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr - end do + ! COMPUTING THE HLLC FLUXES + ! MASS FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end do - ! ENERGY FLUX. - ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - xi_M*(vel_L(idx1)*(E_L + pres_L) + & - s_M*(xi_L*(E_L + (s_S - vel_L(idx1))* & - (rho_L*s_S + pres_L/ & - (s_L - vel_L(idx1)))) - E_L)) & - + xi_P*(vel_R(idx1)*(E_R + pres_R) + & - s_P*(xi_R*(E_R + (s_S - vel_R(idx1))* & - (rho_R*s_S + pres_R/ & - (s_R - vel_R(idx1)))) - E_R)) & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S - - ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux - if (elasticity) then - flux_ene_e = 0._wp + ! MOMENTUM FLUX. + ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims idxi = dir_idx(i) - ! MOMENTUM ELASTIC FLUX. flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & - - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) - ! ENERGY ELASTIC FLUX. - flux_ene_e = flux_ene_e - & - xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & - s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & - xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & - s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) - end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e - end if - - ! HYPOELASTIC STRESS EVOLUTION FLUX. - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) + xi_M*(rho_L*(vel_L(idx1)* & + vel_L(idxi) + & + s_M*(xi_L*(dir_flg(idxi)*s_S + & + (1._wp - dir_flg(idxi))* & + vel_L(idxi)) - vel_L(idxi))) + & + dir_flg(idxi)*(pres_L)) & + + xi_P*(rho_R*(vel_R(idx1)* & + vel_R(idxi) + & + s_P*(xi_R*(dir_flg(idxi)*s_S + & + (1._wp - dir_flg(idxi))* & + vel_R(idxi)) - vel_R(idxi))) + & + dir_flg(idxi)*(pres_R)) & + + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr end do - end if - - ! VOLUME FRACTION FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end do - ! VOLUME FRACTION SOURCE FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & - xi_M*(vel_L(idxi) + & - dir_flg(idxi)* & - s_M*(xi_L - 1._wp)) & - + xi_P*(vel_R(idxi) + & - dir_flg(idxi)* & - s_P*(xi_R - 1._wp)) - end do + ! ENERGY FLUX. + ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + xi_M*(vel_L(idx1)*(E_L + pres_L) + & + s_M*(xi_L*(E_L + (s_S - vel_L(idx1))* & + (rho_L*s_S + pres_L/ & + (s_L - vel_L(idx1)))) - E_L)) & + + xi_P*(vel_R(idx1)*(E_R + pres_R) + & + s_P*(xi_R*(E_R + (s_S - vel_R(idx1))* & + (rho_R*s_S + pres_R/ & + (s_R - vel_R(idx1)))) - E_R)) & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + + ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux + if (elasticity) then + flux_ene_e = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + ! MOMENTUM ELASTIC FLUX. + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & + - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) + ! ENERGY ELASTIC FLUX. + flux_ene_e = flux_ene_e - & + xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & + s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & + xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & + s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) + end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e + end if - ! COLOR FUNCTION FLUX - if (surface_tension) then - flux_rs${XYZ}$_vf(j, k, l, c_idx) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) & - *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx) & - *(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end if + ! HYPOELASTIC STRESS EVOLUTION FLUX. + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) + end do + end if - ! REFERENCE MAP FLUX. - if (hyperelasticity) then + ! VOLUME FRACTION FLUX. $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & - - rho_L*vel_L(idx1)*xi_field_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & - - rho_R*vel_R(idx1)*xi_field_R(i)) + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(idx1) + s_P*(xi_R - 1._wp)) end do - end if - - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) - if (chemistry) then + ! VOLUME FRACTION SOURCE FLUX. $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - - flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*rho_L*Y_L*(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*rho_R*Y_R*(vel_R(idx1) + s_P*(xi_R - 1._wp)) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0.0_wp + do i = 1, num_dims + idxi = dir_idx(i) + vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & + xi_M*(vel_L(idxi) + & + dir_flg(idxi)* & + s_M*(xi_L - 1._wp)) & + + xi_P*(vel_R(idxi) + & + dir_flg(idxi)* & + s_P*(xi_R - 1._wp)) end do - end if - ! Geometrical source flux for cylindrical coordinates - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + idx1) = & - xi_M*(rho_L*(vel_L(idx1)* & - vel_L(idx1) + & - s_M*(xi_L*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_L(idx1)) - vel_L(idx1)))) & - + xi_P*(rho_R*(vel_R(idx1)* & - vel_R(idx1) + & - s_P*(xi_R*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_R(idx1)) - vel_R(idx1)))) - ! Geometrical source of the void fraction(s) is zero + ! COLOR FUNCTION FLUX + if (surface_tension) then + flux_rs${XYZ}$_vf(j, k, l, c_idx) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) & + *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx) & + *(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end if + + ! REFERENCE MAP FLUX. + if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & + - rho_L*vel_L(idx1)*xi_field_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & + - rho_R*vel_R(idx1)*xi_field_R(i)) end do end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then + + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) + + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + do i = chemxb, chemxe + Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + + flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*rho_L*Y_L*(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*rho_R*Y_R*(vel_R(idx1) + s_P*(xi_R - 1._wp)) + flux_src_rs${XYZ}$_vf(j, k, l, i) = 0.0_wp end do + end if + + ! Geometrical source flux for cylindrical coordinates + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + idx1) = & + xi_M*(rho_L*(vel_L(idx1)* & + vel_L(idx1) + & + s_M*(xi_L*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_L(idx1)) - vel_L(idx1)))) & + + xi_P*(rho_R*(vel_R(idx1)* & + vel_R(idx1) + & + s_P*(xi_R*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_R(idx1)) - vel_R(idx1)))) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & - -xi_M*(rho_L*(vel_L(idx1)* & - vel_L(idx1) + & - s_M*(xi_L*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_L(idx1)) - vel_L(idx1)))) & - - xi_P*(rho_R*(vel_R(idx1)* & - vel_R(idx1) + & - s_P*(xi_R*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_R(idx1)) - vel_R(idx1)))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & + -xi_M*(rho_L*(vel_L(idx1)* & + vel_L(idx1) + & + s_M*(xi_L*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_L(idx1)) - vel_L(idx1)))) & + - xi_P*(rho_R*(vel_R(idx1)* & + vel_R(idx1) + & + s_P*(xi_R*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_R(idx1)) - vel_R(idx1)))) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) - end if - #:endif + end if + #:endif + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end if @@ -2943,177 +2943,177 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, rho, pres,E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end - ! (1) Extract the left/right primitive states - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + ! (1) Extract the left/right primitive states + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - ! NOTE: unlike HLL & HLLC, vel_L here is permutated by dir_idx for simpler logic - do i = 1, num_vels - vel%L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) - vel%R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + dir_idx(i)) - end do + ! NOTE: unlike HLL & HLLC, vel_L here is permutated by dir_idx for simpler logic + do i = 1, num_vels + vel%L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + vel%R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + dir_idx(i)) + end do - vel_rms%L = sum(vel%L**2._wp) - vel_rms%R = sum(vel%R**2._wp) + vel_rms%L = sum(vel%L**2._wp) + vel_rms%R = sum(vel%R**2._wp) - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do + + pres%L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres%R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - pres%L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres%R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - ! NOTE: unlike HLL, Bx, By, Bz are permutated by dir_idx for simpler logic - if (mhd) then - if (n == 0) then ! 1D: constant Bx; By, Bz as variables; only in x so not permutated - B%L = [Bx0, qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg), qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1)] - B%R = [Bx0, qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg), qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1)] - else ! 2D/3D: Bx, By, Bz as variables - B%L = [qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(1) - 1), & - qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(2) - 1), & - qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(3) - 1)] - B%R = [qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(1) - 1), & - qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(2) - 1), & - qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(3) - 1)] + ! NOTE: unlike HLL, Bx, By, Bz are permutated by dir_idx for simpler logic + if (mhd) then + if (n == 0) then ! 1D: constant Bx; By, Bz as variables; only in x so not permutated + B%L = [Bx0, qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg), qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1)] + B%R = [Bx0, qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg), qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1)] + else ! 2D/3D: Bx, By, Bz as variables + B%L = [qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(1) - 1), & + qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(2) - 1), & + qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(3) - 1)] + B%R = [qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(1) - 1), & + qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(2) - 1), & + qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(3) - 1)] + end if end if - end if - ! Sum properties of all fluid components - rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp - rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho%L = rho%L + alpha_rho_L(i) - gamma%L = gamma%L + alpha_L(i)*gammas(i) - pi_inf%L = pi_inf%L + alpha_L(i)*pi_infs(i) - qv%L = qv%L + alpha_rho_L(i)*qvs(i) - - rho%R = rho%R + alpha_rho_R(i) - gamma%R = gamma%R + alpha_R(i)*gammas(i) - pi_inf%R = pi_inf%R + alpha_R(i)*pi_infs(i) - qv%R = qv%R + alpha_rho_R(i)*qvs(i) - end do + ! Sum properties of all fluid components + rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp + rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho%L = rho%L + alpha_rho_L(i) + gamma%L = gamma%L + alpha_L(i)*gammas(i) + pi_inf%L = pi_inf%L + alpha_L(i)*pi_infs(i) + qv%L = qv%L + alpha_rho_L(i)*qvs(i) + + rho%R = rho%R + alpha_rho_R(i) + gamma%R = gamma%R + alpha_R(i)*gammas(i) + pi_inf%R = pi_inf%R + alpha_R(i)*pi_infs(i) + qv%R = qv%R + alpha_rho_R(i)*qvs(i) + end do - pres_mag%L = 0.5_wp*sum(B%L**2._wp) - pres_mag%R = 0.5_wp*sum(B%R**2._wp) - E%L = gamma%L*pres%L + pi_inf%L + 0.5_wp*rho%L*vel_rms%L + qv%L + pres_mag%L - E%R = gamma%R*pres%R + pi_inf%R + 0.5_wp*rho%R*vel_rms%R + qv%R + pres_mag%R ! includes magnetic energy - H_no_mag%L = (E%L + pres%L - pres_mag%L)/rho%L - H_no_mag%R = (E%R + pres%R - pres_mag%R)/rho%R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) - - ! (2) Compute fast wave speeds - call s_compute_speed_of_sound(pres%L, rho%L, gamma%L, pi_inf%L, H_no_mag%L, alpha_L, vel_rms%L, 0._wp, c%L) - call s_compute_speed_of_sound(pres%R, rho%R, gamma%R, pi_inf%R, H_no_mag%R, alpha_R, vel_rms%R, 0._wp, c%R) - call s_compute_fast_magnetosonic_speed(rho%L, c%L, B%L, norm_dir, c_fast%L, H_no_mag%L) - call s_compute_fast_magnetosonic_speed(rho%R, c%R, B%R, norm_dir, c_fast%R, H_no_mag%R) - - ! (3) Compute contact speed s_M [Miyoshi Equ. (38)] - s_L = min(vel%L(1) - c_fast%L, vel%R(1) - c_fast%R) - s_R = max(vel%R(1) + c_fast%R, vel%L(1) + c_fast%L) - - pTot_L = pres%L + pres_mag%L - pTot_R = pres%R + pres_mag%R - - s_M = (((s_R - vel%R(1))*rho%R*vel%R(1) - & - (s_L - vel%L(1))*rho%L*vel%L(1) - pTot_R + pTot_L)/ & - ((s_R - vel%R(1))*rho%R - (s_L - vel%L(1))*rho%L)) - - ! (4) Compute star state variables - rhoL_star = rho%L*(s_L - vel%L(1))/(s_L - s_M) - rhoR_star = rho%R*(s_R - vel%R(1))/(s_R - s_M) - p_star = pTot_L + rho%L*(s_L - vel%L(1))*(s_M - vel%L(1))/(s_L - s_M) - E_starL = ((s_L - vel%L(1))*E%L - pTot_L*vel%L(1) + p_star*s_M)/(s_L - s_M) - E_starR = ((s_R - vel%R(1))*E%R - pTot_R*vel%R(1) + p_star*s_M)/(s_R - s_M) - - ! (5) Compute left/right state vectors and fluxes - U_L = [rho%L, rho%L*vel%L(1:3), B%L(2:3), E%L] - U_starL = [rhoL_star, rhoL_star*s_M, rhoL_star*vel%L(2:3), B%L(2:3), E_starL] - U_R = [rho%R, rho%R*vel%R(1:3), B%R(2:3), E%R] - U_starR = [rhoR_star, rhoR_star*s_M, rhoR_star*vel%R(2:3), B%R(2:3), E_starR] - - ! Compute the left/right fluxes - F_L(1) = U_L(2) - F_L(2) = U_L(2)*vel%L(1) - B%L(1)*B%L(1) + pTot_L - F_L(3:4) = U_L(2)*vel%L(2:3) - B%L(1)*B%L(2:3) - F_L(5:6) = vel%L(1)*B%L(2:3) - vel%L(2:3)*B%L(1) - F_L(7) = (E%L + pTot_L)*vel%L(1) - B%L(1)*(vel%L(1)*B%L(1) + vel%L(2)*B%L(2) + vel%L(3)*B%L(3)) - - F_R(1) = U_R(2) - F_R(2) = U_R(2)*vel%R(1) - B%R(1)*B%R(1) + pTot_R - F_R(3:4) = U_R(2)*vel%R(2:3) - B%R(1)*B%R(2:3) - F_R(5:6) = vel%R(1)*B%R(2:3) - vel%R(2:3)*B%R(1) - F_R(7) = (E%R + pTot_R)*vel%R(1) - B%R(1)*(vel%R(1)*B%R(1) + vel%R(2)*B%R(2) + vel%R(3)*B%R(3)) - ! Compute the star flux using HLL relation - F_starL = F_L + s_L*(U_starL - U_L) - F_starR = F_R + s_R*(U_starR - U_R) - ! Compute the rotational (Alfvén) speeds - s_starL = s_M - abs(B%L(1))/sqrt(rhoL_star) - s_starR = s_M + abs(B%L(1))/sqrt(rhoR_star) - ! Compute the double–star states [Miyoshi Eqns. (59)-(62)] - sqrt_rhoL_star = sqrt(rhoL_star); sqrt_rhoR_star = sqrt(rhoR_star) - vL_star = vel%L(2); wL_star = vel%L(3) - vR_star = vel%R(2); wR_star = vel%R(3) - - ! (6) Compute the double–star states [Miyoshi Eqns. (59)-(62)] - denom_ds = sqrt_rhoL_star + sqrt_rhoR_star - sign_Bx = sign(1._wp, B%L(1)) - v_double = (sqrt_rhoL_star*vL_star + sqrt_rhoR_star*vR_star + (B%R(2) - B%L(2))*sign_Bx)/denom_ds - w_double = (sqrt_rhoL_star*wL_star + sqrt_rhoR_star*wR_star + (B%R(3) - B%L(3))*sign_Bx)/denom_ds - By_double = (sqrt_rhoL_star*B%R(2) + sqrt_rhoR_star*B%L(2) + sqrt_rhoL_star*sqrt_rhoR_star*(vR_star - vL_star)*sign_Bx)/denom_ds - Bz_double = (sqrt_rhoL_star*B%R(3) + sqrt_rhoR_star*B%L(3) + sqrt_rhoL_star*sqrt_rhoR_star*(wR_star - wL_star)*sign_Bx)/denom_ds - - E_doubleL = E_starL - sqrt_rhoL_star*((vL_star*B%L(2) + wL_star*B%L(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx - E_doubleR = E_starR + sqrt_rhoR_star*((vR_star*B%R(2) + wR_star*B%R(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx - E_double = 0.5_wp*(E_doubleL + E_doubleR) - - U_doubleL = [rhoL_star, rhoL_star*s_M, rhoL_star*v_double, rhoL_star*w_double, By_double, Bz_double, E_double] - U_doubleR = [rhoR_star, rhoR_star*s_M, rhoR_star*v_double, rhoR_star*w_double, By_double, Bz_double, E_double] - - ! (11) Choose HLLD flux based on wave-speed regions - if (0.0_wp <= s_L) then - F_hlld = F_L - else if (0.0_wp <= s_starL) then - F_hlld = F_L + s_L*(U_starL - U_L) - else if (0.0_wp <= s_M) then - F_hlld = F_starL + s_starL*(U_doubleL - U_starL) - else if (0.0_wp <= s_starR) then - F_hlld = F_starR + s_starR*(U_doubleR - U_starR) - else if (0.0_wp <= s_R) then - F_hlld = F_R + s_R*(U_starR - U_R) - else - F_hlld = F_R - end if + pres_mag%L = 0.5_wp*sum(B%L**2._wp) + pres_mag%R = 0.5_wp*sum(B%R**2._wp) + E%L = gamma%L*pres%L + pi_inf%L + 0.5_wp*rho%L*vel_rms%L + qv%L + pres_mag%L + E%R = gamma%R*pres%R + pi_inf%R + 0.5_wp*rho%R*vel_rms%R + qv%R + pres_mag%R ! includes magnetic energy + H_no_mag%L = (E%L + pres%L - pres_mag%L)/rho%L + H_no_mag%R = (E%R + pres%R - pres_mag%R)/rho%R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) + + ! (2) Compute fast wave speeds + call s_compute_speed_of_sound(pres%L, rho%L, gamma%L, pi_inf%L, H_no_mag%L, alpha_L, vel_rms%L, 0._wp, c%L) + call s_compute_speed_of_sound(pres%R, rho%R, gamma%R, pi_inf%R, H_no_mag%R, alpha_R, vel_rms%R, 0._wp, c%R) + call s_compute_fast_magnetosonic_speed(rho%L, c%L, B%L, norm_dir, c_fast%L, H_no_mag%L) + call s_compute_fast_magnetosonic_speed(rho%R, c%R, B%R, norm_dir, c_fast%R, H_no_mag%R) + + ! (3) Compute contact speed s_M [Miyoshi Equ. (38)] + s_L = min(vel%L(1) - c_fast%L, vel%R(1) - c_fast%R) + s_R = max(vel%R(1) + c_fast%R, vel%L(1) + c_fast%L) + + pTot_L = pres%L + pres_mag%L + pTot_R = pres%R + pres_mag%R + + s_M = (((s_R - vel%R(1))*rho%R*vel%R(1) - & + (s_L - vel%L(1))*rho%L*vel%L(1) - pTot_R + pTot_L)/ & + ((s_R - vel%R(1))*rho%R - (s_L - vel%L(1))*rho%L)) + + ! (4) Compute star state variables + rhoL_star = rho%L*(s_L - vel%L(1))/(s_L - s_M) + rhoR_star = rho%R*(s_R - vel%R(1))/(s_R - s_M) + p_star = pTot_L + rho%L*(s_L - vel%L(1))*(s_M - vel%L(1))/(s_L - s_M) + E_starL = ((s_L - vel%L(1))*E%L - pTot_L*vel%L(1) + p_star*s_M)/(s_L - s_M) + E_starR = ((s_R - vel%R(1))*E%R - pTot_R*vel%R(1) + p_star*s_M)/(s_R - s_M) + + ! (5) Compute left/right state vectors and fluxes + U_L = [rho%L, rho%L*vel%L(1:3), B%L(2:3), E%L] + U_starL = [rhoL_star, rhoL_star*s_M, rhoL_star*vel%L(2:3), B%L(2:3), E_starL] + U_R = [rho%R, rho%R*vel%R(1:3), B%R(2:3), E%R] + U_starR = [rhoR_star, rhoR_star*s_M, rhoR_star*vel%R(2:3), B%R(2:3), E_starR] + + ! Compute the left/right fluxes + F_L(1) = U_L(2) + F_L(2) = U_L(2)*vel%L(1) - B%L(1)*B%L(1) + pTot_L + F_L(3:4) = U_L(2)*vel%L(2:3) - B%L(1)*B%L(2:3) + F_L(5:6) = vel%L(1)*B%L(2:3) - vel%L(2:3)*B%L(1) + F_L(7) = (E%L + pTot_L)*vel%L(1) - B%L(1)*(vel%L(1)*B%L(1) + vel%L(2)*B%L(2) + vel%L(3)*B%L(3)) + + F_R(1) = U_R(2) + F_R(2) = U_R(2)*vel%R(1) - B%R(1)*B%R(1) + pTot_R + F_R(3:4) = U_R(2)*vel%R(2:3) - B%R(1)*B%R(2:3) + F_R(5:6) = vel%R(1)*B%R(2:3) - vel%R(2:3)*B%R(1) + F_R(7) = (E%R + pTot_R)*vel%R(1) - B%R(1)*(vel%R(1)*B%R(1) + vel%R(2)*B%R(2) + vel%R(3)*B%R(3)) + ! Compute the star flux using HLL relation + F_starL = F_L + s_L*(U_starL - U_L) + F_starR = F_R + s_R*(U_starR - U_R) + ! Compute the rotational (Alfvén) speeds + s_starL = s_M - abs(B%L(1))/sqrt(rhoL_star) + s_starR = s_M + abs(B%L(1))/sqrt(rhoR_star) + ! Compute the double–star states [Miyoshi Eqns. (59)-(62)] + sqrt_rhoL_star = sqrt(rhoL_star); sqrt_rhoR_star = sqrt(rhoR_star) + vL_star = vel%L(2); wL_star = vel%L(3) + vR_star = vel%R(2); wR_star = vel%R(3) + + ! (6) Compute the double–star states [Miyoshi Eqns. (59)-(62)] + denom_ds = sqrt_rhoL_star + sqrt_rhoR_star + sign_Bx = sign(1._wp, B%L(1)) + v_double = (sqrt_rhoL_star*vL_star + sqrt_rhoR_star*vR_star + (B%R(2) - B%L(2))*sign_Bx)/denom_ds + w_double = (sqrt_rhoL_star*wL_star + sqrt_rhoR_star*wR_star + (B%R(3) - B%L(3))*sign_Bx)/denom_ds + By_double = (sqrt_rhoL_star*B%R(2) + sqrt_rhoR_star*B%L(2) + sqrt_rhoL_star*sqrt_rhoR_star*(vR_star - vL_star)*sign_Bx)/denom_ds + Bz_double = (sqrt_rhoL_star*B%R(3) + sqrt_rhoR_star*B%L(3) + sqrt_rhoL_star*sqrt_rhoR_star*(wR_star - wL_star)*sign_Bx)/denom_ds + + E_doubleL = E_starL - sqrt_rhoL_star*((vL_star*B%L(2) + wL_star*B%L(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx + E_doubleR = E_starR + sqrt_rhoR_star*((vR_star*B%R(2) + wR_star*B%R(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx + E_double = 0.5_wp*(E_doubleL + E_doubleR) + + U_doubleL = [rhoL_star, rhoL_star*s_M, rhoL_star*v_double, rhoL_star*w_double, By_double, Bz_double, E_double] + U_doubleR = [rhoR_star, rhoR_star*s_M, rhoR_star*v_double, rhoR_star*w_double, By_double, Bz_double, E_double] + + ! (11) Choose HLLD flux based on wave-speed regions + if (0.0_wp <= s_L) then + F_hlld = F_L + else if (0.0_wp <= s_starL) then + F_hlld = F_L + s_L*(U_starL - U_L) + else if (0.0_wp <= s_M) then + F_hlld = F_starL + s_starL*(U_doubleL - U_starL) + else if (0.0_wp <= s_starR) then + F_hlld = F_starR + s_starR*(U_doubleR - U_starR) + else if (0.0_wp <= s_R) then + F_hlld = F_R + s_R*(U_starR - U_R) + else + F_hlld = F_R + end if - ! (12) Reorder and write temporary variables to the flux array - ! Mass - flux_rs${XYZ}$_vf(j, k, l, 1) = F_hlld(1) ! TODO multi-component - ! Momentum - flux_rs${XYZ}$_vf(j, k, l, [contxe + dir_idx(1), contxe + dir_idx(2), contxe + dir_idx(3)]) = F_hlld([2, 3, 4]) - ! Magnetic field - if (n == 0) then - flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg, B_idx%beg + 1]) = F_hlld([5, 6]) - else - flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg + dir_idx(2) - 1, B_idx%beg + dir_idx(3) - 1]) = F_hlld([5, 6]) - end if - ! Energy - flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7) - ! Partial fraction - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) - end do + ! (12) Reorder and write temporary variables to the flux array + ! Mass + flux_rs${XYZ}$_vf(j, k, l, 1) = F_hlld(1) ! TODO multi-component + ! Momentum + flux_rs${XYZ}$_vf(j, k, l, [contxe + dir_idx(1), contxe + dir_idx(2), contxe + dir_idx(3)]) = F_hlld([2, 3, 4]) + ! Magnetic field + if (n == 0) then + flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg, B_idx%beg + 1]) = F_hlld([5, 6]) + else + flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg + dir_idx(2) - 1, B_idx%beg + dir_idx(3) - 1]) = F_hlld([5, 6]) + end if + ! Energy + flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7) + ! Partial fraction + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) + end do - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:endfor @@ -3318,53 +3318,53 @@ contains if (bc_x%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qL_prim_rsx_vf(-1, k, l, i) = & - qR_prim_rsx_vf(0, k, l, i) + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qL_prim_rsx_vf(-1, k, l, i) = & + qR_prim_rsx_vf(0, k, l, i) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (viscous) then #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqL_prim_dx_vf(i)%sf(-1, k, l) = & - dqR_prim_dx_vf(i)%sf(0, k, l) + dqL_prim_dx_vf(i)%sf(-1, k, l) = & + dqR_prim_dx_vf(i)%sf(0, k, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (n > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqL_prim_dy_vf(i)%sf(-1, k, l) = & - dqR_prim_dy_vf(i)%sf(0, k, l) + dqL_prim_dy_vf(i)%sf(-1, k, l) = & + dqR_prim_dy_vf(i)%sf(0, k, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqL_prim_dz_vf(i)%sf(-1, k, l) = & - dqR_prim_dz_vf(i)%sf(0, k, l) + dqL_prim_dz_vf(i)%sf(-1, k, l) = & + dqR_prim_dz_vf(i)%sf(0, k, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -3377,54 +3377,54 @@ contains if (bc_x%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qR_prim_rsx_vf(m + 1, k, l, i) = & - qL_prim_rsx_vf(m, k, l, i) + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qR_prim_rsx_vf(m + 1, k, l, i) = & + qL_prim_rsx_vf(m, k, l, i) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (viscous) then #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqR_prim_dx_vf(i)%sf(m + 1, k, l) = & - dqL_prim_dx_vf(i)%sf(m, k, l) + dqR_prim_dx_vf(i)%sf(m + 1, k, l) = & + dqL_prim_dx_vf(i)%sf(m, k, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (n > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqR_prim_dy_vf(i)%sf(m + 1, k, l) = & - dqL_prim_dy_vf(i)%sf(m, k, l) + dqR_prim_dy_vf(i)%sf(m + 1, k, l) = & + dqL_prim_dy_vf(i)%sf(m, k, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqR_prim_dz_vf(i)%sf(m + 1, k, l) = & - dqL_prim_dz_vf(i)%sf(m, k, l) + dqR_prim_dz_vf(i)%sf(m + 1, k, l) = & + dqL_prim_dz_vf(i)%sf(m, k, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -3440,50 +3440,50 @@ contains if (bc_y%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qL_prim_rsy_vf(-1, k, l, i) = & - qR_prim_rsy_vf(0, k, l, i) + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qL_prim_rsy_vf(-1, k, l, i) = & + qR_prim_rsy_vf(0, k, l, i) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (viscous) then #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqL_prim_dx_vf(i)%sf(j, -1, l) = & - dqR_prim_dx_vf(i)%sf(j, 0, l) + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqL_prim_dx_vf(i)%sf(j, -1, l) = & + dqR_prim_dx_vf(i)%sf(j, 0, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqL_prim_dy_vf(i)%sf(j, -1, l) = & - dqR_prim_dy_vf(i)%sf(j, 0, l) + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqL_prim_dy_vf(i)%sf(j, -1, l) = & + dqR_prim_dy_vf(i)%sf(j, 0, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqL_prim_dz_vf(i)%sf(j, -1, l) = & - dqR_prim_dz_vf(i)%sf(j, 0, l) + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqL_prim_dz_vf(i)%sf(j, -1, l) = & + dqR_prim_dz_vf(i)%sf(j, 0, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -3494,50 +3494,50 @@ contains if (bc_y%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qR_prim_rsy_vf(n + 1, k, l, i) = & - qL_prim_rsy_vf(n, k, l, i) + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qR_prim_rsy_vf(n + 1, k, l, i) = & + qL_prim_rsy_vf(n, k, l, i) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (viscous) then #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqR_prim_dx_vf(i)%sf(j, n + 1, l) = & - dqL_prim_dx_vf(i)%sf(j, n, l) + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqR_prim_dx_vf(i)%sf(j, n + 1, l) = & + dqL_prim_dx_vf(i)%sf(j, n, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqR_prim_dy_vf(i)%sf(j, n + 1, l) = & - dqL_prim_dy_vf(i)%sf(j, n, l) + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqR_prim_dy_vf(i)%sf(j, n + 1, l) = & + dqL_prim_dy_vf(i)%sf(j, n, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqR_prim_dz_vf(i)%sf(j, n + 1, l) = & - dqL_prim_dz_vf(i)%sf(j, n, l) + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqR_prim_dz_vf(i)%sf(j, n + 1, l) = & + dqL_prim_dz_vf(i)%sf(j, n, l) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -3551,46 +3551,46 @@ contains if (bc_z%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qL_prim_rsz_vf(-1, k, l, i) = & - qR_prim_rsz_vf(0, k, l, i) + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qL_prim_rsz_vf(-1, k, l, i) = & + qR_prim_rsz_vf(0, k, l, i) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (viscous) then #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqL_prim_dx_vf(i)%sf(j, k, -1) = & - dqR_prim_dx_vf(i)%sf(j, k, 0) + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqL_prim_dx_vf(i)%sf(j, k, -1) = & + dqR_prim_dx_vf(i)%sf(j, k, 0) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqL_prim_dy_vf(i)%sf(j, k, -1) = & - dqR_prim_dy_vf(i)%sf(j, k, 0) + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqL_prim_dy_vf(i)%sf(j, k, -1) = & + dqR_prim_dy_vf(i)%sf(j, k, 0) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqL_prim_dz_vf(i)%sf(j, k, -1) = & - dqR_prim_dz_vf(i)%sf(j, k, 0) + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqL_prim_dz_vf(i)%sf(j, k, -1) = & + dqR_prim_dz_vf(i)%sf(j, k, 0) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -3599,48 +3599,48 @@ contains if (bc_z%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end #:call GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qR_prim_rsz_vf(p + 1, k, l, i) = & - qL_prim_rsz_vf(p, k, l, i) + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qR_prim_rsz_vf(p + 1, k, l, i) = & + qL_prim_rsz_vf(p, k, l, i) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (viscous) then #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqR_prim_dx_vf(i)%sf(j, k, p + 1) = & - dqL_prim_dx_vf(i)%sf(j, k, p) + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqR_prim_dx_vf(i)%sf(j, k, p + 1) = & + dqL_prim_dx_vf(i)%sf(j, k, p) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqR_prim_dy_vf(i)%sf(j, k, p + 1) = & - dqL_prim_dy_vf(i)%sf(j, k, p) + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqR_prim_dy_vf(i)%sf(j, k, p + 1) = & + dqL_prim_dy_vf(i)%sf(j, k, p) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqR_prim_dz_vf(i)%sf(j, k, p + 1) = & - dqL_prim_dz_vf(i)%sf(j, k, p) + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqR_prim_dz_vf(i)%sf(j, k, p + 1) = & + dqL_prim_dz_vf(i)%sf(j, k, p) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -3686,30 +3686,30 @@ contains if (viscous .or. (surface_tension)) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_src_vf(i)%sf(j, k, l) = 0._wp + do i = momxb, E_idx + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_src_vf(i)%sf(j, k, l) = 0._wp + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (qbmm) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, 4 - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + 1 - mom_sp_rsx_vf(j, k, l, i) = mom_sp(i)%sf(j, k, l) + do i = 1, 4 + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + 1 + mom_sp_rsx_vf(j, k, l, i) = mom_sp(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -3718,29 +3718,29 @@ contains if (viscous .or. (surface_tension)) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_src_vf(i)%sf(k, j, l) = 0._wp + do i = momxb, E_idx + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_src_vf(i)%sf(k, j, l) = 0._wp + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (qbmm) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, 4 - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + 1 - mom_sp_rsy_vf(j, k, l, i) = mom_sp(i)%sf(k, j, l) + do i = 1, 4 + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + 1 + mom_sp_rsy_vf(j, k, l, i) = mom_sp(i)%sf(k, j, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -3749,29 +3749,29 @@ contains if (viscous .or. (surface_tension)) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - flux_src_vf(i)%sf(l, k, j) = 0._wp + do i = momxb, E_idx + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + flux_src_vf(i)%sf(l, k, j) = 0._wp + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (qbmm) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, 4 - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + 1 - mom_sp_rsz_vf(j, k, l, i) = mom_sp(i)%sf(l, k, j) + do i = 1, 4 + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + 1 + mom_sp_rsz_vf(j, k, l, i) = mom_sp(i)%sf(l, k, j) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -3797,10 +3797,10 @@ contains !! @param[in] iy Global Y-direction loop bounds (int_bounds_info). !! @param[in] iz Global Z-direction loop bounds (int_bounds_info). subroutine s_compute_cylindrical_viscous_source_flux(velL_vf, & - dvelL_dx_vf, dvelL_dy_vf, dvelL_dz_vf, & - velR_vf, & - dvelR_dx_vf, dvelR_dy_vf, dvelR_dz_vf, & - flux_src_vf, norm_dir, ix, iy, iz) + dvelL_dx_vf, dvelL_dy_vf, dvelL_dz_vf, & + velR_vf, & + dvelR_dx_vf, dvelR_dy_vf, dvelR_dz_vf, & + flux_src_vf, norm_dir, ix, iy, iz) type(scalar_field), dimension(num_dims), intent(in) :: velL_vf, velR_vf type(scalar_field), dimension(num_dims), intent(in) :: dvelL_dx_vf, dvelR_dx_vf @@ -3830,111 +3830,111 @@ contains integer :: idx_rp(3) !!< Indices $(j,k,l)$ of 'right' point for averaging. #:call GPU_PARALLEL_LOOP(collapse=3, private='[idx_rp, avg_v_int, avg_dvdx_int, avg_dvdy_int, avg_dvdz_int, Re_s, Re_b, vel_src_int, r_eff, divergence_cyl, stress_vector_shear, stress_normal_bulk, div_v_term_const]') - do l = iz%beg, iz%end - do k = iy%beg, iy%end - do j = ix%beg, ix%end - - ! Determine indices for the 'right' state for averaging across the interface - idx_rp = [j, k, l] - idx_rp(norm_dir) = idx_rp(norm_dir) + 1 - - ! Average velocities and their derivatives at the interface - ! For cylindrical: x-dir ~ axial (z_cyl), y-dir ~ radial (r_cyl), z-dir ~ azimuthal (theta_cyl) - $:GPU_LOOP(parallelism='[seq]') - do i_vel = 1, num_dims - avg_v_int(i_vel) = 0.5_wp*(velL_vf(i_vel)%sf(j, k, l) + velR_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) - - avg_dvdx_int(i_vel) = 0.5_wp*(dvelL_dx_vf(i_vel)%sf(j, k, l) + & - dvelR_dx_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) - if (num_dims > 1) then - avg_dvdy_int(i_vel) = 0.5_wp*(dvelL_dy_vf(i_vel)%sf(j, k, l) + & - dvelR_dy_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) - else - avg_dvdy_int(i_vel) = 0.0_wp - end if - if (num_dims > 2) then - avg_dvdz_int(i_vel) = 0.5_wp*(dvelL_dz_vf(i_vel)%sf(j, k, l) + & - dvelR_dz_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) - else - avg_dvdz_int(i_vel) = 0.0_wp - end if - end do - - ! Get Re numbers and interface velocity for viscous work - select case (norm_dir) - case (1) ! x-face (axial face in z_cyl direction) - Re_s = Re_avg_rsx_vf(j, k, l, 1) - Re_b = Re_avg_rsx_vf(j, k, l, 2) - vel_src_int = vel_src_rsx_vf(j, k, l, 1:num_dims) - r_eff = y_cc(k) - case (2) ! y-face (radial face in r_cyl direction) - Re_s = Re_avg_rsy_vf(k, j, l, 1) - Re_b = Re_avg_rsy_vf(k, j, l, 2) - vel_src_int = vel_src_rsy_vf(k, j, l, 1:num_dims) - r_eff = y_cb(k) - case (3) ! z-face (azimuthal face in theta_cyl direction) - Re_s = Re_avg_rsz_vf(l, k, j, 1) - Re_b = Re_avg_rsz_vf(l, k, j, 2) - vel_src_int = vel_src_rsz_vf(l, k, j, 1:num_dims) - r_eff = y_cc(k) - end select - - ! Divergence in cylindrical coordinates (vx=vz_cyl, vy=vr_cyl, vz=vtheta_cyl) - divergence_cyl = avg_dvdx_int(1) + avg_dvdy_int(2) + avg_v_int(2)/r_eff - if (num_dims > 2) then - divergence_cyl = divergence_cyl + avg_dvdz_int(3)/r_eff - end if + do l = iz%beg, iz%end + do k = iy%beg, iy%end + do j = ix%beg, ix%end - stress_vector_shear = 0.0_wp - stress_normal_bulk = 0.0_wp + ! Determine indices for the 'right' state for averaging across the interface + idx_rp = [j, k, l] + idx_rp(norm_dir) = idx_rp(norm_dir) + 1 - if (shear_stress) then - div_v_term_const = -(2.0_wp/3.0_wp)*divergence_cyl/Re_s + ! Average velocities and their derivatives at the interface + ! For cylindrical: x-dir ~ axial (z_cyl), y-dir ~ radial (r_cyl), z-dir ~ azimuthal (theta_cyl) + $:GPU_LOOP(parallelism='[seq]') + do i_vel = 1, num_dims + avg_v_int(i_vel) = 0.5_wp*(velL_vf(i_vel)%sf(j, k, l) + velR_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) - select case (norm_dir) - case (1) ! X-face (axial normal, z_cyl) - stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const - if (num_dims > 1) then - stress_vector_shear(2) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s - end if - if (num_dims > 2) then - stress_vector_shear(3) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s - end if - case (2) ! Y-face (radial normal, r_cyl) + avg_dvdx_int(i_vel) = 0.5_wp*(dvelL_dx_vf(i_vel)%sf(j, k, l) + & + dvelR_dx_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) if (num_dims > 1) then - stress_vector_shear(1) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s - stress_vector_shear(2) = (2.0_wp*avg_dvdy_int(2))/Re_s + div_v_term_const - if (num_dims > 2) then - stress_vector_shear(3) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s - end if + avg_dvdy_int(i_vel) = 0.5_wp*(dvelL_dy_vf(i_vel)%sf(j, k, l) + & + dvelR_dy_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) else - stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const + avg_dvdy_int(i_vel) = 0.0_wp end if - case (3) ! Z-face (azimuthal normal, theta_cyl) if (num_dims > 2) then - stress_vector_shear(1) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s - stress_vector_shear(2) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s - stress_vector_shear(3) = (2.0_wp*(avg_dvdz_int(3)/r_eff + avg_v_int(2)/r_eff))/Re_s + div_v_term_const + avg_dvdz_int(i_vel) = 0.5_wp*(dvelL_dz_vf(i_vel)%sf(j, k, l) + & + dvelR_dz_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + else + avg_dvdz_int(i_vel) = 0.0_wp end if + end do + + ! Get Re numbers and interface velocity for viscous work + select case (norm_dir) + case (1) ! x-face (axial face in z_cyl direction) + Re_s = Re_avg_rsx_vf(j, k, l, 1) + Re_b = Re_avg_rsx_vf(j, k, l, 2) + vel_src_int = vel_src_rsx_vf(j, k, l, 1:num_dims) + r_eff = y_cc(k) + case (2) ! y-face (radial face in r_cyl direction) + Re_s = Re_avg_rsy_vf(k, j, l, 1) + Re_b = Re_avg_rsy_vf(k, j, l, 2) + vel_src_int = vel_src_rsy_vf(k, j, l, 1:num_dims) + r_eff = y_cb(k) + case (3) ! z-face (azimuthal face in theta_cyl direction) + Re_s = Re_avg_rsz_vf(l, k, j, 1) + Re_b = Re_avg_rsz_vf(l, k, j, 2) + vel_src_int = vel_src_rsz_vf(l, k, j, 1:num_dims) + r_eff = y_cc(k) end select - $:GPU_LOOP(parallelism='[seq]') - do i_vel = 1, num_dims - flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) = flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) - stress_vector_shear(i_vel) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(i_vel)*stress_vector_shear(i_vel) - end do - end if + ! Divergence in cylindrical coordinates (vx=vz_cyl, vy=vr_cyl, vz=vtheta_cyl) + divergence_cyl = avg_dvdx_int(1) + avg_dvdy_int(2) + avg_v_int(2)/r_eff + if (num_dims > 2) then + divergence_cyl = divergence_cyl + avg_dvdz_int(3)/r_eff + end if - if (bulk_stress) then - stress_normal_bulk = divergence_cyl/Re_b + stress_vector_shear = 0.0_wp + stress_normal_bulk = 0.0_wp - flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) = flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) - stress_normal_bulk - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(norm_dir)*stress_normal_bulk - end if + if (shear_stress) then + div_v_term_const = -(2.0_wp/3.0_wp)*divergence_cyl/Re_s + + select case (norm_dir) + case (1) ! X-face (axial normal, z_cyl) + stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const + if (num_dims > 1) then + stress_vector_shear(2) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s + end if + if (num_dims > 2) then + stress_vector_shear(3) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s + end if + case (2) ! Y-face (radial normal, r_cyl) + if (num_dims > 1) then + stress_vector_shear(1) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s + stress_vector_shear(2) = (2.0_wp*avg_dvdy_int(2))/Re_s + div_v_term_const + if (num_dims > 2) then + stress_vector_shear(3) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s + end if + else + stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const + end if + case (3) ! Z-face (azimuthal normal, theta_cyl) + if (num_dims > 2) then + stress_vector_shear(1) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s + stress_vector_shear(2) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s + stress_vector_shear(3) = (2.0_wp*(avg_dvdz_int(3)/r_eff + avg_v_int(2)/r_eff))/Re_s + div_v_term_const + end if + end select + + $:GPU_LOOP(parallelism='[seq]') + do i_vel = 1, num_dims + flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) = flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) - stress_vector_shear(i_vel) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(i_vel)*stress_vector_shear(i_vel) + end do + end if + + if (bulk_stress) then + stress_normal_bulk = divergence_cyl/Re_b + + flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) = flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) - stress_normal_bulk + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(norm_dir)*stress_normal_bulk + end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_cylindrical_viscous_source_flux @@ -3956,13 +3956,13 @@ contains !! @param[in] iy Y-direction loop bounds (int_bounds_info). !! @param[in] iz Z-direction loop bounds (int_bounds_info). subroutine s_compute_cartesian_viscous_source_flux(dvelL_dx_vf, & - dvelL_dy_vf, & - dvelL_dz_vf, & - dvelR_dx_vf, & - dvelR_dy_vf, & - dvelR_dz_vf, & - flux_src_vf, & - norm_dir) + dvelL_dy_vf, & + dvelL_dz_vf, & + dvelR_dx_vf, & + dvelR_dy_vf, & + dvelR_dz_vf, & + flux_src_vf, & + norm_dir) ! Arguments type(scalar_field), dimension(num_dims), intent(in) :: dvelL_dx_vf, dvelR_dx_vf @@ -3990,86 +3990,86 @@ contains real(wp) :: divergence_v !< Velocity divergence at interface. #:call GPU_PARALLEL_LOOP(collapse=3, private='[idx_right_phys, vel_grad_avg, current_tau_shear, current_tau_bulk, vel_src_at_interface, Re_shear, Re_bulk, divergence_v, i_dim, vel_comp_idx]') - do l_loop = isz%beg, isz%end - do k_loop = isy%beg, isy%end - do j_loop = isx%beg, isx%end - - idx_right_phys(1) = j_loop - idx_right_phys(2) = k_loop - idx_right_phys(3) = l_loop - idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 - - vel_grad_avg = 0.0_wp - do vel_comp_idx = 1, num_dims - vel_grad_avg(vel_comp_idx, 1) = 0.5_wp*(dvelL_dx_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & - dvelR_dx_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) - if (num_dims > 1) then - vel_grad_avg(vel_comp_idx, 2) = 0.5_wp*(dvelL_dy_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & - dvelR_dy_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) - end if - if (num_dims > 2) then - vel_grad_avg(vel_comp_idx, 3) = 0.5_wp*(dvelL_dz_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & - dvelR_dz_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) - end if - end do - - divergence_v = 0.0_wp - do i_dim = 1, num_dims - divergence_v = divergence_v + vel_grad_avg(i_dim, i_dim) - end do - - vel_src_at_interface = 0.0_wp - if (norm_dir == 1) then - Re_shear = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 1) - Re_bulk = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 2) - do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsx_vf(j_loop, k_loop, l_loop, i_dim) - end do - else if (norm_dir == 2) then - Re_shear = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 1) - Re_bulk = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 2) - do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsy_vf(k_loop, j_loop, l_loop, i_dim) + do l_loop = isz%beg, isz%end + do k_loop = isy%beg, isy%end + do j_loop = isx%beg, isx%end + + idx_right_phys(1) = j_loop + idx_right_phys(2) = k_loop + idx_right_phys(3) = l_loop + idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 + + vel_grad_avg = 0.0_wp + do vel_comp_idx = 1, num_dims + vel_grad_avg(vel_comp_idx, 1) = 0.5_wp*(dvelL_dx_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & + dvelR_dx_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + if (num_dims > 1) then + vel_grad_avg(vel_comp_idx, 2) = 0.5_wp*(dvelL_dy_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & + dvelR_dy_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + end if + if (num_dims > 2) then + vel_grad_avg(vel_comp_idx, 3) = 0.5_wp*(dvelL_dz_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & + dvelR_dz_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + end if end do - else - Re_shear = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 1) - Re_bulk = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 2) + + divergence_v = 0.0_wp do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsz_vf(l_loop, k_loop, j_loop, i_dim) + divergence_v = divergence_v + vel_grad_avg(i_dim, i_dim) end do - end if - if (shear_stress) then - ! current_tau_shear = 0.0_wp - call s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, current_tau_shear) + vel_src_at_interface = 0.0_wp + if (norm_dir == 1) then + Re_shear = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 1) + Re_bulk = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 2) + do i_dim = 1, num_dims + vel_src_at_interface(i_dim) = vel_src_rsx_vf(j_loop, k_loop, l_loop, i_dim) + end do + else if (norm_dir == 2) then + Re_shear = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 1) + Re_bulk = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 2) + do i_dim = 1, num_dims + vel_src_at_interface(i_dim) = vel_src_rsy_vf(k_loop, j_loop, l_loop, i_dim) + end do + else + Re_shear = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 1) + Re_bulk = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 2) + do i_dim = 1, num_dims + vel_src_at_interface(i_dim) = vel_src_rsz_vf(l_loop, k_loop, j_loop, i_dim) + end do + end if + + if (shear_stress) then + ! current_tau_shear = 0.0_wp + call s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, current_tau_shear) - do i_dim = 1, num_dims - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_shear(norm_dir, i_dim) + do i_dim = 1, num_dims + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_shear(norm_dir, i_dim) - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & - vel_src_at_interface(i_dim)*current_tau_shear(norm_dir, i_dim) - end do - end if + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & + vel_src_at_interface(i_dim)*current_tau_shear(norm_dir, i_dim) + end do + end if - if (bulk_stress) then - ! current_tau_bulk = 0.0_wp - call s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, current_tau_bulk) + if (bulk_stress) then + ! current_tau_bulk = 0.0_wp + call s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, current_tau_bulk) - do i_dim = 1, num_dims - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_bulk(norm_dir, i_dim) + do i_dim = 1, num_dims + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_bulk(norm_dir, i_dim) - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & - vel_src_at_interface(i_dim)*current_tau_bulk(norm_dir, i_dim) - end do - end if + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & + vel_src_at_interface(i_dim)*current_tau_bulk(norm_dir, i_dim) + end do + end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_cartesian_viscous_source_flux @@ -4142,8 +4142,8 @@ contains !! @param flux_gsrc_vf Intercell geometric source fluxes !! @param norm_dir Dimensional splitting coordinate direction subroutine s_finalize_riemann_solver(flux_vf, flux_src_vf, & - flux_gsrc_vf, & - norm_dir) + flux_gsrc_vf, & + norm_dir) type(scalar_field), & dimension(sys_size), & @@ -4156,153 +4156,153 @@ contains ! Reshaping Outputted Data in y-direction if (norm_dir == 2) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_vf(i)%sf(k, j, l) = & - flux_rsy_vf(j, k, l, i) + do i = 1, sys_size + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_vf(i)%sf(k, j, l) = & + flux_rsy_vf(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (cyl_coord) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_gsrc_vf(i)%sf(k, j, l) = & - flux_gsrc_rsy_vf(j, k, l, i) + do i = 1, sys_size + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_gsrc_vf(i)%sf(k, j, l) = & + flux_gsrc_rsy_vf(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_src_vf(advxb)%sf(k, j, l) = & - flux_src_rsy_vf(j, k, l, advxb) + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_src_vf(advxb)%sf(k, j, l) = & + flux_src_rsy_vf(j, k, l, advxb) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1 .or. riemann_solver == 4) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb + 1, advxe - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_src_vf(i)%sf(k, j, l) = & - flux_src_rsy_vf(j, k, l, i) + do i = advxb + 1, advxe + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_src_vf(i)%sf(k, j, l) = & + flux_src_rsy_vf(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if ! Reshaping Outputted Data in z-direction elseif (norm_dir == 3) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end + do i = 1, sys_size + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end - flux_vf(i)%sf(l, k, j) = & - flux_rsz_vf(j, k, l, i) + flux_vf(i)%sf(l, k, j) = & + flux_rsz_vf(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (grid_geometry == 3) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end + do i = 1, sys_size + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end - flux_gsrc_vf(i)%sf(l, k, j) = & - flux_gsrc_rsz_vf(j, k, l, i) + flux_gsrc_vf(i)%sf(l, k, j) = & + flux_gsrc_rsz_vf(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - flux_src_vf(advxb)%sf(l, k, j) = & - flux_src_rsz_vf(j, k, l, advxb) + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + flux_src_vf(advxb)%sf(l, k, j) = & + flux_src_rsz_vf(j, k, l, advxb) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1 .or. riemann_solver == 4) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb + 1, advxe - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - flux_src_vf(i)%sf(l, k, j) = & - flux_src_rsz_vf(j, k, l, i) + do i = advxb + 1, advxe + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + flux_src_vf(i)%sf(l, k, j) = & + flux_src_rsz_vf(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if elseif (norm_dir == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_vf(i)%sf(j, k, l) = & - flux_rsx_vf(j, k, l, i) + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_vf(i)%sf(j, k, l) = & + flux_rsx_vf(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_src_vf(advxb)%sf(j, k, l) = & - flux_src_rsx_vf(j, k, l, advxb) + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_src_vf(advxb)%sf(j, k, l) = & + flux_src_rsx_vf(j, k, l, advxb) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (riemann_solver == 1 .or. riemann_solver == 4) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = advxb + 1, advxe - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_src_vf(i)%sf(j, k, l) = & - flux_src_rsx_vf(j, k, l, i) + do i = advxb + 1, advxe + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_src_vf(i)%sf(j, k, l) = & + flux_src_rsx_vf(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end if diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index c817f7cce5..84c09ab6f5 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -86,139 +86,139 @@ contains if (id == 1) then #:call GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end - w1L = gL_x(j, k, l, 1) - w2L = gL_x(j, k, l, 2) - w3L = 0._wp - if (p > 0) w3L = gL_x(j, k, l, 3) + w1L = gL_x(j, k, l, 1) + w2L = gL_x(j, k, l, 2) + w3L = 0._wp + if (p > 0) w3L = gL_x(j, k, l, 3) - w1R = gR_x(j + 1, k, l, 1) - w2R = gR_x(j + 1, k, l, 2) - w3R = 0._wp - if (p > 0) w3R = gR_x(j + 1, k, l, 3) + w1R = gR_x(j + 1, k, l, 1) + w2R = gR_x(j + 1, k, l, 2) + w3R = 0._wp + if (p > 0) w3R = gR_x(j + 1, k, l, 3) - normWL = gL_x(j, k, l, num_dims + 1) - normWR = gR_x(j + 1, k, l, num_dims + 1) + normWL = gL_x(j, k, l, num_dims + 1) + normWR = gR_x(j + 1, k, l, num_dims + 1) - w1 = (w1L + w1R)/2._wp - w2 = (w2L + w2R)/2._wp - w3 = (w3L + w3R)/2._wp - normW = (normWL + normWR)/2._wp + w1 = (w1L + w1R)/2._wp + w2 = (w2L + w2R)/2._wp + w3 = (w3L + w3R)/2._wp + normW = (normWL + normWR)/2._wp if (normW > capillary_cutoff) then @:compute_capillary_stress_tensor() - do i = 1, num_dims + do i = 1, num_dims - flux_src_vf(momxb + i - 1)%sf(j, k, l) = & - flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(1, i) + flux_src_vf(momxb + i - 1)%sf(j, k, l) = & + flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(1, i) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - Omega(1, i)*vSrc_rsx_vf(j, k, l, i) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + Omega(1, i)*vSrc_rsx_vf(j, k, l, i) - end do + end do - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsx_vf(j, k, l, 1) - end if + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsx_vf(j, k, l, 1) + end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP elseif (id == 2) then #:call GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end - w1L = gL_y(k, j, l, 1) - w2L = gL_y(k, j, l, 2) - w3L = 0._wp - if (p > 0) w3L = gL_y(k, j, l, 3) + w1L = gL_y(k, j, l, 1) + w2L = gL_y(k, j, l, 2) + w3L = 0._wp + if (p > 0) w3L = gL_y(k, j, l, 3) - w1R = gR_y(k + 1, j, l, 1) - w2R = gR_y(k + 1, j, l, 2) - w3R = 0._wp - if (p > 0) w3R = gR_y(k + 1, j, l, 3) + w1R = gR_y(k + 1, j, l, 1) + w2R = gR_y(k + 1, j, l, 2) + w3R = 0._wp + if (p > 0) w3R = gR_y(k + 1, j, l, 3) - normWL = gL_y(k, j, l, num_dims + 1) - normWR = gR_y(k + 1, j, l, num_dims + 1) + normWL = gL_y(k, j, l, num_dims + 1) + normWR = gR_y(k + 1, j, l, num_dims + 1) - w1 = (w1L + w1R)/2._wp - w2 = (w2L + w2R)/2._wp - w3 = (w3L + w3R)/2._wp - normW = (normWL + normWR)/2._wp + w1 = (w1L + w1R)/2._wp + w2 = (w2L + w2R)/2._wp + w3 = (w3L + w3R)/2._wp + normW = (normWL + normWR)/2._wp if (normW > capillary_cutoff) then @:compute_capillary_stress_tensor() - do i = 1, num_dims + do i = 1, num_dims - flux_src_vf(momxb + i - 1)%sf(j, k, l) = & - flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(2, i) + flux_src_vf(momxb + i - 1)%sf(j, k, l) = & + flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(2, i) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - Omega(2, i)*vSrc_rsy_vf(k, j, l, i) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + Omega(2, i)*vSrc_rsy_vf(k, j, l, i) - end do + end do - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsy_vf(k, j, l, 2) - end if + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsy_vf(k, j, l, 2) + end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP elseif (id == 3) then #:call GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end - w1L = gL_z(l, k, j, 1) - w2L = gL_z(l, k, j, 2) - w3L = 0._wp - if (p > 0) w3L = gL_z(l, k, j, 3) + w1L = gL_z(l, k, j, 1) + w2L = gL_z(l, k, j, 2) + w3L = 0._wp + if (p > 0) w3L = gL_z(l, k, j, 3) - w1R = gR_z(l + 1, k, j, 1) - w2R = gR_z(l + 1, k, j, 2) - w3R = 0._wp - if (p > 0) w3R = gR_z(l + 1, k, j, 3) + w1R = gR_z(l + 1, k, j, 1) + w2R = gR_z(l + 1, k, j, 2) + w3R = 0._wp + if (p > 0) w3R = gR_z(l + 1, k, j, 3) - normWL = gL_z(l, k, j, num_dims + 1) - normWR = gR_z(l + 1, k, j, num_dims + 1) + normWL = gL_z(l, k, j, num_dims + 1) + normWR = gR_z(l + 1, k, j, num_dims + 1) - w1 = (w1L + w1R)/2._wp - w2 = (w2L + w2R)/2._wp - w3 = (w3L + w3R)/2._wp - normW = (normWL + normWR)/2._wp + w1 = (w1L + w1R)/2._wp + w2 = (w2L + w2R)/2._wp + w3 = (w3L + w3R)/2._wp + normW = (normWL + normWR)/2._wp if (normW > capillary_cutoff) then @:compute_capillary_stress_tensor() - do i = 1, num_dims + do i = 1, num_dims - flux_src_vf(momxb + i - 1)%sf(j, k, l) = & - flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(3, i) + flux_src_vf(momxb + i - 1)%sf(j, k, l) = & + flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(3, i) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - Omega(3, i)*vSrc_rsz_vf(l, k, j, i) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + Omega(3, i)*vSrc_rsz_vf(l, k, j, i) - end do + end do - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsz_vf(l, k, j, 3) - end if + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsz_vf(l, k, j, 3) + end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -241,37 +241,37 @@ contains ! compute gradient components #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(1)%sf(j, k, l) = 1._wp/(x_cc(j + 1) - x_cc(j - 1))* & - (q_prim_vf(c_idx)%sf(j + 1, k, l) - q_prim_vf(c_idx)%sf(j - 1, k, l)) + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(1)%sf(j, k, l) = 1._wp/(x_cc(j + 1) - x_cc(j - 1))* & + (q_prim_vf(c_idx)%sf(j + 1, k, l) - q_prim_vf(c_idx)%sf(j - 1, k, l)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(2)%sf(j, k, l) = 1._wp/(y_cc(k + 1) - y_cc(k - 1))* & - (q_prim_vf(c_idx)%sf(j, k + 1, l) - q_prim_vf(c_idx)%sf(j, k - 1, l)) + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(2)%sf(j, k, l) = 1._wp/(y_cc(k + 1) - y_cc(k - 1))* & + (q_prim_vf(c_idx)%sf(j, k + 1, l) - q_prim_vf(c_idx)%sf(j, k - 1, l)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(3)%sf(j, k, l) = 1._wp/(z_cc(l + 1) - z_cc(l - 1))* & - (q_prim_vf(c_idx)%sf(j, k, l + 1) - q_prim_vf(c_idx)%sf(j, k, l - 1)) + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(3)%sf(j, k, l) = 1._wp/(z_cc(l + 1) - z_cc(l - 1))* & + (q_prim_vf(c_idx)%sf(j, k, l + 1) - q_prim_vf(c_idx)%sf(j, k, l - 1)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -340,42 +340,42 @@ contains if (recon_dir == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else if (recon_dir == 2) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else if (recon_dir == 3) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 84aa0b4107..66c85751ff 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -391,54 +391,53 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_ts(1)%vf(i)%sf(j, k, l) = & + q_cons_ts(1)%vf(i)%sf(j, k, l) & + + dt*rhs_vf(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP - !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(1)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(1)%sf(j, k, l, q, i) = & + pb_ts(1)%sf(j, k, l, q, i) & + + dt*rhs_pb(j, k, l, q, i) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (qbmm .and. (.not. polytropic)) then #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(1)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + mv_ts(1)%sf(j, k, l, q, i) = & + mv_ts(1)%sf(j, k, l, q, i) & + + dt*rhs_mv(j, k, l, q, i) + end do end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -501,176 +500,176 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - - !Evolve pb and mv for non-polytropic qbmm - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb + do i = 1, sys_size do l = 0, p do k = 0, n do j = 0, m - do q = 1, nnode - pb_ts(2)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i) - end do + q_cons_ts(2)%vf(i)%sf(j, k, l) = & + q_cons_ts(1)%vf(i)%sf(j, k, l) & + + dt*rhs_vf(i)%sf(j, k, l) end do end do end do end do - end if #:endcall GPU_PARALLEL_LOOP + !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(2)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(2)%sf(j, k, l, q, i) = & + pb_ts(1)%sf(j, k, l, q, i) & + + dt*rhs_pb(j, k, l, q, i) + end do end do end do end do end do - end do - #:endcall GPU_PARALLEL_LOOP - end if + end if + #:endcall GPU_PARALLEL_LOOP + + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + mv_ts(2)%sf(j, k, l, q, i) = & + mv_ts(1)%sf(j, k, l, q, i) & + + dt*rhs_mv(j, k, l, q, i) + end do + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) + if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) + if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(2)%vf) - end if + if (model_eqns == 3 .and. (.not. relax)) then + call s_pressure_relaxation_procedure(q_cons_ts(2)%vf) + end if - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf) + if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf) - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) - else - call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf) - end if - end if - - ! Stage 2 of 2 + if (ib) then + if (qbmm .and. .not. polytropic) then + call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) + else + call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf) + end if + end if - call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) + ! Stage 2 of 2 - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) + call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (q_cons_ts(1)%vf(i)%sf(j, k, l) & - + q_cons_ts(2)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l))/2._wp - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(1)%sf(j, k, l, q, i) = & - (pb_ts(1)%sf(j, k, l, q, i) & - + pb_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i))/2._wp + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_ts(1)%vf(i)%sf(j, k, l) = & + (q_cons_ts(1)%vf(i)%sf(j, k, l) & + + q_cons_ts(2)%vf(i)%sf(j, k, l) & + + dt*rhs_vf(i)%sf(j, k, l))/2._wp + end do end do end do end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if + #:endcall GPU_PARALLEL_LOOP + + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(1)%sf(j, k, l, q, i) = & + (pb_ts(1)%sf(j, k, l, q, i) & + + pb_ts(2)%sf(j, k, l, q, i) & + + dt*rhs_pb(j, k, l, q, i))/2._wp + end do + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(1)%sf(j, k, l, q, i) = & - (mv_ts(1)%sf(j, k, l, q, i) & - + mv_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i))/2._wp + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + mv_ts(1)%sf(j, k, l, q, i) = & + (mv_ts(1)%sf(j, k, l, q, i) & + + mv_ts(2)%sf(j, k, l, q, i) & + + dt*rhs_mv(j, k, l, q, i))/2._wp + end do + end do + end do end do end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if + #:endcall GPU_PARALLEL_LOOP + end if - if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) + if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf) + if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf) - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(1)%vf) - end if + if (model_eqns == 3 .and. (.not. relax)) then + call s_pressure_relaxation_procedure(q_cons_ts(1)%vf) + end if - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf) + if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf) - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) - else - call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf) - end if - end if + if (ib) then + if (qbmm .and. .not. polytropic) then + call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) + else + call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf) + end if + end if - call nvtxEndRange + call nvtxEndRange - call cpu_time(finish) + call cpu_time(finish) - end subroutine s_2nd_order_tvd_rk + end subroutine s_2nd_order_tvd_rk - !> 3rd order TVD RK time-stepping algorithm + !> 3rd order TVD RK time-stepping algorithm !! @param t_step Current time-step - impure subroutine s_3rd_order_tvd_rk(t_step, time_avg) + impure subroutine s_3rd_order_tvd_rk(t_step, time_avg) - integer, intent(IN) :: t_step - real(wp), intent(INOUT) :: time_avg + integer, intent(IN) :: t_step + real(wp), intent(INOUT) :: time_avg - integer :: i, j, k, l, q !< Generic loop iterator + integer :: i, j, k, l, q !< Generic loop iterator - real(wp) :: start, finish + real(wp) :: start, finish - ! Stage 1 of 3 + ! Stage 1 of 3 - if (.not. adap_dt) then - call cpu_time(start) - call nvtxStartRange("TIMESTEP") - end if + if (.not. adap_dt) then + call cpu_time(start) + call nvtxStartRange("TIMESTEP") + end if - call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, 1) + call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, 1) if (run_time_info) then if (igr) then @@ -680,338 +679,338 @@ contains end if end if - if (probe_wrt) then - call s_time_step_cycling(t_step) - end if - - if (cfl_dt) then - if (mytime >= t_stop) return - else - if (t_step == t_step_stop) return - end if - - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) + if (probe_wrt) then + call s_time_step_cycling(t_step) + end if - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + if (cfl_dt) then + if (mytime >= t_stop) return + else + if (t_step == t_step_stop) return + end if - !Evolve pb and mv for non-polytropic qbmm - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(2)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) + + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_ts(2)%vf(i)%sf(j, k, l) = & + q_cons_ts(1)%vf(i)%sf(j, k, l) & + + dt*rhs_vf(i)%sf(j, k, l) + end do end do end do end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if - - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(2)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i) + #:endcall GPU_PARALLEL_LOOP + + !Evolve pb and mv for non-polytropic qbmm + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(2)%sf(j, k, l, q, i) = & + pb_ts(1)%sf(j, k, l, q, i) & + + dt*rhs_pb(j, k, l, q, i) + end do + end do + end do end do end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if + #:endcall GPU_PARALLEL_LOOP + end if - if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + mv_ts(2)%sf(j, k, l, q, i) = & + mv_ts(1)%sf(j, k, l, q, i) & + + dt*rhs_mv(j, k, l, q, i) + end do + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) + if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(2)%vf) - end if + if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf) + if (model_eqns == 3 .and. (.not. relax)) then + call s_pressure_relaxation_procedure(q_cons_ts(2)%vf) + end if - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) - else - call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf) - end if - end if + if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf) - ! Stage 2 of 3 + if (ib) then + if (qbmm .and. .not. polytropic) then + call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) + else + call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf) + end if + end if - call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) + ! Stage 2 of 3 - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) + call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - (3._wp*q_cons_ts(1)%vf(i)%sf(j, k, l) & - + q_cons_ts(2)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l))/4._wp - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(2)%sf(j, k, l, q, i) = & - (3._wp*pb_ts(1)%sf(j, k, l, q, i) & - + pb_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i))/4._wp + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_ts(2)%vf(i)%sf(j, k, l) = & + (3._wp*q_cons_ts(1)%vf(i)%sf(j, k, l) & + + q_cons_ts(2)%vf(i)%sf(j, k, l) & + + dt*rhs_vf(i)%sf(j, k, l))/4._wp + end do end do end do end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if - - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(2)%sf(j, k, l, q, i) = & - (3._wp*mv_ts(1)%sf(j, k, l, q, i) & - + mv_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i))/4._wp + #:endcall GPU_PARALLEL_LOOP + + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(2)%sf(j, k, l, q, i) = & + (3._wp*pb_ts(1)%sf(j, k, l, q, i) & + + pb_ts(2)%sf(j, k, l, q, i) & + + dt*rhs_pb(j, k, l, q, i))/4._wp + end do + end do + end do end do end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if - - if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt/4._wp) - - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) + #:endcall GPU_PARALLEL_LOOP + end if - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(2)%vf) - end if + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + mv_ts(2)%sf(j, k, l, q, i) = & + (3._wp*mv_ts(1)%sf(j, k, l, q, i) & + + mv_ts(2)%sf(j, k, l, q, i) & + + dt*rhs_mv(j, k, l, q, i))/4._wp + end do + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf) + if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt/4._wp) - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) - else - call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf) - end if - end if + if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) - ! Stage 3 of 3 - call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 3) + if (model_eqns == 3 .and. (.not. relax)) then + call s_pressure_relaxation_procedure(q_cons_ts(2)%vf) + end if - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3) + if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf) - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (q_cons_ts(1)%vf(i)%sf(j, k, l) & - + 2._wp*q_cons_ts(2)%vf(i)%sf(j, k, l) & - + 2._wp*dt*rhs_vf(i)%sf(j, k, l))/3._wp - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + if (ib) then + if (qbmm .and. .not. polytropic) then + call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) + else + call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf) + end if + end if - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(1)%sf(j, k, l, q, i) = & - (pb_ts(1)%sf(j, k, l, q, i) & - + 2._wp*pb_ts(2)%sf(j, k, l, q, i) & - + 2._wp*dt*rhs_pb(j, k, l, q, i))/3._wp + ! Stage 3 of 3 + call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 3) + + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3) + + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_ts(1)%vf(i)%sf(j, k, l) = & + (q_cons_ts(1)%vf(i)%sf(j, k, l) & + + 2._wp*q_cons_ts(2)%vf(i)%sf(j, k, l) & + + 2._wp*dt*rhs_vf(i)%sf(j, k, l))/3._wp + end do end do end do end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if + #:endcall GPU_PARALLEL_LOOP + + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(1)%sf(j, k, l, q, i) = & + (pb_ts(1)%sf(j, k, l, q, i) & + + 2._wp*pb_ts(2)%sf(j, k, l, q, i) & + + 2._wp*dt*rhs_pb(j, k, l, q, i))/3._wp + end do + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(1)%sf(j, k, l, q, i) = & - (mv_ts(1)%sf(j, k, l, q, i) & - + 2._wp*mv_ts(2)%sf(j, k, l, q, i) & - + 2._wp*dt*rhs_mv(j, k, l, q, i))/3._wp + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + mv_ts(1)%sf(j, k, l, q, i) = & + (mv_ts(1)%sf(j, k, l, q, i) & + + 2._wp*mv_ts(2)%sf(j, k, l, q, i) & + + 2._wp*dt*rhs_mv(j, k, l, q, i))/3._wp + end do + end do + end do end do end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if + #:endcall GPU_PARALLEL_LOOP + end if - if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) + if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf) + if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf) - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(1)%vf) - end if + if (model_eqns == 3 .and. (.not. relax)) then + call s_pressure_relaxation_procedure(q_cons_ts(1)%vf) + end if - call nvtxStartRange("RHS-ELASTIC") - if (hyperelasticity) call s_hyperelastic_rmt_stress_update(q_cons_ts(1)%vf, q_prim_vf) - call nvtxEndRange + call nvtxStartRange("RHS-ELASTIC") + if (hyperelasticity) call s_hyperelastic_rmt_stress_update(q_cons_ts(1)%vf, q_prim_vf) + call nvtxEndRange - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf) + if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf) - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) - else - call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf) - end if - end if + if (ib) then + if (qbmm .and. .not. polytropic) then + call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) + else + call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf) + end if + end if - if (.not. adap_dt) then - call nvtxEndRange - call cpu_time(finish) + if (.not. adap_dt) then + call nvtxEndRange + call cpu_time(finish) - time = time + (finish - start) - end if - end subroutine s_3rd_order_tvd_rk + time = time + (finish - start) + end if + end subroutine s_3rd_order_tvd_rk - !> Strang splitting scheme with 3rd order TVD RK time-stepping algorithm for + !> Strang splitting scheme with 3rd order TVD RK time-stepping algorithm for !! the flux term and adaptive time stepping algorithm for !! the source term !! @param t_step Current time-step - subroutine s_strang_splitting(t_step, time_avg) + subroutine s_strang_splitting(t_step, time_avg) - integer, intent(in) :: t_step - real(wp), intent(inout) :: time_avg + integer, intent(in) :: t_step + real(wp), intent(inout) :: time_avg - real(wp) :: start, finish + real(wp) :: start, finish - call cpu_time(start) + call cpu_time(start) - call nvtxStartRange("TIMESTEP") + call nvtxStartRange("TIMESTEP") - ! Stage 1 of 3 - call s_adaptive_dt_bubble(1) + ! Stage 1 of 3 + call s_adaptive_dt_bubble(1) - ! Stage 2 of 3 - call s_3rd_order_tvd_rk(t_step, time_avg) + ! Stage 2 of 3 + call s_3rd_order_tvd_rk(t_step, time_avg) - ! Stage 3 of 3 - call s_adaptive_dt_bubble(3) + ! Stage 3 of 3 + call s_adaptive_dt_bubble(3) - call nvtxEndRange + call nvtxEndRange - call cpu_time(finish) + call cpu_time(finish) - time = time + (finish - start) + time = time + (finish - start) - end subroutine s_strang_splitting + end subroutine s_strang_splitting - !> Bubble source part in Strang operator splitting scheme + !> Bubble source part in Strang operator splitting scheme !! @param t_step Current time-step - impure subroutine s_adaptive_dt_bubble(stage) + impure subroutine s_adaptive_dt_bubble(stage) - integer, intent(in) :: stage + integer, intent(in) :: stage - type(vector_field) :: gm_alpha_qp + type(vector_field) :: gm_alpha_qp - call s_convert_conservative_to_primitive_variables( & - q_cons_ts(1)%vf, & - q_T_sf, & - q_prim_vf, & - idwint) + call s_convert_conservative_to_primitive_variables( & + q_cons_ts(1)%vf, & + q_T_sf, & + q_prim_vf, & + idwint) - if (bubbles_euler) then + if (bubbles_euler) then call s_compute_bubble_EE_source(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, divu) call s_comp_alpha_from_n(q_cons_ts(1)%vf) - elseif (bubbles_lagrange) then - - call s_populate_variables_buffers(bc_type, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) - call s_compute_bubble_EL_dynamics(q_prim_vf, stage) - call s_transfer_data_to_tmp() - call s_smear_voidfraction() - if (stage == 3) then - if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() - if (lag_params%write_bubbles) then - $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]') - call s_write_lag_particles(mytime) - end if - call s_write_void_evol(mytime) - end if + elseif (bubbles_lagrange) then + + call s_populate_variables_buffers(bc_type, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) + call s_compute_bubble_EL_dynamics(q_prim_vf, stage) + call s_transfer_data_to_tmp() + call s_smear_voidfraction() + if (stage == 3) then + if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() + if (lag_params%write_bubbles) then + $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]') + call s_write_lag_particles(mytime) + end if + call s_write_void_evol(mytime) + end if - end if + end if - end subroutine s_adaptive_dt_bubble + end subroutine s_adaptive_dt_bubble - impure subroutine s_compute_dt() + impure subroutine s_compute_dt() - real(wp) :: rho !< Cell-avg. density - real(wp), dimension(num_vels) :: vel !< Cell-avg. velocity - real(wp) :: vel_sum !< Cell-avg. velocity sum - real(wp) :: pres !< Cell-avg. pressure - real(wp), dimension(num_fluids) :: alpha !< Cell-avg. volume fraction - real(wp) :: gamma !< Cell-avg. sp. heat ratio - real(wp) :: pi_inf !< Cell-avg. liquid stiffness function - real(wp) :: c !< Cell-avg. sound speed - real(wp) :: H !< Cell-avg. enthalpy - real(wp), dimension(2) :: Re !< Cell-avg. Reynolds numbers - type(vector_field) :: gm_alpha_qp + real(wp) :: rho !< Cell-avg. density + real(wp), dimension(num_vels) :: vel !< Cell-avg. velocity + real(wp) :: vel_sum !< Cell-avg. velocity sum + real(wp) :: pres !< Cell-avg. pressure + real(wp), dimension(num_fluids) :: alpha !< Cell-avg. volume fraction + real(wp) :: gamma !< Cell-avg. sp. heat ratio + real(wp) :: pi_inf !< Cell-avg. liquid stiffness function + real(wp) :: c !< Cell-avg. sound speed + real(wp) :: H !< Cell-avg. enthalpy + real(wp), dimension(2) :: Re !< Cell-avg. Reynolds numbers + type(vector_field) :: gm_alpha_qp - real(wp) :: dt_local - integer :: j, k, l !< Generic loop iterators + real(wp) :: dt_local + integer :: j, k, l !< Generic loop iterators if (.not. igr) then call s_convert_conservative_to_primitive_variables( & @@ -1031,30 +1030,30 @@ contains call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) end if - ! Compute mixture sound speed - call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) + ! Compute mixture sound speed + call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) - call s_compute_dt_from_cfl(vel, c, max_dt, rho, Re, j, k, l) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + call s_compute_dt_from_cfl(vel, c, max_dt, rho, Re, j, k, l) + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL(copyout='[dt_local]', copyin='[max_dt]') dt_local = minval(max_dt) #:endcall GPU_PARALLEL - if (num_procs == 1) then - dt = dt_local - else - call s_mpi_allreduce_min(dt_local, dt) - end if + if (num_procs == 1) then + dt = dt_local + else + call s_mpi_allreduce_min(dt_local, dt) + end if - $:GPU_UPDATE(device='[dt]') + $:GPU_UPDATE(device='[dt]') - end subroutine s_compute_dt + end subroutine s_compute_dt - !> This subroutine applies the body forces source term at each + !> This subroutine applies the body forces source term at each !! Runge-Kutta stage subroutine s_apply_bodyforces(q_cons_vf, q_prim_vf_in, rhs_vf_in, ldt) @@ -1062,9 +1061,9 @@ contains type(scalar_field), dimension(1:sys_size), intent(in) :: q_prim_vf_in type(scalar_field), dimension(1:sys_size), intent(inout) :: rhs_vf_in - real(wp), intent(in) :: ldt !< local dt + real(wp), intent(in) :: ldt !< local dt - integer :: i, j, k, l + integer :: i, j, k, l call nvtxStartRange("RHS-BODYFORCES") call s_compute_body_forces_rhs(q_prim_vf_in, q_cons_vf, rhs_vf_in) @@ -1082,21 +1081,21 @@ contains end do #:endcall GPU_PARALLEL_LOOP - call nvtxEndRange + call nvtxEndRange - end subroutine s_apply_bodyforces + end subroutine s_apply_bodyforces - !> This subroutine saves the temporary q_prim_vf vector + !> This subroutine saves the temporary q_prim_vf vector !! into the q_prim_ts vector that is then used in p_main !! @param t_step current time-step - subroutine s_time_step_cycling(t_step) + subroutine s_time_step_cycling(t_step) - integer, intent(in) :: t_step + integer, intent(in) :: t_step integer :: i, j, k, l !< Generic loop iterator if (t_step == t_step_start) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -1106,8 +1105,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (t_step == t_step_start + 1) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -1117,8 +1117,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (t_step == t_step_start + 2) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -1128,8 +1129,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP elseif (t_step == t_step_start + 3) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -1139,8 +1141,9 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP else ! All other timesteps - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -1153,14 +1156,15 @@ contains end do end do end do + #:endcall GPU_PARALLEL_LOOP end if - end subroutine s_time_step_cycling + end subroutine s_time_step_cycling - !> Module deallocation and/or disassociation procedures - impure subroutine s_finalize_time_steppers_module + !> Module deallocation and/or disassociation procedures + impure subroutine s_finalize_time_steppers_module - integer :: i, j !< Generic loop iterators + integer :: i, j !< Generic loop iterators ! Deallocating the cell-average conservative variables do i = 1, num_ts @@ -1171,18 +1175,18 @@ contains @:DEALLOCATE(q_cons_ts(i)%vf) end do - @:DEALLOCATE(q_cons_ts) + @:DEALLOCATE(q_cons_ts) - ! Deallocating the cell-average primitive ts variables - if (probe_wrt) then - do i = 0, 3 - do j = 1, sys_size - @:DEALLOCATE(q_prim_ts(i)%vf(j)%sf) - end do - @:DEALLOCATE(q_prim_ts(i)%vf) - end do - @:DEALLOCATE(q_prim_ts) - end if + ! Deallocating the cell-average primitive ts variables + if (probe_wrt) then + do i = 0, 3 + do j = 1, sys_size + @:DEALLOCATE(q_prim_ts(i)%vf(j)%sf) + end do + @:DEALLOCATE(q_prim_ts(i)%vf) + end do + @:DEALLOCATE(q_prim_ts) + end if if (.not. igr) then ! Deallocating the cell-average primitive variables @@ -1225,20 +1229,20 @@ contains end if end if - @:DEALLOCATE(q_prim_vf) + @:DEALLOCATE(q_prim_vf) - ! Deallocating the cell-average RHS variables - do i = 1, sys_size - @:DEALLOCATE(rhs_vf(i)%sf) - end do + ! Deallocating the cell-average RHS variables + do i = 1, sys_size + @:DEALLOCATE(rhs_vf(i)%sf) + end do - @:DEALLOCATE(rhs_vf) + @:DEALLOCATE(rhs_vf) - ! Writing the footer of and closing the run-time information file - if (proc_rank == 0 .and. run_time_info) then - call s_close_run_time_information_file() - end if + ! Writing the footer of and closing the run-time information file + if (proc_rank == 0 .and. run_time_info) then + call s_close_run_time_information_file() + end if - end subroutine s_finalize_time_steppers_module + end subroutine s_finalize_time_steppers_module -end module m_time_steppers + end module m_time_steppers diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index d22fa9c5dd..b31285690f 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -78,226 +78,226 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - tau_Re_vf(i)%sf(j, k, l) = 0._wp + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + tau_Re_vf(i)%sf(j, k, l) = 0._wp + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (shear_stress) then ! Shear stresses #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + else + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do + + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + alpha_visc_sum = 0._wp + + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do + + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + + end if - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if - else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) - - end if + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) + end do - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) end do - - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - - end do + end if end if - end if - - tau_Re(2, 1) = (grad_y_vf(1)%sf(j, k, l) + & - grad_x_vf(2)%sf(j, k, l))/ & - Re_visc(1) - tau_Re(2, 2) = (4._wp*grad_y_vf(2)%sf(j, k, l) & - - 2._wp*grad_x_vf(1)%sf(j, k, l) & - - 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & - (3._wp*Re_visc(1)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - tau_Re_vf(contxe + i)%sf(j, k, l) = & - tau_Re_vf(contxe + i)%sf(j, k, l) - & - tau_Re(2, i) + tau_Re(2, 1) = (grad_y_vf(1)%sf(j, k, l) + & + grad_x_vf(2)%sf(j, k, l))/ & + Re_visc(1) - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) + tau_Re(2, 2) = (4._wp*grad_y_vf(2)%sf(j, k, l) & + - 2._wp*grad_x_vf(1)%sf(j, k, l) & + - 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & + (3._wp*Re_visc(1)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + tau_Re_vf(contxe + i)%sf(j, k, l) = & + tau_Re_vf(contxe + i)%sf(j, k, l) - & + tau_Re(2, i) + + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (bulk_stress) then ! Bulk stresses #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + else + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do + + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + + alpha_visc_sum = 0._wp + + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + + end if - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if - else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) - - end if + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) + end do - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) end do - - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - - end do + end if end if - end if - tau_Re(2, 2) = (grad_x_vf(1)%sf(j, k, l) + & - grad_y_vf(2)%sf(j, k, l) + & - q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & - Re_visc(2) + tau_Re(2, 2) = (grad_x_vf(1)%sf(j, k, l) + & + grad_y_vf(2)%sf(j, k, l) + & + q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & + Re_visc(2) - tau_Re_vf(momxb + 1)%sf(j, k, l) = & - tau_Re_vf(momxb + 1)%sf(j, k, l) - & - tau_Re(2, 2) + tau_Re_vf(momxb + 1)%sf(j, k, l) = & + tau_Re_vf(momxb + 1)%sf(j, k, l) - & + tau_Re(2, 2) - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -305,211 +305,211 @@ contains if (shear_stress) then ! Shear stresses #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + else + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do + + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + + alpha_visc_sum = 0._wp + + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do + + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + end if - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if - else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real - end if + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real - - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) end do - - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - - end do + end if end if - end if - tau_Re(2, 2) = -(2._wp/3._wp)*grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & - Re_visc(1) + tau_Re(2, 2) = -(2._wp/3._wp)*grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & + Re_visc(1) - tau_Re(2, 3) = ((grad_z_vf(2)%sf(j, k, l) - & - q_prim_vf(momxe)%sf(j, k, l))/ & - y_cc(k) + grad_y_vf(3)%sf(j, k, l))/ & - Re_visc(1) + tau_Re(2, 3) = ((grad_z_vf(2)%sf(j, k, l) - & + q_prim_vf(momxe)%sf(j, k, l))/ & + y_cc(k) + grad_y_vf(3)%sf(j, k, l))/ & + Re_visc(1) - $:GPU_LOOP(parallelism='[seq]') - do i = 2, 3 - tau_Re_vf(contxe + i)%sf(j, k, l) = & - tau_Re_vf(contxe + i)%sf(j, k, l) - & - tau_Re(2, i) + $:GPU_LOOP(parallelism='[seq]') + do i = 2, 3 + tau_Re_vf(contxe + i)%sf(j, k, l) = & + tau_Re_vf(contxe + i)%sf(j, k, l) - & + tau_Re(2, i) + + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) + end do - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) end do - end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (bulk_stress) then ! Bulk stresses #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + else + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do + + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + alpha_visc_sum = 0._wp + + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do + + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + + end if - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if - else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) - - end if + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) + end do - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) end do - - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - - end do + end if end if - end if - tau_Re(2, 2) = grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & - Re_visc(2) + tau_Re(2, 2) = grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & + Re_visc(2) - tau_Re_vf(momxb + 1)%sf(j, k, l) = & - tau_Re_vf(momxb + 1)%sf(j, k, l) - & - tau_Re(2, 2) + tau_Re_vf(momxb + 1)%sf(j, k, l) = & + tau_Re_vf(momxb + 1)%sf(j, k, l) - & + tau_Re(2, 2) - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end subroutine s_compute_viscous_stress_tensor @@ -596,361 +596,361 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = iy%beg, iy%end - do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dx_n(1)%vf(i)%sf(j, k, l) = & - (q_prim_qp%vf(i)%sf(j, k, l) - & - q_prim_qp%vf(i)%sf(j - 1, k, l))/ & - (x_cc(j) - x_cc(j - 1)) + do l = is3_viscous%beg, is3_viscous%end + do k = iy%beg, iy%end + do j = is1_viscous%beg + 1, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dx_n(1)%vf(i)%sf(j, k, l) = & + (q_prim_qp%vf(i)%sf(j, k, l) - & + q_prim_qp%vf(i)%sf(j - 1, k, l))/ & + (x_cc(j) - x_cc(j - 1)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dx_n(1)%vf(i)%sf(j, k, l) = & - (q_prim_qp%vf(i)%sf(j + 1, k, l) - & - q_prim_qp%vf(i)%sf(j, k, l))/ & - (x_cc(j + 1) - x_cc(j)) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dx_n(1)%vf(i)%sf(j, k, l) = & + (q_prim_qp%vf(i)%sf(j + 1, k, l) - & + q_prim_qp%vf(i)%sf(j, k, l))/ & + (x_cc(j + 1) - x_cc(j)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (n > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = & - (q_prim_qp%vf(i)%sf(k, j, l) - & - q_prim_qp%vf(i)%sf(k, j - 1, l))/ & - (y_cc(j) - y_cc(j - 1)) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = & + (q_prim_qp%vf(i)%sf(k, j, l) - & + q_prim_qp%vf(i)%sf(k, j - 1, l))/ & + (y_cc(j) - y_cc(j - 1)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = & - (q_prim_qp%vf(i)%sf(k, j + 1, l) - & - q_prim_qp%vf(i)%sf(k, j, l))/ & - (y_cc(j + 1) - y_cc(j)) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = & + (q_prim_qp%vf(i)%sf(k, j + 1, l) - & + q_prim_qp%vf(i)%sf(k, j, l))/ & + (y_cc(j + 1) - y_cc(j)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, j - 1, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j - 1, l)) - - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, j - 1, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j - 1, l)) + + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j, l)) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j, l)) - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqL_prim_dy_n(2)%vf(i)%sf(j - 1, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j - 1, k, l)) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg + 1, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqL_prim_dy_n(2)%vf(i)%sf(j - 1, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j - 1, k, l)) - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & - dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j, k, l)) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & + dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j, k, l)) - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg + 1, is3_viscous%end - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg + 1, is3_viscous%end + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dz_n(3)%vf(i)%sf(k, l, j) = & - (q_prim_qp%vf(i)%sf(k, l, j) - & - q_prim_qp%vf(i)%sf(k, l, j - 1))/ & - (z_cc(j) - z_cc(j - 1)) + dqL_prim_dz_n(3)%vf(i)%sf(k, l, j) = & + (q_prim_qp%vf(i)%sf(k, l, j) - & + q_prim_qp%vf(i)%sf(k, l, j - 1))/ & + (z_cc(j) - z_cc(j - 1)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg, is3_viscous%end - 1 - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg, is3_viscous%end - 1 + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dz_n(3)%vf(i)%sf(k, l, j) = & - (q_prim_qp%vf(i)%sf(k, l, j + 1) - & - q_prim_qp%vf(i)%sf(k, l, j))/ & - (z_cc(j + 1) - z_cc(j)) + dqR_prim_dz_n(3)%vf(i)%sf(k, l, j) = & + (q_prim_qp%vf(i)%sf(k, l, j + 1) - & + q_prim_qp%vf(i)%sf(k, l, j))/ & + (z_cc(j + 1) - z_cc(j)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg + 1, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j, k, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(j - 1, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j - 1, k, l)) + dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j, k, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(j - 1, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j - 1, k, l)) - dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) + dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j, k, l)) + dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j, k, l)) - dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) + dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(k, j - 1, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j - 1, l)) + dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(k, j - 1, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j - 1, l)) - dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) + dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j, l)) + dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j, l)) - dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) + dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg + 1, is3_viscous%end - do l = is2_viscous%beg + 1, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg + 1, is3_viscous%end + do l = is2_viscous%beg + 1, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j) + & - dqL_prim_dy_n(2)%vf(i)%sf(k, l, j - 1) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j - 1)) + dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j) + & + dqL_prim_dy_n(2)%vf(i)%sf(k, l, j - 1) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j - 1)) - dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) + dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg, is3_viscous%end - 1 - do l = is2_viscous%beg + 1, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg, is3_viscous%end - 1 + do l = is2_viscous%beg + 1, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & - dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j)) + dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & + dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j)) - dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) + dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg + 1, is3_viscous%end - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg + 1, is3_viscous%end + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, l, j - 1) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j - 1)) + dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, l, j - 1) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j - 1)) - dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) + dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg, is3_viscous%end - 1 - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j)) + do j = is3_viscous%beg, is3_viscous%end - 1 + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j)) - dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) + dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP do i = iv%beg, iv%end @@ -1044,42 +1044,42 @@ contains if (weno_Re_flux) then if (norm_dir == 2) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) - vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) + vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP elseif (norm_dir == 3) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do l = is3_viscous%beg, is3_viscous%end - vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) - vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) + do i = iv%beg, iv%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do l = is3_viscous%beg, is3_viscous%end + vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) + vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP elseif (norm_dir == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) - vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) + vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end if @@ -1146,42 +1146,42 @@ contains if (weno_Re_flux) then if (norm_dir == 2) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) - vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) + vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP elseif (norm_dir == 3) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do l = is3_viscous%beg, is3_viscous%end - vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) - vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) + do i = iv%beg, iv%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do l = is3_viscous%beg, is3_viscous%end + vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) + vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP elseif (norm_dir == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) - vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) + vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end if @@ -1237,21 +1237,21 @@ contains ! spatial derivatives inside the cell. #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dv_ds_vf(i)%sf(j, k, l) = & - 1._wp/((1._wp + wa_flg)*dL(j)) & - *(wa_flg*vL_vf(i)%sf(j + 1, k, l) & - + vR_vf(i)%sf(j, k, l) & - - vL_vf(i)%sf(j, k, l) & - - wa_flg*vR_vf(i)%sf(j - 1, k, l)) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dv_ds_vf(i)%sf(j, k, l) = & + 1._wp/((1._wp + wa_flg)*dL(j)) & + *(wa_flg*vL_vf(i)%sf(j + 1, k, l) & + + vR_vf(i)%sf(j, k, l) & + - vL_vf(i)%sf(j, k, l) & + - wa_flg*vR_vf(i)%sf(j - 1, k, l)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP ! END: First-Order Spatial Derivatives in x-direction @@ -1266,21 +1266,21 @@ contains ! spatial derivatives inside the cell. #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dv_ds_vf(i)%sf(j, k, l) = & - 1._wp/((1._wp + wa_flg)*dL(k)) & - *(wa_flg*vL_vf(i)%sf(j, k + 1, l) & - + vR_vf(i)%sf(j, k, l) & - - vL_vf(i)%sf(j, k, l) & - - wa_flg*vR_vf(i)%sf(j, k - 1, l)) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dv_ds_vf(i)%sf(j, k, l) = & + 1._wp/((1._wp + wa_flg)*dL(k)) & + *(wa_flg*vL_vf(i)%sf(j, k + 1, l) & + + vR_vf(i)%sf(j, k, l) & + - vL_vf(i)%sf(j, k, l) & + - wa_flg*vR_vf(i)%sf(j, k - 1, l)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP ! END: First-Order Spatial Derivatives in y-direction @@ -1295,21 +1295,21 @@ contains ! spatial derivatives inside the cell. #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dv_ds_vf(i)%sf(j, k, l) = & - 1._wp/((1._wp + wa_flg)*dL(l)) & - *(wa_flg*vL_vf(i)%sf(j, k, l + 1) & - + vR_vf(i)%sf(j, k, l) & - - vL_vf(i)%sf(j, k, l) & - - wa_flg*vR_vf(i)%sf(j, k, l - 1)) + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dv_ds_vf(i)%sf(j, k, l) = & + 1._wp/((1._wp + wa_flg)*dL(l)) & + *(wa_flg*vL_vf(i)%sf(j, k, l + 1) & + + vR_vf(i)%sf(j, k, l) & + - vL_vf(i)%sf(j, k, l) & + - wa_flg*vR_vf(i)%sf(j, k, l - 1)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1351,148 +1351,148 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - grad_x%sf(j, k, l) = & - (var%sf(j + 1, k, l) - var%sf(j - 1, k, l))/ & - (x_cc(j + 1) - x_cc(j - 1)) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + grad_x%sf(j, k, l) = & + (var%sf(j + 1, k, l) - var%sf(j - 1, k, l))/ & + (x_cc(j + 1) - x_cc(j - 1)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (n > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - grad_y%sf(j, k, l) = & - (var%sf(j, k + 1, l) - var%sf(j, k - 1, l))/ & - (y_cc(k + 1) - y_cc(k - 1)) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + grad_y%sf(j, k, l) = & + (var%sf(j, k + 1, l) - var%sf(j, k - 1, l))/ & + (y_cc(k + 1) - y_cc(k - 1)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - grad_z%sf(j, k, l) = & - (var%sf(j, k, l + 1) - var%sf(j, k, l - 1))/ & - (z_cc(l + 1) - z_cc(l - 1)) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + grad_z%sf(j, k, l) = & + (var%sf(j, k, l + 1) - var%sf(j, k, l - 1))/ & + (z_cc(l + 1) - z_cc(l - 1)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:call GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(idwbuff(1)%beg, k, l) = & - (-3._wp*var%sf(idwbuff(1)%beg, k, l) + 4._wp*var%sf(idwbuff(1)%beg + 1, k, l) - var%sf(idwbuff(1)%beg + 2, k, l))/ & - (x_cc(idwbuff(1)%beg + 2) - x_cc(idwbuff(1)%beg)) - grad_x%sf(idwbuff(1)%end, k, l) = & - (+3._wp*var%sf(idwbuff(1)%end, k, l) - 4._wp*var%sf(idwbuff(1)%end - 1, k, l) + var%sf(idwbuff(1)%end - 2, k, l))/ & - (x_cc(idwbuff(1)%end) - x_cc(idwbuff(1)%end - 2)) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + grad_x%sf(idwbuff(1)%beg, k, l) = & + (-3._wp*var%sf(idwbuff(1)%beg, k, l) + 4._wp*var%sf(idwbuff(1)%beg + 1, k, l) - var%sf(idwbuff(1)%beg + 2, k, l))/ & + (x_cc(idwbuff(1)%beg + 2) - x_cc(idwbuff(1)%beg)) + grad_x%sf(idwbuff(1)%end, k, l) = & + (+3._wp*var%sf(idwbuff(1)%end, k, l) - 4._wp*var%sf(idwbuff(1)%end - 1, k, l) + var%sf(idwbuff(1)%end - 2, k, l))/ & + (x_cc(idwbuff(1)%end) - x_cc(idwbuff(1)%end - 2)) + end do end do - end do #:endcall GPU_PARALLEL_LOOP if (n > 0) then #:call GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, idwbuff(2)%beg, l) = & - (-3._wp*var%sf(j, idwbuff(2)%beg, l) + 4._wp*var%sf(j, idwbuff(2)%beg + 1, l) - var%sf(j, idwbuff(2)%beg + 2, l))/ & - (y_cc(idwbuff(2)%beg + 2) - y_cc(idwbuff(2)%beg)) - grad_y%sf(j, idwbuff(2)%end, l) = & - (+3._wp*var%sf(j, idwbuff(2)%end, l) - 4._wp*var%sf(j, idwbuff(2)%end - 1, l) + var%sf(j, idwbuff(2)%end - 2, l))/ & - (y_cc(idwbuff(2)%end) - y_cc(idwbuff(2)%end - 2)) + do l = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_y%sf(j, idwbuff(2)%beg, l) = & + (-3._wp*var%sf(j, idwbuff(2)%beg, l) + 4._wp*var%sf(j, idwbuff(2)%beg + 1, l) - var%sf(j, idwbuff(2)%beg + 2, l))/ & + (y_cc(idwbuff(2)%beg + 2) - y_cc(idwbuff(2)%beg)) + grad_y%sf(j, idwbuff(2)%end, l) = & + (+3._wp*var%sf(j, idwbuff(2)%end, l) - 4._wp*var%sf(j, idwbuff(2)%end - 1, l) + var%sf(j, idwbuff(2)%end - 2, l))/ & + (y_cc(idwbuff(2)%end) - y_cc(idwbuff(2)%end - 2)) + end do end do - end do #:endcall GPU_PARALLEL_LOOP if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=2) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, idwbuff(3)%beg) = & - (-3._wp*var%sf(j, k, idwbuff(3)%beg) + 4._wp*var%sf(j, k, idwbuff(3)%beg + 1) - var%sf(j, k, idwbuff(3)%beg + 2))/ & - (z_cc(idwbuff(3)%beg + 2) - z_cc(is3_viscous%beg)) - grad_z%sf(j, k, idwbuff(3)%end) = & - (+3._wp*var%sf(j, k, idwbuff(3)%end) - 4._wp*var%sf(j, k, idwbuff(3)%end - 1) + var%sf(j, k, idwbuff(3)%end - 2))/ & - (z_cc(idwbuff(3)%end) - z_cc(idwbuff(3)%end - 2)) + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_z%sf(j, k, idwbuff(3)%beg) = & + (-3._wp*var%sf(j, k, idwbuff(3)%beg) + 4._wp*var%sf(j, k, idwbuff(3)%beg + 1) - var%sf(j, k, idwbuff(3)%beg + 2))/ & + (z_cc(idwbuff(3)%beg + 2) - z_cc(is3_viscous%beg)) + grad_z%sf(j, k, idwbuff(3)%end) = & + (+3._wp*var%sf(j, k, idwbuff(3)%end) - 4._wp*var%sf(j, k, idwbuff(3)%end - 1) + var%sf(j, k, idwbuff(3)%end - 2))/ & + (z_cc(idwbuff(3)%end) - z_cc(idwbuff(3)%end - 2)) + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end if if (bc_x%beg <= BC_GHOST_EXTRAP) then #:call GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ & - (x_cc(2) - x_cc(0)) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ & + (x_cc(2) - x_cc(0)) + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (bc_x%end <= BC_GHOST_EXTRAP) then #:call GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & - (x_cc(m) - x_cc(m - 2)) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & + (x_cc(m) - x_cc(m - 2)) + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (n > 0) then if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_NULL) then #:call GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ & - (y_cc(2) - y_cc(0)) + do l = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ & + (y_cc(2) - y_cc(0)) + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (bc_y%end <= BC_GHOST_EXTRAP) then #:call GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & - (y_cc(n) - y_cc(n - 2)) + do l = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & + (y_cc(n) - y_cc(n - 2)) + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (p > 0) then if (bc_z%beg <= BC_GHOST_EXTRAP) then #:call GPU_PARALLEL_LOOP(collapse=2) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, 0) = & - (-3._wp*var%sf(j, k, 0) + 4._wp*var%sf(j, k, 1) - var%sf(j, k, 2))/ & - (z_cc(2) - z_cc(0)) + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_z%sf(j, k, 0) = & + (-3._wp*var%sf(j, k, 0) + 4._wp*var%sf(j, k, 1) - var%sf(j, k, 2))/ & + (z_cc(2) - z_cc(0)) + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (bc_z%end <= BC_GHOST_EXTRAP) then #:call GPU_PARALLEL_LOOP(collapse=2) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, p) = & - (3._wp*var%sf(j, k, p) - 4._wp*var%sf(j, k, p - 1) + var%sf(j, k, p - 2))/ & - (z_cc(p) - z_cc(p - 2)) + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_z%sf(j, k, p) = & + (3._wp*var%sf(j, k, p) - 4._wp*var%sf(j, k, p - 1) + var%sf(j, k, p - 2))/ & + (z_cc(p) - z_cc(p - 2)) + end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end if diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 9dd9dcfbf7..bbaf3648a3 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -669,120 +669,120 @@ contains if (weno_order == 1) then if (weno_dir == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + do i = 1, ubound(v_vf, 1) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + vL_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else if (weno_dir == 2) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + do i = 1, ubound(v_vf, 1) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else if (weno_dir == 3) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + do i = 1, ubound(v_vf, 1) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if elseif (weno_order == 3) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then #:call GPU_PARALLEL_LOOP(collapse=4,private='[beta,dvd,poly,omega,alpha,tau]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - do i = 1, v_size - ! reconstruct from left side - - dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j, k, l, i) - dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 1, k, l, i) - - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(-1) - - beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(0)*dvd(0) & - + weno_eps - beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(-1)*dvd(-1) & - + weno_eps - - if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + do i = 1, v_size + ! reconstruct from left side - elseif (wenoz) then - ! Borges, et al. (2008) + dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j, k, l, i) + dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 1, k, l, i) - tau = abs(beta(1) - beta(0)) - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(-1) - end if + beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(0)*dvd(0) & + + weno_eps + beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(-1)*dvd(-1) & + + weno_eps - omega = alpha/sum(alpha) + if (wenojs) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + elseif (mapped_weno) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) + alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) - ! reconstruct from right side + elseif (wenoz) then + ! Borges, et al. (2008) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(-1) + tau = abs(beta(1) - beta(0)) + alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) - if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + end if - elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - elseif (wenoz) then + vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + + ! reconstruct from right side + + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(-1) + + if (wenojs) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) + alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) + + elseif (wenoz) then - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) + alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) - end if + end if - omega = alpha/sum(alpha) + omega = alpha/sum(alpha) - vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:endfor @@ -790,114 +790,114 @@ contains #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then #:call GPU_PARALLEL_LOOP(collapse=3,private='[dvd,poly,beta,alpha,omega,tau,delta]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, v_size - ! reconstruct from left side - - dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & - - v_rs_ws_${XYZ}$ (j + 1, k, l, i) - dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j, k, l, i) - dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 1, k, l, i) - dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 2, k, l, i) - - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(1) & - + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(0) & - + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(-1) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(-1) & - + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-2) - - beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(1)*dvd(1) & - + beta_coef_${XYZ}$ (j, 0, 1)*dvd(1)*dvd(0) & - + beta_coef_${XYZ}$ (j, 0, 2)*dvd(0)*dvd(0) & - + weno_eps - beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(0)*dvd(0) & - + beta_coef_${XYZ}$ (j, 1, 1)*dvd(0)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 1, 2)*dvd(-1)*dvd(-1) & - + weno_eps - beta(2) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(-1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 2, 1)*dvd(-1)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 2, 2)*dvd(-2)*dvd(-2) & - + weno_eps - - if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, v_size + ! reconstruct from left side - elseif (wenoz) then - ! Borges, et al. (2008) + dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & + - v_rs_ws_${XYZ}$ (j + 1, k, l, i) + dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j, k, l, i) + dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 1, k, l, i) + dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 2, k, l, i) - tau = abs(beta(2) - beta(0)) ! Equation 25 - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) ! Equation 28 (note: weno_eps was already added to beta) + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(1) & + + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(0) & + + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(-1) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(-1) & + + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-2) - elseif (teno) then - ! Fu, et al. (2016) - ! Fu''s code: https://dx.doi.org/10.13140/RG.2.2.36250.34247 - tau = abs(beta(2) - beta(0)) - alpha = 1._wp + tau/beta ! Equation 22 (reuse alpha as gamma; pick C=1 & q=6) - alpha = (alpha*alpha*alpha)**2._wp ! Equation 22 cont. (some CPU compilers cannot optimize x**6.0) - omega = alpha/sum(alpha) ! Equation 25 (reuse omega as xi) - delta = merge(0._wp, 1._wp, omega < teno_CT)! Equation 26 - alpha = delta*d_cbL_${XYZ}$ (:, j) ! Equation 27 + beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(1)*dvd(1) & + + beta_coef_${XYZ}$ (j, 0, 1)*dvd(1)*dvd(0) & + + beta_coef_${XYZ}$ (j, 0, 2)*dvd(0)*dvd(0) & + + weno_eps + beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(0)*dvd(0) & + + beta_coef_${XYZ}$ (j, 1, 1)*dvd(0)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 1, 2)*dvd(-1)*dvd(-1) & + + weno_eps + beta(2) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(-1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 2, 1)*dvd(-1)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 2, 2)*dvd(-2)*dvd(-2) & + + weno_eps - end if + if (wenojs) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) + elseif (mapped_weno) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) + alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) - vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + elseif (wenoz) then + ! Borges, et al. (2008) - ! reconstruct from right side + tau = abs(beta(2) - beta(0)) ! Equation 25 + alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) ! Equation 28 (note: weno_eps was already added to beta) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(1) & - + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(0) & - + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(-1) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(-1) & - + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-2) + elseif (teno) then + ! Fu, et al. (2016) + ! Fu''s code: https://dx.doi.org/10.13140/RG.2.2.36250.34247 + tau = abs(beta(2) - beta(0)) + alpha = 1._wp + tau/beta ! Equation 22 (reuse alpha as gamma; pick C=1 & q=6) + alpha = (alpha*alpha*alpha)**2._wp ! Equation 22 cont. (some CPU compilers cannot optimize x**6.0) + omega = alpha/sum(alpha) ! Equation 25 (reuse omega as xi) + delta = merge(0._wp, 1._wp, omega < teno_CT)! Equation 26 + alpha = delta*d_cbL_${XYZ}$ (:, j) ! Equation 27 - if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + end if - elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - elseif (wenoz) then + vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + + ! reconstruct from right side + + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(1) & + + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(0) & + + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(-1) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(-1) & + + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-2) + + if (wenojs) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) + alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) + + elseif (wenoz) then - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) + alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) - elseif (teno) then - alpha = delta*d_cbR_${XYZ}$ (:, j) + elseif (teno) then + alpha = delta*d_cbR_${XYZ}$ (:, j) - end if + end if - omega = alpha/sum(alpha) + omega = alpha/sum(alpha) - vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (mp_weno) then @@ -910,190 +910,190 @@ contains #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then #:call GPU_PARALLEL_LOOP(collapse=3,private='[poly,beta,alpha,omega,tau,delta,dvd,v]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, v_size - - if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3, k, l, i) ! temporary field value array for clarity - - if (.not. teno) then - dvd(2) = v_rs_ws_${XYZ}$ (j + 3, k, l, i) & - - v_rs_ws_${XYZ}$ (j + 2, k, l, i) - dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & - - v_rs_ws_${XYZ}$ (j + 1, k, l, i) - dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j, k, l, i) - dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 1, k, l, i) - dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 2, k, l, i) - dvd(-3) = v_rs_ws_${XYZ}$ (j - 2, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 3, k, l, i) - - poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(2) & - + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(1) & - + poly_coef_cbL_${XYZ}$ (j, 0, 2)*dvd(0) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(1) & - + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(0) & - + poly_coef_cbL_${XYZ}$ (j, 1, 2)*dvd(-1) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(0) & - + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-1) & - + poly_coef_cbL_${XYZ}$ (j, 2, 2)*dvd(-2) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 3, 0)*dvd(-1) & - + poly_coef_cbL_${XYZ}$ (j, 3, 1)*dvd(-2) & - + poly_coef_cbL_${XYZ}$ (j, 3, 2)*dvd(-3) - - else - ! (Fu, et al., 2016) Table 1 - ! Note: Unlike TENO5, TENO7 stencils differ from WENO7 stencils - ! See Figure 2 (right) for right-sided flux (at i+1/2) - ! Here we need the left-sided flux, so we flip the weights with respect to the x=i point - ! But we need to keep the stencil order to reuse the beta coefficients - poly(0) = ( 2._wp*v(-1) + 5._wp*v( 0) - 1._wp*v( 1)) / 6._wp !& - poly(1) = (11._wp*v( 0) - 7._wp*v( 1) + 2._wp*v( 2)) / 6._wp !& - poly(2) = (-1._wp*v(-2) + 5._wp*v(-1) + 2._wp*v( 0)) / 6._wp !& - poly(3) = (25._wp*v( 0) - 23._wp*v( 1) + 13._wp*v( 2) - 3._wp*v( 3)) / 12._wp !& - poly(4) = ( 1._wp*v(-3) - 5._wp*v(-2) + 13._wp*v(-1) + 3._wp*v( 0)) / 12._wp !& - end if - - if (.not. teno) then - - beta(3) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(2)*dvd(2) & - + beta_coef_${XYZ}$ (j, 0, 1)*dvd(2)*dvd(1) & - + beta_coef_${XYZ}$ (j, 0, 2)*dvd(2)*dvd(0) & - + beta_coef_${XYZ}$ (j, 0, 3)*dvd(1)*dvd(1) & - + beta_coef_${XYZ}$ (j, 0, 4)*dvd(1)*dvd(0) & - + beta_coef_${XYZ}$ (j, 0, 5)*dvd(0)*dvd(0) & - + weno_eps - - beta(2) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(1)*dvd(1) & - + beta_coef_${XYZ}$ (j, 1, 1)*dvd(1)*dvd(0) & - + beta_coef_${XYZ}$ (j, 1, 2)*dvd(1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 1, 3)*dvd(0)*dvd(0) & - + beta_coef_${XYZ}$ (j, 1, 4)*dvd(0)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 1, 5)*dvd(-1)*dvd(-1) & - + weno_eps + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, v_size + + if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3, k, l, i) ! temporary field value array for clarity + + if (.not. teno) then + dvd(2) = v_rs_ws_${XYZ}$ (j + 3, k, l, i) & + - v_rs_ws_${XYZ}$ (j + 2, k, l, i) + dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & + - v_rs_ws_${XYZ}$ (j + 1, k, l, i) + dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j, k, l, i) + dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 1, k, l, i) + dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 2, k, l, i) + dvd(-3) = v_rs_ws_${XYZ}$ (j - 2, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 3, k, l, i) + + poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(2) & + + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(1) & + + poly_coef_cbL_${XYZ}$ (j, 0, 2)*dvd(0) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(1) & + + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(0) & + + poly_coef_cbL_${XYZ}$ (j, 1, 2)*dvd(-1) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(0) & + + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-1) & + + poly_coef_cbL_${XYZ}$ (j, 2, 2)*dvd(-2) + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 3, 0)*dvd(-1) & + + poly_coef_cbL_${XYZ}$ (j, 3, 1)*dvd(-2) & + + poly_coef_cbL_${XYZ}$ (j, 3, 2)*dvd(-3) + + else + ! (Fu, et al., 2016) Table 1 + ! Note: Unlike TENO5, TENO7 stencils differ from WENO7 stencils + ! See Figure 2 (right) for right-sided flux (at i+1/2) + ! Here we need the left-sided flux, so we flip the weights with respect to the x=i point + ! But we need to keep the stencil order to reuse the beta coefficients + poly(0) = ( 2._wp*v(-1) + 5._wp*v( 0) - 1._wp*v( 1)) / 6._wp !& + poly(1) = (11._wp*v( 0) - 7._wp*v( 1) + 2._wp*v( 2)) / 6._wp !& + poly(2) = (-1._wp*v(-2) + 5._wp*v(-1) + 2._wp*v( 0)) / 6._wp !& + poly(3) = (25._wp*v( 0) - 23._wp*v( 1) + 13._wp*v( 2) - 3._wp*v( 3)) / 12._wp !& + poly(4) = ( 1._wp*v(-3) - 5._wp*v(-2) + 13._wp*v(-1) + 3._wp*v( 0)) / 12._wp !& + end if + + if (.not. teno) then + + beta(3) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(2)*dvd(2) & + + beta_coef_${XYZ}$ (j, 0, 1)*dvd(2)*dvd(1) & + + beta_coef_${XYZ}$ (j, 0, 2)*dvd(2)*dvd(0) & + + beta_coef_${XYZ}$ (j, 0, 3)*dvd(1)*dvd(1) & + + beta_coef_${XYZ}$ (j, 0, 4)*dvd(1)*dvd(0) & + + beta_coef_${XYZ}$ (j, 0, 5)*dvd(0)*dvd(0) & + + weno_eps + + beta(2) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(1)*dvd(1) & + + beta_coef_${XYZ}$ (j, 1, 1)*dvd(1)*dvd(0) & + + beta_coef_${XYZ}$ (j, 1, 2)*dvd(1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 1, 3)*dvd(0)*dvd(0) & + + beta_coef_${XYZ}$ (j, 1, 4)*dvd(0)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 1, 5)*dvd(-1)*dvd(-1) & + + weno_eps + + beta(1) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(0)*dvd(0) & + + beta_coef_${XYZ}$ (j, 2, 1)*dvd(0)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 2, 2)*dvd(0)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 2, 3)*dvd(-1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 2, 4)*dvd(-1)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 2, 5)*dvd(-2)*dvd(-2) & + + weno_eps + + beta(0) = beta_coef_${XYZ}$ (j, 3, 0)*dvd(-1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 3, 1)*dvd(-1)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 3, 2)*dvd(-1)*dvd(-3) & + + beta_coef_${XYZ}$ (j, 3, 3)*dvd(-2)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 3, 4)*dvd(-2)*dvd(-3) & + + beta_coef_${XYZ}$ (j, 3, 5)*dvd(-3)*dvd(-3) & + + weno_eps + + else ! TENO + ! High-Order Low-Dissipation Targeted ENO Schemes for Ideal Magnetohydrodynamics (Fu & Tang, 2019) Section 3.2 + beta(0) = 13._wp/12._wp*(v(-1) - 2._wp*v( 0) + v( 1))**2._wp + (( v(-1) - v( 1))**2._wp)/4._wp + weno_eps !& + beta(1) = 13._wp/12._wp*(v( 0) - 2._wp*v( 1) + v( 2))**2._wp + ((3._wp*v( 0) - 4._wp*v( 1) + v( 2))**2._wp)/4._wp + weno_eps !& + beta(2) = 13._wp/12._wp*(v(-2) - 2._wp*v(-1) + v( 0))**2._wp + (( v(-2) - 4._wp*v(-1) + 3._wp*v( 0))**2._wp)/4._wp + weno_eps !& + + beta(3) = ( v( 0)*(2107._wp*v( 0) - 9402._wp*v( 1) + 7042._wp*v( 2) - 1854._wp*v( 3)) & !& + + v( 1)*( 11003._wp*v( 1) - 17246._wp*v( 2) + 4642._wp*v( 3)) & !& + + v( 2)*( 7043._wp*v( 2) - 3882._wp*v( 3)) & !& + + v( 3)*( 547._wp*v( 3)) ) / 240._wp & !& + + weno_eps !& + + beta(4) = ( v(-3)*(547._wp*v(-3) - 3882._wp*v(-2) + 4642._wp*v(-1) - 1854._wp*v( 0)) & !& + + v(-2)*( 7043._wp*v(-2) - 17246._wp*v(-1) + 7042._wp*v( 0)) & !& + + v(-1)*( 11003._wp*v(-1) - 9402._wp*v( 0)) & !& + + v( 0)*( 2107._wp*v( 0)) ) / 240._wp & !& + + weno_eps !& + end if + + if (wenojs) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) + alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + + elseif (wenoz) then + ! Castro, et al. (2010) + ! Don & Borges (2013) also helps + tau = abs(beta(3) - beta(0)) ! Equation 50 + alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) ! q = 2,3,4 for stability + + elseif (teno) then + tau = abs(beta(4) - beta(3)) ! Note the reordering of stencils + alpha = 1._wp + tau/beta + alpha = (alpha*alpha*alpha)**2._wp ! some CPU compilers cannot optimize x**6.0 + omega = alpha/sum(alpha) + delta = merge(0._wp, 1._wp, omega < teno_CT) + alpha = delta*d_cbL_${XYZ}$ (:, j) + + end if - beta(1) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(0)*dvd(0) & - + beta_coef_${XYZ}$ (j, 2, 1)*dvd(0)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 2, 2)*dvd(0)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 2, 3)*dvd(-1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 2, 4)*dvd(-1)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 2, 5)*dvd(-2)*dvd(-2) & - + weno_eps - - beta(0) = beta_coef_${XYZ}$ (j, 3, 0)*dvd(-1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 3, 1)*dvd(-1)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 3, 2)*dvd(-1)*dvd(-3) & - + beta_coef_${XYZ}$ (j, 3, 3)*dvd(-2)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 3, 4)*dvd(-2)*dvd(-3) & - + beta_coef_${XYZ}$ (j, 3, 5)*dvd(-3)*dvd(-3) & - + weno_eps - - else ! TENO - ! High-Order Low-Dissipation Targeted ENO Schemes for Ideal Magnetohydrodynamics (Fu & Tang, 2019) Section 3.2 - beta(0) = 13._wp/12._wp*(v(-1) - 2._wp*v( 0) + v( 1))**2._wp + (( v(-1) - v( 1))**2._wp)/4._wp + weno_eps !& - beta(1) = 13._wp/12._wp*(v( 0) - 2._wp*v( 1) + v( 2))**2._wp + ((3._wp*v( 0) - 4._wp*v( 1) + v( 2))**2._wp)/4._wp + weno_eps !& - beta(2) = 13._wp/12._wp*(v(-2) - 2._wp*v(-1) + v( 0))**2._wp + (( v(-2) - 4._wp*v(-1) + 3._wp*v( 0))**2._wp)/4._wp + weno_eps !& - - beta(3) = ( v( 0)*(2107._wp*v( 0) - 9402._wp*v( 1) + 7042._wp*v( 2) - 1854._wp*v( 3)) & !& - + v( 1)*( 11003._wp*v( 1) - 17246._wp*v( 2) + 4642._wp*v( 3)) & !& - + v( 2)*( 7043._wp*v( 2) - 3882._wp*v( 3)) & !& - + v( 3)*( 547._wp*v( 3)) ) / 240._wp & !& - + weno_eps !& - - beta(4) = ( v(-3)*(547._wp*v(-3) - 3882._wp*v(-2) + 4642._wp*v(-1) - 1854._wp*v( 0)) & !& - + v(-2)*( 7043._wp*v(-2) - 17246._wp*v(-1) + 7042._wp*v( 0)) & !& - + v(-1)*( 11003._wp*v(-1) - 9402._wp*v( 0)) & !& - + v( 0)*( 2107._wp*v( 0)) ) / 240._wp & !& - + weno_eps !& - end if - - if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) - - elseif (wenoz) then - ! Castro, et al. (2010) - ! Don & Borges (2013) also helps - tau = abs(beta(3) - beta(0)) ! Equation 50 - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) ! q = 2,3,4 for stability - - elseif (teno) then - tau = abs(beta(4) - beta(3)) ! Note the reordering of stencils - alpha = 1._wp + tau/beta - alpha = (alpha*alpha*alpha)**2._wp ! some CPU compilers cannot optimize x**6.0 omega = alpha/sum(alpha) - delta = merge(0._wp, 1._wp, omega < teno_CT) - alpha = delta*d_cbL_${XYZ}$ (:, j) - - end if - omega = alpha/sum(alpha) + vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + + if (.not. teno) then + poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(2) & + + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(1) & + + poly_coef_cbR_${XYZ}$ (j, 0, 2)*dvd(0) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(1) & + + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(0) & + + poly_coef_cbR_${XYZ}$ (j, 1, 2)*dvd(-1) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(0) & + + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-1) & + + poly_coef_cbR_${XYZ}$ (j, 2, 2)*dvd(-2) + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 3, 0)*dvd(-1) & + + poly_coef_cbR_${XYZ}$ (j, 3, 1)*dvd(-2) & + + poly_coef_cbR_${XYZ}$ (j, 3, 2)*dvd(-3) + else + poly(0) = (-1._wp*v(-1) + 5._wp*v( 0) + 2._wp*v( 1)) / 6._wp !& + poly(1) = ( 2._wp*v( 0) + 5._wp*v( 1) - 1._wp*v( 2)) / 6._wp !& + poly(2) = ( 2._wp*v(-2) - 7._wp*v(-1) + 11._wp*v( 0)) / 6._wp !& + poly(3) = ( 3._wp*v( 0) + 13._wp*v( 1) - 5._wp*v( 2) + 1._wp*v( 3)) / 12._wp !& + poly(4) = (-3._wp*v(-3) + 13._wp*v(-2) - 23._wp*v(-1) + 25._wp*v( 0)) / 12._wp !& + end if + + if (wenojs) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) + alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) + + elseif (wenoz) then + alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) + + elseif (teno) then + alpha = delta*d_cbR_${XYZ}$ (:, j) + + end if - vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - - if (.not. teno) then - poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(2) & - + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(1) & - + poly_coef_cbR_${XYZ}$ (j, 0, 2)*dvd(0) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(1) & - + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(0) & - + poly_coef_cbR_${XYZ}$ (j, 1, 2)*dvd(-1) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(0) & - + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-1) & - + poly_coef_cbR_${XYZ}$ (j, 2, 2)*dvd(-2) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 3, 0)*dvd(-1) & - + poly_coef_cbR_${XYZ}$ (j, 3, 1)*dvd(-2) & - + poly_coef_cbR_${XYZ}$ (j, 3, 2)*dvd(-3) - else - poly(0) = (-1._wp*v(-1) + 5._wp*v( 0) + 2._wp*v( 1)) / 6._wp !& - poly(1) = ( 2._wp*v( 0) + 5._wp*v( 1) - 1._wp*v( 2)) / 6._wp !& - poly(2) = ( 2._wp*v(-2) - 7._wp*v(-1) + 11._wp*v( 0)) / 6._wp !& - poly(3) = ( 3._wp*v( 0) + 13._wp*v( 1) - 5._wp*v( 2) + 1._wp*v( 3)) / 12._wp !& - poly(4) = (-3._wp*v(-3) + 13._wp*v(-2) - 23._wp*v(-1) + 25._wp*v( 0)) / 12._wp !& - end if - - if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - - elseif (wenoz) then - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) - elseif (teno) then - alpha = delta*d_cbR_${XYZ}$ (:, j) - - end if - - omega = alpha/sum(alpha) - - vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1133,15 +1133,15 @@ contains if (weno_dir == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q) + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1227,130 +1227,130 @@ contains real(wp), parameter :: beta_mp = 4._wp/3._wp #:call GPU_PARALLEL_LOOP(collapse=4,private='[d]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - do i = 1, v_size - d(-1) = v_rs_ws(j, k, l, i) & - + v_rs_ws(j - 2, k, l, i) & - - v_rs_ws(j - 1, k, l, i) & - *2._wp - d(0) = v_rs_ws(j + 1, k, l, i) & - + v_rs_ws(j - 1, k, l, i) & - - v_rs_ws(j, k, l, i) & - *2._wp - d(1) = v_rs_ws(j + 2, k, l, i) & - + v_rs_ws(j, k, l, i) & - - v_rs_ws(j + 1, k, l, i) & - *2._wp - - d_MD = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & - *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & - *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & - *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & - abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp - - d_LC = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & - *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & - *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & - *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & - abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp - - vL_UL = v_rs_ws(j, k, l, i) & - - (v_rs_ws(j + 1, k, l, i) & - - v_rs_ws(j, k, l, i))*alpha_mp - - vL_MD = (v_rs_ws(j, k, l, i) & - + v_rs_ws(j - 1, k, l, i) & - - d_MD)*5.e-1_wp - - vL_LC = v_rs_ws(j, k, l, i) & - - (v_rs_ws(j + 1, k, l, i) & - - v_rs_ws(j, k, l, i))*5.e-1_wp + beta_mp*d_LC - - vL_min = max(min(v_rs_ws(j, k, l, i), & - v_rs_ws(j - 1, k, l, i), & - vL_MD), & - min(v_rs_ws(j, k, l, i), & - vL_UL, & - vL_LC)) - - vL_max = min(max(v_rs_ws(j, k, l, i), & - v_rs_ws(j - 1, k, l, i), & - vL_MD), & - max(v_rs_ws(j, k, l, i), & - vL_UL, & - vL_LC)) - - vL_rs_vf(j, k, l, i) = vL_rs_vf(j, k, l, i) & - + (sign(5.e-1_wp, vL_min - vL_rs_vf(j, k, l, i)) & - + sign(5.e-1_wp, vL_max - vL_rs_vf(j, k, l, i))) & - *min(abs(vL_min - vL_rs_vf(j, k, l, i)), & - abs(vL_max - vL_rs_vf(j, k, l, i))) - ! END: Left Monotonicity Preserving Bound - - ! Right Monotonicity Preserving Bound - d(-1) = v_rs_ws(j, k, l, i) & - + v_rs_ws(j - 2, k, l, i) & - - v_rs_ws(j - 1, k, l, i) & - *2._wp - d(0) = v_rs_ws(j + 1, k, l, i) & - + v_rs_ws(j - 1, k, l, i) & - - v_rs_ws(j, k, l, i) & - *2._wp - d(1) = v_rs_ws(j + 2, k, l, i) & - + v_rs_ws(j, k, l, i) & - - v_rs_ws(j + 1, k, l, i) & - *2._wp - - d_MD = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & - *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & - *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & - *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & - abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp - - d_LC = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & - *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & - *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & - *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & - abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp - - vR_UL = v_rs_ws(j, k, l, i) & - + (v_rs_ws(j, k, l, i) & - - v_rs_ws(j - 1, k, l, i))*alpha_mp - - vR_MD = (v_rs_ws(j, k, l, i) & - + v_rs_ws(j + 1, k, l, i) & - - d_MD)*5.e-1_wp - - vR_LC = v_rs_ws(j, k, l, i) & - + (v_rs_ws(j, k, l, i) & - - v_rs_ws(j - 1, k, l, i))*5.e-1_wp + beta_mp*d_LC - - vR_min = max(min(v_rs_ws(j, k, l, i), & - v_rs_ws(j + 1, k, l, i), & - vR_MD), & - min(v_rs_ws(j, k, l, i), & - vR_UL, & - vR_LC)) - - vR_max = min(max(v_rs_ws(j, k, l, i), & - v_rs_ws(j + 1, k, l, i), & - vR_MD), & - max(v_rs_ws(j, k, l, i), & - vR_UL, & - vR_LC)) - - vR_rs_vf(j, k, l, i) = vR_rs_vf(j, k, l, i) & - + (sign(5.e-1_wp, vR_min - vR_rs_vf(j, k, l, i)) & - + sign(5.e-1_wp, vR_max - vR_rs_vf(j, k, l, i))) & - *min(abs(vR_min - vR_rs_vf(j, k, l, i)), & - abs(vR_max - vR_rs_vf(j, k, l, i))) - ! END: Right Monotonicity Preserving Bound + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + do i = 1, v_size + d(-1) = v_rs_ws(j, k, l, i) & + + v_rs_ws(j - 2, k, l, i) & + - v_rs_ws(j - 1, k, l, i) & + *2._wp + d(0) = v_rs_ws(j + 1, k, l, i) & + + v_rs_ws(j - 1, k, l, i) & + - v_rs_ws(j, k, l, i) & + *2._wp + d(1) = v_rs_ws(j + 2, k, l, i) & + + v_rs_ws(j, k, l, i) & + - v_rs_ws(j + 1, k, l, i) & + *2._wp + + d_MD = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & + *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & + *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & + *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & + abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp + + d_LC = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & + *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & + *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & + *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & + abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp + + vL_UL = v_rs_ws(j, k, l, i) & + - (v_rs_ws(j + 1, k, l, i) & + - v_rs_ws(j, k, l, i))*alpha_mp + + vL_MD = (v_rs_ws(j, k, l, i) & + + v_rs_ws(j - 1, k, l, i) & + - d_MD)*5.e-1_wp + + vL_LC = v_rs_ws(j, k, l, i) & + - (v_rs_ws(j + 1, k, l, i) & + - v_rs_ws(j, k, l, i))*5.e-1_wp + beta_mp*d_LC + + vL_min = max(min(v_rs_ws(j, k, l, i), & + v_rs_ws(j - 1, k, l, i), & + vL_MD), & + min(v_rs_ws(j, k, l, i), & + vL_UL, & + vL_LC)) + + vL_max = min(max(v_rs_ws(j, k, l, i), & + v_rs_ws(j - 1, k, l, i), & + vL_MD), & + max(v_rs_ws(j, k, l, i), & + vL_UL, & + vL_LC)) + + vL_rs_vf(j, k, l, i) = vL_rs_vf(j, k, l, i) & + + (sign(5.e-1_wp, vL_min - vL_rs_vf(j, k, l, i)) & + + sign(5.e-1_wp, vL_max - vL_rs_vf(j, k, l, i))) & + *min(abs(vL_min - vL_rs_vf(j, k, l, i)), & + abs(vL_max - vL_rs_vf(j, k, l, i))) + ! END: Left Monotonicity Preserving Bound + + ! Right Monotonicity Preserving Bound + d(-1) = v_rs_ws(j, k, l, i) & + + v_rs_ws(j - 2, k, l, i) & + - v_rs_ws(j - 1, k, l, i) & + *2._wp + d(0) = v_rs_ws(j + 1, k, l, i) & + + v_rs_ws(j - 1, k, l, i) & + - v_rs_ws(j, k, l, i) & + *2._wp + d(1) = v_rs_ws(j + 2, k, l, i) & + + v_rs_ws(j, k, l, i) & + - v_rs_ws(j + 1, k, l, i) & + *2._wp + + d_MD = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & + *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & + *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & + *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & + abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp + + d_LC = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & + *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & + *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & + *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & + abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp + + vR_UL = v_rs_ws(j, k, l, i) & + + (v_rs_ws(j, k, l, i) & + - v_rs_ws(j - 1, k, l, i))*alpha_mp + + vR_MD = (v_rs_ws(j, k, l, i) & + + v_rs_ws(j + 1, k, l, i) & + - d_MD)*5.e-1_wp + + vR_LC = v_rs_ws(j, k, l, i) & + + (v_rs_ws(j, k, l, i) & + - v_rs_ws(j - 1, k, l, i))*5.e-1_wp + beta_mp*d_LC + + vR_min = max(min(v_rs_ws(j, k, l, i), & + v_rs_ws(j + 1, k, l, i), & + vR_MD), & + min(v_rs_ws(j, k, l, i), & + vR_UL, & + vR_LC)) + + vR_max = min(max(v_rs_ws(j, k, l, i), & + v_rs_ws(j + 1, k, l, i), & + vR_MD), & + max(v_rs_ws(j, k, l, i), & + vR_UL, & + vR_LC)) + + vR_rs_vf(j, k, l, i) = vR_rs_vf(j, k, l, i) & + + (sign(5.e-1_wp, vR_min - vR_rs_vf(j, k, l, i)) & + + sign(5.e-1_wp, vR_max - vR_rs_vf(j, k, l, i))) & + *min(abs(vR_min - vR_rs_vf(j, k, l, i)), & + abs(vR_max - vR_rs_vf(j, k, l, i))) + ! END: Right Monotonicity Preserving Bound + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end subroutine s_preserve_monotonicity From cec9867b75c5fb0a36ce358c9fc4d0e60aa85d72 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Mon, 21 Jul 2025 18:01:02 -0400 Subject: [PATCH 10/60] Fixed some issues with matching start and end of parallel loop macros, started work on enter and exit data, compiles --- src/common/include/acc_macros.fpp | 34 ++ src/common/include/omp_macros.fpp | 36 +- src/common/include/parallel_macros.fpp | 20 +- src/simulation/m_rhs.fpp | 574 +++++++++--------- src/simulation/m_time_steppers.fpp | 798 ++++++++++++------------- 5 files changed, 766 insertions(+), 696 deletions(-) diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp index 4da5ab0231..77e896f91e 100644 --- a/src/common/include/acc_macros.fpp +++ b/src/common/include/acc_macros.fpp @@ -140,4 +140,38 @@ $:code $:end_acc_directive #:enddef + +#:def ACC_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + #:set create_val = GEN_CREATE_STR(create) + #:set attach_val = GEN_ATTACH_STR(attach) + #! #:set to_val = GEN_TO_STR(copyin) + #! #:set alloc_val = GEN_ALLOC_STR(create) + #! #:set alloc_val2 = GEN_ALLOC_STR(attach) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #! #:set extraMpArgs_val = '' + #:set acc_clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n') + #! #:set mp_clause_val = to_val.strip('\n') + alloc_val.strip('\n') + alloc_val2.strip('\n') + #:set acc_directive = '!$acc enter data ' + acc_clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def ACC_EXIT_DATA(copyout=None, delete=None, detach=None, extraAccArgs=None) + #:set copyout_val = GEN_COPYOUT_STR(copyout) + #:set delete_val = GEN_DELETE_STR(delete) + #:set detach_val = GEN_DETACH_STR(detach) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = copyout_val.strip('\n') + delete_val.strip('\n') + detach_val.strip('\n') + #:set acc_directive = '!$acc exit data ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def ACC_UPDATE(host=None, device=None, extraAccArgs=None) + #:set host_val = GEN_HOST_STR(host) + #:set device_val = GEN_DEVICE_STR(device) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = host_val.strip('\n') + device_val.strip('\n') + #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef ! New line at end of file is required for FYPP \ No newline at end of file diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 04b1c25465..af89d0b5a8 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -48,6 +48,11 @@ $:create_val #:enddef +#:def OMP_DELETE_STR(delete) + #:set create_val = OMP_MAP_STR('release', delete) + $:create_val +#:enddef + #:def OMP_NOCREATE_STR(no_create) #:if no_create is not None #:stop 'no_create is not supported yet' @@ -76,7 +81,7 @@ #:def OMP_TO_STR(to) #! Not yet implemented - #:set to_val = '' + #:set to_val = GEN_PARENTHESES_CLAUSE('to', to) $:to_val #:enddef @@ -214,4 +219,33 @@ $:code $:end_omp_directive #:enddef + +#:def OMP_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraOmpArgs=None) + #:set copyin_val = OMP_COPYIN_STR(copyin).strip('\n') + OMP_COPYIN_STR(copyinReadOnly).strip('\n') + #:set create_val = OMP_CREATE_STR(create) + #:set attach_val = OMP_MAP_STR('to', attach) + #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) + #:set omp_clause_val = to_val.strip('\n') + alloc_val.strip('\n') + alloc_val2.strip('\n') + #:set omp_directive = '!$omp target enter data ' + omp_clause_val + extraOmpArgs_val.strip('\n') + $:omp_directive +#:enddef + +#:def OMP_EXIT_DATA(copyout=None, delete=None, detach=None, extraOmpArgs=None) + #:set copyout_val = OMP_COPYOUT_STR(copyout) + #:set delete_val = OMP_DELETE_STR(delete) + #:set detach_val = OMP_MAP_STR('from', detach) + #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) + #:set clause_val = copyout_val.strip('\n') + delete_val.strip('\n') + detach_val.strip('\n') + #:set omp_directive = '!$omp target exit data ' + clause_val + extraOmpArgs.strip('\n') + $:omp_directive +#:enddef + +#:def OMP_UPDATE(host=None, device=None, extraAccArgs=None) + #:set host_val = OMP_FROM_STR(host) + #:set device_val = OMP_TO_STR(device) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = host_val.strip('\n') + device_val.strip('\n') + #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef ! New line at end of file is required for FYPP \ No newline at end of file diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index a2199f9cfd..5f0f95b3a6 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -274,6 +274,17 @@ $:end_acc_directive #:enddef +#:def TEMP_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None, extraOmpArgs=None) + #:set acc_code = ACC_ENTER_DATA(copy=copy, copyin=copyin, copyinReadOnly=copyinReadOnly, create=create, no_create=no_create, attach=attach, extraAccArgs=extraAccArgs) + #:set omp_code = OMP_ENTER_DATA(copy=copy, copyin=copyin, copyinReadOnly=copyinReadOnly, create=create, no_create=no_create, attach=attach, extraOmpArgs=extraOmpArgs) + +#if defined(MFC_OpenACC) + $:acc_code +#elif defined(MFC_OpenMP) + $:omp_code +#endif +#:enddef + #:def GPU_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') #:set create_val = GEN_CREATE_STR(create) @@ -286,7 +297,6 @@ #:set acc_clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n') #! #:set mp_clause_val = to_val.strip('\n') + alloc_val.strip('\n') + alloc_val2.strip('\n') #:set acc_directive = '!$acc enter data ' + acc_clause_val + extraAccArgs_val.strip('\n') - #! #:set mp_directive = '!$omp target enter data ' + mp_clause_val + extraMpArgs_val.strip('\n') $:acc_directive #:enddef @@ -300,14 +310,6 @@ $:acc_directive #:enddef -#:def GPU_CACHE(cache, extraAccArgs=None) - #:set cache_val = GEN_PARENTHESES_CLAUSE('cache', cache) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = cache_val.strip('\n') - #:set acc_directive = '!$acc ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive -#:enddef - #:def GPU_ATOMIC(atomic, extraAccArgs=None) #:assert isinstance(atomic, str) #:assert (atomic == 'read' or atomic == 'write' or atomic == 'update' or atomic == 'capture') diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 9761f469ef..cf3822dbf3 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -1318,155 +1318,155 @@ contains rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - #:endcall GPU_PARALLEL_LOOP + end do + #:endcall GPU_PARALLEL_LOOP + end if + end if + + case (2) ! y-direction: loops q_idx (x), k_idx (y), l_idx (z); sf(q_idx, k_idx, l_idx); dy(k_idx); Kterm(q_idx,k_idx,l_idx) + use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) + if (use_standard_riemann) then + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do l_idx = 0, p ! z_extent + do k_idx = 0, n ! y_extent + do q_idx = 0, m ! x_extent + local_inv_ds = 1._wp/dy(k_idx) + local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(q_idx, k_idx, l_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) + rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do end do - end if + end do + end do + #:endcall GPU_PARALLEL_LOOP + else ! Other Riemann solvers + if (alt_soundspeed) then + if (bubbles_euler .neqv. .true.) then + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m + local_inv_ds = 1._wp/dy(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) + local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe + local_term_coeff = local_q_cons_val - local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx - 1, l_idx) + rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + if (cyl_coord) then + rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) - & + (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) + end if + end do; end do; end do + #:endcall GPU_PARALLEL_LOOP + + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') + do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m + local_inv_ds = 1._wp/dy(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) + local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe + local_term_coeff = local_q_cons_val + local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx - 1, l_idx) + rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + if (cyl_coord) then + rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & + (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) + end if + end do; end do; end do + #:endcall GPU_PARALLEL_LOOP end if + else ! NOT alt_soundspeed + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m + local_inv_ds = 1._wp/dy(k_idx) + local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) + rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + end do + #:endcall GPU_PARALLEL_LOOP + end if + end if + + case (3) ! z-direction: loops l_idx (x), q_idx (y), k_idx (z); sf(l_idx, q_idx, k_idx); dz(k_idx); Kterm(l_idx,q_idx,k_idx) + if (grid_geometry == 3) then + use_standard_riemann = (riemann_solver == 1) + else + use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) + end if - case (2) ! y-direction: loops q_idx (x), k_idx (y), l_idx (z); sf(q_idx, k_idx, l_idx); dy(k_idx); Kterm(q_idx,k_idx,l_idx) - use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) - if (use_standard_riemann) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do l_idx = 0, p ! z_extent - do k_idx = 0, n ! y_extent - do q_idx = 0, m ! x_extent - local_inv_ds = 1._wp/dy(k_idx) - local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(q_idx, k_idx, l_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) - rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do - end do + if (use_standard_riemann) then + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do k_idx = 0, p ! z_extent + do q_idx = 0, n ! y_extent + do l_idx = 0, m ! x_extent + local_inv_ds = 1._wp/dz(k_idx) + local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(l_idx, q_idx, k_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) + rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + else ! Other Riemann solvers + if (alt_soundspeed) then + if (bubbles_euler .neqv. .true.) then + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m + local_inv_ds = 1._wp/dz(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) + local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe + local_term_coeff = local_q_cons_val - local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx - 1) + rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do #:endcall GPU_PARALLEL_LOOP - else ! Other Riemann solvers - if (alt_soundspeed) then - if (bubbles_euler .neqv. .true.) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m - local_inv_ds = 1._wp/dy(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) - local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe - local_term_coeff = local_q_cons_val - local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx - 1, l_idx) - rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - if (cyl_coord) then - rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) - & - (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) - end if - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') - do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m - local_inv_ds = 1._wp/dy(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) - local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe - local_term_coeff = local_q_cons_val + local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx - 1, l_idx) - rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - if (cyl_coord) then - rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & - (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) - end if - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - end if - else ! NOT alt_soundspeed - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m - local_inv_ds = 1._wp/dy(k_idx) - local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) - rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - end do - end if - end if - - case (3) ! z-direction: loops l_idx (x), q_idx (y), k_idx (z); sf(l_idx, q_idx, k_idx); dz(k_idx); Kterm(l_idx,q_idx,k_idx) - if (grid_geometry == 3) then - use_standard_riemann = (riemann_solver == 1) - else - use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) - end if - - if (use_standard_riemann) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do k_idx = 0, p ! z_extent - do q_idx = 0, n ! y_extent - do l_idx = 0, m ! x_extent - local_inv_ds = 1._wp/dz(k_idx) - local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(l_idx, q_idx, k_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) - rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else ! Other Riemann solvers - if (alt_soundspeed) then - if (bubbles_euler .neqv. .true.) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m - local_inv_ds = 1._wp/dz(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) - local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe - local_term_coeff = local_q_cons_val - local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx - 1) - rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m - local_inv_ds = 1._wp/dz(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) - local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe - local_term_coeff = local_q_cons_val + local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx - 1) - rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - end if - else ! NOT alt_soundspeed - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m - local_inv_ds = 1._wp/dz(k_idx) - local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) - rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - #:endcall GPU_PARALLEL_LOOP - end do - end if - end if - end select - end subroutine s_add_directional_advection_source_terms - end subroutine s_compute_advection_source_term + #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m + local_inv_ds = 1._wp/dz(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) + local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe + local_term_coeff = local_q_cons_val + local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx - 1) + rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + #:endcall GPU_PARALLEL_LOOP + end if + else ! NOT alt_soundspeed + #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + do j_adv = advxb, advxe + do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m + local_inv_ds = 1._wp/dz(k_idx) + local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) + rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + end do + #:endcall GPU_PARALLEL_LOOP + end if + end if + end select + end subroutine s_add_directional_advection_source_terms + + end subroutine s_compute_advection_source_term subroutine s_compute_additional_physics_rhs(idir, q_prim_vf, rhs_vf, flux_src_n_in, & dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf) @@ -1477,9 +1477,9 @@ contains type(scalar_field), dimension(sys_size), intent(in) :: flux_src_n_in type(scalar_field), dimension(sys_size), intent(in) :: dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf - integer :: i, j, k, l + integer :: i, j, k, l - if (idir == 1) then ! x-direction + if (idir == 1) then ! x-direction if (surface_tension) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -1513,7 +1513,7 @@ contains end do #:endcall GPU_PARALLEL_LOOP - elseif (idir == 2) then ! y-direction + elseif (idir == 2) then ! y-direction if (surface_tension) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -1549,21 +1549,21 @@ contains idwbuff(1), idwbuff(2), idwbuff(3)) end if - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, 0, l) = & - rhs_vf(i)%sf(j, 0, l) + 1._wp/(y_cc(1) - y_cc(-1))* & - (tau_Re_vf(i)%sf(j, -1, l) & - - tau_Re_vf(i)%sf(j, 1, l)) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - - end if + #:call GPU_PARALLEL_LOOP(collapse=2) + do l = 0, p + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, 0, l) = & + rhs_vf(i)%sf(j, 0, l) + 1._wp/(y_cc(1) - y_cc(-1))* & + (tau_Re_vf(i)%sf(j, -1, l) & + - tau_Re_vf(i)%sf(j, 1, l)) + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + + end if #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p @@ -1620,21 +1620,21 @@ contains end do #:endcall GPU_PARALLEL_LOOP - if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, 0, l) = & - rhs_vf(i)%sf(j, 0, l) - 1._wp/y_cc(0)* & - tau_Re_vf(i)%sf(j, 0, l) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if - else + if (viscous) then + #:call GPU_PARALLEL_LOOP(collapse=2) + do l = 0, p + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, 0, l) = & + rhs_vf(i)%sf(j, 0, l) - 1._wp/y_cc(0)* & + tau_Re_vf(i)%sf(j, 0, l) + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if + else #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p @@ -1654,7 +1654,7 @@ contains end if end if - elseif (idir == 3) then ! z-direction + elseif (idir == 3) then ! z-direction if (surface_tension) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -1709,16 +1709,16 @@ contains end if end if - end subroutine s_compute_additional_physics_rhs + end subroutine s_compute_additional_physics_rhs - !> The purpose of this procedure is to infinitely relax + !> The purpose of this procedure is to infinitely relax !! the pressures from the internal-energy equations to a !! unique pressure, from which the corresponding volume !! fraction of each phase are recomputed. For conservation !! purpose, this pressure is finally corrected using the !! mixture-total-energy equation. - !> The purpose of this subroutine is to WENO-reconstruct the + !> The purpose of this subroutine is to WENO-reconstruct the !! left and the right cell-boundary values, including values !! at the Gaussian quadrature points, from the cell-averaged !! variables. @@ -1728,34 +1728,34 @@ contains !! @param vR_qp Right WENO-reconstructed, cell-boundary values including !! the values at the quadrature points, of the cell-average variables !! @param norm_dir Splitting coordinate direction - subroutine s_reconstruct_cell_boundary_values(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, & - norm_dir) + subroutine s_reconstruct_cell_boundary_values(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, & + norm_dir) - type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vL_x, vL_y, vL_z - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vR_x, vR_y, vR_z - integer, intent(in) :: norm_dir + type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf + real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vL_x, vL_y, vL_z + real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vR_x, vR_y, vR_z + integer, intent(in) :: norm_dir - integer :: weno_dir !< Coordinate direction of the WENO reconstruction + integer :: weno_dir !< Coordinate direction of the WENO reconstruction - ! Reconstruction in s1-direction + ! Reconstruction in s1-direction - if (norm_dir == 1) then - is1 = idwbuff(1); is2 = idwbuff(2); is3 = idwbuff(3) - weno_dir = 1; is1%beg = is1%beg + weno_polyn - is1%end = is1%end - weno_polyn + if (norm_dir == 1) then + is1 = idwbuff(1); is2 = idwbuff(2); is3 = idwbuff(3) + weno_dir = 1; is1%beg = is1%beg + weno_polyn + is1%end = is1%end - weno_polyn - elseif (norm_dir == 2) then - is1 = idwbuff(2); is2 = idwbuff(1); is3 = idwbuff(3) - weno_dir = 2; is1%beg = is1%beg + weno_polyn - is1%end = is1%end - weno_polyn + elseif (norm_dir == 2) then + is1 = idwbuff(2); is2 = idwbuff(1); is3 = idwbuff(3) + weno_dir = 2; is1%beg = is1%beg + weno_polyn + is1%end = is1%end - weno_polyn - else - is1 = idwbuff(3); is2 = idwbuff(2); is3 = idwbuff(1) - weno_dir = 3; is1%beg = is1%beg + weno_polyn - is1%end = is1%end - weno_polyn + else + is1 = idwbuff(3); is2 = idwbuff(2); is3 = idwbuff(1) + weno_dir = 3; is1%beg = is1%beg + weno_polyn + is1%end = is1%end - weno_polyn - end if + end if if (n > 0) then if (p > 0) then @@ -1776,89 +1776,89 @@ contains is1, is2, is3) end if - end subroutine s_reconstruct_cell_boundary_values - - subroutine s_reconstruct_cell_boundary_values_first_order(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, & - norm_dir) - - type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vL_x, vL_y, vL_z - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vR_x, vR_y, vR_z - integer, intent(in) :: norm_dir - - integer :: recon_dir !< Coordinate direction of the WENO reconstruction - - integer :: i, j, k, l - ! Reconstruction in s1-direction - - if (norm_dir == 1) then - is1 = idwbuff(1); is2 = idwbuff(2); is3 = idwbuff(3) - recon_dir = 1; is1%beg = is1%beg + weno_polyn - is1%end = is1%end - weno_polyn - - elseif (norm_dir == 2) then - is1 = idwbuff(2); is2 = idwbuff(1); is3 = idwbuff(3) - recon_dir = 2; is1%beg = is1%beg + weno_polyn - is1%end = is1%end - weno_polyn - - else - is1 = idwbuff(3); is2 = idwbuff(2); is3 = idwbuff(1) - recon_dir = 3; is1%beg = is1%beg + weno_polyn - is1%end = is1%end - weno_polyn - - end if - - $:GPU_UPDATE(device='[is1,is2,is3,iv]') - - if (recon_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else if (recon_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - else if (recon_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - end if - - end subroutine s_reconstruct_cell_boundary_values_first_order - - !> Module deallocation and/or disassociation procedures - impure subroutine s_finalize_rhs_module - - integer :: i, j, l - - call s_finalize_pressure_relaxation_module + end subroutine s_reconstruct_cell_boundary_values + + subroutine s_reconstruct_cell_boundary_values_first_order(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, & + norm_dir) + + type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf + real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vL_x, vL_y, vL_z + real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vR_x, vR_y, vR_z + integer, intent(in) :: norm_dir + + integer :: recon_dir !< Coordinate direction of the WENO reconstruction + + integer :: i, j, k, l + ! Reconstruction in s1-direction + + if (norm_dir == 1) then + is1 = idwbuff(1); is2 = idwbuff(2); is3 = idwbuff(3) + recon_dir = 1; is1%beg = is1%beg + weno_polyn + is1%end = is1%end - weno_polyn + + elseif (norm_dir == 2) then + is1 = idwbuff(2); is2 = idwbuff(1); is3 = idwbuff(3) + recon_dir = 2; is1%beg = is1%beg + weno_polyn + is1%end = is1%end - weno_polyn + + else + is1 = idwbuff(3); is2 = idwbuff(2); is3 = idwbuff(1) + recon_dir = 3; is1%beg = is1%beg + weno_polyn + is1%end = is1%end - weno_polyn + + end if + + $:GPU_UPDATE(device='[is1,is2,is3,iv]') + + if (recon_dir == 1) then + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + else if (recon_dir == 2) then + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + else if (recon_dir == 3) then + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if + + end subroutine s_reconstruct_cell_boundary_values_first_order + + !> Module deallocation and/or disassociation procedures + impure subroutine s_finalize_rhs_module + + integer :: i, j, l + + call s_finalize_pressure_relaxation_module if (.not. igr) then do j = cont_idx%beg, cont_idx%end @@ -1975,8 +1975,8 @@ contains deallocate (alf_sum%sf) end if - @:DEALLOCATE(dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n) - @:DEALLOCATE(dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n) + @:DEALLOCATE(dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n) + @:DEALLOCATE(dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n) if (.not. igr) then do i = num_dims, 1, -1 @@ -2017,7 +2017,7 @@ contains @:DEALLOCATE(flux_n, flux_src_n, flux_gsrc_n) end if - end subroutine s_finalize_rhs_module + end subroutine s_finalize_rhs_module - end module m_rhs +end module m_rhs diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 66c85751ff..cef74f406d 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -529,147 +529,147 @@ contains end do end do end do - end if - #:endcall GPU_PARALLEL_LOOP - - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(2)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i) - end do - end do + #:endcall GPU_PARALLEL_LOOP + end if + + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + mv_ts(2)%sf(j, k, l, q, i) = & + mv_ts(1)%sf(j, k, l, q, i) & + + dt*rhs_mv(j, k, l, q, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP - end if + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) + if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) + if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(2)%vf) - end if + if (model_eqns == 3 .and. (.not. relax)) then + call s_pressure_relaxation_procedure(q_cons_ts(2)%vf) + end if - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf) + if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf) - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) - else - call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf) - end if - end if + if (ib) then + if (qbmm .and. .not. polytropic) then + call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) + else + call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf) + end if + end if - ! Stage 2 of 2 + ! Stage 2 of 2 - call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) + call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (q_cons_ts(1)%vf(i)%sf(j, k, l) & - + q_cons_ts(2)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l))/2._wp - end do - end do + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_ts(1)%vf(i)%sf(j, k, l) = & + (q_cons_ts(1)%vf(i)%sf(j, k, l) & + + q_cons_ts(2)%vf(i)%sf(j, k, l) & + + dt*rhs_vf(i)%sf(j, k, l))/2._wp end do end do - #:endcall GPU_PARALLEL_LOOP - - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(1)%sf(j, k, l, q, i) = & - (pb_ts(1)%sf(j, k, l, q, i) & - + pb_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i))/2._wp - end do - end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(1)%sf(j, k, l, q, i) = & + (pb_ts(1)%sf(j, k, l, q, i) & + + pb_ts(2)%sf(j, k, l, q, i) & + + dt*rhs_pb(j, k, l, q, i))/2._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP - end if + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(1)%sf(j, k, l, q, i) = & - (mv_ts(1)%sf(j, k, l, q, i) & - + mv_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i))/2._wp - end do - end do + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + mv_ts(1)%sf(j, k, l, q, i) = & + (mv_ts(1)%sf(j, k, l, q, i) & + + mv_ts(2)%sf(j, k, l, q, i) & + + dt*rhs_mv(j, k, l, q, i))/2._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP - end if + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) + if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf) + if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf) - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(1)%vf) - end if + if (model_eqns == 3 .and. (.not. relax)) then + call s_pressure_relaxation_procedure(q_cons_ts(1)%vf) + end if - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf) + if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf) - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) - else - call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf) - end if - end if + if (ib) then + if (qbmm .and. .not. polytropic) then + call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) + else + call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf) + end if + end if - call nvtxEndRange + call nvtxEndRange - call cpu_time(finish) + call cpu_time(finish) - end subroutine s_2nd_order_tvd_rk + end subroutine s_2nd_order_tvd_rk - !> 3rd order TVD RK time-stepping algorithm + !> 3rd order TVD RK time-stepping algorithm !! @param t_step Current time-step - impure subroutine s_3rd_order_tvd_rk(t_step, time_avg) + impure subroutine s_3rd_order_tvd_rk(t_step, time_avg) - integer, intent(IN) :: t_step - real(wp), intent(INOUT) :: time_avg + integer, intent(IN) :: t_step + real(wp), intent(INOUT) :: time_avg - integer :: i, j, k, l, q !< Generic loop iterator + integer :: i, j, k, l, q !< Generic loop iterator - real(wp) :: start, finish + real(wp) :: start, finish - ! Stage 1 of 3 + ! Stage 1 of 3 - if (.not. adap_dt) then - call cpu_time(start) - call nvtxStartRange("TIMESTEP") - end if + if (.not. adap_dt) then + call cpu_time(start) + call nvtxStartRange("TIMESTEP") + end if - call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, 1) + call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, 1) if (run_time_info) then if (igr) then @@ -679,338 +679,338 @@ contains end if end if - if (probe_wrt) then - call s_time_step_cycling(t_step) - end if + if (probe_wrt) then + call s_time_step_cycling(t_step) + end if - if (cfl_dt) then - if (mytime >= t_stop) return - else - if (t_step == t_step_stop) return - end if + if (cfl_dt) then + if (mytime >= t_stop) return + else + if (t_step == t_step_stop) return + end if - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l) - end do - end do + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_ts(2)%vf(i)%sf(j, k, l) = & + q_cons_ts(1)%vf(i)%sf(j, k, l) & + + dt*rhs_vf(i)%sf(j, k, l) end do end do - #:endcall GPU_PARALLEL_LOOP - - !Evolve pb and mv for non-polytropic qbmm - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(2)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i) - end do - end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + + !Evolve pb and mv for non-polytropic qbmm + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(2)%sf(j, k, l, q, i) = & + pb_ts(1)%sf(j, k, l, q, i) & + + dt*rhs_pb(j, k, l, q, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP - end if + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(2)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i) - end do - end do + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + mv_ts(2)%sf(j, k, l, q, i) = & + mv_ts(1)%sf(j, k, l, q, i) & + + dt*rhs_mv(j, k, l, q, i) end do end do end do - #:endcall GPU_PARALLEL_LOOP - end if + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) + if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) + if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(2)%vf) - end if + if (model_eqns == 3 .and. (.not. relax)) then + call s_pressure_relaxation_procedure(q_cons_ts(2)%vf) + end if - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf) + if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf) - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) - else - call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf) - end if - end if + if (ib) then + if (qbmm .and. .not. polytropic) then + call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) + else + call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf) + end if + end if - ! Stage 2 of 3 + ! Stage 2 of 3 - call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) + call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - (3._wp*q_cons_ts(1)%vf(i)%sf(j, k, l) & - + q_cons_ts(2)%vf(i)%sf(j, k, l) & - + dt*rhs_vf(i)%sf(j, k, l))/4._wp - end do - end do + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_ts(2)%vf(i)%sf(j, k, l) = & + (3._wp*q_cons_ts(1)%vf(i)%sf(j, k, l) & + + q_cons_ts(2)%vf(i)%sf(j, k, l) & + + dt*rhs_vf(i)%sf(j, k, l))/4._wp end do end do - #:endcall GPU_PARALLEL_LOOP - - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(2)%sf(j, k, l, q, i) = & - (3._wp*pb_ts(1)%sf(j, k, l, q, i) & - + pb_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_pb(j, k, l, q, i))/4._wp - end do - end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(2)%sf(j, k, l, q, i) = & + (3._wp*pb_ts(1)%sf(j, k, l, q, i) & + + pb_ts(2)%sf(j, k, l, q, i) & + + dt*rhs_pb(j, k, l, q, i))/4._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP - end if + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(2)%sf(j, k, l, q, i) = & - (3._wp*mv_ts(1)%sf(j, k, l, q, i) & - + mv_ts(2)%sf(j, k, l, q, i) & - + dt*rhs_mv(j, k, l, q, i))/4._wp - end do - end do + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + mv_ts(2)%sf(j, k, l, q, i) = & + (3._wp*mv_ts(1)%sf(j, k, l, q, i) & + + mv_ts(2)%sf(j, k, l, q, i) & + + dt*rhs_mv(j, k, l, q, i))/4._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP - end if + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt/4._wp) + if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt/4._wp) - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) + if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(2)%vf) - end if + if (model_eqns == 3 .and. (.not. relax)) then + call s_pressure_relaxation_procedure(q_cons_ts(2)%vf) + end if - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf) + if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf) - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) - else - call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf) - end if - end if + if (ib) then + if (qbmm .and. .not. polytropic) then + call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf) + else + call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf) + end if + end if - ! Stage 3 of 3 - call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 3) + ! Stage 3 of 3 + call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 3) - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3) - #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (q_cons_ts(1)%vf(i)%sf(j, k, l) & - + 2._wp*q_cons_ts(2)%vf(i)%sf(j, k, l) & - + 2._wp*dt*rhs_vf(i)%sf(j, k, l))/3._wp - end do - end do + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_ts(1)%vf(i)%sf(j, k, l) = & + (q_cons_ts(1)%vf(i)%sf(j, k, l) & + + 2._wp*q_cons_ts(2)%vf(i)%sf(j, k, l) & + + 2._wp*dt*rhs_vf(i)%sf(j, k, l))/3._wp end do end do - #:endcall GPU_PARALLEL_LOOP - - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - pb_ts(1)%sf(j, k, l, q, i) = & - (pb_ts(1)%sf(j, k, l, q, i) & - + 2._wp*pb_ts(2)%sf(j, k, l, q, i) & - + 2._wp*dt*rhs_pb(j, k, l, q, i))/3._wp - end do - end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + pb_ts(1)%sf(j, k, l, q, i) = & + (pb_ts(1)%sf(j, k, l, q, i) & + + 2._wp*pb_ts(2)%sf(j, k, l, q, i) & + + 2._wp*dt*rhs_pb(j, k, l, q, i))/3._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP - end if + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - mv_ts(1)%sf(j, k, l, q, i) = & - (mv_ts(1)%sf(j, k, l, q, i) & - + 2._wp*mv_ts(2)%sf(j, k, l, q, i) & - + 2._wp*dt*rhs_mv(j, k, l, q, i))/3._wp - end do - end do + if (qbmm .and. (.not. polytropic)) then + #:call GPU_PARALLEL_LOOP(collapse=5) + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + mv_ts(1)%sf(j, k, l, q, i) = & + (mv_ts(1)%sf(j, k, l, q, i) & + + 2._wp*mv_ts(2)%sf(j, k, l, q, i) & + + 2._wp*dt*rhs_mv(j, k, l, q, i))/3._wp end do end do end do - #:endcall GPU_PARALLEL_LOOP - end if + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if - if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) + if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp) - if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf) + if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf) - if (model_eqns == 3 .and. (.not. relax)) then - call s_pressure_relaxation_procedure(q_cons_ts(1)%vf) - end if + if (model_eqns == 3 .and. (.not. relax)) then + call s_pressure_relaxation_procedure(q_cons_ts(1)%vf) + end if - call nvtxStartRange("RHS-ELASTIC") - if (hyperelasticity) call s_hyperelastic_rmt_stress_update(q_cons_ts(1)%vf, q_prim_vf) - call nvtxEndRange + call nvtxStartRange("RHS-ELASTIC") + if (hyperelasticity) call s_hyperelastic_rmt_stress_update(q_cons_ts(1)%vf, q_prim_vf) + call nvtxEndRange - if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf) + if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf) - if (ib) then - if (qbmm .and. .not. polytropic) then - call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) - else - call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf) - end if - end if + if (ib) then + if (qbmm .and. .not. polytropic) then + call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) + else + call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf) + end if + end if - if (.not. adap_dt) then - call nvtxEndRange - call cpu_time(finish) + if (.not. adap_dt) then + call nvtxEndRange + call cpu_time(finish) - time = time + (finish - start) - end if - end subroutine s_3rd_order_tvd_rk + time = time + (finish - start) + end if + end subroutine s_3rd_order_tvd_rk - !> Strang splitting scheme with 3rd order TVD RK time-stepping algorithm for + !> Strang splitting scheme with 3rd order TVD RK time-stepping algorithm for !! the flux term and adaptive time stepping algorithm for !! the source term !! @param t_step Current time-step - subroutine s_strang_splitting(t_step, time_avg) + subroutine s_strang_splitting(t_step, time_avg) - integer, intent(in) :: t_step - real(wp), intent(inout) :: time_avg + integer, intent(in) :: t_step + real(wp), intent(inout) :: time_avg - real(wp) :: start, finish + real(wp) :: start, finish - call cpu_time(start) + call cpu_time(start) - call nvtxStartRange("TIMESTEP") + call nvtxStartRange("TIMESTEP") - ! Stage 1 of 3 - call s_adaptive_dt_bubble(1) + ! Stage 1 of 3 + call s_adaptive_dt_bubble(1) - ! Stage 2 of 3 - call s_3rd_order_tvd_rk(t_step, time_avg) + ! Stage 2 of 3 + call s_3rd_order_tvd_rk(t_step, time_avg) - ! Stage 3 of 3 - call s_adaptive_dt_bubble(3) + ! Stage 3 of 3 + call s_adaptive_dt_bubble(3) - call nvtxEndRange + call nvtxEndRange - call cpu_time(finish) + call cpu_time(finish) - time = time + (finish - start) + time = time + (finish - start) - end subroutine s_strang_splitting + end subroutine s_strang_splitting - !> Bubble source part in Strang operator splitting scheme + !> Bubble source part in Strang operator splitting scheme !! @param t_step Current time-step - impure subroutine s_adaptive_dt_bubble(stage) + impure subroutine s_adaptive_dt_bubble(stage) - integer, intent(in) :: stage + integer, intent(in) :: stage - type(vector_field) :: gm_alpha_qp + type(vector_field) :: gm_alpha_qp - call s_convert_conservative_to_primitive_variables( & - q_cons_ts(1)%vf, & - q_T_sf, & - q_prim_vf, & - idwint) + call s_convert_conservative_to_primitive_variables( & + q_cons_ts(1)%vf, & + q_T_sf, & + q_prim_vf, & + idwint) - if (bubbles_euler) then + if (bubbles_euler) then call s_compute_bubble_EE_source(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, divu) call s_comp_alpha_from_n(q_cons_ts(1)%vf) - elseif (bubbles_lagrange) then - - call s_populate_variables_buffers(bc_type, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) - call s_compute_bubble_EL_dynamics(q_prim_vf, stage) - call s_transfer_data_to_tmp() - call s_smear_voidfraction() - if (stage == 3) then - if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() - if (lag_params%write_bubbles) then - $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]') - call s_write_lag_particles(mytime) - end if - call s_write_void_evol(mytime) - end if - + elseif (bubbles_lagrange) then + + call s_populate_variables_buffers(bc_type, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) + call s_compute_bubble_EL_dynamics(q_prim_vf, stage) + call s_transfer_data_to_tmp() + call s_smear_voidfraction() + if (stage == 3) then + if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() + if (lag_params%write_bubbles) then + $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]') + call s_write_lag_particles(mytime) end if + call s_write_void_evol(mytime) + end if - end subroutine s_adaptive_dt_bubble + end if + + end subroutine s_adaptive_dt_bubble - impure subroutine s_compute_dt() + impure subroutine s_compute_dt() - real(wp) :: rho !< Cell-avg. density - real(wp), dimension(num_vels) :: vel !< Cell-avg. velocity - real(wp) :: vel_sum !< Cell-avg. velocity sum - real(wp) :: pres !< Cell-avg. pressure - real(wp), dimension(num_fluids) :: alpha !< Cell-avg. volume fraction - real(wp) :: gamma !< Cell-avg. sp. heat ratio - real(wp) :: pi_inf !< Cell-avg. liquid stiffness function - real(wp) :: c !< Cell-avg. sound speed - real(wp) :: H !< Cell-avg. enthalpy - real(wp), dimension(2) :: Re !< Cell-avg. Reynolds numbers - type(vector_field) :: gm_alpha_qp + real(wp) :: rho !< Cell-avg. density + real(wp), dimension(num_vels) :: vel !< Cell-avg. velocity + real(wp) :: vel_sum !< Cell-avg. velocity sum + real(wp) :: pres !< Cell-avg. pressure + real(wp), dimension(num_fluids) :: alpha !< Cell-avg. volume fraction + real(wp) :: gamma !< Cell-avg. sp. heat ratio + real(wp) :: pi_inf !< Cell-avg. liquid stiffness function + real(wp) :: c !< Cell-avg. sound speed + real(wp) :: H !< Cell-avg. enthalpy + real(wp), dimension(2) :: Re !< Cell-avg. Reynolds numbers + type(vector_field) :: gm_alpha_qp - real(wp) :: dt_local - integer :: j, k, l !< Generic loop iterators + real(wp) :: dt_local + integer :: j, k, l !< Generic loop iterators if (.not. igr) then call s_convert_conservative_to_primitive_variables( & @@ -1030,30 +1030,30 @@ contains call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) end if - ! Compute mixture sound speed - call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) + ! Compute mixture sound speed + call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) - call s_compute_dt_from_cfl(vel, c, max_dt, rho, Re, j, k, l) - end do - end do + call s_compute_dt_from_cfl(vel, c, max_dt, rho, Re, j, k, l) end do - #:endcall GPU_PARALLEL_LOOP + end do + end do + #:endcall GPU_PARALLEL_LOOP #:call GPU_PARALLEL(copyout='[dt_local]', copyin='[max_dt]') dt_local = minval(max_dt) #:endcall GPU_PARALLEL - if (num_procs == 1) then - dt = dt_local - else - call s_mpi_allreduce_min(dt_local, dt) - end if + if (num_procs == 1) then + dt = dt_local + else + call s_mpi_allreduce_min(dt_local, dt) + end if - $:GPU_UPDATE(device='[dt]') + $:GPU_UPDATE(device='[dt]') - end subroutine s_compute_dt + end subroutine s_compute_dt - !> This subroutine applies the body forces source term at each + !> This subroutine applies the body forces source term at each !! Runge-Kutta stage subroutine s_apply_bodyforces(q_cons_vf, q_prim_vf_in, rhs_vf_in, ldt) @@ -1061,9 +1061,9 @@ contains type(scalar_field), dimension(1:sys_size), intent(in) :: q_prim_vf_in type(scalar_field), dimension(1:sys_size), intent(inout) :: rhs_vf_in - real(wp), intent(in) :: ldt !< local dt + real(wp), intent(in) :: ldt !< local dt - integer :: i, j, k, l + integer :: i, j, k, l call nvtxStartRange("RHS-BODYFORCES") call s_compute_body_forces_rhs(q_prim_vf_in, q_cons_vf, rhs_vf_in) @@ -1081,16 +1081,16 @@ contains end do #:endcall GPU_PARALLEL_LOOP - call nvtxEndRange + call nvtxEndRange - end subroutine s_apply_bodyforces + end subroutine s_apply_bodyforces - !> This subroutine saves the temporary q_prim_vf vector + !> This subroutine saves the temporary q_prim_vf vector !! into the q_prim_ts vector that is then used in p_main !! @param t_step current time-step - subroutine s_time_step_cycling(t_step) + subroutine s_time_step_cycling(t_step) - integer, intent(in) :: t_step + integer, intent(in) :: t_step integer :: i, j, k, l !< Generic loop iterator @@ -1159,12 +1159,12 @@ contains #:endcall GPU_PARALLEL_LOOP end if - end subroutine s_time_step_cycling + end subroutine s_time_step_cycling - !> Module deallocation and/or disassociation procedures - impure subroutine s_finalize_time_steppers_module + !> Module deallocation and/or disassociation procedures + impure subroutine s_finalize_time_steppers_module - integer :: i, j !< Generic loop iterators + integer :: i, j !< Generic loop iterators ! Deallocating the cell-average conservative variables do i = 1, num_ts @@ -1175,18 +1175,18 @@ contains @:DEALLOCATE(q_cons_ts(i)%vf) end do - @:DEALLOCATE(q_cons_ts) + @:DEALLOCATE(q_cons_ts) - ! Deallocating the cell-average primitive ts variables - if (probe_wrt) then - do i = 0, 3 - do j = 1, sys_size - @:DEALLOCATE(q_prim_ts(i)%vf(j)%sf) - end do - @:DEALLOCATE(q_prim_ts(i)%vf) - end do - @:DEALLOCATE(q_prim_ts) - end if + ! Deallocating the cell-average primitive ts variables + if (probe_wrt) then + do i = 0, 3 + do j = 1, sys_size + @:DEALLOCATE(q_prim_ts(i)%vf(j)%sf) + end do + @:DEALLOCATE(q_prim_ts(i)%vf) + end do + @:DEALLOCATE(q_prim_ts) + end if if (.not. igr) then ! Deallocating the cell-average primitive variables @@ -1229,20 +1229,20 @@ contains end if end if - @:DEALLOCATE(q_prim_vf) + @:DEALLOCATE(q_prim_vf) - ! Deallocating the cell-average RHS variables - do i = 1, sys_size - @:DEALLOCATE(rhs_vf(i)%sf) - end do + ! Deallocating the cell-average RHS variables + do i = 1, sys_size + @:DEALLOCATE(rhs_vf(i)%sf) + end do - @:DEALLOCATE(rhs_vf) + @:DEALLOCATE(rhs_vf) - ! Writing the footer of and closing the run-time information file - if (proc_rank == 0 .and. run_time_info) then - call s_close_run_time_information_file() - end if + ! Writing the footer of and closing the run-time information file + if (proc_rank == 0 .and. run_time_info) then + call s_close_run_time_information_file() + end if - end subroutine s_finalize_time_steppers_module + end subroutine s_finalize_time_steppers_module - end module m_time_steppers +end module m_time_steppers From 33eca5ff17fd2a7c76160e8ef8862cbe2ffea090 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Mon, 21 Jul 2025 19:11:35 -0400 Subject: [PATCH 11/60] Moved macro code to their corresponding file, and finished enter data, exit data, and update --- src/common/include/acc_macros.fpp | 60 +++++++ src/common/include/omp_macros.fpp | 16 +- src/common/include/parallel_macros.fpp | 158 +++--------------- src/common/include/shared_parallel_macros.fpp | 8 + 4 files changed, 98 insertions(+), 144 deletions(-) diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp index 77e896f91e..c70d065f90 100644 --- a/src/common/include/acc_macros.fpp +++ b/src/common/include/acc_macros.fpp @@ -1,5 +1,65 @@ #:include 'shared_parallel_macros.fpp' +#:def GEN_COPY_STR(copy) + #:set copy_val = GEN_PARENTHESES_CLAUSE('copy', copy) + $:copy_val +#:enddef + +#:def GEN_COPYIN_STR(copyin, readonly) + #:assert isinstance(readonly, bool) + #:set copyin_val = GEN_PARENTHESES_CLAUSE('copyin', copyin) + #:if copyin is not None and readonly == True + #:set index = copyin_val.find('copyin(') + len('copyin(') + #:set copyin_val = copyin_val[:index] + 'readonly:' + copyin_val[index:] + #:endif + $:copyin_val +#:enddef + +#:def GEN_COPYOUT_STR(copyout) + #:set copyout_val = GEN_PARENTHESES_CLAUSE('copyout', copyout) + $:copyout_val +#:enddef + +#:def GEN_CREATE_STR(create) + #:set create_val = GEN_PARENTHESES_CLAUSE('create', create) + $:create_val +#:enddef + +#:def GEN_NOCREATE_STR(no_create) + #:set nocreate_val = GEN_PARENTHESES_CLAUSE('no_create', no_create) + $:nocreate_val +#:enddef + +#:def GEN_DELETE_STR(delete) + #:set delete_val = GEN_PARENTHESES_CLAUSE('delete', delete) + $:delete_val +#:enddef + +#:def GEN_PRESENT_STR(present) + #:set present_val = GEN_PARENTHESES_CLAUSE('present', present) + $:present_val +#:enddef + +#:def GEN_DEVICEPTR_STR(deviceptr) + #:set deviceptr_val = GEN_PARENTHESES_CLAUSE('deviceptr', deviceptr) + $:deviceptr_val +#:enddef + +#:def GEN_ATTACH_STR(attach) + #:set attach_val = GEN_PARENTHESES_CLAUSE('attach', attach) + $:attach_val +#:enddef + +#:def GEN_DETACH_STR(detach) + #:set detach_val = GEN_PARENTHESES_CLAUSE('detach', detach) + $:detach_val +#:enddef + +#:def GEN_LINK_STR(link) + #:set link_val = GEN_PARENTHESES_CLAUSE('link', link) + $:link_val +#:enddef + #:def ACC_PARALLEL(code, private=None, default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index af89d0b5a8..70786ad38a 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -80,11 +80,15 @@ #:enddef #:def OMP_TO_STR(to) - #! Not yet implemented #:set to_val = GEN_PARENTHESES_CLAUSE('to', to) $:to_val #:enddef +#:def OMP_FROM_STR(to) + #:set from_val = GEN_PARENTHESES_CLAUSE('from', to) + $:from_val +#:enddef + #:def OMP_PARALLELISM_STR(parallelism) #:set temp = '' $:temp @@ -225,7 +229,7 @@ #:set create_val = OMP_CREATE_STR(create) #:set attach_val = OMP_MAP_STR('to', attach) #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) - #:set omp_clause_val = to_val.strip('\n') + alloc_val.strip('\n') + alloc_val2.strip('\n') + #:set omp_clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n') #:set omp_directive = '!$omp target enter data ' + omp_clause_val + extraOmpArgs_val.strip('\n') $:omp_directive #:enddef @@ -236,16 +240,16 @@ #:set detach_val = OMP_MAP_STR('from', detach) #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) #:set clause_val = copyout_val.strip('\n') + delete_val.strip('\n') + detach_val.strip('\n') - #:set omp_directive = '!$omp target exit data ' + clause_val + extraOmpArgs.strip('\n') + #:set omp_directive = '!$omp target exit data ' + clause_val + extraOmpArgs_val.strip('\n') $:omp_directive #:enddef -#:def OMP_UPDATE(host=None, device=None, extraAccArgs=None) +#:def OMP_UPDATE(host=None, device=None, extraOmpArgs=None) #:set host_val = OMP_FROM_STR(host) #:set device_val = OMP_TO_STR(device) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) #:set clause_val = host_val.strip('\n') + device_val.strip('\n') - #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n') + #:set acc_directive = '!$omp target update ' + clause_val + extraOmpArgs_val.strip('\n') $:acc_directive #:enddef ! New line at end of file is required for FYPP \ No newline at end of file diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 5f0f95b3a6..7dfacd0829 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -2,112 +2,6 @@ #:include 'omp_macros.fpp' #:include 'acc_macros.fpp' -#:def ASSERT_LIST(data, datatype) - #:assert data is not None - #:assert isinstance(data, list) - #:assert len(data) != 0 - #:assert all(isinstance(element, datatype) for element in data) -#:enddef - -#:def GEN_PARENTHESES_CLAUSE(clause_name, clause_str) - #:set clause_regex = re.compile(',(?![^(]*\\))') - #:assert isinstance(clause_name, str) - #:if clause_str is not None - #:set count = 0 - #:assert isinstance(clause_str, str) - #:assert clause_str[0] == '[' and clause_str[-1] == ']' - #:for c in clause_str - #:if c == '(' - #:set count = count + 1 - #:elif c == ')' - #:set count = count - 1 - #:endif - #:if c == ',' and count > 1 - #:stop 'Nested parentheses with comma inside is not supported. Incorrect clause: {}'.format(clause_str) - #:elif count < 0 - #:stop 'Missing parentheses. Incorrect clause: {}'.format(clause_str) - #:endif - #:endfor - #:set clause_str = re.sub(clause_regex, ';', clause_str) - #:set clause_list = [x.strip() for x in clause_str.strip('[]').split(';')] - $:ASSERT_LIST(clause_list, str) - #:set clause_str = clause_name + '(' + ', '.join(clause_list) + ') ' - #:else - #:set clause_str = '' - #:endif - $:clause_str -#:enddef - -#:def GEN_PRIVATE_STR(private, initialized_values) - #:assert isinstance(initialized_values, bool) - #:if initialized_values == True - #:set private_val = GEN_PARENTHESES_CLAUSE('firstprivate', private) - #:else - #:set private_val = GEN_PARENTHESES_CLAUSE('private', private) - #:endif - $:private_val -#:enddef - -#:def GEN_COPY_STR(copy) - #:set copy_val = GEN_PARENTHESES_CLAUSE('copy', copy) - $:copy_val -#:enddef - -#:def GEN_COPYIN_STR(copyin, readonly) - #:assert isinstance(readonly, bool) - #:set copyin_val = GEN_PARENTHESES_CLAUSE('copyin', copyin) - #:if copyin is not None and readonly == True - #:set index = copyin_val.find('copyin(') + len('copyin(') - #:set copyin_val = copyin_val[:index] + 'readonly:' + copyin_val[index:] - #:endif - $:copyin_val -#:enddef - -#:def GEN_COPYOUT_STR(copyout) - #:set copyout_val = GEN_PARENTHESES_CLAUSE('copyout', copyout) - $:copyout_val -#:enddef - -#:def GEN_CREATE_STR(create) - #:set create_val = GEN_PARENTHESES_CLAUSE('create', create) - $:create_val -#:enddef - -#:def GEN_NOCREATE_STR(no_create) - #:set nocreate_val = GEN_PARENTHESES_CLAUSE('no_create', no_create) - $:nocreate_val -#:enddef - -#:def GEN_DELETE_STR(delete) - #:set delete_val = GEN_PARENTHESES_CLAUSE('delete', delete) - $:delete_val -#:enddef - -#:def GEN_PRESENT_STR(present) - #:set present_val = GEN_PARENTHESES_CLAUSE('present', present) - $:present_val -#:enddef - -#:def GEN_DEVICEPTR_STR(deviceptr) - #:set deviceptr_val = GEN_PARENTHESES_CLAUSE('deviceptr', deviceptr) - $:deviceptr_val -#:enddef - -#:def GEN_ATTACH_STR(attach) - #:set attach_val = GEN_PARENTHESES_CLAUSE('attach', attach) - $:attach_val -#:enddef - -#:def GEN_DETACH_STR(detach) - #:set detach_val = GEN_PARENTHESES_CLAUSE('detach', detach) - $:detach_val -#:enddef - -#:def GEN_LINK_STR(link) - #:set link_val = GEN_PARENTHESES_CLAUSE('link', link) - $:link_val -#:enddef - #:def GEN_PARALLELISM_STR(parallelism) #:if parallelism is not None #:assert isinstance(parallelism, str) @@ -274,9 +168,9 @@ $:end_acc_directive #:enddef -#:def TEMP_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None, extraOmpArgs=None) - #:set acc_code = ACC_ENTER_DATA(copy=copy, copyin=copyin, copyinReadOnly=copyinReadOnly, create=create, no_create=no_create, attach=attach, extraAccArgs=extraAccArgs) - #:set omp_code = OMP_ENTER_DATA(copy=copy, copyin=copyin, copyinReadOnly=copyinReadOnly, create=create, no_create=no_create, attach=attach, extraOmpArgs=extraOmpArgs) +#:def GPU_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None, extraOmpArgs=None) + #:set acc_code = ACC_ENTER_DATA(copyin=copyin, copyinReadOnly=copyinReadOnly, create=create, attach=attach, extraAccArgs=extraAccArgs) + #:set omp_code = OMP_ENTER_DATA(copyin=copyin, copyinReadOnly=copyinReadOnly, create=create, attach=attach, extraOmpArgs=extraOmpArgs) #if defined(MFC_OpenACC) $:acc_code @@ -285,29 +179,15 @@ #endif #:enddef -#:def GPU_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) - #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - #:set create_val = GEN_CREATE_STR(create) - #:set attach_val = GEN_ATTACH_STR(attach) - #! #:set to_val = GEN_TO_STR(copyin) - #! #:set alloc_val = GEN_ALLOC_STR(create) - #! #:set alloc_val2 = GEN_ALLOC_STR(attach) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #! #:set extraMpArgs_val = '' - #:set acc_clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n') - #! #:set mp_clause_val = to_val.strip('\n') + alloc_val.strip('\n') + alloc_val2.strip('\n') - #:set acc_directive = '!$acc enter data ' + acc_clause_val + extraAccArgs_val.strip('\n') - $:acc_directive -#:enddef +#:def GPU_EXIT_DATA(copyout=None, delete=None, detach=None, extraAccArgs=None, extraOmpArgs=None) + #:set acc_code = ACC_EXIT_DATA(copyout=copyout, delete=delete, detach=detach, extraAccArgs=extraAccArgs) + #:set omp_code = OMP_EXIT_DATA(copyout=copyout, delete=delete, detach=detach, extraOmpArgs=extraOmpArgs) -#:def GPU_EXIT_DATA(copyout=None, delete=None, detach=None, extraAccArgs=None) - #:set copyout_val = GEN_COPYOUT_STR(copyout) - #:set delete_val = GEN_DELETE_STR(delete) - #:set detach_val = GEN_DETACH_STR(detach) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = copyout_val.strip('\n') + delete_val.strip('\n') + detach_val.strip('\n') - #:set acc_directive = '!$acc exit data ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive +#if defined(MFC_OpenACC) + $:acc_code +#elif defined(MFC_OpenMP) + $:omp_code +#endif #:enddef #:def GPU_ATOMIC(atomic, extraAccArgs=None) @@ -320,13 +200,15 @@ $:acc_directive #:enddef -#:def GPU_UPDATE(host=None, device=None, extraAccArgs=None) - #:set host_val = GEN_HOST_STR(host) - #:set device_val = GEN_DEVICE_STR(device) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = host_val.strip('\n') + device_val.strip('\n') - #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive +#:def GPU_UPDATE(host=None, device=None, extraAccArgs=None, extraOmpArgs=None) + #:set acc_code = ACC_UPDATE(host=host, device=device, extraAccArgs=extraAccArgs) + #:set omp_code = OMP_UPDATE(host=host, device=device, extraOmpArgs=extraOmpArgs) + +#if defined(MFC_OpenACC) + $:acc_code +#elif defined(MFC_OpenMP) + $:omp_code +#endif #:enddef #:def GPU_WAIT(extraAccArgs=None) diff --git a/src/common/include/shared_parallel_macros.fpp b/src/common/include/shared_parallel_macros.fpp index eadb2211f4..d82f671d5a 100644 --- a/src/common/include/shared_parallel_macros.fpp +++ b/src/common/include/shared_parallel_macros.fpp @@ -1,3 +1,10 @@ +#:def ASSERT_LIST(data, datatype) + #:assert data is not None + #:assert isinstance(data, list) + #:assert len(data) != 0 + #:assert all(isinstance(element, datatype) for element in data) +#:enddef + #:def GEN_CLAUSE(clause_name, clause_str) #:set clause_regex = re.compile(',(?![^(]*\\))') #:assert isinstance(clause_name, str) @@ -39,6 +46,7 @@ $:clause_str #:enddef + #:def GEN_PRIVATE_STR(private, initialized_values) #:assert isinstance(initialized_values, bool) #:if initialized_values == True From 11822f5f27ba31abe501c58fe20fdf06f341f627 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Mon, 21 Jul 2025 21:46:29 -0400 Subject: [PATCH 12/60] remove line that sets default_val to empty string --- src/common/include/omp_macros.fpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 70786ad38a..5632a34fc3 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -132,7 +132,6 @@ #:set collapse_val = GEN_COLLAPSE_STR(collapse) #:set parallelism_val = OMP_PARALLELISM_STR(parallelism) #:set default_val = OMP_DEFAULT_STR(default) - #:set default_val = '' #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) #:set copy_val = OMP_COPY_STR(copy) From 314fa1393dcd7d954d579d95f21f91932de44adf Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 22 Jul 2025 14:53:24 -0400 Subject: [PATCH 13/60] Fixed GPU_PARALLEL for omp and ran formatter --- src/common/include/omp_macros.fpp | 8 +- src/common/include/shared_parallel_macros.fpp | 1 - src/common/m_boundary_common.fpp | 230 +- src/common/m_variables_conversion.fpp | 42 +- src/simulation/m_derived_variables.fpp | 418 +- src/simulation/m_igr.fpp | 3472 ++++++++--------- src/simulation/m_rhs.fpp | 286 +- src/simulation/m_surface_tension.fpp | 34 +- src/simulation/m_time_steppers.fpp | 96 +- src/simulation/m_weno.fpp | 28 +- 10 files changed, 2315 insertions(+), 2300 deletions(-) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 5632a34fc3..5566dba088 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -17,8 +17,8 @@ #:assert isinstance(default, str) #:assert (default == 'present' or default == 'none') #:if default == 'present' - #! #:set default_val = 'defaultmap(present:aggregate) defaultmap(present:allocatable) defaultmap(present:pointer)' - #:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer)' + #! #:set default_val = 'defaultmap(present:aggregate) defaultmap(present:allocatable) defaultmap(present:pointer) ' + #:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) ' #:elif default == 'none' #:stop 'Not Supported Yet' #:endif @@ -113,12 +113,12 @@ & copy_val.strip('\n') + copyin_val.strip('\n') + & & copyout_val.strip('\n') + create_val.strip('\n') + & & no_create_val.strip('\n') + present_val.strip('\n') + & - & deviceptr_val.strip('\n') + attach_val.strip('\n')) + & deviceptr_val.strip('\n') + attach_val.strip('\n') #:set omp_clause_val = omp_clause_val.strip('\n') #:set omp_directive = '!$omp target teams ' + omp_clause_val + extraOmpArgs_val.strip('\n') - #:set end_omp_directive = '!$omp end target teams' + #:set omp_end_directive = '!$omp end target teams' $:omp_directive $:code $:omp_end_directive diff --git a/src/common/include/shared_parallel_macros.fpp b/src/common/include/shared_parallel_macros.fpp index d82f671d5a..a2039fd2a4 100644 --- a/src/common/include/shared_parallel_macros.fpp +++ b/src/common/include/shared_parallel_macros.fpp @@ -46,7 +46,6 @@ $:clause_str #:enddef - #:def GEN_PRIVATE_STR(private, initialized_values) #:assert isinstance(initialized_values, bool) #:if initialized_values == True diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index bb92ec70c1..3c21279314 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -1482,56 +1482,59 @@ contains integer :: j, k, l - #:call GPU_PARALLEL() - jac_sf(1)%sf => jac - #:endcall GPU_PARALLEL + jac_sf(1)%sf => jac + $:GPU_UPDATE(device='[jac_sf(1)%sf]') if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 1, -1, 1) else - $:GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, -1)%sf(0, k, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac(-j, k, l) = jac(m - j + 1, k, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac(-j, k, l) = jac(j - 1, k, l) - end do - case default - do j = 1, buff_size - jac(-j, k, l) = jac(0, k, l) - end do - end select + #:call GPU_PARALLEL_LOOP(collapse=2) + do l = 0, p + do k = 0, n + select case (bc_type(1, -1)%sf(0, k, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac(-j, k, l) = jac(m - j + 1, k, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac(-j, k, l) = jac(j - 1, k, l) + end do + case default + do j = 1, buff_size + jac(-j, k, l) = jac(0, k, l) + end do + end select + end do end do - end do + #:endcall GPU_PARALLEL_LOOP + end if if (bc_x%end >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 1, 1, 1) else - $:GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, 1)%sf(0, k, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac(m + j, k, l) = jac(j - 1, k, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac(m + j, k, l) = jac(m - (j - 1), k, l) - end do - case default - do j = 1, buff_size - jac(m + j, k, l) = jac(m, k, l) - end do - end select + #:call GPU_PARALLEL_LOOP(collapse=2) + do l = 0, p + do k = 0, n + select case (bc_type(1, 1)%sf(0, k, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac(m + j, k, l) = jac(j - 1, k, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac(m + j, k, l) = jac(m - (j - 1), k, l) + end do + case default + do j = 1, buff_size + jac(m + j, k, l) = jac(m, k, l) + end do + end select + end do end do - end do + #:endcall GPU_PARALLEL_LOOP + end if if (n == 0) then @@ -1539,49 +1542,52 @@ contains else if (bc_y%beg >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 2, -1, 1) else - $:GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(2, -1)%sf(k, 0, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac(k, -j, l) = jac(k, n - j + 1, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac(k, -j, l) = jac(k, j - 1, l) - end do - case default - do j = 1, buff_size - jac(k, -j, l) = jac(k, 0, l) - end do - end select + #:call GPU_PARALLEL_LOOP(collapse=2) + do l = 0, p + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(2, -1)%sf(k, 0, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac(k, -j, l) = jac(k, n - j + 1, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac(k, -j, l) = jac(k, j - 1, l) + end do + case default + do j = 1, buff_size + jac(k, -j, l) = jac(k, 0, l) + end do + end select + end do end do - end do + #:endcall GPU_PARALLEL_LOOP + end if if (bc_y%end >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 2, 1, 1) else - $:GPU_PARALLEL_LOOP(collapse=2) - do l = 0, p - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(2, 1)%sf(k, 0, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac(k, n + j, l) = jac(k, j - 1, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac(k, n + j, l) = jac(k, n - (j - 1), l) - end do - case default - do j = 1, buff_size - jac(k, n + j, l) = jac(k, n, l) - end do - end select + #:call GPU_PARALLEL_LOOP(collapse=2) + do l = 0, p + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(2, 1)%sf(k, 0, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac(k, n + j, l) = jac(k, j - 1, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac(k, n + j, l) = jac(k, n - (j - 1), l) + end do + case default + do j = 1, buff_size + jac(k, n + j, l) = jac(k, n, l) + end do + end select + end do end do - end do + #:endcall GPU_PARALLEL_LOOP end if if (p == 0) then @@ -1589,49 +1595,51 @@ contains else if (bc_z%beg >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 3, -1, 1) else - $:GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(2)%beg, idwbuff(2)%end - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(3, -1)%sf(k, l, 0)) - case (BC_PERIODIC) - do j = 1, buff_size - jac(k, l, -j) = jac(k, l, p - j + 1) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac(k, l, -j) = jac(k, l, j - 1) - end do - case default - do j = 1, buff_size - jac(k, l, -j) = jac(k, l, 0) - end do - end select + #:call GPU_PARALLEL_LOOP(collapse=2) + do l = idwbuff(2)%beg, idwbuff(2)%end + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(3, -1)%sf(k, l, 0)) + case (BC_PERIODIC) + do j = 1, buff_size + jac(k, l, -j) = jac(k, l, p - j + 1) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac(k, l, -j) = jac(k, l, j - 1) + end do + case default + do j = 1, buff_size + jac(k, l, -j) = jac(k, l, 0) + end do + end select + end do end do - end do + #:endcall GPU_PARALLEL_LOOP end if if (bc_z%end >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 3, 1, 1) else - $:GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(2)%beg, idwbuff(2)%end - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(3, 1)%sf(k, l, 0)) - case (BC_PERIODIC) - do j = 1, buff_size - jac(k, l, p + j) = jac(k, l, j - 1) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac(k, l, p + j) = jac(k, l, p - (j - 1)) - end do - case default - do j = 1, buff_size - jac(k, l, p + j) = jac(k, l, p) - end do - end select + #:call GPU_PARALLEL_LOOP(collapse=2) + do l = idwbuff(2)%beg, idwbuff(2)%end + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(3, 1)%sf(k, l, 0)) + case (BC_PERIODIC) + do j = 1, buff_size + jac(k, l, p + j) = jac(k, l, j - 1) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac(k, l, p + j) = jac(k, l, p - (j - 1)) + end do + case default + do j = 1, buff_size + jac(k, l, p + j) = jac(k, l, p) + end do + end select + end do end do - end do + #:endcall GPU_PARALLEL_LOOP end if end subroutine s_populate_F_igr_buffers diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 1a49e7e5b6..0bffeee879 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -879,27 +879,27 @@ contains do j = ibounds(1)%beg, ibounds(1)%end dyn_pres_K = 0._wp - if (igr) then - if (num_fluids == 1) then - alpha_rho_K(1) = qK_cons_vf(contxb)%sf(j, k, l) - alpha_K(1) = qK_cons_vf(advxb)%sf(j, k, l) - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) - alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) - end do - - alpha_rho_K(num_fluids) = qK_cons_vf(num_fluids)%sf(j, k, l) - alpha_K(num_fluids) = 1._wp - sum(alpha_K(1:num_fluids - 1)) - end if - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) - alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) - end do - end if + if (igr) then + if (num_fluids == 1) then + alpha_rho_K(1) = qK_cons_vf(contxb)%sf(j, k, l) + alpha_K(1) = qK_cons_vf(advxb)%sf(j, k, l) + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) + alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) + end do + + alpha_rho_K(num_fluids) = qK_cons_vf(num_fluids)%sf(j, k, l) + alpha_K(num_fluids) = 1._wp - sum(alpha_K(1:num_fluids - 1)) + end if + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) + alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) + end do + end if if (model_eqns /= 4) then #ifdef MFC_SIMULATION diff --git a/src/simulation/m_derived_variables.fpp b/src/simulation/m_derived_variables.fpp index 18c88f85c3..f4653a9366 100644 --- a/src/simulation/m_derived_variables.fpp +++ b/src/simulation/m_derived_variables.fpp @@ -146,23 +146,24 @@ contains z_accel) end if - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (p > 0) then - accel_mag(i, j, k) = sqrt(x_accel(i, j, k)**2._wp + & - y_accel(i, j, k)**2._wp + & - z_accel(i, j, k)**2._wp) - elseif (n > 0) then - accel_mag(i, j, k) = sqrt(x_accel(i, j, k)**2._wp + & - y_accel(i, j, k)**2._wp) - else - accel_mag(i, j, k) = x_accel(i, j, k) - end if + #:call GPU_PARALLEL_LOOP(collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + if (p > 0) then + accel_mag(i, j, k) = sqrt(x_accel(i, j, k)**2._wp + & + y_accel(i, j, k)**2._wp + & + z_accel(i, j, k)**2._wp) + elseif (n > 0) then + accel_mag(i, j, k) = sqrt(x_accel(i, j, k)**2._wp + & + y_accel(i, j, k)**2._wp) + else + accel_mag(i, j, k) = x_accel(i, j, k) + end if + end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP $:GPU_UPDATE(host='[accel_mag]') @@ -203,66 +204,35 @@ contains ! Computing the acceleration component in the x-coordinate direction if (i == 1) then - $:GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - q_sf(j, k, l) = (11._wp*q_prim_vf0(momxb)%sf(j, k, l) & - - 18._wp*q_prim_vf1(momxb)%sf(j, k, l) & - + 9._wp*q_prim_vf2(momxb)%sf(j, k, l) & - - 2._wp*q_prim_vf3(momxb)%sf(j, k, l))/(6._wp*dt) - end do - end do - end do - - if (n == 0) then - $:GPU_PARALLEL_LOOP(collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb)%sf(r + j, k, l) - end do - end do - end do - end do - elseif (p == 0) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb)%sf(j, r + k, l) - end do + q_sf(j, k, l) = (11._wp*q_prim_vf0(momxb)%sf(j, k, l) & + - 18._wp*q_prim_vf1(momxb)%sf(j, k, l) & + + 9._wp*q_prim_vf2(momxb)%sf(j, k, l) & + - 2._wp*q_prim_vf3(momxb)%sf(j, k, l))/(6._wp*dt) end do end do end do - else - if (grid_geometry == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:endcall GPU_PARALLEL_LOOP + + if (n == 0) then + #:call GPU_PARALLEL_LOOP(collapse=4) do l = 0, p do k = 0, n do j = 0, m do r = -fd_number, fd_number q_sf(j, k, l) = q_sf(j, k, l) & + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxb)%sf(j, k, r + l)/y_cc(k) + q_prim_vf0(momxb)%sf(r + j, k, l) end do end do end do end do - else - $:GPU_PARALLEL_LOOP(collapse=4) + #:endcall GPU_PARALLEL_LOOP + elseif (p == 0) then + #:call GPU_PARALLEL_LOOP(collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -271,47 +241,68 @@ contains + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & q_prim_vf0(momxb)%sf(r + j, k, l) & + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxb)%sf(j, k, r + l) + q_prim_vf0(momxb)%sf(j, r + k, l) end do end do end do end do + #:endcall GPU_PARALLEL_LOOP + else + if (grid_geometry == 3) then + #:call GPU_PARALLEL_LOOP(collapse=4) + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxb)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxb)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxb)%sf(j, k, r + l)/y_cc(k) + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + else + #:call GPU_PARALLEL_LOOP(collapse=4) + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxb)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxb)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxb)%sf(j, k, r + l) + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP end if end if ! Computing the acceleration component in the y-coordinate direction elseif (i == 2) then - $:GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - q_sf(j, k, l) = (11._wp*q_prim_vf0(momxb + 1)%sf(j, k, l) & - - 18._wp*q_prim_vf1(momxb + 1)%sf(j, k, l) & - + 9._wp*q_prim_vf2(momxb + 1)%sf(j, k, l) & - - 2._wp*q_prim_vf3(momxb + 1)%sf(j, k, l))/(6._wp*dt) - end do - end do - end do - - if (p == 0) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb + 1)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb + 1)%sf(j, r + k, l) - end do + q_sf(j, k, l) = (11._wp*q_prim_vf0(momxb + 1)%sf(j, k, l) & + - 18._wp*q_prim_vf1(momxb + 1)%sf(j, k, l) & + + 9._wp*q_prim_vf2(momxb + 1)%sf(j, k, l) & + - 2._wp*q_prim_vf3(momxb + 1)%sf(j, k, l))/(6._wp*dt) end do end do end do - else - if (grid_geometry == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) + #:endcall GPU_PARALLEL_LOOP + + if (p == 0) then + #:call GPU_PARALLEL_LOOP(collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -320,83 +311,105 @@ contains + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & q_prim_vf0(momxb + 1)%sf(r + j, k, l) & + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb + 1)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxb + 1)%sf(j, k, r + l)/y_cc(k) & - - (q_prim_vf0(momxe)%sf(j, k, l)**2._wp)/y_cc(k) + q_prim_vf0(momxb + 1)%sf(j, r + k, l) end do end do end do end do + #:endcall GPU_PARALLEL_LOOP + else + if (grid_geometry == 3) then + #:call GPU_PARALLEL_LOOP(collapse=4) + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxb + 1)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxb + 1)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxb + 1)%sf(j, k, r + l)/y_cc(k) & + - (q_prim_vf0(momxe)%sf(j, k, l)**2._wp)/y_cc(k) + end do + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP else - $:GPU_PARALLEL_LOOP(collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb + 1)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb + 1)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxb + 1)%sf(j, k, r + l) + #:call GPU_PARALLEL_LOOP(collapse=4) + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxb + 1)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxb + 1)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxb + 1)%sf(j, k, r + l) + end do end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP end if end if ! Computing the acceleration component in the z-coordinate direction else - $:GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - q_sf(j, k, l) = (11._wp*q_prim_vf0(momxe)%sf(j, k, l) & - - 18._wp*q_prim_vf1(momxe)%sf(j, k, l) & - + 9._wp*q_prim_vf2(momxe)%sf(j, k, l) & - - 2._wp*q_prim_vf3(momxe)%sf(j, k, l))/(6._wp*dt) + #:call GPU_PARALLEL_LOOP(collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + q_sf(j, k, l) = (11._wp*q_prim_vf0(momxe)%sf(j, k, l) & + - 18._wp*q_prim_vf1(momxe)%sf(j, k, l) & + + 9._wp*q_prim_vf2(momxe)%sf(j, k, l) & + - 2._wp*q_prim_vf3(momxe)%sf(j, k, l))/(6._wp*dt) + end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP if (grid_geometry == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxe)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxe)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxe)%sf(j, k, r + l)/y_cc(k) & - + (q_prim_vf0(momxe)%sf(j, k, l)* & - q_prim_vf0(momxb + 1)%sf(j, k, l))/y_cc(k) + #:call GPU_PARALLEL_LOOP(collapse=4) + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxe)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxe)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxe)%sf(j, k, r + l)/y_cc(k) & + + (q_prim_vf0(momxe)%sf(j, k, l)* & + q_prim_vf0(momxb + 1)%sf(j, k, l))/y_cc(k) + end do end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP else - $:GPU_PARALLEL_LOOP(collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxe)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxe)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxe)%sf(j, k, r + l) + #:call GPU_PARALLEL_LOOP(collapse=4) + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxe)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxe)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxe)%sf(j, k, r + l) + end do end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP end if end if @@ -425,78 +438,81 @@ contains end do if (n == 0) then !1D simulation - $:GPU_PARALLEL_LOOP(collapse=3,private='[dV]') - do l = 0, p !Loop over grid - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids !Loop over individual fluids - dV = dx(j) - ! Mass - $:GPU_ATOMIC(atomic='update') - c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV - ! x-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) - ! Volume fraction - $:GPU_ATOMIC(atomic='update') - c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV + #:call GPU_PARALLEL_LOOP(collapse=3,private='[dV]') + do l = 0, p !Loop over grid + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids !Loop over individual fluids + dV = dx(j) + ! Mass + $:GPU_ATOMIC(atomic='update') + c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV + ! x-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) + ! Volume fraction + $:GPU_ATOMIC(atomic='update') + c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV + end do end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP elseif (p == 0) then !2D simulation - $:GPU_PARALLEL_LOOP(collapse=3,private='[dV]') - do l = 0, p !Loop over grid - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids !Loop over individual fluids - dV = dx(j)*dy(k) - ! Mass - $:GPU_ATOMIC(atomic='update') - c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV - ! x-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) - ! y-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 3) = c_m(i, 3) + q_vf(i)%sf(j, k, l)*dV*y_cc(k) - ! Volume fraction - $:GPU_ATOMIC(atomic='update') - c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV + #:call GPU_PARALLEL_LOOP(collapse=3,private='[dV]') + do l = 0, p !Loop over grid + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids !Loop over individual fluids + dV = dx(j)*dy(k) + ! Mass + $:GPU_ATOMIC(atomic='update') + c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV + ! x-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) + ! y-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 3) = c_m(i, 3) + q_vf(i)%sf(j, k, l)*dV*y_cc(k) + ! Volume fraction + $:GPU_ATOMIC(atomic='update') + c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV + end do end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP else !3D simulation - $:GPU_PARALLEL_LOOP(collapse=3,private='[dV]') - do l = 0, p !Loop over grid - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids !Loop over individual fluids - - dV = dx(j)*dy(k)*dz(l) - ! Mass - $:GPU_ATOMIC(atomic='update') - c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV - ! x-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) - ! y-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 3) = c_m(i, 3) + q_vf(i)%sf(j, k, l)*dV*y_cc(k) - ! z-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 4) = c_m(i, 4) + q_vf(i)%sf(j, k, l)*dV*z_cc(l) - ! Volume fraction - $:GPU_ATOMIC(atomic='update') - c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV + #:call GPU_PARALLEL_LOOP(collapse=3,private='[dV]') + do l = 0, p !Loop over grid + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids !Loop over individual fluids + + dV = dx(j)*dy(k)*dz(l) + ! Mass + $:GPU_ATOMIC(atomic='update') + c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV + ! x-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) + ! y-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 3) = c_m(i, 3) + q_vf(i)%sf(j, k, l)*dV*y_cc(k) + ! z-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 4) = c_m(i, 4) + q_vf(i)%sf(j, k, l)*dV*z_cc(l) + ! Volume fraction + $:GPU_ATOMIC(atomic='update') + c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV + end do end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP end if $:GPU_UPDATE(host='[c_m]') diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index db80bb8346..59684c9e05 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -104,15 +104,16 @@ contains idwbuff(3)%beg:idwbuff(3)%end)) end if - $:GPU_PARALLEL_LOOP(collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - jac(j, k, l) = 0._wp - if (igr_iter_solver == 1) jac_old(j, k, l) = 0._wp + #:call GPU_PARALLEL_LOOP(collapse=3) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + jac(j, k, l) = 0._wp + if (igr_iter_solver == 1) jac_old(j, k, l) = 0._wp + end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP if (p == 0) then alf_igr = alf_factor*max(dx(1), dy(1))**2._wp @@ -181,82 +182,83 @@ contains end if do q = 1, num_iters - $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_lx, rho_rx, rho_ly, & - & rho_ry, rho_lz, rho_rz, fd_coeff]') - do l = 0, p - do k = 0, n - do j = 0, m - rho_lx = 0._wp - rho_rx = 0._wp - rho_ly = 0._wp - rho_ry = 0._wp - rho_lz = 0._wp - rho_rz = 0._wp - fd_coeff = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_lx = rho_lx + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j - 1, k, l))/2._wp - rho_rx = rho_rx + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j + 1, k, l))/2._wp - rho_ly = rho_ly + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k - 1, l))/2._wp - rho_ry = rho_ry + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k + 1, l))/2._wp - if (p > 0) then - rho_lz = rho_lz + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l - 1))/2._wp - rho_rz = rho_rz + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l + 1))/2._wp - end if - fd_coeff = fd_coeff + q_cons_vf(i)%sf(j, k, l) - end do + #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_lx, rho_rx, rho_ly, rho_ry, rho_lz, rho_rz, fd_coeff]') + do l = 0, p + do k = 0, n + do j = 0, m + rho_lx = 0._wp + rho_rx = 0._wp + rho_ly = 0._wp + rho_ry = 0._wp + rho_lz = 0._wp + rho_rz = 0._wp + fd_coeff = 0._wp - fd_coeff = 1._wp/fd_coeff + alf_igr* & - ((1._wp/dx(j)**2._wp)*(1._wp/rho_lx + 1._wp/rho_rx) + & - (1._wp/dy(k)**2._wp)*(1._wp/rho_ly + 1._wp/rho_ry)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_lx = rho_lx + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j - 1, k, l))/2._wp + rho_rx = rho_rx + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j + 1, k, l))/2._wp + rho_ly = rho_ly + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k - 1, l))/2._wp + rho_ry = rho_ry + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k + 1, l))/2._wp + if (p > 0) then + rho_lz = rho_lz + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l - 1))/2._wp + rho_rz = rho_rz + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l + 1))/2._wp + end if + fd_coeff = fd_coeff + q_cons_vf(i)%sf(j, k, l) + end do - if (num_dims == 3) then - fd_coeff = fd_coeff + alf_igr*(1._wp/dz(l)**2._wp)*(1._wp/rho_lz + 1._wp/rho_rz) - end if + fd_coeff = 1._wp/fd_coeff + alf_igr* & + ((1._wp/dx(j)**2._wp)*(1._wp/rho_lx + 1._wp/rho_rx) + & + (1._wp/dy(k)**2._wp)*(1._wp/rho_ly + 1._wp/rho_ry)) - if (igr_iter_solver == 1) then ! Jacobi iteration if (num_dims == 3) then - jac(j, k, l) = (alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry) + & - (1._wp/dz(l)**2._wp)*(jac_old(j, k, l - 1)/rho_lz + jac_old(j, k, l + 1)/rho_rz)) + & - jac_rhs(j, k, l)/fd_coeff - else - jac(j, k, l) = (alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry)) + & - jac_rhs(j, k, l)/fd_coeff + fd_coeff = fd_coeff + alf_igr*(1._wp/dz(l)**2._wp)*(1._wp/rho_lz + 1._wp/rho_rz) end if - else ! Gauss Seidel iteration - if (num_dims == 3) then - jac(j, k, l) = (alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry) + & - (1._wp/dz(l)**2._wp)*(jac(j, k, l - 1)/rho_lz + jac(j, k, l + 1)/rho_rz)) + & - jac_rhs(j, k, l)/fd_coeff - else - jac(j, k, l) = (alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry)) + & - jac_rhs(j, k, l)/fd_coeff + + if (igr_iter_solver == 1) then ! Jacobi iteration + if (num_dims == 3) then + jac(j, k, l) = (alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry) + & + (1._wp/dz(l)**2._wp)*(jac_old(j, k, l - 1)/rho_lz + jac_old(j, k, l + 1)/rho_rz)) + & + jac_rhs(j, k, l)/fd_coeff + else + jac(j, k, l) = (alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry)) + & + jac_rhs(j, k, l)/fd_coeff + end if + else ! Gauss Seidel iteration + if (num_dims == 3) then + jac(j, k, l) = (alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry) + & + (1._wp/dz(l)**2._wp)*(jac(j, k, l - 1)/rho_lz + jac(j, k, l + 1)/rho_rz)) + & + jac_rhs(j, k, l)/fd_coeff + else + jac(j, k, l) = (alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry)) + & + jac_rhs(j, k, l)/fd_coeff + end if end if - end if + end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP call s_populate_F_igr_buffers(bc_type, jac) if (igr_iter_solver == 1) then ! Jacobi iteration - $:GPU_PARALLEL_LOOP(collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - jac_old(j, k, l) = jac(j, k, l) + #:call GPU_PARALLEL_LOOP(collapse=3) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + jac_old(j, k, l) = jac(j, k, l) + end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP end if end do @@ -276,58 +278,58 @@ contains real(wp) :: F_L, vel_L, rho_L, F_R, vel_R, rho_R real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R - $:GPU_PARALLEL_LOOP(collapse=3, private='[F_L, vel_L, alpha_rho_L, & - & F_R, vel_R, alpha_rho_R]') - do l = 0, p - do k = 0, n - do j = -1, m + #:call GPU_PARALLEL_LOOP(collapse=3, private='[F_L, vel_L, alpha_rho_L, F_R, vel_R, alpha_rho_R]') + do l = 0, p + do k = 0, n + do j = -1, m - F_L = 0._wp; F_R = 0._wp - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp + F_L = 0._wp; F_R = 0._wp + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) - end do + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) + end do - vel_L = vel_L + coeff_L(q)*q_cons_vf(momxb)%sf(j + q, k, l) - F_L = F_L + coeff_L(q)*jac(j + q, k, l) - end do + vel_L = vel_L + coeff_L(q)*q_cons_vf(momxb)%sf(j + q, k, l) + F_L = F_L + coeff_L(q)*jac(j + q, k, l) + end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) + end do + + vel_R = vel_R + coeff_R(q)*q_cons_vf(momxb)%sf(j + q, k, l) + F_R = F_R + coeff_R(q)*jac(j + q, k, l) end do - vel_R = vel_R + coeff_R(q)*q_cons_vf(momxb)%sf(j + q, k, l) - F_R = F_R + coeff_R(q)*jac(j + q, k, l) - end do + vel_L = vel_L/sum(alpha_rho_L) + vel_R = vel_R/sum(alpha_rho_R) - vel_L = vel_L/sum(alpha_rho_L) - vel_R = vel_R/sum(alpha_rho_R) - - #:for LR in ['L', 'R'] - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - 0.5_wp*F_${LR}$*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - 0.5_wp*vel_${LR}$*F_${LR}$*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - 0.5_wp*F_${LR}$*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - 0.5_wp*vel_${LR}$*F_${LR}$*(1._wp/dx(j)) - #:endfor + #:for LR in ['L', 'R'] + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + 0.5_wp*F_${LR}$*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + 0.5_wp*vel_${LR}$*F_${LR}$*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + 0.5_wp*F_${LR}$*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + 0.5_wp*vel_${LR}$*F_${LR}$*(1._wp/dx(j)) + #:endfor + end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_igr_sigma_x @@ -355,1244 +357,1688 @@ contains if (idir == 1) then if (p == 0) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, & - & gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, & - & pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, & - & F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, & - & vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = 0, n - do j = -1, m + #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = 0, n + do j = -1, m - dvel = 0._wp - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp + dvel = 0._wp + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 + #:endif #:endif - #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - dvel_small = 0._wp - !x-direction contributions $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp + do q = vidxb, vidxe + dvel_small = 0._wp + !x-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) + end do + rho_sf_small(i) = rho_L + end do + + dvel_small(1) = (1/(2._wp*dx(j)))*( & + 1._wp*q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + 1._wp*q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + + if (q == 0) dvel(:, 1) = dvel_small + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp + end if + + !y-direction contributions $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) + end do + rho_sf_small(i) = rho_L end do - rho_sf_small(i) = rho_L + + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + + if (q == 0) dvel(:, 2) = dvel_small + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp + end if + + if (q == 0) then + jac_rhs(j, k, l) = alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1)) & + + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & + + (dvel(1, 1) + dvel(2, 2))**2._wp) + end if end do - dvel_small(1) = (1/(2._wp*dx(j)))*( & - 1._wp*q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - 1._wp*q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - - if (q == 0) dvel(:, 1) = dvel_small - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp - end if + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + alpha_L = 0._wp; alpha_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp - !y-direction contributions $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + end do + else + alpha_L(1) = 1._wp + end if + $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) end do - rho_sf_small(i) = rho_L end do - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) + end do - if (q == 0) dvel(:, 2) = dvel_small + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + end do + else + alpha_R(1) = 1._wp + end if - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + end do + end do - if (q == 0) then - jac_rhs(j, k, l) = alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1)) & - + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & - + (dvel(1, 1) + dvel(2, 2))**2._wp) + if (num_fluids > 1) then + alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) + alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) end if - end do - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - alpha_L = 0._wp; alpha_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp + rho_L = sum(alpha_rho_L) + gamma_L = sum(alpha_L*gammas) + pi_inf_L = sum(alpha_L*pi_infs) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) - end do + rho_R = sum(alpha_rho_R) + gamma_R = sum(alpha_R*gammas) + pi_inf_R = sum(alpha_R*pi_infs) - if (num_fluids > 1) then + vel_L = vel_L/rho_L + vel_R = vel_R/rho_R + + if (viscous) then + mu_L = 0._wp; mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + do i = 1, num_fluids + mu_L = alpha_L(i)/Res(1, i) + mu_L + mu_R = alpha_R(i)/Res(1, i) + mu_R end do - else - alpha_L(1) = 1._wp + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)) end if + E_L = 0._wp; E_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) end do - end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + end do + + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) + do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j))) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))) end do - else - alpha_R(1) = 1._wp end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) - end do - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + (0.5_wp*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))) - if (num_fluids > 1) then - alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) - alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))) - rho_L = sum(alpha_rho_L) - gamma_L = sum(alpha_L*gammas) - pi_inf_L = sum(alpha_L*pi_infs) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + (0.5_wp*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dx(j + 1))) - rho_R = sum(alpha_rho_R) - gamma_R = sum(alpha_R*gammas) - pi_inf_R = sum(alpha_R*pi_infs) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))) - vel_L = vel_L/rho_L - vel_R = vel_R/rho_R + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dx(j))) - if (viscous) then - mu_L = 0._wp; mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - mu_L = alpha_L(i)/Res(1, i) + mu_L - mu_R = alpha_R(i)/Res(1, i) + mu_R + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j))) end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))) + end do + end if $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)) + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + (0.5_wp*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)) + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)) + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + (0.5_wp*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)) + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)) - end if - - E_L = 0._wp; E_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) - end do + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dx(j))) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + else + #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = 0, n + do j = -1, m - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) + dvel = 0._wp + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))) + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 + #:endif + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe + dvel_small = 0._wp + !x-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) + end do + rho_sf_small(i) = rho_L + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j))) - end do + dvel_small(1) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 2)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j + 1))) + if (q == 0) dvel(:, 1) = dvel_small + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))) + !y-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) + end do + rho_sf_small(i) = rho_L + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j))) + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + if (q == 0) dvel_small(3) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 2)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + if (q == 0) dvel(:, 2) = dvel_small - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))) - end do - end if + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - (0.5_wp*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))) + !z-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + q, k, l + i) + end do + rho_sf_small(i) = rho_L + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))) + dvel_small(1) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb)%sf(j + q, k, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j + q, k, l - 1)/rho_sf_small(-1)) + if (q == 0) dvel_small(2) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 1)%sf(j + q, k, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j + q, k, l - 1)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 2)%sf(j + q, k, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j + q, k, l - 1)/rho_sf_small(-1)) + if (q == 0) dvel(:, 3) = dvel_small - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - (0.5_wp*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dx(j + 1))) + if (q > vidxb) then + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))) + if (q == 0) then + jac_rhs(j, k, l) = alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1) & + + dvel(1, 3)*dvel(3, 1) & + + dvel(2, 3)*dvel(3, 2)) & + + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & + + dvel(3, 3)**2._wp & + + (dvel(1, 1) + dvel(2, 2) + dvel(3, 3))**2._wp) + end if + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))) + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + alpha_L = 0._wp; alpha_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dx(j))) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + end do + else + alpha_L(1) = 1._wp + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j))) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + end do + end do - if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))) + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j))) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + end do + else + alpha_R(1) = 1._wp + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + end do end do - end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - (0.5_wp*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))) + if (num_fluids > 1) then + alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) + alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))) + rho_L = sum(alpha_rho_L) + gamma_L = sum(alpha_L*gammas) + pi_inf_L = sum(alpha_L*pi_infs) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - (0.5_wp*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dx(j + 1))) + rho_R = sum(alpha_rho_R) + gamma_R = sum(alpha_R*gammas) + pi_inf_R = sum(alpha_R*pi_infs) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))) + vel_L = vel_L/rho_L + vel_R = vel_R/rho_R - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))) + if (viscous) then + mu_L = 0._wp + mu_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + mu_L = alpha_L(i)/Res(1, i) + mu_L + mu_R = alpha_R(i)/Res(1, i) + mu_R + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dx(j))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)) - end do - end do - end do - else - $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, & - & gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, & - & pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, & - & F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, & - & vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = 0, n - do j = -1, m + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)) - dvel = 0._wp - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)) - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif - #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - dvel_small = 0._wp - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) - end do - rho_sf_small(i) = rho_L - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)) - dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 2)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - - if (q == 0) dvel(:, 1) = dvel_small - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j + 1)) - !y-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) - end do - rho_sf_small(i) = rho_L - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j)) - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - if (q == 0) dvel_small(3) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 2)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - if (q == 0) dvel(:, 2) = dvel_small - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j + 1)) - !z-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + q, k, l + i) - end do - rho_sf_small(i) = rho_L - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j)) - dvel_small(1) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb)%sf(j + q, k, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j + q, k, l - 1)/rho_sf_small(-1)) - if (q == 0) dvel_small(2) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 1)%sf(j + q, k, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j + q, k, l - 1)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 2)%sf(j + q, k, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j + q, k, l - 1)/rho_sf_small(-1)) - if (q == 0) dvel(:, 3) = dvel_small - - if (q > vidxb) then - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)) - if (q == 0) then - jac_rhs(j, k, l) = alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1) & - + dvel(1, 3)*dvel(3, 1) & - + dvel(2, 3)*dvel(3, 2)) & - + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & - + dvel(3, 3)**2._wp & - + (dvel(1, 1) + dvel(2, 2) + dvel(3, 3))**2._wp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)) end if - end do - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - alpha_L = 0._wp; alpha_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp + E_L = 0._wp; E_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) end do - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_L(1) = 1._wp - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) end do - end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j))) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))) end do - else - alpha_R(1) = 1._wp end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) - end do - end do - - if (num_fluids > 1) then - alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) - alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) - end if - - rho_L = sum(alpha_rho_L) - gamma_L = sum(alpha_L*gammas) - pi_inf_L = sum(alpha_L*pi_infs) - - rho_R = sum(alpha_rho_R) - gamma_R = sum(alpha_R*gammas) - pi_inf_R = sum(alpha_R*pi_infs) - - vel_L = vel_L/rho_L - vel_R = vel_R/rho_R - - if (viscous) then - mu_L = 0._wp - mu_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res(1, i) + mu_L - mu_R = alpha_R(i)/Res(1, i) + mu_R - end do - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)) + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + (0.5_wp*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)) + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)) + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)) + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + (0.5_wp*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j + 1)) + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j)) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j + 1)) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dx(j))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j)) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j))) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))) + end do + end if $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j)) + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + (0.5_wp*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))) + $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)) + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)) + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & + (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dx(j + 1))) + $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)) + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + (0.5_wp*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j)) + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))) + $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)) - end if + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))) - E_L = 0._wp; E_R = 0._wp + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dx(j))) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dx(j))) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + end if + else if (idir == 2) then + if (p == 0) then + #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = -1, n + do j = 0, m + + if (viscous) then + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp + + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 + #:endif + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe + dvel_small = 0._wp + !x-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) + end do + rho_sf_small(i) = rho_L + end do - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) + dvel_small(1) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp + end if + + !y-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) + end do + rho_sf_small(i) = rho_L + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))) + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp + end if + end do + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j))) - end do + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + alpha_L = 0._wp; alpha_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp - if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))) + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j))) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + end do + else + alpha_L(1) = 1._wp + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + end do end do - end if - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - (0.5_wp*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dx(j + 1))) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - (0.5_wp*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dx(j + 1))) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + end do + else + alpha_R(1) = 1._wp + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + end do + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))) + if (num_fluids > 1) then + alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) + alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dx(j))) + rho_L = sum(alpha_rho_L) + gamma_L = sum(alpha_L*gammas) + pi_inf_L = sum(alpha_L*pi_infs) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dx(j))) + rho_R = sum(alpha_rho_R) + gamma_R = sum(alpha_R*gammas) + pi_inf_R = sum(alpha_R*pi_infs) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))) + vel_L = vel_L/rho_L + vel_R = vel_R/rho_R - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j))) - end do + if (viscous) then + mu_L = 0._wp + mu_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + mu_L = alpha_L(i)/Res(1, i) + mu_L + mu_R = alpha_R(i)/Res(1, i) + mu_R + end do - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j + 1))) - + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j))) - + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))) - end do - end if - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - (0.5_wp*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & - (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dx(j + 1))) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - (0.5_wp*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dx(j + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dx(j))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dx(j))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)) - end do - end do - end do - end if - else if (idir == 2) then - if (p == 0) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, & - & gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, & - & pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, & - & F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, & - & vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = -1, n - do j = 0, m + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)) + end if - if (viscous) then - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp + E_L = 0._wp; E_R = 0._wp + F_L = 0._wp; F_R = 0._wp - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif - #:endif $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - dvel_small = 0._wp - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp - end if - - !y-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_L = F_L + coeff_L(q)*jac(j, k + q, l) + end do - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp - end if + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_R = F_R + coeff_R(q)*jac(j, k + q, l) end do - end if - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - alpha_L = 0._wp; alpha_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k))) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_L(i)* & + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_L(i)* & + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k))) end do - else - alpha_L(1) = 1._wp end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) - end do - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + (0.5_wp*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + (0.5_wp*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dy(k))) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k))) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k))) end do - else - alpha_R(1) = 1._wp end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + (0.5_wp*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + (0.5_wp*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dy(k))) end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + else + #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = -1, n + do j = 0, m + + if (viscous) then + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp + + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 + #:endif + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe + dvel_small = 0._wp + !x-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) + end do + rho_sf_small(i) = rho_L + end do - if (num_fluids > 1) then - alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) - alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) - end if + dvel_small(1) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp + end if + + !y-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) + end do + rho_sf_small(i) = rho_L + end do - rho_L = sum(alpha_rho_L) - gamma_L = sum(alpha_L*gammas) - pi_inf_L = sum(alpha_L*pi_infs) + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 2)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp + end if + + !z-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k + q, l + i) + end do + rho_sf_small(i) = rho_L + end do - rho_R = sum(alpha_rho_R) - gamma_R = sum(alpha_R*gammas) - pi_inf_R = sum(alpha_R*pi_infs) + dvel_small(2) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 1)%sf(j, k + q, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k + q, l - 1)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 2)%sf(j, k + q, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k + q, l - 1)/rho_sf_small(-1)) + if (q > vidxb) then + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp + end if + end do + end if - vel_L = vel_L/rho_L - vel_R = vel_R/rho_R + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + alpha_L = 0._wp; alpha_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp - if (viscous) then - mu_L = 0._wp - mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res(1, i) + mu_L - mu_R = alpha_R(i)/Res(1, i) + mu_R + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + end do + else + alpha_L(1) = 1._wp + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + end do end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + end do + else + alpha_R(1) = 1._wp + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + end do + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)) + if (num_fluids > 1) then + alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) + alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)) + rho_L = sum(alpha_rho_L) + gamma_L = sum(alpha_L*gammas) + pi_inf_L = sum(alpha_L*pi_infs) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)) + rho_R = sum(alpha_rho_R) + gamma_R = sum(alpha_R*gammas) + pi_inf_R = sum(alpha_R*pi_infs) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)) + vel_L = vel_L/rho_L + vel_R = vel_R/rho_R - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)) - end if + if (viscous) then + mu_L = 0._wp + mu_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + mu_L = alpha_L(i)/Res(1, i) + mu_L + mu_R = alpha_R(i)/Res(1, i) + mu_R + end do - E_L = 0._wp; E_R = 0._wp - F_L = 0._wp; F_R = 0._wp + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_L = F_L + coeff_L(q)*jac(j, k + q, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_R = F_R + coeff_R(q)*jac(j, k + q, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)) - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k))) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k)) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k + 1))) + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1))) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k))) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k))) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)) + end if + + E_L = 0._wp; E_R = 0._wp + F_L = 0._wp; F_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_L = F_L + coeff_L(q)*jac(j, k + q, l) end do - end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - (0.5_wp*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1))) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_R = F_R + coeff_R(q)*jac(j, k + q, l) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1))) + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - (0.5_wp*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dy(k + 1))) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k))) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k))) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_L(i)* & + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_L(i)* & + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k))) + end do + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1))) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + (0.5_wp*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k))) - end do + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & + (0.5_wp*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + (0.5_wp*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dy(k))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dy(k))) - if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 + do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k + 1))) + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1))) + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k))) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k))) + end do + end if + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + (0.5_wp*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & + (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + (0.5_wp*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k))) - end do - end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & (0.5_wp*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + pres_R + F_R)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dy(k))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & (0.5_wp*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dy(k))) + pres_R + F_R))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dy(k))) + + end do end do end do - end do - else - $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, & - & gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, & - & pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, & - & F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, & - & vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = -1, n + #:endcall GPU_PARALLEL_LOOP + end if + elseif (idir == 3) then + #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = -1, p + do k = 0, n do j = 0, m if (viscous) then @@ -1615,24 +2061,24 @@ contains rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) + rho_L = rho_L + q_cons_vf(r)%sf(j + i, k, l + q) end do rho_sf_small(i) = rho_L end do dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) + q_cons_vf(momxb)%sf(j + 1, k, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1, k, l + q)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 2)%sf(j + 1, k, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j - 1, k, l + q)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(3)) vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(3)) vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp end if @@ -1642,30 +2088,25 @@ contains rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) + rho_L = rho_L + q_cons_vf(r)%sf(j, k + i, l + q) end do rho_sf_small(i) = rho_L end do - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + q_cons_vf(momxb + 1)%sf(j, k + 1, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k - 1, l + q)/rho_sf_small(-1)) dvel_small(3) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 2)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + q_cons_vf(momxb + 2)%sf(j, k + 1, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k - 1, l + q)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp end if !z-direction contributions @@ -1674,24 +2115,28 @@ contains rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + q, l + i) + rho_L = rho_L + q_cons_vf(r)%sf(j, k, l + i + q) end do rho_sf_small(i) = rho_L end do - + dvel_small(1) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) dvel_small(2) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 1)%sf(j, k + q, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k + q, l - 1)/rho_sf_small(-1)) + q_cons_vf(momxb + 1)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) dvel_small(3) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 2)%sf(j, k + q, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k + q, l - 1)/rho_sf_small(-1)) + q_cons_vf(momxb + 2)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(3))/3._wp end if if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(3))/3._wp end if end do end if @@ -1704,13 +2149,13 @@ contains do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k, l + q) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) end do else alpha_L(1) = 1._wp @@ -1718,7 +2163,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) end do end do @@ -1726,13 +2171,13 @@ contains do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k, l + q) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) end do else alpha_R(1) = 1._wp @@ -1740,7 +2185,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) end do end do @@ -1770,88 +2215,88 @@ contains end do $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)) + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dz(l + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)) + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l + 1)) $:GPU_ATOMIC(atomic='update') rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k)) + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dz(l)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)) + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)) + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dz(l + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)) + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l + 1)) $:GPU_ATOMIC(atomic='update') rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k)) + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dz(l)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)) + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dy(k + 1)) + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dz(l + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k + 1)) + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dy(k)) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dz(l)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k)) + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dy(k + 1)) + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dz(l + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k + 1)) + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dy(k)) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dz(l)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k)) + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)) + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dz(l + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)) + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k)) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dz(l)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)) + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)) + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dz(l + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)) + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k)) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dz(l)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)) + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l)) end if E_L = 0._wp; E_R = 0._wp @@ -1859,14 +2304,14 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_L = F_L + coeff_L(q)*jac(j, k + q, l) + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k, l + q) + F_L = F_L + coeff_L(q)*jac(j, k, l + q) end do $:GPU_LOOP(parallelism='[seq]') do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_R = F_R + coeff_R(q)*jac(j, k + q, l) + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k, l + q) + F_R = F_R + coeff_R(q)*jac(j, k, l + q) end do call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & @@ -1876,633 +2321,175 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & (0.5_wp*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1))) + vel_L(3))*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & (0.5_wp*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k))) + vel_L(3))*(1._wp/dz(l)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dz(l))) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & (0.5_wp*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k + 1))) + vel_L(3))*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1))) + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_L(3)*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & (0.5_wp*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k))) + vel_L(3))*(1._wp/dz(l)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dz(l))) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k))) + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(3)*(1._wp/dz(l))) end do end if $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - (0.5_wp*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1))) + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & + (0.5_wp*(rho_L*(vel_L(3))**2.0 + & + pres_L + F_L)*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1))) + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & - (0.5_wp*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dy(k + 1))) + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & + (0.5_wp*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - (0.5_wp*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dy(k + 1))) + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & + (0.5_wp*(vel_L(3)*(E_L + & + pres_L + F_L))*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k))) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(3))**2.0 + & + pres_L + F_L)*(1._wp/dz(l)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dz(l))) $:GPU_ATOMIC(atomic='update') rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k))) + (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dz(l))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dy(k))) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dz(l))) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dy(k))) + (0.5_wp*(vel_L(3)*(E_L + & + pres_L + F_L))*(1._wp/dz(l)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dz(l))) $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & (0.5_wp*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1))) + vel_R(3))*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & (0.5_wp*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k))) + vel_R(3))*(1._wp/dz(l)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dz(l))) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & (0.5_wp*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k + 1))) + vel_R(3))*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1))) + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_R(3)*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & (0.5_wp*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k))) + vel_R(3))*(1._wp/dz(l)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dz(l))) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k))) + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(3)*(1._wp/dz(l))) end do end if $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - (0.5_wp*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1))) + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & + (0.5_wp*(rho_R*(vel_R(3))**2.0 + & + pres_R + F_R)*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1))) + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & + (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & - (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dy(k + 1))) + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & + (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - (0.5_wp*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dy(k + 1))) + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & + (0.5_wp*(vel_R(3)*(E_R + & + pres_R + F_R))*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dz(l + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k))) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(3))**2.0 + & + pres_R + F_R)*(1._wp/dz(l)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dz(l))) $:GPU_ATOMIC(atomic='update') rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k))) + (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dz(l))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dy(k))) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dz(l))) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dy(k))) - - end do - end do - end do - end if - elseif (idir == 3) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, & - & gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, & - & pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, & - & F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, & - & vflux_L_arr, vflux_R_arr]') - do l = -1, p - do k = 0, n - do j = 0, m - - if (viscous) then - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp - - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif - #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - dvel_small = 0._wp - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i, k, l + q) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1, k, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1, k, l + q)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 2)%sf(j + 1, k, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j - 1, k, l + q)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp - end if - - !y-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + i, l + q) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j, k + 1, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k - 1, l + q)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 2)%sf(j, k + 1, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k - 1, l + q)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp - end if - - !z-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k, l + i + q) - end do - rho_sf_small(i) = rho_L - end do - dvel_small(1) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 1)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 2)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(3))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(3))/3._wp - end if - end do - end if - - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - alpha_L = 0._wp; alpha_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k, l + q) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) - end do - else - alpha_L(1) = 1._wp - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) - end do - end do - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k, l + q) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) - end do - else - alpha_R(1) = 1._wp - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) - end do - end do - - if (num_fluids > 1) then - alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) - alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) - end if - - rho_L = sum(alpha_rho_L) - gamma_L = sum(alpha_L*gammas) - pi_inf_L = sum(alpha_L*pi_infs) - - rho_R = sum(alpha_rho_R) - gamma_R = sum(alpha_R*gammas) - pi_inf_R = sum(alpha_R*pi_infs) - - vel_L = vel_L/rho_L - vel_R = vel_R/rho_R - - if (viscous) then - mu_L = 0._wp - mu_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res(1, i) + mu_L - mu_R = alpha_R(i)/Res(1, i) + mu_R - end do - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dz(l + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dz(l)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dz(l + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dz(l)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dz(l + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dz(l)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dz(l + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dz(l)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dz(l + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dz(l)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dz(l + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dz(l)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l)) - end if - - E_L = 0._wp; E_R = 0._wp - F_L = 0._wp; F_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k, l + q) - F_L = F_L + coeff_L(q)*jac(j, k, l + q) - end do - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k, l + q) - F_R = F_R + coeff_R(q)*jac(j, k, l + q) - end do - - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(3))*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(3))*(1._wp/dz(l)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dz(l))) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & - (0.5_wp*(alpha_L(i)* & - vel_L(3))*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_L(3)*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_L(i)* & - vel_L(3))*(1._wp/dz(l)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dz(l))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(3)*(1._wp/dz(l))) - end do - end if - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & - (0.5_wp*(rho_L*(vel_L(3))**2.0 + & - pres_L + F_L)*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & - (0.5_wp*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & - (0.5_wp*(vel_L(3)*(E_L + & - pres_L + F_L))*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(3))**2.0 + & - pres_L + F_L)*(1._wp/dz(l)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dz(l))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dz(l))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dz(l))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(3)*(E_L + & - pres_L + F_L))*(1._wp/dz(l)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dz(l))) - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(3))*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(3))*(1._wp/dz(l)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dz(l))) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & - (0.5_wp*(alpha_R(i)* & - vel_R(3))*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_R(3)*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_R(i)* & - vel_R(3))*(1._wp/dz(l)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dz(l))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(3)*(1._wp/dz(l))) - end do - end if - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & - (0.5_wp*(rho_R*(vel_R(3))**2.0 + & - pres_R + F_R)*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & - (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & - (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & (0.5_wp*(vel_R(3)*(E_R + & - pres_R + F_R))*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dz(l + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(3))**2.0 + & - pres_R + F_R)*(1._wp/dz(l)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dz(l))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dz(l))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dz(l))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_R(3)*(E_R + & - pres_R + F_R))*(1._wp/dz(l)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dz(l))) + pres_R + F_R))*(1._wp/dz(l)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dz(l))) + end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP end if end subroutine s_igr_riemann_solver @@ -2562,46 +2549,49 @@ contains integer, intent(in) :: idir if (idir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = 1._wp/dx(j)* & - (flux_vf(i)%sf(j - 1, k, l) & - - flux_vf(i)%sf(j, k, l)) + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = 1._wp/dx(j)* & + (flux_vf(i)%sf(j - 1, k, l) & + - flux_vf(i)%sf(j, k, l)) + end do end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP elseif (idir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_vf(i)%sf(j, k - 1, l) & - - flux_vf(i)%sf(j, k, l)) + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_vf(i)%sf(j, k - 1, l) & + - flux_vf(i)%sf(j, k, l)) + end do end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP elseif (idir == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & - (flux_vf(i)%sf(j, k, l - 1) & - - flux_vf(i)%sf(j, k, l)) + #:call GPU_PARALLEL_LOOP(collapse=4) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & + (flux_vf(i)%sf(j, k, l - 1) & + - flux_vf(i)%sf(j, k, l)) + end do end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP end if end subroutine s_igr_flux_add diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index cf3822dbf3..910420e10e 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -554,18 +554,19 @@ contains end do end if ! end allocation of viscous variables - $:GPU_PARALLEL_LOOP(collapse=4) - do id = 1, num_dims - do i = 1, sys_size - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - flux_gsrc_n(id)%vf(i)%sf(j, k, l) = 0._wp + #:call GPU_PARALLEL_LOOP(collapse=4) + do id = 1, num_dims + do i = 1, sys_size + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + flux_gsrc_n(id)%vf(i)%sf(j, k, l) = 0._wp + end do end do end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP end if ! end allocation for .not. igr @@ -637,37 +638,37 @@ contains if (.not. igr) then ! Association/Population of Working Variables #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_cons_qp%vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) + do i = 1, sys_size + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_cons_qp%vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) + end do end do end do end do - end do - #:endcall GPU_PARALLEL_LOOP + #:endcall GPU_PARALLEL_LOOP ! Converting Conservative to Primitive Variables if (mpp_lim .and. bubbles_euler) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - alf_sum%sf(j, k, l) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - 1 - alf_sum%sf(j, k, l) = alf_sum%sf(j, k, l) + q_cons_qp%vf(i)%sf(j, k, l) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - 1 - q_cons_qp%vf(i)%sf(j, k, l) = q_cons_qp%vf(i)%sf(j, k, l)*(1._wp - q_cons_qp%vf(alf_idx)%sf(j, k, l)) & - /alf_sum%sf(j, k, l) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + alf_sum%sf(j, k, l) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe - 1 + alf_sum%sf(j, k, l) = alf_sum%sf(j, k, l) + q_cons_qp%vf(i)%sf(j, k, l) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe - 1 + q_cons_qp%vf(i)%sf(j, k, l) = q_cons_qp%vf(i)%sf(j, k, l)*(1._wp - q_cons_qp%vf(alf_idx)%sf(j, k, l)) & + /alf_sum%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end if @@ -728,16 +729,17 @@ contains if (igr) then if (id == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) - do l = -1, p + 1 - do k = -1, n + 1 - do j = -1, m + 1 - do i = 1, sys_size - rhs_vf(i)%sf(j, k, l) = 0._wp + #:call GPU_PARALLEL_LOOP(collapse=4) + do l = -1, p + 1 + do k = -1, n + 1 + do j = -1, m + 1 + do i = 1, sys_size + rhs_vf(i)%sf(j, k, l) = 0._wp + end do end do end do end do - end do + #:endcall GPU_PARALLEL_LOOP end if call nvtxStartRange("IGR_RIEMANN") @@ -978,16 +980,16 @@ contains if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then if (.not. igr) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_prim_vf(i)%sf(j, k, l) = q_prim_qp%vf(i)%sf(j, k, l) + do i = 1, sys_size + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_prim_vf(i)%sf(j, k, l) = q_prim_qp%vf(i)%sf(j, k, l) + end do end do end do end do - end do - #:endcall GPU_PARALLEL_LOOP + #:endcall GPU_PARALLEL_LOOP end if end if @@ -1483,51 +1485,51 @@ contains if (surface_tension) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(c_idx)%sf(j, k, l) = & - rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dx(j)* & - q_prim_vf(c_idx)%sf(j, k, l)* & - (flux_src_n_in(advxb)%sf(j, k, l) - & - flux_src_n_in(advxb)%sf(j - 1, k, l)) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(c_idx)%sf(j, k, l) = & + rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dx(j)* & + q_prim_vf(c_idx)%sf(j, k, l)* & + (flux_src_n_in(advxb)%sf(j, k, l) - & + flux_src_n_in(advxb)%sf(j - 1, k, l)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & - (flux_src_n_in(i)%sf(j - 1, k, l) & - - flux_src_n_in(i)%sf(j, k, l)) + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & + (flux_src_n_in(i)%sf(j - 1, k, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP elseif (idir == 2) then ! y-direction if (surface_tension) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(c_idx)%sf(j, k, l) = & - rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dy(k)* & - q_prim_vf(c_idx)%sf(j, k, l)* & - (flux_src_n_in(advxb)%sf(j, k, l) - & - flux_src_n_in(advxb)%sf(j, k - 1, l)) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(c_idx)%sf(j, k, l) = & + rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dy(k)* & + q_prim_vf(c_idx)%sf(j, k, l)* & + (flux_src_n_in(advxb)%sf(j, k, l) - & + flux_src_n_in(advxb)%sf(j, k - 1, l)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1566,36 +1568,36 @@ contains end if #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 1, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - - flux_src_n_in(i)%sf(j, k, l)) + do l = 0, p + do k = 1, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - - flux_src_n_in(i)%sf(j, k, l)) + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1605,19 +1607,19 @@ contains if ((bc_y%beg == -2) .or. (bc_y%beg == -14)) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 1, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - + flux_src_n_in(i)%sf(j, k, l)) + do l = 0, p + do k = 1, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + + flux_src_n_in(i)%sf(j, k, l)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (viscous) then @@ -1637,19 +1639,19 @@ contains else #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - + flux_src_n_in(i)%sf(j, k, l)) + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + + flux_src_n_in(i)%sf(j, k, l)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end if @@ -1658,53 +1660,53 @@ contains if (surface_tension) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(c_idx)%sf(j, k, l) = & - rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dz(l)* & - q_prim_vf(c_idx)%sf(j, k, l)* & - (flux_src_n_in(advxb)%sf(j, k, l) - & - flux_src_n_in(advxb)%sf(j, k, l - 1)) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(c_idx)%sf(j, k, l) = & + rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dz(l)* & + q_prim_vf(c_idx)%sf(j, k, l)* & + (flux_src_n_in(advxb)%sf(j, k, l) - & + flux_src_n_in(advxb)%sf(j, k, l - 1)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & - (flux_src_n_in(i)%sf(j, k, l - 1) & - - flux_src_n_in(i)%sf(j, k, l)) + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & + (flux_src_n_in(i)%sf(j, k, l - 1) & + - flux_src_n_in(i)%sf(j, k, l)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (grid_geometry == 3) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(momxb + 1)%sf(j, k, l) = & - rhs_vf(momxb + 1)%sf(j, k, l) + 5.e-1_wp* & - (flux_src_n_in(momxe)%sf(j, k, l - 1) & - + flux_src_n_in(momxe)%sf(j, k, l)) - - rhs_vf(momxe)%sf(j, k, l) = & - rhs_vf(momxe)%sf(j, k, l) - 5.e-1_wp* & - (flux_src_n_in(momxb + 1)%sf(j, k, l - 1) & - + flux_src_n_in(momxb + 1)%sf(j, k, l)) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(momxb + 1)%sf(j, k, l) = & + rhs_vf(momxb + 1)%sf(j, k, l) + 5.e-1_wp* & + (flux_src_n_in(momxe)%sf(j, k, l - 1) & + + flux_src_n_in(momxe)%sf(j, k, l)) + + rhs_vf(momxe)%sf(j, k, l) = & + rhs_vf(momxe)%sf(j, k, l) - 5.e-1_wp* & + (flux_src_n_in(momxb + 1)%sf(j, k, l - 1) & + + flux_src_n_in(momxb + 1)%sf(j, k, l)) + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end if diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 84c09ab6f5..a1dd5aeda1 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -108,8 +108,8 @@ contains w3 = (w3L + w3R)/2._wp normW = (normWL + normWR)/2._wp - if (normW > capillary_cutoff) then - @:compute_capillary_stress_tensor() + if (normW > capillary_cutoff) then + @:compute_capillary_stress_tensor() do i = 1, num_dims @@ -154,8 +154,8 @@ contains w3 = (w3L + w3R)/2._wp normW = (normWL + normWR)/2._wp - if (normW > capillary_cutoff) then - @:compute_capillary_stress_tensor() + if (normW > capillary_cutoff) then + @:compute_capillary_stress_tensor() do i = 1, num_dims @@ -200,8 +200,8 @@ contains w3 = (w3L + w3R)/2._wp normW = (normWL + normWR)/2._wp - if (normW > capillary_cutoff) then - @:compute_capillary_stress_tensor() + if (normW > capillary_cutoff) then + @:compute_capillary_stress_tensor() do i = 1, num_dims @@ -276,21 +276,21 @@ contains end if #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(num_dims + 1)%sf(j, k, l) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(num_dims + 1)%sf(j, k, l) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + c_divs(num_dims + 1)%sf(j, k, l) = & + c_divs(num_dims + 1)%sf(j, k, l) + & + c_divs(i)%sf(j, k, l)**2._wp + end do c_divs(num_dims + 1)%sf(j, k, l) = & - c_divs(num_dims + 1)%sf(j, k, l) + & - c_divs(i)%sf(j, k, l)**2._wp + sqrt(c_divs(num_dims + 1)%sf(j, k, l)) end do - c_divs(num_dims + 1)%sf(j, k, l) = & - sqrt(c_divs(num_dims + 1)%sf(j, k, l)) end do end do - end do #:endcall GPU_PARALLEL_LOOP call s_populate_capillary_buffers(c_divs, bc_type) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index cef74f406d..832c50792f 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -1021,14 +1021,14 @@ contains end if #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') - do l = 0, p - do k = 0, n - do j = 0, m - if (igr) then - call s_compute_enthalpy(q_cons_ts(1)%vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) - else - call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) - end if + do l = 0, p + do k = 0, n + do j = 0, m + if (igr) then + call s_compute_enthalpy(q_cons_ts(1)%vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) + else + call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) + end if ! Compute mixture sound speed call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) @@ -1069,16 +1069,16 @@ contains call s_compute_body_forces_rhs(q_prim_vf_in, q_cons_vf, rhs_vf_in) #:call GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) + & - ldt*rhs_vf_in(i)%sf(j, k, l) + do i = momxb, E_idx + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) + & + ldt*rhs_vf_in(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP call nvtxEndRange @@ -1096,66 +1096,66 @@ contains if (t_step == t_step_start) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(3)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(3)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP elseif (t_step == t_step_start + 1) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(2)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(2)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP elseif (t_step == t_step_start + 2) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(1)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(1)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP elseif (t_step == t_step_start + 3) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(0)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(0)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP else ! All other timesteps #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(3)%vf(i)%sf(j, k, l) = q_prim_ts(2)%vf(i)%sf(j, k, l) - q_prim_ts(2)%vf(i)%sf(j, k, l) = q_prim_ts(1)%vf(i)%sf(j, k, l) - q_prim_ts(1)%vf(i)%sf(j, k, l) = q_prim_ts(0)%vf(i)%sf(j, k, l) - q_prim_ts(0)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(3)%vf(i)%sf(j, k, l) = q_prim_ts(2)%vf(i)%sf(j, k, l) + q_prim_ts(2)%vf(i)%sf(j, k, l) = q_prim_ts(1)%vf(i)%sf(j, k, l) + q_prim_ts(1)%vf(i)%sf(j, k, l) = q_prim_ts(0)%vf(i)%sf(j, k, l) + q_prim_ts(0)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index bbaf3648a3..7399d84993 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -1150,16 +1150,16 @@ contains if (weno_dir == 2) then #:call GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) + end do end do end do end do - end do - #:endcall GPU_PARALLEL_LOOP + #:endcall GPU_PARALLEL_LOOP end if ! Reshaping/Projecting onto Characteristic Fields in z-direction @@ -1167,16 +1167,16 @@ contains if (weno_dir == 3) then #:call GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) + end do end do end do end do - end do - #:endcall GPU_PARALLEL_LOOP + #:endcall GPU_PARALLEL_LOOP end if end subroutine s_initialize_weno From 9d23036b05af0fa74c0df40d29aea8097ff71315 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 22 Jul 2025 17:24:16 -0400 Subject: [PATCH 14/60] Add syscheck of OpenMP, add omp support for GPU_HOST_DATA, ATOMIC, and WAIT --- src/common/include/acc_macros.fpp | 33 +++++++++++++ src/common/include/omp_macros.fpp | 44 +++++++++++++++++ src/common/include/parallel_macros.fpp | 67 ++++++++++++++++---------- src/common/m_mpi_common.fpp | 2 +- src/simulation/m_body_forces.fpp | 4 +- src/simulation/m_fftw.fpp | 8 +-- src/simulation/m_global_parameters.fpp | 4 +- src/simulation/m_start_up.fpp | 5 -- src/simulation/m_weno.fpp | 4 +- src/syscheck/syscheck.fpp | 23 +++++++++ 10 files changed, 150 insertions(+), 44 deletions(-) diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp index c70d065f90..d5efabb67f 100644 --- a/src/common/include/acc_macros.fpp +++ b/src/common/include/acc_macros.fpp @@ -234,4 +234,37 @@ #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n') $:acc_directive #:enddef + +#:def ACC_HOST_DATA(code, use_device=None, extraAccArgs=None) + #:assert code is not None + #:assert isinstance(code, str) + #:if code == '' or code.isspace() + #:stop 'GPU_HOST_DATA macro has no effect on the code as it is not surrounding any code' + #:endif + #:set use_device_val = GEN_USE_DEVICE_STR(use_device) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = use_device_val.strip('\n') + #:set acc_directive = '!$acc host_data ' + clause_val + extraAccArgs_val.strip('\n') + #:set end_acc_directive = '!$acc end host_data' + $:acc_directive + $:code + $:end_acc_directive +#:enddef + +#:def ACC_ATOMIC(atomic, extraAccArgs=None) + #:assert isinstance(atomic, str) + #:assert (atomic == 'read' or atomic == 'write' or atomic == 'update' or atomic == 'capture') + #:set atomic_val = atomic + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = atomic_val.strip('\n') + #:set acc_directive = '!$acc atomic ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def ACC_WAIT(extraAccArgs=None) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = '' + #:set acc_directive = '!$acc wait ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef ! New line at end of file is required for FYPP \ No newline at end of file diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 5566dba088..e37c94ee5f 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -94,6 +94,16 @@ $:temp #:enddef +#:def OMP_USE_DEVICE_ADDR_STR(use_device_addr) + #:set use_device_addr_val = GEN_PARENTHESES_CLAUSE('use_device_addr', use_device_addr) + $:use_device_addr_val +#:enddef + +#:def OMP_USE_DEVICE_PTR_STR(use_device_ptr) + #:set use_device_ptr_val = GEN_PARENTHESES_CLAUSE('use_device_ptr', use_device_ptr) + $:use_device_ptr_val +#:enddef + #:def OMP_PARALLEL(code, private=None, default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None) @@ -251,4 +261,38 @@ #:set acc_directive = '!$omp target update ' + clause_val + extraOmpArgs_val.strip('\n') $:acc_directive #:enddef + +#:def OMP_HOST_DATA(code, use_device_addr, use_device_ptr, extraOmpArgs) + #:assert code is not None + #:assert isinstance(code, str) + #:if code == '' or code.isspace() + #:stop 'GPU_HOST_DATA macro has no effect on the code as it is not surrounding any code' + #:endif + #:set use_device_addr_val = OMP_USE_DEVICE_ADDR_STR(use_device_addr) + #:set use_device_ptr_val = OMP_USE_DEVICE_PTR_STR(use_device_ptr) + #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) + #:set clause_val = use_device_addr_val.strip('\n') + use_device_ptr_val.strip('\n') + #:set omp_directive = '!$omp target data ' + clause_val + extraOmpArgs_val.strip('\n') + #:set omp_end_directive = '!$omp end target data' + $:omp_directive + $:code + $:omp_end_directive +#:enddef + +#:def OMP_ATOMIC(atomic, extraOmpArgs=None) + #:assert isinstance(atomic, str) + #:assert (atomic == 'read' or atomic == 'write' or atomic == 'update' or atomic == 'capture') + #:set atomic_val = atomic + #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) + #:set clause_val = atomic_val.strip('\n') + #:set omp_directive = '!$omp atomic ' + clause_val + extraOmpArgs_val.strip('\n') + $:omp_directive +#:enddef + +#:def OMP_WAIT(extraOmpArgs=None) + #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) + #:set clause_val = '' + #:set omp_directive = '!$omp barrier ' + clause_val + extraOmpArgs_val.strip('\n') + $:omp_directive +#:enddef ! New line at end of file is required for FYPP \ No newline at end of file diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 7dfacd0829..b93035ebbc 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -152,20 +152,32 @@ #endif #:enddef -#:def GPU_HOST_DATA(code, use_device=None, extraAccArgs=None) - #:assert code is not None - #:assert isinstance(code, str) - #:if code == '' or code.isspace() - #:stop 'GPU_HOST_DATA macro has no effect on the code as it is not surrounding any code' +#:def GPU_HOST_DATA(code, use_device_addr=None, use_device_ptr=None, extraAccArgs=None, extraOmpArgs=None) + + #:if use_device_addr is not None and use_device_ptr is not None + #:set use_device_addr_end_index = len(use_device_addr) - 1 + #:set use_device = use_device_addr + use_device_ptr + $:use_device[use_device_addr_end_index] = ',' + $:use_device[use_device_addr_end_index + 1] = ' ' + #:elif use_device_addr is not None or use_device_ptr is not None + #:if use_device_addr is not None + #:set use_device = use_device_addr + #:else + #:set use_device = use_device_ptr + #:endif + #:else + #:set use_device = None #:endif - #:set use_device_val = GEN_USE_DEVICE_STR(use_device) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = use_device_val.strip('\n') - #:set acc_directive = '!$acc host_data ' + clause_val + extraAccArgs_val.strip('\n') - #:set end_acc_directive = '!$acc end host_data' - $:acc_directive + #:set acc_code = ACC_HOST_DATA(code=code, use_device=use_device, extraAccArgs=extraAccArgs) + #:set omp_code = OMP_HOST_DATA(code=code, use_device_addr=use_device_addr, use_device_ptr=use_device_ptr, extraOmpArgs=extraOmpArgs) + +#if defined(MFC_OpenACC) + $:acc_code +#elif defined(MFC_OpenMP) + $:omp_code +#else $:code - $:end_acc_directive +#endif #:enddef #:def GPU_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None, extraOmpArgs=None) @@ -190,14 +202,15 @@ #endif #:enddef -#:def GPU_ATOMIC(atomic, extraAccArgs=None) - #:assert isinstance(atomic, str) - #:assert (atomic == 'read' or atomic == 'write' or atomic == 'update' or atomic == 'capture') - #:set atomic_val = atomic - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = atomic_val.strip('\n') - #:set acc_directive = '!$acc atomic ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive +#:def GPU_ATOMIC(atomic, extraAccArgs=None, extraOmpArgs=None) + #:set acc_code = ACC_ATOMIC(atomic=atomic, extraAccArgs=extraAccArgs) + #:set omp_code = OMP_ATOMIC(atomic=atomic, extraOmpArgs=extraOmpArgs) + +#if defined(MFC_OpenACC) + $:acc_code +#elif defined(MFC_OpenMP) + $:omp_code +#endif #:enddef #:def GPU_UPDATE(host=None, device=None, extraAccArgs=None, extraOmpArgs=None) @@ -211,11 +224,15 @@ #endif #:enddef -#:def GPU_WAIT(extraAccArgs=None) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = '' - #:set acc_directive = '!$acc wait ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive +#:def GPU_WAIT(extraAccArgs=None, extraOmpArgs=None) + #:set acc_code = ACC_WAIT(extraAccArgs=extraAccArgs) + #:set omp_code = OMP_WAIT(extraOmpArgs=extraOmpArgs) + +#if defined(MFC_OpenACC) + $:acc_code +#elif defined(MFC_OpenMP) + $:omp_code +#endif #:enddef #:def USE_GPU_MODULE() diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 098dbe82d6..c2697e475a 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -845,7 +845,7 @@ contains #:for rdma_mpi in [False, True] if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then #:if rdma_mpi - #:call GPU_HOST_DATA(use_device='[buff_send, buff_recv]') + #:call GPU_HOST_DATA(use_device_addr='[buff_send, buff_recv]') call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") call MPI_SENDRECV( & diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 213d9b7fc1..a5b95a4b41 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -10,9 +10,7 @@ module m_body_forces use m_nvtx -#ifdef MFC_OpenACC - use openacc -#endif +! $:USE_GPU_MODULE() implicit none diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index be081f44d9..3211fce682 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -166,7 +166,7 @@ contains p_fltr_cmplx => data_fltr_cmplx_gpu #:call GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]') - #:call GPU_HOST_DATA(use_device='[p_real, p_cmplx, p_fltr_cmplx]') + #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_cmplx, p_fltr_cmplx]') #if defined(__PGI) ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else @@ -187,7 +187,7 @@ contains end do #:endcall GPU_PARALLEL_LOOP - #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') + #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') #if defined(__PGI) ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else @@ -229,7 +229,7 @@ contains end do #:endcall GPU_PARALLEL_LOOP - #:call GPU_HOST_DATA(use_device='[p_real, p_cmplx]') + #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_cmplx]') #if defined(__PGI) ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else @@ -251,7 +251,7 @@ contains end do #:endcall GPU_PARALLEL_LOOP - #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') + #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') #if defined(__PGI) ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 9ae61c5112..c8a3b09d56 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -21,9 +21,7 @@ module m_global_parameters use m_helper_basic !< Functions to compare floating point numbers -#ifdef MFC_OpenACC - use openacc -#endif + ! $:USE_GPU_MODULE() implicit none diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 4d83080b19..95c7233738 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -69,11 +69,6 @@ module m_start_up use m_helper_basic !< Functions to compare floating point numbers $:USE_GPU_MODULE() -! #if defined(MFC_OpenACC) -! use openacc -! #elif defined(MFC_OpenMP) -! use omp_lib -! #endif use m_nvtx diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 7399d84993..79efa5e6c9 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -24,9 +24,7 @@ module m_weno use m_variables_conversion !< State variables type conversion procedures -#ifdef MFC_OPENACC - use openacc -#endif + ! $:USE_GPU_MODULE() use m_mpi_proxy diff --git a/src/syscheck/syscheck.fpp b/src/syscheck/syscheck.fpp index ca2641057e..dc7a354268 100644 --- a/src/syscheck/syscheck.fpp +++ b/src/syscheck/syscheck.fpp @@ -30,6 +30,15 @@ #endif #:enddef ACCC +#:def OMPC(*args) +#ifdef MFC_OpenMP + @:LOG("[TEST] OMP: ${','.join([ x.replace("'", '') for x in args ])}$") + ${','.join([ x.replace("'", '') for x in args ])}$ +#else + @:LOG("[SKIP] OMP: ${','.join([ x.replace("'", '') for x in args ])}$") +#endif +#:enddef OMPC + #:def MPI(*args) #ifdef MFC_MPI ${','.join([ x.replace("'", '') for x in args ])}$ @@ -42,10 +51,17 @@ #endif #:enddef ACC +#:def OMP(*args) +#ifdef MFC_OpenMP + ${','.join([ x.replace("'", '') for x in args ])}$ +#endif +#:enddef OMP + program syscheck @:MPI(use mpi) @:ACC(use openacc) + @:OMP(use omp_lib) implicit none @@ -55,6 +71,8 @@ program syscheck @:ACC(integer :: i, num_devices) @:ACC(real(8), allocatable, dimension(:) :: arr) @:ACC(integer, parameter :: N = 100) + @:OMP(integer :: num_devices_omp) + @:MPIC(call mpi_init(ierr)) @:MPIC(call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr)) @@ -76,6 +94,11 @@ program syscheck @:ACCC('!$acc update host(arr(1:N))') @:ACCC('!$acc exit data delete(arr)') + @:OMPC('num_devices_omp = omp_get_num_devices()') + @:OMPC(call assert(num_devices_omp > 0)) + @:OMPC(call omp_set_default_device(mod(rank, nRanks))) + + @:MPIC(call mpi_barrier(MPI_COMM_WORLD, ierr)) @:MPIC(call mpi_finalize(ierr)) From de586ad71331cc3c308dcbc6b42db91a4f1bbe7a Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 23 Jul 2025 14:14:25 -0400 Subject: [PATCH 15/60] Update var name --- src/common/include/omp_macros.fpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index e37c94ee5f..84aa6c9387 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -258,8 +258,8 @@ #:set device_val = OMP_TO_STR(device) #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) #:set clause_val = host_val.strip('\n') + device_val.strip('\n') - #:set acc_directive = '!$omp target update ' + clause_val + extraOmpArgs_val.strip('\n') - $:acc_directive + #:set omp_directive = '!$omp target update ' + clause_val + extraOmpArgs_val.strip('\n') + $:omp_directive #:enddef #:def OMP_HOST_DATA(code, use_device_addr, use_device_ptr, extraOmpArgs) From 84ddc012cc73bbfe2be44e8ab66868507ef51280 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 23 Jul 2025 15:40:32 -0400 Subject: [PATCH 16/60] Change how parallel loop is translated --- src/common/include/omp_macros.fpp | 4 ++-- src/simulation/m_acoustic_src.fpp | 2 +- src/simulation/m_data_output.fpp | 4 ++-- src/simulation/m_start_up.fpp | 6 +++--- src/syscheck/syscheck.fpp | 6 +++--- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 84aa6c9387..7719a33411 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -160,9 +160,9 @@ & no_create_val.strip('\n') + present_val.strip('\n') + & & deviceptr_val.strip('\n') + attach_val.strip('\n') #! Hardcoding the parallelism for now - #:set omp_directive = '!$omp target teams distribute parallel do simd ' + & + #:set omp_directive = '!$omp target teams loop bind(teams) ' + & & clause_val + extraOmpArgs_val.strip('\n') - #:set omp_end_directive = '!$omp end target teams distribute parallel do simd' + #:set omp_end_directive = '!$omp end target teams loop' $:omp_directive $:code $:omp_end_directive diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 8b2efa6bcf..27f9cc504b 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -183,7 +183,7 @@ contains ! Keep outer loop sequel because different sources can have very different number of points do ai = 1, num_source ! Skip if the pulse has not started yet for sine and square waves - if (sim_time < delay(ai) .and. (pulse(ai) == 1 .or. pulse(ai) == 3)) cycle + ! if (sim_time < delay(ai) .and. (pulse(ai) == 1 .or. pulse(ai) == 3)) cycle ! Decide if frequency need to be converted from wavelength freq_conv_flag = f_is_default(frequency(ai)) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index ecae911da3..37af5bd696 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -279,7 +279,7 @@ contains integer :: j, k, l ! Computing Stability Criteria at Current Time-step - #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') + ! #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') do l = 0, p do k = 0, n do j = 0, m @@ -296,7 +296,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + ! #:endcall GPU_PARALLEL_LOOP ! end: Computing Stability Criteria at Current Time-step diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 95c7233738..61e04dee6f 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1361,9 +1361,9 @@ contains call acc_set_device_num(dev, devtype) #elif defined(MFC_OpenMP) - devNum = omp_get_num_devices() - dev = mod(local_rank, devNum) - call omp_set_default_device(dev) + ! devNum = omp_get_num_devices() + ! dev = mod(local_rank, devNum) + ! call omp_set_default_device(dev) #endif #endif diff --git a/src/syscheck/syscheck.fpp b/src/syscheck/syscheck.fpp index dc7a354268..6a89b3c028 100644 --- a/src/syscheck/syscheck.fpp +++ b/src/syscheck/syscheck.fpp @@ -94,9 +94,9 @@ program syscheck @:ACCC('!$acc update host(arr(1:N))') @:ACCC('!$acc exit data delete(arr)') - @:OMPC('num_devices_omp = omp_get_num_devices()') - @:OMPC(call assert(num_devices_omp > 0)) - @:OMPC(call omp_set_default_device(mod(rank, nRanks))) + ! @:OMPC('num_devices_omp = omp_get_num_devices()') + ! @:OMPC(call assert(num_devices_omp > 0)) + ! @:OMPC(call omp_set_default_device(mod(rank, nRanks))) @:MPIC(call mpi_barrier(MPI_COMM_WORLD, ierr)) From 7246e9b7b74a61aaba8ab62dac276956107e3ba6 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 23 Jul 2025 15:41:19 -0400 Subject: [PATCH 17/60] Ran formatter --- src/common/include/parallel_macros.fpp | 20 ++++++++++---------- src/simulation/m_data_output.fpp | 22 +++++++++++----------- src/syscheck/syscheck.fpp | 2 -- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index b93035ebbc..7fbb346bb9 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -155,18 +155,18 @@ #:def GPU_HOST_DATA(code, use_device_addr=None, use_device_ptr=None, extraAccArgs=None, extraOmpArgs=None) #:if use_device_addr is not None and use_device_ptr is not None - #:set use_device_addr_end_index = len(use_device_addr) - 1 - #:set use_device = use_device_addr + use_device_ptr - $:use_device[use_device_addr_end_index] = ',' - $:use_device[use_device_addr_end_index + 1] = ' ' + #:set use_device_addr_end_index = len(use_device_addr) - 1 + #:set use_device = use_device_addr + use_device_ptr + $:use_device[use_device_addr_end_index] = ',' + $:use_device[use_device_addr_end_index + 1] = ' ' #:elif use_device_addr is not None or use_device_ptr is not None - #:if use_device_addr is not None - #:set use_device = use_device_addr - #:else - #:set use_device = use_device_ptr - #:endif + #:if use_device_addr is not None + #:set use_device = use_device_addr + #:else + #:set use_device = use_device_ptr + #:endif #:else - #:set use_device = None + #:set use_device = None #:endif #:set acc_code = ACC_HOST_DATA(code=code, use_device=use_device, extraAccArgs=extraAccArgs) #:set omp_code = OMP_HOST_DATA(code=code, use_device_addr=use_device_addr, use_device_ptr=use_device_ptr, extraOmpArgs=extraOmpArgs) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 37af5bd696..f39bdfc940 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -280,22 +280,22 @@ contains ! Computing Stability Criteria at Current Time-step ! #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') - do l = 0, p - do k = 0, n - do j = 0, m - call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) + do l = 0, p + do k = 0, n + do j = 0, m + call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) - call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) + call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) - if (viscous) then - call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf, Rc_sf) - else - call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf) - end if + if (viscous) then + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf, Rc_sf) + else + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf) + end if - end do end do end do + end do ! #:endcall GPU_PARALLEL_LOOP ! end: Computing Stability Criteria at Current Time-step diff --git a/src/syscheck/syscheck.fpp b/src/syscheck/syscheck.fpp index 6a89b3c028..80fe745491 100644 --- a/src/syscheck/syscheck.fpp +++ b/src/syscheck/syscheck.fpp @@ -73,7 +73,6 @@ program syscheck @:ACC(integer, parameter :: N = 100) @:OMP(integer :: num_devices_omp) - @:MPIC(call mpi_init(ierr)) @:MPIC(call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr)) @:MPIC(call mpi_barrier(MPI_COMM_WORLD, ierr)) @@ -98,7 +97,6 @@ program syscheck ! @:OMPC(call assert(num_devices_omp > 0)) ! @:OMPC(call omp_set_default_device(mod(rank, nRanks))) - @:MPIC(call mpi_barrier(MPI_COMM_WORLD, ierr)) @:MPIC(call mpi_finalize(ierr)) From d5381aafd81c66012f4374d5393a1fbb4011a58e Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 23 Jul 2025 17:58:50 -0400 Subject: [PATCH 18/60] Remove extraneous build flags --- CMakeLists.txt | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c7a35187f..b46571094e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,18 +194,18 @@ elseif ((CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") OR (CMAKE_Fortran_COMPILER_ add_compile_options( $<$:-Mfreeform> $<$:-cpp> - $<$:-Minfo=inline> + # $<$:-Minfo=inline> $<$:-Minfo=accel> ) if (CMAKE_BUILD_TYPE STREQUAL "Debug") add_compile_options( $<$:-O0> - $<$:-C> - $<$:-g> - $<$:-traceback> + # $<$:-C> + # $<$:-g> + # $<$:-traceback> $<$:-Minform=inform> - $<$:-Mbounds> + # $<$:-Mbounds> ) endif() @@ -480,11 +480,19 @@ function(MFC_SETUP_TARGET) if (NOT TARGET OpenMP::OpenMP_Fortran) message(FATAL_ERROR "OpenMP + Fortran is unsupported.") endif() - + set(ENV{OMP_TARGET_OFFLOAD} [MANDATORY]) target_link_libraries(${a_target} PRIVATE OpenMP::OpenMP_Fortran) target_compile_definitions(${a_target} PRIVATE MFC_OpenMP MFC_GPU) + if(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") - target_compile_options(${a_target} PRIVATE "-mp=gpu") + target_compile_options(${a_target} PRIVATE "-target=gpu" "-Minfo=mp") + set_target_properties(${a_target} PROPERTIES Fortran_FLAGS "-mp=gpu -gpu=ccall") + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Intel") + target_compile_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64) + target_link_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64) + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") + target_compile_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64 ) + target_link_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64) endif() endif() @@ -502,7 +510,7 @@ function(MFC_SETUP_TARGET) endforeach() target_compile_options(${a_target} - PRIVATE -gpu=keep,ptxinfo,lineinfo + PRIVATE -gpu=lineinfo ) # GH-200 Unified Memory Support @@ -518,7 +526,7 @@ function(MFC_SETUP_TARGET) if (CMAKE_BUILD_TYPE STREQUAL "Debug") target_compile_options(${a_target} - PRIVATE -gpu=autocompare,debug + PRIVATE -gpu=debug ) endif() elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") From dbcb6f3cfaa549f00cf48acb4f61f4dbae15975c Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 24 Jul 2025 11:20:32 -0400 Subject: [PATCH 19/60] Remove thermochem function calls --- src/common/m_chemistry.fpp | 4 +-- src/common/m_variables_conversion.fpp | 12 +++---- src/simulation/include/inline_riemann.fpp | 8 ++--- src/simulation/m_acoustic_src.fpp | 4 +-- src/simulation/m_cbc.fpp | 18 +++++----- src/simulation/m_riemann_solvers.fpp | 40 +++++++++++------------ src/simulation/m_start_up.fpp | 6 ++-- src/syscheck/syscheck.fpp | 6 ++-- 8 files changed, 49 insertions(+), 49 deletions(-) diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index e9e5bc5ee8..92587d9de7 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -99,7 +99,7 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]') + ! #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]') do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end @@ -126,7 +126,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + ! #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_chemistry_reaction_flux diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 0bffeee879..e68097df7b 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -181,8 +181,8 @@ contains T_guess = T - call get_temperature(e_Per_Kg - Pdyn_Per_Kg, T_guess, Y_rs, .true., T) - call get_pressure(rho, T, Y_rs, pres) + ! call get_temperature(e_Per_Kg - Pdyn_Per_Kg, T_guess, Y_rs, .true., T) + ! call get_pressure(rho, T, Y_rs, pres) #:endif @@ -1306,9 +1306,9 @@ contains q_cons_vf(i)%sf(j, k, l) = rho*q_prim_vf(i)%sf(j, k, l) end do - call get_mixture_molecular_weight(Ys, mix_mol_weight) + ! call get_mixture_molecular_weight(Ys, mix_mol_weight) T = q_prim_vf(E_idx)%sf(j, k, l)*mix_mol_weight/(gas_constant*rho) - call get_mixture_energy_mass(T, Ys, e_mix) + ! call get_mixture_energy_mass(T, Ys, e_mix) q_cons_vf(E_idx)%sf(j, k, l) = & dyn_pres + rho*e_mix @@ -1534,10 +1534,10 @@ contains Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) end do !Computing the energy from the internal energy of the mixture - call get_mixture_molecular_weight(Y_k, mix_mol_weight) + ! call get_mixture_molecular_weight(Y_k, mix_mol_weight) R_gas = gas_constant/mix_mol_weight T_K = pres_K/rho_K/R_gas - call get_mixture_energy_mass(T_K, Y_K, E_K) + ! call get_mixture_energy_mass(T_K, Y_K, E_K) E_K = rho_K*E_K + 5.e-1_wp*rho_K*vel_K_sum else ! Computing the energy from the pressure diff --git a/src/simulation/include/inline_riemann.fpp b/src/simulation/include/inline_riemann.fpp index 9972799b02..69b79abba5 100644 --- a/src/simulation/include/inline_riemann.fpp +++ b/src/simulation/include/inline_riemann.fpp @@ -34,13 +34,13 @@ if (chemistry) then eps = 0.001_wp - call get_species_enthalpies_rt(T_L, h_iL) - call get_species_enthalpies_rt(T_R, h_iR) + ! call get_species_enthalpies_rt(T_L, h_iL) + ! call get_species_enthalpies_rt(T_R, h_iR) h_iL = h_iL*gas_constant/molecular_weights*T_L h_iR = h_iR*gas_constant/molecular_weights*T_R - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) + ! call get_species_specific_heats_r(T_L, Cp_iL) + ! call get_species_specific_heats_r(T_R, Cp_iR) h_avg_2 = (sqrt(rho_L)*h_iL + sqrt(rho_R)*h_iR)/(sqrt(rho_L) + sqrt(rho_R)) Yi_avg = (sqrt(rho_L)*Ys_L + sqrt(rho_R)*Ys_R)/(sqrt(rho_L) + sqrt(rho_R)) diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 27f9cc504b..e92b61a58f 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -201,8 +201,8 @@ contains allocate (phi_rn(1:bb_num_freq(ai))) if (pulse(ai) == 4) then - ! call random_number(phi_rn(1:bb_num_freq(ai))) - phi_rn(1:bb_num_freq(ai)) = 1 + call random_number(phi_rn(1:bb_num_freq(ai))) + ! phi_rn(1:bb_num_freq(ai)) = 1 ! Ensure all the ranks have the same random phase shift call s_mpi_send_random_number(phi_rn, bb_num_freq(ai)) end if diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 14d81fdf31..020833d6c2 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -773,7 +773,7 @@ contains end if ! FD2 or FD4 of RHS at j = 0 - #:call GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]') + ! #:call GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]') do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -818,21 +818,21 @@ contains Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i) end do - call get_mixture_molecular_weight(Ys, Mw) + ! call get_mixture_molecular_weight(Ys, Mw) R_gas = gas_constant/Mw T = pres/rho/R_gas - call get_mixture_specific_heat_cp_mass(T, Ys, Cp) - call get_mixture_energy_mass(T, Ys, e_mix) + ! call get_mixture_specific_heat_cp_mass(T, Ys, Cp) + ! call get_mixture_energy_mass(T, Ys, e_mix) E = rho*e_mix + 5.e-1_wp*rho*vel_K_sum if (chem_params%gamma_method == 1) then !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - call get_mole_fractions(Mw, Ys, Xs) - call get_species_specific_heats_r(T, Cp_i) + ! call get_mole_fractions(Mw, Ys, Xs) + ! call get_species_specific_heats_r(T, Cp_i) Gamma_i = Cp_i/(Cp_i - 1.0_wp) gamma = sum(Xs(:)/(Gamma_i(:) - 1.0_wp)) else if (chem_params%gamma_method == 2) then !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cv_mass(T, Ys, Cv) + ! call get_mixture_specific_heat_cv_mass(T, Ys, Cv) gamma = 1.0_wp/(Cp/Cv - 1.0_wp) end if else @@ -1045,7 +1045,7 @@ contains if (chemistry) then ! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990 - call get_species_enthalpies_rt(T, h_k) + ! call get_species_enthalpies_rt(T, h_k) sum_Enthalpies = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species @@ -1104,7 +1104,7 @@ contains end do end do - #:endcall GPU_PARALLEL_LOOP + ! #:endcall GPU_PARALLEL_LOOP end if #:endfor diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 4a82254c3d..d577f8a8b9 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -477,8 +477,8 @@ contains Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) end do - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) + ! call get_mixture_molecular_weight(Ys_L, MW_L) + ! call get_mixture_molecular_weight(Ys_R, MW_R) Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) @@ -488,8 +488,8 @@ contains T_L = pres_L/rho_L/R_gas_L T_R = pres_R/rho_R/R_gas_R - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) + ! call get_species_specific_heats_r(T_L, Cp_iL) + ! call get_species_specific_heats_r(T_R, Cp_iR) if (chem_params%gamma_method == 1) then ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. @@ -500,10 +500,10 @@ contains gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) else if (chem_params%gamma_method == 2) then ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + ! call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + ! call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + ! call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + ! call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) Gamm_L = Cp_L/Cv_L gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) @@ -511,8 +511,8 @@ contains gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) end if - call get_mixture_energy_mass(T_L, Ys_L, E_L) - call get_mixture_energy_mass(T_R, Ys_R, E_R) + ! call get_mixture_energy_mass(T_L, Ys_L, E_L) + ! call get_mixture_energy_mass(T_R, Ys_R, E_R) E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms @@ -2445,8 +2445,8 @@ contains Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) end do - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) + ! call get_mixture_molecular_weight(Ys_L, MW_L) + ! call get_mixture_molecular_weight(Ys_R, MW_R) Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) @@ -2457,8 +2457,8 @@ contains T_L = pres_L/rho_L/R_gas_L T_R = pres_R/rho_R/R_gas_R - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) + ! call get_species_specific_heats_r(T_L, Cp_iL) + ! call get_species_specific_heats_r(T_R, Cp_iR) if (chem_params%gamma_method == 1) then !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. @@ -2469,10 +2469,10 @@ contains gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) else if (chem_params%gamma_method == 2) then !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + ! call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + ! call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + ! call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + ! call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) Gamm_L = Cp_L/Cv_L gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) @@ -2480,8 +2480,8 @@ contains gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) end if - call get_mixture_energy_mass(T_L, Ys_L, E_L) - call get_mixture_energy_mass(T_R, Ys_R, E_R) + ! call get_mixture_energy_mass(T_L, Ys_L, E_L) + ! call get_mixture_energy_mass(T_R, Ys_R, E_R) E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 61e04dee6f..b425a3a5cd 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1361,9 +1361,9 @@ contains call acc_set_device_num(dev, devtype) #elif defined(MFC_OpenMP) - ! devNum = omp_get_num_devices() - ! dev = mod(local_rank, devNum) - ! call omp_set_default_device(dev) + devNum = omp_get_num_devices() + dev = mod(local_rank, devNum) + call omp_set_default_device(dev) #endif #endif diff --git a/src/syscheck/syscheck.fpp b/src/syscheck/syscheck.fpp index 80fe745491..75e18efc33 100644 --- a/src/syscheck/syscheck.fpp +++ b/src/syscheck/syscheck.fpp @@ -93,9 +93,9 @@ program syscheck @:ACCC('!$acc update host(arr(1:N))') @:ACCC('!$acc exit data delete(arr)') - ! @:OMPC('num_devices_omp = omp_get_num_devices()') - ! @:OMPC(call assert(num_devices_omp > 0)) - ! @:OMPC(call omp_set_default_device(mod(rank, nRanks))) + @:OMPC('num_devices_omp = omp_get_num_devices()') + @:OMPC(call assert(num_devices_omp > 0)) + @:OMPC(call omp_set_default_device(mod(rank, nRanks))) @:MPIC(call mpi_barrier(MPI_COMM_WORLD, ierr)) @:MPIC(call mpi_finalize(ierr)) From 94222f4c3a336d9ddca3d30cfe8ab4fa74301d74 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 24 Jul 2025 14:38:24 -0400 Subject: [PATCH 20/60] Remove LTO add always to data allocation omp, switch delete to release, add mappers to derived types, change how allocate is done --- CMakeLists.txt | 37 +- src/common/include/acc_macros.fpp | 26 ++ src/common/include/macros.fpp | 19 +- src/common/include/omp_macros.fpp | 2 +- src/common/include/parallel_macros.fpp | 38 -- src/common/include/shared_parallel_macros.fpp | 12 + src/common/m_derived_types.fpp | 395 ++++++++++++++---- 7 files changed, 376 insertions(+), 153 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b46571094e..af2941b7ae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,18 +194,18 @@ elseif ((CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") OR (CMAKE_Fortran_COMPILER_ add_compile_options( $<$:-Mfreeform> $<$:-cpp> - # $<$:-Minfo=inline> + $<$:-Minfo=inline> $<$:-Minfo=accel> ) if (CMAKE_BUILD_TYPE STREQUAL "Debug") add_compile_options( $<$:-O0> - # $<$:-C> - # $<$:-g> - # $<$:-traceback> + $<$:-C> + $<$:-g> + $<$:-traceback> $<$:-Minform=inform> - # $<$:-Mbounds> + $<$:-Mbounds> ) endif() @@ -234,14 +234,14 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release") elseif(CMAKE_Fortran_COMPILER_VERSION VERSION_LESS "23.11") message(STATUS "LTO/IPO is not supported in NVHPC Version < 23.11. Use a newer version of NVHPC for best performance.") else() - message(STATUS "Performing IPO using -Mextract followed by -Minline") - set(NVHPC_USE_TWO_PASS_IPO TRUE) + # message(STATUS "Performing IPO using -Mextract followed by -Minline") + # set(NVHPC_USE_TWO_PASS_IPO TRUE) endif() else() CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR) if (SUPPORTS_IPO) message(STATUS "Enabled IPO / LTO") - set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + # set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) else() message(STATUS "IPO / LTO is NOT available") endif() @@ -401,14 +401,14 @@ function(MFC_SETUP_TARGET) # Here we need to split into "library" and "executable" to perform IPO on the NVIDIA compiler. # A little hacky, but it *is* an edge-case for *one* compiler. if (NVHPC_USE_TWO_PASS_IPO) - add_library(${ARGS_TARGET}_lib OBJECT ${ARGS_SOURCES}) - target_compile_options(${ARGS_TARGET}_lib PRIVATE - $<$:-Mextract=lib:${ARGS_TARGET}_lib> - $<$:-Minline> - ) - add_dependencies(${ARGS_TARGET} ${ARGS_TARGET}_lib) - target_compile_options(${ARGS_TARGET} PRIVATE -Minline=lib:${ARGS_TARGET}_lib) - list(PREPEND IPO_TARGETS ${ARGS_TARGET}_lib) + # add_library(${ARGS_TARGET}_lib OBJECT ${ARGS_SOURCES}) + # target_compile_options(${ARGS_TARGET}_lib PRIVATE + # $<$:-Mextract=lib:${ARGS_TARGET}_lib> + # $<$:-Minline> + # ) + # add_dependencies(${ARGS_TARGET} ${ARGS_TARGET}_lib) + # target_compile_options(${ARGS_TARGET} PRIVATE -Minline=lib:${ARGS_TARGET}_lib) + # list(PREPEND IPO_TARGETS ${ARGS_TARGET}_lib) endif() foreach (a_target ${IPO_TARGETS}) @@ -481,11 +481,12 @@ function(MFC_SETUP_TARGET) message(FATAL_ERROR "OpenMP + Fortran is unsupported.") endif() set(ENV{OMP_TARGET_OFFLOAD} [MANDATORY]) - target_link_libraries(${a_target} PRIVATE OpenMP::OpenMP_Fortran) + # target_link_libraries(${a_target} PRIVATE OpenMP::OpenMP_Fortran) target_compile_definitions(${a_target} PRIVATE MFC_OpenMP MFC_GPU) if(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") - target_compile_options(${a_target} PRIVATE "-target=gpu" "-Minfo=mp") + target_compile_options(${a_target} PRIVATE "-mp=gpu" "-Minfo=mp") + target_link_options(${a_target} PRIVATE "-mp=gpu") set_target_properties(${a_target} PROPERTIES Fortran_FLAGS "-mp=gpu -gpu=ccall") elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Intel") target_compile_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64) diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp index d5efabb67f..acb8672660 100644 --- a/src/common/include/acc_macros.fpp +++ b/src/common/include/acc_macros.fpp @@ -60,6 +60,32 @@ $:link_val #:enddef +#:def GEN_DEFAULT_STR(default) + #:if default is not None + #:assert isinstance(default, str) + #:assert (default == 'present' or default == 'none') + #:set default_val = 'default(' + default + ') ' + #:else + #:set default_val = '' + #:endif + $:default_val +#:enddef + +#:def GEN_HOST_STR(host) + #:set host_val = GEN_PARENTHESES_CLAUSE('host', host) + $:host_val +#:enddef + +#:def GEN_DEVICE_STR(device) + #:set device_val = GEN_PARENTHESES_CLAUSE('device', device) + $:device_val +#:enddef + +#:def GEN_USE_DEVICE_STR(use_device) + #:set use_device_val = GEN_PARENTHESES_CLAUSE('use_device', use_device) + $:use_device_val +#:enddef + #:def ACC_PARALLEL(code, private=None, default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index c1652388c3..d508d1f98f 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -13,10 +13,21 @@ #:enddef #:def ALLOCATE(*args) - @:LOG({'@:ALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'}) - #:set allocated_variables = ', '.join(args) - allocate (${allocated_variables}$) - $:GPU_ENTER_DATA(create=('[' + allocated_variables + ']')) + @:LOG({'@:ALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'}) + #:set allocated_variables = ', '.join(args) + allocate (${allocated_variables}$) + #:set cleaned = [] + #:for a in args + #:set s = a.rstrip() + #:if s.endswith(')') + #:set rev = s[::-1] + #:set pos = next(i for i, ch, d in ( (j, c, sum(1 if t==')' else -1 if t=='(' else 0 for t in rev[:j+1])) for j, c in enumerate(rev) ) if ch == '(' and d == 0 ) + #:set s = s[:len(s)-1-pos] + #:endif + $:cleaned.append(s) + #:endfor + #:set joined = ', '.join(cleaned) + $:GPU_ENTER_DATA(create='[' + joined + ']') #:enddef ALLOCATE #:def DEALLOCATE(*args) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 7719a33411..1b5bac0126 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -44,7 +44,7 @@ #:enddef #:def OMP_CREATE_STR(create) - #:set create_val = OMP_MAP_STR('alloc', create) + #:set create_val = OMP_MAP_STR('always,alloc', create) $:create_val #:enddef diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 7fbb346bb9..afc6d83651 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -17,44 +17,6 @@ $:parallelism_val #:enddef -#:def GEN_COLLAPSE_STR(collapse) - #:if collapse is not None - #:set collapse = int(collapse) - #:assert isinstance(collapse, int) - #:assert collapse > 1 - #:set collapse_val = 'collapse(' + str(collapse) + ') ' - #:else - #:set collapse_val = '' - #:endif - $:collapse_val -#:enddef - -#:def GEN_DEFAULT_STR(default) - #:if default is not None - #:assert isinstance(default, str) - #:assert (default == 'present' or default == 'none') - #:set default_val = 'default(' + default + ') ' - #:else - #:set default_val = '' - #:endif - $:default_val -#:enddef - -#:def GEN_HOST_STR(host) - #:set host_val = GEN_PARENTHESES_CLAUSE('host', host) - $:host_val -#:enddef - -#:def GEN_DEVICE_STR(device) - #:set device_val = GEN_PARENTHESES_CLAUSE('device', device) - $:device_val -#:enddef - -#:def GEN_USE_DEVICE_STR(use_device) - #:set use_device_val = GEN_PARENTHESES_CLAUSE('use_device', use_device) - $:use_device_val -#:enddef - #:def GPU_PARALLEL(code, private=None, default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None) diff --git a/src/common/include/shared_parallel_macros.fpp b/src/common/include/shared_parallel_macros.fpp index a2039fd2a4..61134a3df3 100644 --- a/src/common/include/shared_parallel_macros.fpp +++ b/src/common/include/shared_parallel_macros.fpp @@ -86,6 +86,18 @@ $:reduction_val #:enddef +#:def GEN_COLLAPSE_STR(collapse) + #:if collapse is not None + #:set collapse = int(collapse) + #:assert isinstance(collapse, int) + #:assert collapse > 1 + #:set collapse_val = 'collapse(' + str(collapse) + ') ' + #:else + #:set collapse_val = '' + #:endif + $:collapse_val +#:enddef + #:def GEN_EXTRA_ARGS_STR(extraArgs) #:if extraArgs is not None #:assert isinstance(extraArgs, str) diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index 38674af615..e069567bfd 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -1,75 +1,101 @@ !> !! @file m_derived_types.f90 !! @brief Contains module m_derived_types - -#:include "macros.fpp" - +! New line at end of file is required for FYPP# 2 "/storage/home/hcoda1/5/tprathi3/OSPO/MFC-prathi.git/openmp/src/common/include/parallel_macros.fpp" 2 +! New line at end of file is required for FYPP# 2 "/storage/home/hcoda1/5/tprathi3/OSPO/MFC-prathi.git/openmp/src/common/include/omp_macros.fpp" 2 +! New line at end of file is required for FYPP# 3 "/storage/home/hcoda1/5/tprathi3/OSPO/MFC-prathi.git/openmp/src/common/include/parallel_macros.fpp" 2 +! New line at end of file is required for FYPP# 2 "/storage/home/hcoda1/5/tprathi3/OSPO/MFC-prathi.git/openmp/src/common/include/acc_macros.fpp" 2 +! New line at end of file is required for FYPP# 4 "/storage/home/hcoda1/5/tprathi3/OSPO/MFC-prathi.git/openmp/src/common/include/parallel_macros.fpp" 2 +! New line at end of file is required for FYPP +! New line at end of file is required for FYPP !> @brief This file contains the definitions of all of the custom-defined !! types used in the pre-process code. module m_derived_types - use m_constants !< Constants - use m_precision_select use m_thermochem, only: num_species - implicit none - !> Derived type adding the field position (fp) as an attribute type field_position real(wp), allocatable, dimension(:, :, :) :: fp !< Field position end type field_position - +!$omp declare mapper (field_position::x) map ( & +!$omp x%fp & +!$omp ) !> Derived type annexing a scalar field (SF) type scalar_field real(wp), pointer, dimension(:, :, :) :: sf => null() end type scalar_field - +!$omp declare mapper (scalar_field::x) map ( & +!$omp x%sf & +!$omp ) !> Derived type for bubble variables pb and mv at quadrature nodes (qbmm) type pres_field real(wp), pointer, dimension(:, :, :, :, :) :: sf => null() end type pres_field - +!$omp declare mapper (pres_field::x) map ( & +!$omp x%sf & +!$omp ) !> Derived type annexing an integer scalar field (SF) type integer_field integer, pointer, dimension(:, :, :) :: sf => null() end type integer_field - +!$omp declare mapper (integer_field::x) map ( & +!$omp x%sf & +!$omp ) !> Derived type for levelset type levelset_field real(wp), pointer, dimension(:, :, :, :) :: sf => null() end type levelset_field - +!$omp declare mapper (levelset_field::x) map ( & +!$omp x%sf & +!$omp ) !> Derived type for levelset norm type levelset_norm_field real(wp), pointer, dimension(:, :, :, :, :) :: sf => null() end type levelset_norm_field - +!$omp declare mapper (levelset_norm_field::x) map ( & +!$omp x%sf & +!$omp ) type mpi_io_var integer, allocatable, dimension(:) :: view type(scalar_field), allocatable, dimension(:) :: var end type mpi_io_var - +!$omp declare mapper (mpi_io_var::x) map ( & +!$omp x%view & +!$omp , x%var & +!$omp ) type mpi_io_ib_var integer :: view type(integer_field) :: var end type mpi_io_ib_var - +!$omp declare mapper (mpi_io_ib_var::x) map ( & +!$omp x%view & +!$omp , x%var & +!$omp ) type mpi_io_levelset_var integer :: view type(levelset_field) :: var end type mpi_io_levelset_var - +!$omp declare mapper (mpi_io_levelset_var::x) map ( & +!$omp x%view & +!$omp , x%var & +!$omp ) type mpi_io_levelset_norm_var integer :: view type(levelset_norm_field) :: var end type mpi_io_levelset_norm_var - +!$omp declare mapper (mpi_io_levelset_norm_var::x) map ( & +!$omp x%view & +!$omp , x%var & +!$omp ) !> Derived type annexing a vector field (VF) type vector_field type(scalar_field), allocatable, dimension(:) :: vf !< Vector field end type vector_field - +!$omp declare mapper (vector_field::x) map ( & +!$omp x%vf & +!$omp ) !> Generic 3-component vector (e.g., spatial coordinates or field components) !! Named _dt (derived types: x,y,z) to differentiate from t_vec3 (3-component vector) type vec3_dt ! dt for derived types @@ -77,24 +103,33 @@ module m_derived_types real(wp) :: y real(wp) :: z end type vec3_dt - +!$omp declare mapper (vec3_dt::x) map ( & +!$omp x%x & +!$omp , x%y & +!$omp , x%z & +!$omp ) !> Left and right Riemann states type riemann_states real(wp) :: L real(wp) :: R end type riemann_states - +!$omp declare mapper (riemann_states::x) map ( & +!$omp x%L & +!$omp , x%R & +!$omp ) !> Left and right Riemann states for 3-component vectors type riemann_states_vec3 real(wp) :: L(3) real(wp) :: R(3) end type riemann_states_vec3 - +!$omp declare mapper (riemann_states_vec3::x) map ( & +!$omp x%L(3) & +!$omp , x%R(3) & +!$omp ) !> Integer bounds for variables type int_bounds_info integer :: beg integer :: end - real(wp) :: vb1 real(wp) :: vb2 real(wp) :: vb3 @@ -105,9 +140,26 @@ module m_derived_types real(wp), dimension(3) :: vel_in, vel_out real(wp), dimension(num_fluids_max) :: alpha_rho_in, alpha_in logical :: grcbc_in, grcbc_out, grcbc_vel_out - end type int_bounds_info - +!$omp declare mapper (int_bounds_info::x) map ( & +!$omp x%beg & +!$omp , x%end & +!$omp , x%vb1 & +!$omp , x%vb2 & +!$omp , x%vb3 & +!$omp , x%ve1 & +!$omp , x%ve2 & +!$omp , x%ve3 & +!$omp , x%pres_in & +!$omp , x%pres_out & +!$omp , x%vel_in & +!$omp , x%vel_out & +!$omp , x%alpha_rho_in & +!$omp , x%alpha_in & +!$omp , x%grcbc_in & +!$omp , x%grcbc_out & +!$omp , x%grcbc_vel_out & +!$omp ) type bc_patch_parameters integer :: geometry integer :: type @@ -117,13 +169,24 @@ module m_derived_types real(wp), dimension(3) :: length real(wp) :: radius end type bc_patch_parameters - +!$omp declare mapper (bc_patch_parameters::x) map ( & +!$omp x%geometry & +!$omp , x%type & +!$omp , x%dir & +!$omp , x%loc & +!$omp , x%centroid & +!$omp , x%length & +!$omp , x%radius & +!$omp ) !> Derived type adding beginning (beg) and end bounds info as attributes type bounds_info real(wp) :: beg real(wp) :: end end type bounds_info - +!$omp declare mapper (bounds_info::x) map ( & +!$omp x%beg & +!$omp , x%end & +!$omp ) !> bounds for the bubble dynamic variables type bub_bounds_info integer :: beg @@ -135,99 +198,108 @@ module m_derived_types integer, dimension(:, :), allocatable :: moms !< Moment indices for qbmm integer, dimension(:, :, :), allocatable :: fullmom !< Moment indices for qbmm end type bub_bounds_info - +!$omp declare mapper (bub_bounds_info::x) map ( & +!$omp x%beg & +!$omp , x%end & +!$omp , x%rs & +!$omp , x%vs & +!$omp , x%ps & +!$omp , x%ms & +!$omp , x%moms & +!$omp , x%fullmom & +!$omp ) !> Defines parameters for a Model Patch type ic_model_parameters character(LEN=pathlen_max) :: filepath !< !! Path the STL file relative to case_dir. - real(wp), dimension(1:3) :: translate !< !! Translation of the STL object. - real(wp), dimension(1:3) :: scale !< !! Scale factor for the STL object. - real(wp), dimension(1:3) :: rotate !< !! Angle to rotate the STL object along each cartesian coordinate axis, !! in radians. - integer :: spc !< !! Number of samples per cell to use when discretizing the STL object. - real(wp) :: threshold !< !! Threshold to turn on smoothen STL patch. end type ic_model_parameters - +!$omp declare mapper (ic_model_parameters::x) map ( & +!$omp x%filepath & +!$omp , x%translate & +!$omp , x%scale & +!$omp , x%rotate & +!$omp , x%spc & +!$omp , x%threshold & +!$omp ) type :: t_triangle real(wp), dimension(1:3, 1:3) :: v ! Vertices of the triangle real(wp), dimension(1:3) :: n ! Normal vector end type t_triangle - +!$omp declare mapper (t_triangle::x) map ( & +!$omp x%v & +!$omp , x%n & +!$omp ) type :: t_ray real(wp), dimension(1:3) :: o ! Origin real(wp), dimension(1:3) :: d ! Direction end type t_ray - +!$omp declare mapper (t_ray::x) map ( & +!$omp x%o & +!$omp , x%d & +!$omp ) type :: t_bbox real(wp), dimension(1:3) :: min ! Minimum coordinates real(wp), dimension(1:3) :: max ! Maximum coordinates end type t_bbox - +!$omp declare mapper (t_bbox::x) map ( & +!$omp x%min & +!$omp , x%max & +!$omp ) type :: t_model integer :: ntrs ! Number of triangles type(t_triangle), allocatable :: trs(:) ! Triangles end type t_model - +!$omp declare mapper (t_model::x) map ( & +!$omp x%ntrs & +!$omp , x%trs(:) & +!$omp ) !> Derived type adding initial condition (ic) patch parameters as attributes !! NOTE: The requirements for the specification of the above parameters !! are strongly dependent on both the choice of the multicomponent flow !! model as well as the choice of the patch geometry. type ic_patch_parameters - integer :: geometry !< Type of geometry for the patch - real(wp) :: x_centroid, y_centroid, z_centroid !< !! Location of the geometric center, i.e. the centroid, of the patch. It !! is specified through its x-, y- and z-coordinates, respectively. - real(wp) :: length_x, length_y, length_z !< Dimensions of the patch. x,y,z Lengths. real(wp) :: radius !< Dimensions of the patch. radius. - real(wp), dimension(3) :: radii !< !! Vector indicating the various radii for the elliptical and ellipsoidal !! patch geometries. It is specified through its x-, y-, and z-components !! respectively. - real(wp) :: epsilon, beta !< !! The isentropic vortex parameters for the amplitude of the disturbance and !! domain of influence. - real(wp), dimension(2:9) :: a !< !! The parameters needed for the spherical harmonic patch - logical :: non_axis_sym - real(wp), dimension(3) :: normal !< !! Normal vector indicating the orientation of the patch. It is specified !! through its x-, y- and z-components, respectively. - logical, dimension(0:num_patches_max - 1) :: alter_patch !< - !! List of permissions that indicate to the current patch which preceding !! patches it is allowed to overwrite when it is in process of being laid !! out in the domain - logical :: smoothen !< !! Permission indicating to the current patch whether its boundaries will !! be smoothed out across a few cells or whether they are to remain sharp - integer :: smooth_patch_id !< !! Identity (id) of the patch with which current patch is to get smoothed - real(wp) :: smooth_coeff !< !! Smoothing coefficient (coeff) for the size of the stencil of !! cells across which boundaries of the current patch will be smeared out - real(wp), dimension(num_fluids_max) :: alpha_rho real(wp) :: rho real(wp), dimension(3) :: vel @@ -238,88 +310,130 @@ module m_derived_types real(wp) :: cv !< real(wp) :: qv !< real(wp) :: qvp !< - !! Primitive variables associated with the patch. In order, these include !! the partial densities, density, velocity, pressure, volume fractions, !! specific heat ratio function and the liquid stiffness function. - real(wp) :: Bx, By, Bz !< !! Magnetic field components; B%x is not used for 1D - real(wp), dimension(6) :: tau_e !< !! Elastic stresses added to primitive variables if hypoelasticity = True - real(wp) :: R0 !< Bubble size real(wp) :: V0 !< Bubble velocity - real(wp) :: p0 !< Bubble size real(wp) :: m0 !< Bubble velocity - integer :: hcid !! id for hard coded initial condition - real(wp) :: cf_val !! color function value real(wp) :: Y(1:num_species) - !! STL or OBJ model input parameter character(LEN=pathlen_max) :: model_filepath !< !! Path the STL file relative to case_dir. - real(wp), dimension(1:3) :: model_translate !< !! Translation of the STL object. - real(wp), dimension(1:3) :: model_scale !< !! Scale factor for the STL object. - real(wp), dimension(1:3) :: model_rotate !< !! Angle to rotate the STL object along each cartesian coordinate axis, !! in radians. - integer :: model_spc !< !! Number of samples per cell to use when discretizing the STL object. - real(wp) :: model_threshold !< !! Threshold to turn on smoothen STL patch. - end type ic_patch_parameters - +!$omp declare mapper (ic_patch_parameters::x) map ( & +!$omp x%geometry & +!$omp , x%x_centroid & +!$omp , x%y_centroid & +!$omp , x%z_centroid & +!$omp , x%length_x & +!$omp , x%length_y & +!$omp , x%length_z & +!$omp , x%radius & +!$omp , x%radii & +!$omp , x%epsilon & +!$omp , x%beta & +!$omp , x%a & +!$omp , x%non_axis_sym & +!$omp , x%normal & +!$omp , x%alter_patch & +!$omp , x%smoothen & +!$omp , x%smooth_patch_id & +!$omp , x%smooth_coeff & +!$omp , x%alpha_rho & +!$omp , x%rho & +!$omp , x%vel & +!$omp , x%pres & +!$omp , x%alpha & +!$omp , x%gamma & +!$omp , x%pi_inf & +!$omp , x%cv & +!$omp , x%qv & +!$omp , x%qvp & +!$omp , x%Bx & +!$omp , x%By & +!$omp , x%Bz & +!$omp , x%tau_e & +!$omp , x%R0 & +!$omp , x%V0 & +!$omp , x%p0 & +!$omp , x%m0 & +!$omp , x%hcid & +!$omp , x%cf_val & +!$omp , x%Y(1:num_species) & +!$omp , x%model_filepath & +!$omp , x%model_translate & +!$omp , x%model_scale & +!$omp , x%model_rotate & +!$omp , x%model_spc & +!$omp , x%model_threshold & +!$omp ) type ib_patch_parameters - integer :: geometry !< Type of geometry for the patch - real(wp) :: x_centroid, y_centroid, z_centroid !< !! Location of the geometric center, i.e. the centroid, of the patch. It !! is specified through its x-, y- and z-coordinates, respectively. - real(wp) :: c, p, t, m - real(wp) :: length_x, length_y, length_z !< Dimensions of the patch. x,y,z Lengths. real(wp) :: radius !< Dimensions of the patch. radius. real(wp) :: theta - logical :: slip - !! STL or OBJ model input parameter character(LEN=pathlen_max) :: model_filepath !< !! Path the STL file relative to case_dir. - real(wp), dimension(1:3) :: model_translate !< !! Translation of the STL object. - real(wp), dimension(1:3) :: model_scale !< !! Scale factor for the STL object. - real(wp), dimension(1:3) :: model_rotate !< !! Angle to rotate the STL object along each cartesian coordinate axis, !! in radians. - integer :: model_spc !< !! Number of samples per cell to use when discretizing the STL object. - real(wp) :: model_threshold !< !! Threshold to turn on smoothen STL patch. end type ib_patch_parameters - +!$omp declare mapper (ib_patch_parameters::x) map ( & +!$omp x%geometry & +!$omp , x%x_centroid & +!$omp , x%y_centroid & +!$omp , x%z_centroid & +!$omp , x%c & +!$omp , x%p & +!$omp , x%t & +!$omp , x%m & +!$omp , x%length_x & +!$omp , x%length_y & +!$omp , x%length_z & +!$omp , x%radius & +!$omp , x%theta & +!$omp , x%slip & +!$omp , x%model_filepath & +!$omp , x%model_translate & +!$omp , x%model_scale & +!$omp , x%model_rotate & +!$omp , x%model_spc & +!$omp , x%model_threshold & +!$omp ) !> Derived type annexing the physical parameters (PP) of the fluids. These !! include the specific heat ratio function and liquid stiffness function. type physical_parameters @@ -339,12 +453,31 @@ module m_derived_types real(wp) :: cp_v real(wp) :: G end type physical_parameters - +!$omp declare mapper (physical_parameters::x) map ( & +!$omp x%gamma & +!$omp , x%pi_inf & +!$omp , x%Re & +!$omp , x%cv & +!$omp , x%qv & +!$omp , x%qvp & +!$omp , x%mul0 & +!$omp , x%ss & +!$omp , x%pv & +!$omp , x%gamma_v & +!$omp , x%M_v & +!$omp , x%mu_v & +!$omp , x%k_v & +!$omp , x%cp_v & +!$omp , x%G & +!$omp ) type mpi_io_airfoil_ib_var integer, dimension(2) :: view type(vec3_dt), allocatable, dimension(:) :: var end type mpi_io_airfoil_ib_var - +!$omp declare mapper (mpi_io_airfoil_ib_var::x) map ( & +!$omp x%view & +!$omp , x%var & +!$omp ) !> Derived type annexing integral regions type integral_parameters real(wp) :: xmin !< Min. boundary first coordinate direction @@ -354,7 +487,14 @@ module m_derived_types real(wp) :: zmin !< Min. boundary third coordinate direction real(wp) :: zmax !< Max. boundary third coordinate direction end type integral_parameters - +!$omp declare mapper (integral_parameters::x) map ( & +!$omp x%xmin & +!$omp , x%xmax & +!$omp , x%ymin & +!$omp , x%ymax & +!$omp , x%zmin & +!$omp , x%zmax & +!$omp ) !> Acoustic source parameters type acoustic_parameters integer :: pulse !< Type of pulse @@ -382,7 +522,32 @@ module m_derived_types integer :: element_on !< Element in the acoustic array to turn on integer :: bb_num_freq !< Number of frequencies in the broadband wave end type acoustic_parameters - +!$omp declare mapper (acoustic_parameters::x) map ( & +!$omp x%pulse & +!$omp , x%support & +!$omp , x%dipole & +!$omp , x%loc & +!$omp , x%mag & +!$omp , x%length & +!$omp , x%height & +!$omp , x%wavelength & +!$omp , x%frequency & +!$omp , x%gauss_sigma_dist & +!$omp , x%gauss_sigma_time & +!$omp , x%npulse & +!$omp , x%dir & +!$omp , x%delay & +!$omp , x%foc_length & +!$omp , x%aperture & +!$omp , x%element_spacing_angle & +!$omp , x%element_polygon_ratio & +!$omp , x%rotate_angle & +!$omp , x%bb_bandwidth & +!$omp , x%bb_lowest_freq & +!$omp , x%num_elements & +!$omp , x%element_on & +!$omp , x%bb_num_freq & +!$omp ) !> Acoustic source source_spatial pre-calculated values type source_spatial_type integer, dimension(:, :), allocatable :: coord !< List of grid points indices with non-zero source_spatial values @@ -390,7 +555,12 @@ module m_derived_types real(wp), dimension(:), allocatable :: angle !< List of angles with x-axis for mom source term vector real(wp), dimension(:, :), allocatable :: xyz_to_r_ratios !< List of [xyz]/r for mom source term vector end type source_spatial_type - +!$omp declare mapper (source_spatial_type::x) map ( & +!$omp x%coord & +!$omp , x%val & +!$omp , x%angle & +!$omp , x%xyz_to_r_ratios & +!$omp ) !> Ghost Point for Immersed Boundaries type ghost_point integer, dimension(3) :: loc !< Physical location of the ghost point @@ -401,28 +571,40 @@ module m_derived_types logical :: slip integer, dimension(3) :: DB end type ghost_point - +!$omp declare mapper (ghost_point::x) map ( & +!$omp x%loc & +!$omp , x%ip_loc & +!$omp , x%ip_grid & +!$omp , x%interp_coeffs & +!$omp , x%ib_patch_id & +!$omp , x%slip & +!$omp , x%DB & +!$omp ) !> Species parameters type species_parameters character(LEN=name_len) :: name !< Name of species end type species_parameters - +!$omp declare mapper (species_parameters::x) map ( & +!$omp x%name & +!$omp ) !> Chemistry parameters type chemistry_parameters character(LEN=name_len) :: cantera_file !< Path to Cantera file - logical :: diffusion logical :: reactions - !> Method of determining gamma. !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. integer :: gamma_method end type chemistry_parameters - +!$omp declare mapper (chemistry_parameters::x) map ( & +!$omp x%cantera_file & +!$omp , x%diffusion & +!$omp , x%reactions & +!$omp , x%gamma_method & +!$omp ) !> Lagrangian bubble parameters type bubbles_lagrange_parameters - integer :: solver_approach !< 1: One-way coupling, 2: two-way coupling integer :: cluster_type !< Cluster model to find p_inf logical :: pressure_corrector !< Cell pressure correction term @@ -440,13 +622,42 @@ module m_derived_types real(wp) :: T0, Thost !< Reference temperature and host temperature real(wp) :: x0 !< Reference length real(wp) :: diffcoefvap !< Vapor diffusivity in the gas - end type bubbles_lagrange_parameters - +!$omp declare mapper (bubbles_lagrange_parameters::x) map ( & +!$omp x%solver_approach & +!$omp , x%cluster_type & +!$omp , x%pressure_corrector & +!$omp , x%smooth_type & +!$omp , x%heatTransfer_model & +!$omp , x%massTransfer_model & +!$omp , x%write_bubbles & +!$omp , x%write_bubbles_stats & +!$omp , x%nBubs_glb & +!$omp , x%epsilonb & +!$omp , x%charwidth & +!$omp , x%valmaxvoid & +!$omp , x%c0 & +!$omp , x%rho0 & +!$omp , x%T0 & +!$omp , x%Thost & +!$omp , x%x0 & +!$omp , x%diffcoefvap & +!$omp ) !> Max and min number of cells in a direction of each combination of x-,y-, and z- type cell_num_bounds integer :: mn_max, np_max, mp_max, mnp_max integer :: mn_min, np_min, mp_min, mnp_min end type cell_num_bounds - +!$omp declare mapper (cell_num_bounds::x) map ( & +!$omp x%mn_max & +!$omp , x%np_max & +!$omp , x%mp_max & +!$omp , x%mnp_max & +!$omp , x%mn_min & +!$omp , x%np_min & +!$omp , x%mp_min & +!$omp , x%mnp_min & +!$omp ) end module m_derived_types + +! Code was translated using: /media/shared/Documents/GitHub/OSPO/intel-application-migration-tool-for-openacc-to-openmp/simulation/src/intel-application-migration-tool-for-openacc-to-openmp -keep-binding-clauses=all simulation/p_main.fpp.f90 From 473d19d8976c3f737bf9cedb90f1e4619238849b Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 24 Jul 2025 16:26:20 -0400 Subject: [PATCH 21/60] Fixed parallel loop when no OpenMP or OpenACC --- src/common/include/acc_macros.fpp | 15 +++++++++++++++ src/common/include/parallel_macros.fpp | 16 ++-------------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp index acb8672660..147473250d 100644 --- a/src/common/include/acc_macros.fpp +++ b/src/common/include/acc_macros.fpp @@ -86,6 +86,21 @@ $:use_device_val #:enddef +#:def GEN_PARALLELISM_STR(parallelism) + #:if parallelism is not None + #:assert isinstance(parallelism, str) + #:assert parallelism[0] == '[' and parallelism[-1] == ']' + #:set parallelism_list = [x.strip() for x in parallelism.strip('[]').split(',')] + $:ASSERT_LIST(parallelism_list, str) + #:assert all((element == 'gang' or element == 'worker' or & + & element == 'vector' or element == 'seq') for element in parallelism_list) + #:set parallelism_val = ' '.join(parallelism_list) + ' ' + #:else + #:set parallelism_val = '' + #:endif + $:parallelism_val +#:enddef + #:def ACC_PARALLEL(code, private=None, default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index afc6d83651..114ebf7c47 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -2,20 +2,6 @@ #:include 'omp_macros.fpp' #:include 'acc_macros.fpp' -#:def GEN_PARALLELISM_STR(parallelism) - #:if parallelism is not None - #:assert isinstance(parallelism, str) - #:assert parallelism[0] == '[' and parallelism[-1] == ']' - #:set parallelism_list = [x.strip() for x in parallelism.strip('[]').split(',')] - $:ASSERT_LIST(parallelism_list, str) - #:assert all((element == 'gang' or element == 'worker' or & - & element == 'vector' or element == 'seq') for element in parallelism_list) - #:set parallelism_val = ' '.join(parallelism_list) + ' ' - #:else - #:set parallelism_val = '' - #:endif - $:parallelism_val -#:enddef #:def GPU_PARALLEL(code, private=None, default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & @@ -46,6 +32,8 @@ $:acc_code #elif defined(MFC_OpenMP) $:omp_code +#else + $:code #endif #:enddef From 5703c19948870a5eda6fafdde1fe9249ca1a0833 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Fri, 25 Jul 2025 10:31:13 -0400 Subject: [PATCH 22/60] Update how allocate macro works, update riemann solver, and parallel loop --- src/common/include/macros.fpp | 30 ++++++++++++++-------------- src/common/include/omp_macros.fpp | 4 ++-- src/simulation/m_rhs.fpp | 8 +++++++- src/simulation/m_riemann_solvers.fpp | 23 +++++++++++---------- 4 files changed, 36 insertions(+), 29 deletions(-) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index d508d1f98f..f44aa96a63 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -13,21 +13,21 @@ #:enddef #:def ALLOCATE(*args) - @:LOG({'@:ALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'}) - #:set allocated_variables = ', '.join(args) - allocate (${allocated_variables}$) - #:set cleaned = [] - #:for a in args - #:set s = a.rstrip() - #:if s.endswith(')') - #:set rev = s[::-1] - #:set pos = next(i for i, ch, d in ( (j, c, sum(1 if t==')' else -1 if t=='(' else 0 for t in rev[:j+1])) for j, c in enumerate(rev) ) if ch == '(' and d == 0 ) - #:set s = s[:len(s)-1-pos] - #:endif - $:cleaned.append(s) - #:endfor - #:set joined = ', '.join(cleaned) - $:GPU_ENTER_DATA(create='[' + joined + ']') + @:LOG({'@:ALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'}) + #:set allocated_variables = ', '.join(args) + allocate (${allocated_variables}$) + #:set cleaned = [] + #:for a in args + #:set s = a.rstrip() + #:if s.endswith(')') + #:set rev = s[::-1] + #:set pos = next(i for i, ch, d in ( (j, c, sum(1 if t==')' else -1 if t=='(' else 0 for t in rev[:j+1])) for j, c in enumerate(rev) ) if ch == '(' and d == 0 ) + #:set s = s[:len(s)-1-pos] + #:endif + $:cleaned.append(s) + #:endfor + #:set joined = ', '.join(cleaned) + $:GPU_ENTER_DATA(create='[' + joined + ']') #:enddef ALLOCATE #:def DEALLOCATE(*args) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 1b5bac0126..e61c56ffac 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -160,7 +160,7 @@ & no_create_val.strip('\n') + present_val.strip('\n') + & & deviceptr_val.strip('\n') + attach_val.strip('\n') #! Hardcoding the parallelism for now - #:set omp_directive = '!$omp target teams loop bind(teams) ' + & + #:set omp_directive = '!$omp target teams loop ' + & & clause_val + extraOmpArgs_val.strip('\n') #:set omp_end_directive = '!$omp end target teams loop' $:omp_directive @@ -201,7 +201,7 @@ #! Not implemented yet #:def OMP_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraOmpArgs=None) #! loop is going to be ignored since all loops right now are seq - #:set temp = '' + #:set temp = '!$omp loop bind(thread)' $:temp #:enddef diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 910420e10e..3dbf37386b 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -830,6 +830,9 @@ contains irx%beg = 0; iry%beg = 0; irz%beg = -1 end if irx%end = m; iry%end = n; irz%end = p + $:GPU_UPDATE(host='[qL_rsx_vf,qR_rsx_vf]') + print *, "L", qL_rsx_vf(100:300, 0, 0, 1) + print *, "R", qR_rsx_vf(100:300, 0, 0, 1) !Computing Riemann Solver Flux and Source Flux call nvtxStartRange("RHS-RIEMANN-SOLVER") @@ -849,6 +852,8 @@ contains flux_gsrc_n(id)%vf, & id, irx, iry, irz) call nvtxEndRange + $:GPU_UPDATE(host='[flux_n(1)%vf(1)%sf]') + print *, "FLUX", flux_n(1)%vf(1)%sf(100:300, 0, 0) ! Additional physics and source terms ! RHS addition for advection source @@ -1070,7 +1075,8 @@ contains end do end do #:endcall GPU_PARALLEL_LOOP - + $:GPU_UPDATE(host='[rhs_vf(1)%sf]') + print *, "RHS", rhs_vf(1)%sf(100:300, 0, 0) if (model_eqns == 3) then #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') do q_loop = 0, p diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index d577f8a8b9..0406fd99bb 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -357,9 +357,9 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R,G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R,G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp]', extraOmpArgs='defaultmap(firstprivate:scalar) bind(teams, parallel)') + do l = is3%beg, is3%end + do k = is2%beg, is2%end do j = is1%beg, is1%end $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe @@ -480,8 +480,8 @@ contains ! call get_mixture_molecular_weight(Ys_L, MW_L) ! call get_mixture_molecular_weight(Ys_R, MW_R) - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + ! Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + ! Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) R_gas_L = gas_constant/MW_L R_gas_R = gas_constant/MW_R @@ -496,8 +496,8 @@ contains Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) - gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) - gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) + ! gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) + ! gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) else if (chem_params%gamma_method == 2) then ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. ! call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) @@ -524,8 +524,8 @@ contains vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) - b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L - b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R + ! b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L + ! b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp @@ -536,8 +536,8 @@ contains H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R - cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) - cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) + ! cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) + ! cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R @@ -570,6 +570,7 @@ contains G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) end if + $:GPU_LOOP(parallelism='[seq]') do i = 1, strxe - strxb + 1 tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) From 7021690665f870c6fa8eec447e87f9bd2972df28 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Fri, 25 Jul 2025 13:48:12 -0400 Subject: [PATCH 23/60] Passing most 1D cases --- src/common/include/omp_macros.fpp | 2 +- src/common/m_chemistry.fpp | 6 +- src/simulation/m_acoustic_src.fpp | 312 ++++++++++++------------- src/simulation/m_fftw.fpp | 1 - src/simulation/m_global_parameters.fpp | 2 +- src/simulation/m_rhs.fpp | 14 +- src/simulation/m_riemann_solvers.fpp | 18 +- 7 files changed, 177 insertions(+), 178 deletions(-) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index e61c56ffac..00735eae2c 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -160,7 +160,7 @@ & no_create_val.strip('\n') + present_val.strip('\n') + & & deviceptr_val.strip('\n') + attach_val.strip('\n') #! Hardcoding the parallelism for now - #:set omp_directive = '!$omp target teams loop ' + & + #:set omp_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams)' + & & clause_val + extraOmpArgs_val.strip('\n') #:set omp_end_directive = '!$omp end target teams loop' $:omp_directive diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 92587d9de7..97dbcf2632 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -99,7 +99,7 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - ! #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]') do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end @@ -112,7 +112,7 @@ contains rho = q_cons_qp(contxe)%sf(x, y, z) T = q_T_sf%sf(x, y, z) - call get_net_production_rates(rho, T, Ys, omega) + ! call get_net_production_rates(rho, T, Ys, omega) $:GPU_LOOP(parallelism='[seq]') do eqn = chemxb, chemxe @@ -126,7 +126,7 @@ contains end do end do end do - ! #:endcall GPU_PARALLEL_LOOP + #:endcall GPU_PARALLEL_LOOP end subroutine s_compute_chemistry_reaction_flux diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index e92b61a58f..39f671ba7f 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -183,162 +183,162 @@ contains ! Keep outer loop sequel because different sources can have very different number of points do ai = 1, num_source ! Skip if the pulse has not started yet for sine and square waves - ! if (sim_time < delay(ai) .and. (pulse(ai) == 1 .or. pulse(ai) == 3)) cycle - - ! Decide if frequency need to be converted from wavelength - freq_conv_flag = f_is_default(frequency(ai)) - gauss_conv_flag = f_is_default(gauss_sigma_time(ai)) - - num_points = source_spatials_num_points(ai) ! Use scalar to force firstprivate to prevent GPU bug - - ! Calculate the broadband source - period_BB = 0._wp - sl_BB = 0._wp - ffre_BB = 0._wp - sum_BB = 0._wp - - ! Allocate buffers for random phase shift - allocate (phi_rn(1:bb_num_freq(ai))) - - if (pulse(ai) == 4) then - call random_number(phi_rn(1:bb_num_freq(ai))) - ! phi_rn(1:bb_num_freq(ai)) = 1 - ! Ensure all the ranks have the same random phase shift - call s_mpi_send_random_number(phi_rn, bb_num_freq(ai)) - end if - - $:GPU_LOOP(reduction='[[sum_BB]]', reductionOp='[+]') - do k = 1, bb_num_freq(ai) - ! Acoustic period of the wave at each discrete frequency - period_BB = 1._wp/(bb_lowest_freq(ai) + k*bb_bandwidth(ai)) - ! Spectral level at each frequency - sl_BB = broadband_spectral_level_constant*mag(ai) + k*mag(ai)/broadband_spectral_level_growth_rate - ! Source term corresponding to each frequencies - ffre_BB = sqrt((2._wp*sl_BB*bb_bandwidth(ai)))*cos((sim_time)*2._wp*pi/period_BB + 2._wp*pi*phi_rn(k)) - ! Sum up the source term of each frequency to obtain the total source term for broadband wave - sum_BB = sum_BB + ffre_BB - end do - - deallocate (phi_rn) - - #:call GPU_PARALLEL_LOOP(private='[myalpha,myalpha_rho]') - do i = 1, num_points - j = source_spatials(ai)%coord(1, i) - k = source_spatials(ai)%coord(2, i) - l = source_spatials(ai)%coord(3, i) - - ! Compute speed of sound - myRho = 0._wp - B_tait = 0._wp - small_gamma = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - myalpha_rho(q) = q_cons_vf(q)%sf(j, k, l) - myalpha(q) = q_cons_vf(advxb + q - 1)%sf(j, k, l) - end do - - if (bubbles_euler) then - if (num_fluids > 2) then - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - 1 - myRho = myRho + myalpha_rho(q) - B_tait = B_tait + myalpha(q)*pi_infs(q) - small_gamma = small_gamma + myalpha(q)*gammas(q) - end do - else - myRho = myalpha_rho(1) - B_tait = pi_infs(1) - small_gamma = gammas(1) - end if - end if - - if ((.not. bubbles_euler) .or. (mpp_lim .and. (num_fluids > 2))) then - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - myRho = myRho + myalpha_rho(q) - B_tait = B_tait + myalpha(q)*pi_infs(q) - small_gamma = small_gamma + myalpha(q)*gammas(q) - end do - end if - - small_gamma = 1._wp/small_gamma + 1._wp - c = sqrt(small_gamma*(q_prim_vf(E_idx)%sf(j, k, l) + ((small_gamma - 1._wp)/small_gamma)*B_tait)/myRho) - - ! Wavelength to frequency conversion - if (pulse(ai) == 1 .or. pulse(ai) == 3) frequency_local = f_frequency_local(freq_conv_flag, ai, c) - if (pulse(ai) == 2) gauss_sigma_time_local = f_gauss_sigma_time_local(gauss_conv_flag, ai, c) - - ! Update momentum source term - call s_source_temporal(sim_time, c, ai, mom_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) - mom_src_diff = source_temporal*source_spatials(ai)%val(i) - - if (dipole(ai)) then ! Double amplitude & No momentum source term (only works for Planar) - mass_src(j, k, l) = mass_src(j, k, l) + 2._wp*mom_src_diff/c - if (model_eqns /= 4) E_src(j, k, l) = E_src(j, k, l) + 2._wp*mom_src_diff*c/(small_gamma - 1._wp) - cycle - end if - - if (n == 0) then ! 1D - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*sign(1._wp, dir(ai)) ! Left or right-going wave - - elseif (p == 0) then ! 2D - if (support(ai) < 5) then ! Planar - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) - else - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(source_spatials(ai)%angle(i)) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(source_spatials(ai)%angle(i)) - end if - - else ! 3D - if (support(ai) < 5) then ! Planar - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) - else - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(1, i) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(2, i) - mom_src(3, j, k, l) = mom_src(3, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(3, i) - end if - end if - - ! Update mass source term - if (support(ai) < 5) then ! Planar - mass_src_diff = mom_src_diff/c - else ! Spherical or cylindrical support - ! Mass source term must be calculated differently using a correction term for spherical and cylindrical support - call s_source_temporal(sim_time, c, ai, mass_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) - mass_src_diff = source_temporal*source_spatials(ai)%val(i) - end if - mass_src(j, k, l) = mass_src(j, k, l) + mass_src_diff - - ! Update energy source term - if (model_eqns /= 4) then - E_src(j, k, l) = E_src(j, k, l) + mass_src_diff*c**2._wp/(small_gamma - 1._wp) - end if - - end do - #:endcall GPU_PARALLEL_LOOP - end do - - ! Update the rhs variables - #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do q = contxb, contxe - rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mass_src(j, k, l) - end do - $:GPU_LOOP(parallelism='[seq]') - do q = momxb, momxe - rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mom_src(q - contxe, j, k, l) - end do - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + e_src(j, k, l) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP + if (not (sim_time < delay(ai) .and. (pulse(ai) == 1 .or. pulse(ai) == 3))) then + + ! Decide if frequency need to be converted from wavelength + freq_conv_flag = f_is_default(frequency(ai)) + gauss_conv_flag = f_is_default(gauss_sigma_time(ai)) + + num_points = source_spatials_num_points(ai) ! Use scalar to force firstprivate to prevent GPU bug + + ! Calculate the broadband source + period_BB = 0._wp + sl_BB = 0._wp + ffre_BB = 0._wp + sum_BB = 0._wp + + ! Allocate buffers for random phase shift + allocate (phi_rn(1:bb_num_freq(ai))) + + if (pulse(ai) == 4) then + call random_number(phi_rn(1:bb_num_freq(ai))) + ! Ensure all the ranks have the same random phase shift + call s_mpi_send_random_number(phi_rn, bb_num_freq(ai)) + end if + + $:GPU_LOOP(reduction='[[sum_BB]]', reductionOp='[+]') + do k = 1, bb_num_freq(ai) + ! Acoustic period of the wave at each discrete frequency + period_BB = 1._wp/(bb_lowest_freq(ai) + k*bb_bandwidth(ai)) + ! Spectral level at each frequency + sl_BB = broadband_spectral_level_constant*mag(ai) + k*mag(ai)/broadband_spectral_level_growth_rate + ! Source term corresponding to each frequencies + ffre_BB = sqrt((2._wp*sl_BB*bb_bandwidth(ai)))*cos((sim_time)*2._wp*pi/period_BB + 2._wp*pi*phi_rn(k)) + ! Sum up the source term of each frequency to obtain the total source term for broadband wave + sum_BB = sum_BB + ffre_BB + end do + + deallocate (phi_rn) + + #:call GPU_PARALLEL_LOOP(private='[myalpha,myalpha_rho]') + do i = 1, num_points + j = source_spatials(ai)%coord(1, i) + k = source_spatials(ai)%coord(2, i) + l = source_spatials(ai)%coord(3, i) + + ! Compute speed of sound + myRho = 0._wp + B_tait = 0._wp + small_gamma = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids + myalpha_rho(q) = q_cons_vf(q)%sf(j, k, l) + myalpha(q) = q_cons_vf(advxb + q - 1)%sf(j, k, l) + end do + + if (bubbles_euler) then + if (num_fluids > 2) then + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids - 1 + myRho = myRho + myalpha_rho(q) + B_tait = B_tait + myalpha(q)*pi_infs(q) + small_gamma = small_gamma + myalpha(q)*gammas(q) + end do + else + myRho = myalpha_rho(1) + B_tait = pi_infs(1) + small_gamma = gammas(1) + end if + end if + + if ((.not. bubbles_euler) .or. (mpp_lim .and. (num_fluids > 2))) then + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids + myRho = myRho + myalpha_rho(q) + B_tait = B_tait + myalpha(q)*pi_infs(q) + small_gamma = small_gamma + myalpha(q)*gammas(q) + end do + end if + + small_gamma = 1._wp/small_gamma + 1._wp + c = sqrt(small_gamma*(q_prim_vf(E_idx)%sf(j, k, l) + ((small_gamma - 1._wp)/small_gamma)*B_tait)/myRho) + + ! Wavelength to frequency conversion + if (pulse(ai) == 1 .or. pulse(ai) == 3) frequency_local = f_frequency_local(freq_conv_flag, ai, c) + if (pulse(ai) == 2) gauss_sigma_time_local = f_gauss_sigma_time_local(gauss_conv_flag, ai, c) + + ! Update momentum source term + call s_source_temporal(sim_time, c, ai, mom_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) + mom_src_diff = source_temporal*source_spatials(ai)%val(i) + + if (dipole(ai)) then ! Double amplitude & No momentum source term (only works for Planar) + mass_src(j, k, l) = mass_src(j, k, l) + 2._wp*mom_src_diff/c + if (model_eqns /= 4) E_src(j, k, l) = E_src(j, k, l) + 2._wp*mom_src_diff*c/(small_gamma - 1._wp) + cycle + end if + + if (n == 0) then ! 1D + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*sign(1._wp, dir(ai)) ! Left or right-going wave + + elseif (p == 0) then ! 2D + if (support(ai) < 5) then ! Planar + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) + else + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(source_spatials(ai)%angle(i)) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(source_spatials(ai)%angle(i)) + end if + + else ! 3D + if (support(ai) < 5) then ! Planar + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) + else + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(1, i) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(2, i) + mom_src(3, j, k, l) = mom_src(3, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(3, i) + end if + end if + + ! Update mass source term + if (support(ai) < 5) then ! Planar + mass_src_diff = mom_src_diff/c + else ! Spherical or cylindrical support + ! Mass source term must be calculated differently using a correction term for spherical and cylindrical support + call s_source_temporal(sim_time, c, ai, mass_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) + mass_src_diff = source_temporal*source_spatials(ai)%val(i) + end if + mass_src(j, k, l) = mass_src(j, k, l) + mass_src_diff + + ! Update energy source term + if (model_eqns /= 4) then + E_src(j, k, l) = E_src(j, k, l) + mass_src_diff*c**2._wp/(small_gamma - 1._wp) + end if + + end do + #:endcall GPU_PARALLEL_LOOP + end if + end do + + ! Update the rhs variables + #:call GPU_PARALLEL_LOOP(collapse=3) + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do q = contxb, contxe + rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mass_src(j, k, l) + end do + $:GPU_LOOP(parallelism='[seq]') + do q = momxb, momxe + rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mom_src(q - contxe, j, k, l) + end do + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + e_src(j, k, l) + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP end subroutine s_acoustic_src_calculations !> This subroutine gives the temporally varying amplitude of the pulse diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 3211fce682..4ea2ac6934 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -52,7 +52,6 @@ module m_fftw complex(dp), allocatable, target :: data_cmplx_gpu(:) complex(dp), allocatable, target :: data_fltr_cmplx_gpu(:) $:GPU_DECLARE(create='[data_real_gpu,data_cmplx_gpu,data_fltr_cmplx_gpu]') - !$omp declare target (data_real_gpu,data_cmplx_gpu,data_fltr_cmplx_gpu) #if defined(__PGI) integer :: fwd_plan_gpu, bwd_plan_gpu diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index c8a3b09d56..854e350781 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -182,7 +182,7 @@ module m_global_parameters #:endfor real(wp), dimension(3) :: accel_bf $:GPU_DECLARE(create='[accel_bf]') - $:GPU_DECLARE(create='[k_x,w_x,p_x,g_x,k_y,w_y,p_y,g_y,k_z,w_z,p_z,g_z]') + ! $:GPU_DECLARE(create='[k_x,w_x,p_x,g_x,k_y,w_y,p_y,g_y,k_z,w_z,p_z,g_z]') integer :: cpu_start, cpu_end, cpu_rate diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 3dbf37386b..bb6ba2871e 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -830,9 +830,9 @@ contains irx%beg = 0; iry%beg = 0; irz%beg = -1 end if irx%end = m; iry%end = n; irz%end = p - $:GPU_UPDATE(host='[qL_rsx_vf,qR_rsx_vf]') - print *, "L", qL_rsx_vf(100:300, 0, 0, 1) - print *, "R", qR_rsx_vf(100:300, 0, 0, 1) + ! $:GPU_UPDATE(host='[qL_rsx_vf,qR_rsx_vf]') + ! print *, "L", qL_rsx_vf(100:300, 0, 0, 1) + ! print *, "R", qR_rsx_vf(100:300, 0, 0, 1) !Computing Riemann Solver Flux and Source Flux call nvtxStartRange("RHS-RIEMANN-SOLVER") @@ -852,8 +852,8 @@ contains flux_gsrc_n(id)%vf, & id, irx, iry, irz) call nvtxEndRange - $:GPU_UPDATE(host='[flux_n(1)%vf(1)%sf]') - print *, "FLUX", flux_n(1)%vf(1)%sf(100:300, 0, 0) + ! $:GPU_UPDATE(host='[flux_n(1)%vf(1)%sf]') + ! print *, "FLUX", flux_n(1)%vf(1)%sf(100:300, 0, 0) ! Additional physics and source terms ! RHS addition for advection source @@ -1075,8 +1075,8 @@ contains end do end do #:endcall GPU_PARALLEL_LOOP - $:GPU_UPDATE(host='[rhs_vf(1)%sf]') - print *, "RHS", rhs_vf(1)%sf(100:300, 0, 0) + ! $:GPU_UPDATE(host='[rhs_vf(1)%sf]') + ! print *, "RHS", rhs_vf(1)%sf(100:300, 0, 0) if (model_eqns == 3) then #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') do q_loop = 0, p diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 0406fd99bb..039020248b 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -357,7 +357,7 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R,G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp]', extraOmpArgs='defaultmap(firstprivate:scalar) bind(teams, parallel)') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R,G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -480,8 +480,8 @@ contains ! call get_mixture_molecular_weight(Ys_L, MW_L) ! call get_mixture_molecular_weight(Ys_R, MW_R) - ! Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - ! Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) R_gas_L = gas_constant/MW_L R_gas_R = gas_constant/MW_R @@ -496,8 +496,8 @@ contains Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) - ! gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) - ! gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) + gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) + gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) else if (chem_params%gamma_method == 2) then ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. ! call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) @@ -524,8 +524,8 @@ contains vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) - ! b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L - ! b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R + b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L + b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp @@ -536,8 +536,8 @@ contains H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R - ! cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) - ! cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) + cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) + cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R From 828d9d89f54941286bf5d24362250e462ccd3a11 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Fri, 25 Jul 2025 16:25:02 -0400 Subject: [PATCH 24/60] Fixed IGR 2D, readded parallel loop in cbc, undid changes in derived types, removed rest of pure functions, fix issue with acoustic on nvfortran --- src/common/m_boundary_common.fpp | 46 ++- src/common/m_derived_types.fpp | 397 ++++++----------------- src/common/m_helper_basic.fpp | 2 +- src/post_process/m_derived_variables.fpp | 14 +- src/pre_process/m_assign_variables.fpp | 4 +- src/pre_process/m_compute_levelset.fpp | 14 +- src/pre_process/m_model.fpp | 24 +- src/pre_process/m_patches.fpp | 4 +- src/simulation/m_acoustic_src.fpp | 2 +- src/simulation/m_cbc.fpp | 4 +- 10 files changed, 147 insertions(+), 364 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 3c21279314..9c33009050 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -26,9 +26,6 @@ module m_boundary_common type(scalar_field), dimension(:, :), allocatable :: bc_buffers $:GPU_DECLARE(create='[bc_buffers]') - type(scalar_field), dimension(1) :: jac_sf - $:GPU_DECLARE(create='[jac_sf]') - #ifdef MFC_MPI integer, dimension(1:3, -1:1) :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE #endif @@ -1475,16 +1472,13 @@ contains end subroutine s_color_function_ghost_cell_extrapolation - impure subroutine s_populate_F_igr_buffers(bc_type, jac) + impure subroutine s_populate_F_igr_buffers(bc_type, jac_sf) type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type - real(wp), target, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:), intent(inout) :: jac + type(scalar_field), dimension(1:), intent(inout) :: jac_sf integer :: j, k, l - jac_sf(1)%sf => jac - $:GPU_UPDATE(device='[jac_sf(1)%sf]') - if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 1, -1, 1) else @@ -1494,15 +1488,15 @@ contains select case (bc_type(1, -1)%sf(0, k, l)) case (BC_PERIODIC) do j = 1, buff_size - jac(-j, k, l) = jac(m - j + 1, k, l) + jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(m - j + 1, k, l) end do case (BC_REFLECTIVE) do j = 1, buff_size - jac(-j, k, l) = jac(j - 1, k, l) + jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(j - 1, k, l) end do case default do j = 1, buff_size - jac(-j, k, l) = jac(0, k, l) + jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(0, k, l) end do end select end do @@ -1520,15 +1514,15 @@ contains select case (bc_type(1, 1)%sf(0, k, l)) case (BC_PERIODIC) do j = 1, buff_size - jac(m + j, k, l) = jac(j - 1, k, l) + jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(j - 1, k, l) end do case (BC_REFLECTIVE) do j = 1, buff_size - jac(m + j, k, l) = jac(m - (j - 1), k, l) + jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m - (j - 1), k, l) end do case default do j = 1, buff_size - jac(m + j, k, l) = jac(m, k, l) + jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m, k, l) end do end select end do @@ -1548,15 +1542,15 @@ contains select case (bc_type(2, -1)%sf(k, 0, l)) case (BC_PERIODIC) do j = 1, buff_size - jac(k, -j, l) = jac(k, n - j + 1, l) + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, n - j + 1, l) end do case (BC_REFLECTIVE) do j = 1, buff_size - jac(k, -j, l) = jac(k, j - 1, l) + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, j - 1, l) end do case default do j = 1, buff_size - jac(k, -j, l) = jac(k, 0, l) + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, 0, l) end do end select end do @@ -1574,15 +1568,15 @@ contains select case (bc_type(2, 1)%sf(k, 0, l)) case (BC_PERIODIC) do j = 1, buff_size - jac(k, n + j, l) = jac(k, j - 1, l) + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, j - 1, l) end do case (BC_REFLECTIVE) do j = 1, buff_size - jac(k, n + j, l) = jac(k, n - (j - 1), l) + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n - (j - 1), l) end do case default do j = 1, buff_size - jac(k, n + j, l) = jac(k, n, l) + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n, l) end do end select end do @@ -1601,15 +1595,15 @@ contains select case (bc_type(3, -1)%sf(k, l, 0)) case (BC_PERIODIC) do j = 1, buff_size - jac(k, l, -j) = jac(k, l, p - j + 1) + jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, p - j + 1) end do case (BC_REFLECTIVE) do j = 1, buff_size - jac(k, l, -j) = jac(k, l, j - 1) + jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, j - 1) end do case default do j = 1, buff_size - jac(k, l, -j) = jac(k, l, 0) + jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, 0) end do end select end do @@ -1626,15 +1620,15 @@ contains select case (bc_type(3, 1)%sf(k, l, 0)) case (BC_PERIODIC) do j = 1, buff_size - jac(k, l, p + j) = jac(k, l, j - 1) + jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, j - 1) end do case (BC_REFLECTIVE) do j = 1, buff_size - jac(k, l, p + j) = jac(k, l, p - (j - 1)) + jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p - (j - 1)) end do case default do j = 1, buff_size - jac(k, l, p + j) = jac(k, l, p) + jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p) end do end select end do diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index e069567bfd..23fcc87c13 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -1,101 +1,75 @@ !> !! @file m_derived_types.f90 !! @brief Contains module m_derived_types -! New line at end of file is required for FYPP# 2 "/storage/home/hcoda1/5/tprathi3/OSPO/MFC-prathi.git/openmp/src/common/include/parallel_macros.fpp" 2 -! New line at end of file is required for FYPP# 2 "/storage/home/hcoda1/5/tprathi3/OSPO/MFC-prathi.git/openmp/src/common/include/omp_macros.fpp" 2 -! New line at end of file is required for FYPP# 3 "/storage/home/hcoda1/5/tprathi3/OSPO/MFC-prathi.git/openmp/src/common/include/parallel_macros.fpp" 2 -! New line at end of file is required for FYPP# 2 "/storage/home/hcoda1/5/tprathi3/OSPO/MFC-prathi.git/openmp/src/common/include/acc_macros.fpp" 2 -! New line at end of file is required for FYPP# 4 "/storage/home/hcoda1/5/tprathi3/OSPO/MFC-prathi.git/openmp/src/common/include/parallel_macros.fpp" 2 -! New line at end of file is required for FYPP -! New line at end of file is required for FYPP + +#:include "macros.fpp" + !> @brief This file contains the definitions of all of the custom-defined !! types used in the pre-process code. module m_derived_types + use m_constants !< Constants + use m_precision_select use m_thermochem, only: num_species + implicit none + !> Derived type adding the field position (fp) as an attribute type field_position real(wp), allocatable, dimension(:, :, :) :: fp !< Field position end type field_position -!$omp declare mapper (field_position::x) map ( & -!$omp x%fp & -!$omp ) + !> Derived type annexing a scalar field (SF) type scalar_field real(wp), pointer, dimension(:, :, :) :: sf => null() end type scalar_field -!$omp declare mapper (scalar_field::x) map ( & -!$omp x%sf & -!$omp ) + !> Derived type for bubble variables pb and mv at quadrature nodes (qbmm) type pres_field real(wp), pointer, dimension(:, :, :, :, :) :: sf => null() end type pres_field -!$omp declare mapper (pres_field::x) map ( & -!$omp x%sf & -!$omp ) + !> Derived type annexing an integer scalar field (SF) type integer_field integer, pointer, dimension(:, :, :) :: sf => null() end type integer_field -!$omp declare mapper (integer_field::x) map ( & -!$omp x%sf & -!$omp ) + !> Derived type for levelset type levelset_field real(wp), pointer, dimension(:, :, :, :) :: sf => null() end type levelset_field -!$omp declare mapper (levelset_field::x) map ( & -!$omp x%sf & -!$omp ) + !> Derived type for levelset norm type levelset_norm_field real(wp), pointer, dimension(:, :, :, :, :) :: sf => null() end type levelset_norm_field -!$omp declare mapper (levelset_norm_field::x) map ( & -!$omp x%sf & -!$omp ) + type mpi_io_var integer, allocatable, dimension(:) :: view type(scalar_field), allocatable, dimension(:) :: var end type mpi_io_var -!$omp declare mapper (mpi_io_var::x) map ( & -!$omp x%view & -!$omp , x%var & -!$omp ) + type mpi_io_ib_var integer :: view type(integer_field) :: var end type mpi_io_ib_var -!$omp declare mapper (mpi_io_ib_var::x) map ( & -!$omp x%view & -!$omp , x%var & -!$omp ) + type mpi_io_levelset_var integer :: view type(levelset_field) :: var end type mpi_io_levelset_var -!$omp declare mapper (mpi_io_levelset_var::x) map ( & -!$omp x%view & -!$omp , x%var & -!$omp ) + type mpi_io_levelset_norm_var integer :: view type(levelset_norm_field) :: var end type mpi_io_levelset_norm_var -!$omp declare mapper (mpi_io_levelset_norm_var::x) map ( & -!$omp x%view & -!$omp , x%var & -!$omp ) + !> Derived type annexing a vector field (VF) type vector_field type(scalar_field), allocatable, dimension(:) :: vf !< Vector field end type vector_field -!$omp declare mapper (vector_field::x) map ( & -!$omp x%vf & -!$omp ) + !> Generic 3-component vector (e.g., spatial coordinates or field components) !! Named _dt (derived types: x,y,z) to differentiate from t_vec3 (3-component vector) type vec3_dt ! dt for derived types @@ -103,33 +77,24 @@ module m_derived_types real(wp) :: y real(wp) :: z end type vec3_dt -!$omp declare mapper (vec3_dt::x) map ( & -!$omp x%x & -!$omp , x%y & -!$omp , x%z & -!$omp ) + !> Left and right Riemann states type riemann_states real(wp) :: L real(wp) :: R end type riemann_states -!$omp declare mapper (riemann_states::x) map ( & -!$omp x%L & -!$omp , x%R & -!$omp ) + !> Left and right Riemann states for 3-component vectors type riemann_states_vec3 real(wp) :: L(3) real(wp) :: R(3) end type riemann_states_vec3 -!$omp declare mapper (riemann_states_vec3::x) map ( & -!$omp x%L(3) & -!$omp , x%R(3) & -!$omp ) + !> Integer bounds for variables type int_bounds_info integer :: beg integer :: end + real(wp) :: vb1 real(wp) :: vb2 real(wp) :: vb3 @@ -140,26 +105,9 @@ module m_derived_types real(wp), dimension(3) :: vel_in, vel_out real(wp), dimension(num_fluids_max) :: alpha_rho_in, alpha_in logical :: grcbc_in, grcbc_out, grcbc_vel_out + end type int_bounds_info -!$omp declare mapper (int_bounds_info::x) map ( & -!$omp x%beg & -!$omp , x%end & -!$omp , x%vb1 & -!$omp , x%vb2 & -!$omp , x%vb3 & -!$omp , x%ve1 & -!$omp , x%ve2 & -!$omp , x%ve3 & -!$omp , x%pres_in & -!$omp , x%pres_out & -!$omp , x%vel_in & -!$omp , x%vel_out & -!$omp , x%alpha_rho_in & -!$omp , x%alpha_in & -!$omp , x%grcbc_in & -!$omp , x%grcbc_out & -!$omp , x%grcbc_vel_out & -!$omp ) + type bc_patch_parameters integer :: geometry integer :: type @@ -169,24 +117,13 @@ module m_derived_types real(wp), dimension(3) :: length real(wp) :: radius end type bc_patch_parameters -!$omp declare mapper (bc_patch_parameters::x) map ( & -!$omp x%geometry & -!$omp , x%type & -!$omp , x%dir & -!$omp , x%loc & -!$omp , x%centroid & -!$omp , x%length & -!$omp , x%radius & -!$omp ) + !> Derived type adding beginning (beg) and end bounds info as attributes type bounds_info real(wp) :: beg real(wp) :: end end type bounds_info -!$omp declare mapper (bounds_info::x) map ( & -!$omp x%beg & -!$omp , x%end & -!$omp ) + !> bounds for the bubble dynamic variables type bub_bounds_info integer :: beg @@ -198,108 +135,99 @@ module m_derived_types integer, dimension(:, :), allocatable :: moms !< Moment indices for qbmm integer, dimension(:, :, :), allocatable :: fullmom !< Moment indices for qbmm end type bub_bounds_info -!$omp declare mapper (bub_bounds_info::x) map ( & -!$omp x%beg & -!$omp , x%end & -!$omp , x%rs & -!$omp , x%vs & -!$omp , x%ps & -!$omp , x%ms & -!$omp , x%moms & -!$omp , x%fullmom & -!$omp ) + !> Defines parameters for a Model Patch type ic_model_parameters character(LEN=pathlen_max) :: filepath !< !! Path the STL file relative to case_dir. + real(wp), dimension(1:3) :: translate !< !! Translation of the STL object. + real(wp), dimension(1:3) :: scale !< !! Scale factor for the STL object. + real(wp), dimension(1:3) :: rotate !< !! Angle to rotate the STL object along each cartesian coordinate axis, !! in radians. + integer :: spc !< !! Number of samples per cell to use when discretizing the STL object. + real(wp) :: threshold !< !! Threshold to turn on smoothen STL patch. end type ic_model_parameters -!$omp declare mapper (ic_model_parameters::x) map ( & -!$omp x%filepath & -!$omp , x%translate & -!$omp , x%scale & -!$omp , x%rotate & -!$omp , x%spc & -!$omp , x%threshold & -!$omp ) + type :: t_triangle real(wp), dimension(1:3, 1:3) :: v ! Vertices of the triangle real(wp), dimension(1:3) :: n ! Normal vector end type t_triangle -!$omp declare mapper (t_triangle::x) map ( & -!$omp x%v & -!$omp , x%n & -!$omp ) + type :: t_ray real(wp), dimension(1:3) :: o ! Origin real(wp), dimension(1:3) :: d ! Direction end type t_ray -!$omp declare mapper (t_ray::x) map ( & -!$omp x%o & -!$omp , x%d & -!$omp ) + type :: t_bbox real(wp), dimension(1:3) :: min ! Minimum coordinates real(wp), dimension(1:3) :: max ! Maximum coordinates end type t_bbox -!$omp declare mapper (t_bbox::x) map ( & -!$omp x%min & -!$omp , x%max & -!$omp ) + type :: t_model integer :: ntrs ! Number of triangles type(t_triangle), allocatable :: trs(:) ! Triangles end type t_model -!$omp declare mapper (t_model::x) map ( & -!$omp x%ntrs & -!$omp , x%trs(:) & -!$omp ) + !> Derived type adding initial condition (ic) patch parameters as attributes !! NOTE: The requirements for the specification of the above parameters !! are strongly dependent on both the choice of the multicomponent flow !! model as well as the choice of the patch geometry. type ic_patch_parameters + integer :: geometry !< Type of geometry for the patch + real(wp) :: x_centroid, y_centroid, z_centroid !< !! Location of the geometric center, i.e. the centroid, of the patch. It !! is specified through its x-, y- and z-coordinates, respectively. + real(wp) :: length_x, length_y, length_z !< Dimensions of the patch. x,y,z Lengths. real(wp) :: radius !< Dimensions of the patch. radius. + real(wp), dimension(3) :: radii !< !! Vector indicating the various radii for the elliptical and ellipsoidal !! patch geometries. It is specified through its x-, y-, and z-components !! respectively. + real(wp) :: epsilon, beta !< !! The isentropic vortex parameters for the amplitude of the disturbance and !! domain of influence. + real(wp), dimension(2:9) :: a !< !! The parameters needed for the spherical harmonic patch + logical :: non_axis_sym + real(wp), dimension(3) :: normal !< !! Normal vector indicating the orientation of the patch. It is specified !! through its x-, y- and z-components, respectively. + logical, dimension(0:num_patches_max - 1) :: alter_patch !< + !! List of permissions that indicate to the current patch which preceding !! patches it is allowed to overwrite when it is in process of being laid !! out in the domain + logical :: smoothen !< !! Permission indicating to the current patch whether its boundaries will !! be smoothed out across a few cells or whether they are to remain sharp + integer :: smooth_patch_id !< !! Identity (id) of the patch with which current patch is to get smoothed + real(wp) :: smooth_coeff !< !! Smoothing coefficient (coeff) for the size of the stencil of !! cells across which boundaries of the current patch will be smeared out + real(wp), dimension(num_fluids_max) :: alpha_rho real(wp) :: rho real(wp), dimension(3) :: vel @@ -310,130 +238,88 @@ module m_derived_types real(wp) :: cv !< real(wp) :: qv !< real(wp) :: qvp !< + !! Primitive variables associated with the patch. In order, these include !! the partial densities, density, velocity, pressure, volume fractions, !! specific heat ratio function and the liquid stiffness function. + real(wp) :: Bx, By, Bz !< !! Magnetic field components; B%x is not used for 1D + real(wp), dimension(6) :: tau_e !< !! Elastic stresses added to primitive variables if hypoelasticity = True + real(wp) :: R0 !< Bubble size real(wp) :: V0 !< Bubble velocity + real(wp) :: p0 !< Bubble size real(wp) :: m0 !< Bubble velocity + integer :: hcid !! id for hard coded initial condition + real(wp) :: cf_val !! color function value real(wp) :: Y(1:num_species) + !! STL or OBJ model input parameter character(LEN=pathlen_max) :: model_filepath !< !! Path the STL file relative to case_dir. + real(wp), dimension(1:3) :: model_translate !< !! Translation of the STL object. + real(wp), dimension(1:3) :: model_scale !< !! Scale factor for the STL object. + real(wp), dimension(1:3) :: model_rotate !< !! Angle to rotate the STL object along each cartesian coordinate axis, !! in radians. + integer :: model_spc !< !! Number of samples per cell to use when discretizing the STL object. + real(wp) :: model_threshold !< !! Threshold to turn on smoothen STL patch. + end type ic_patch_parameters -!$omp declare mapper (ic_patch_parameters::x) map ( & -!$omp x%geometry & -!$omp , x%x_centroid & -!$omp , x%y_centroid & -!$omp , x%z_centroid & -!$omp , x%length_x & -!$omp , x%length_y & -!$omp , x%length_z & -!$omp , x%radius & -!$omp , x%radii & -!$omp , x%epsilon & -!$omp , x%beta & -!$omp , x%a & -!$omp , x%non_axis_sym & -!$omp , x%normal & -!$omp , x%alter_patch & -!$omp , x%smoothen & -!$omp , x%smooth_patch_id & -!$omp , x%smooth_coeff & -!$omp , x%alpha_rho & -!$omp , x%rho & -!$omp , x%vel & -!$omp , x%pres & -!$omp , x%alpha & -!$omp , x%gamma & -!$omp , x%pi_inf & -!$omp , x%cv & -!$omp , x%qv & -!$omp , x%qvp & -!$omp , x%Bx & -!$omp , x%By & -!$omp , x%Bz & -!$omp , x%tau_e & -!$omp , x%R0 & -!$omp , x%V0 & -!$omp , x%p0 & -!$omp , x%m0 & -!$omp , x%hcid & -!$omp , x%cf_val & -!$omp , x%Y(1:num_species) & -!$omp , x%model_filepath & -!$omp , x%model_translate & -!$omp , x%model_scale & -!$omp , x%model_rotate & -!$omp , x%model_spc & -!$omp , x%model_threshold & -!$omp ) + type ib_patch_parameters + integer :: geometry !< Type of geometry for the patch + real(wp) :: x_centroid, y_centroid, z_centroid !< !! Location of the geometric center, i.e. the centroid, of the patch. It !! is specified through its x-, y- and z-coordinates, respectively. + real(wp) :: c, p, t, m + real(wp) :: length_x, length_y, length_z !< Dimensions of the patch. x,y,z Lengths. real(wp) :: radius !< Dimensions of the patch. radius. real(wp) :: theta + logical :: slip + !! STL or OBJ model input parameter character(LEN=pathlen_max) :: model_filepath !< !! Path the STL file relative to case_dir. + real(wp), dimension(1:3) :: model_translate !< !! Translation of the STL object. + real(wp), dimension(1:3) :: model_scale !< !! Scale factor for the STL object. + real(wp), dimension(1:3) :: model_rotate !< !! Angle to rotate the STL object along each cartesian coordinate axis, !! in radians. + integer :: model_spc !< !! Number of samples per cell to use when discretizing the STL object. + real(wp) :: model_threshold !< !! Threshold to turn on smoothen STL patch. end type ib_patch_parameters -!$omp declare mapper (ib_patch_parameters::x) map ( & -!$omp x%geometry & -!$omp , x%x_centroid & -!$omp , x%y_centroid & -!$omp , x%z_centroid & -!$omp , x%c & -!$omp , x%p & -!$omp , x%t & -!$omp , x%m & -!$omp , x%length_x & -!$omp , x%length_y & -!$omp , x%length_z & -!$omp , x%radius & -!$omp , x%theta & -!$omp , x%slip & -!$omp , x%model_filepath & -!$omp , x%model_translate & -!$omp , x%model_scale & -!$omp , x%model_rotate & -!$omp , x%model_spc & -!$omp , x%model_threshold & -!$omp ) + !> Derived type annexing the physical parameters (PP) of the fluids. These !! include the specific heat ratio function and liquid stiffness function. type physical_parameters @@ -453,31 +339,12 @@ module m_derived_types real(wp) :: cp_v real(wp) :: G end type physical_parameters -!$omp declare mapper (physical_parameters::x) map ( & -!$omp x%gamma & -!$omp , x%pi_inf & -!$omp , x%Re & -!$omp , x%cv & -!$omp , x%qv & -!$omp , x%qvp & -!$omp , x%mul0 & -!$omp , x%ss & -!$omp , x%pv & -!$omp , x%gamma_v & -!$omp , x%M_v & -!$omp , x%mu_v & -!$omp , x%k_v & -!$omp , x%cp_v & -!$omp , x%G & -!$omp ) + type mpi_io_airfoil_ib_var integer, dimension(2) :: view type(vec3_dt), allocatable, dimension(:) :: var end type mpi_io_airfoil_ib_var -!$omp declare mapper (mpi_io_airfoil_ib_var::x) map ( & -!$omp x%view & -!$omp , x%var & -!$omp ) + !> Derived type annexing integral regions type integral_parameters real(wp) :: xmin !< Min. boundary first coordinate direction @@ -487,14 +354,7 @@ module m_derived_types real(wp) :: zmin !< Min. boundary third coordinate direction real(wp) :: zmax !< Max. boundary third coordinate direction end type integral_parameters -!$omp declare mapper (integral_parameters::x) map ( & -!$omp x%xmin & -!$omp , x%xmax & -!$omp , x%ymin & -!$omp , x%ymax & -!$omp , x%zmin & -!$omp , x%zmax & -!$omp ) + !> Acoustic source parameters type acoustic_parameters integer :: pulse !< Type of pulse @@ -522,32 +382,7 @@ module m_derived_types integer :: element_on !< Element in the acoustic array to turn on integer :: bb_num_freq !< Number of frequencies in the broadband wave end type acoustic_parameters -!$omp declare mapper (acoustic_parameters::x) map ( & -!$omp x%pulse & -!$omp , x%support & -!$omp , x%dipole & -!$omp , x%loc & -!$omp , x%mag & -!$omp , x%length & -!$omp , x%height & -!$omp , x%wavelength & -!$omp , x%frequency & -!$omp , x%gauss_sigma_dist & -!$omp , x%gauss_sigma_time & -!$omp , x%npulse & -!$omp , x%dir & -!$omp , x%delay & -!$omp , x%foc_length & -!$omp , x%aperture & -!$omp , x%element_spacing_angle & -!$omp , x%element_polygon_ratio & -!$omp , x%rotate_angle & -!$omp , x%bb_bandwidth & -!$omp , x%bb_lowest_freq & -!$omp , x%num_elements & -!$omp , x%element_on & -!$omp , x%bb_num_freq & -!$omp ) + !> Acoustic source source_spatial pre-calculated values type source_spatial_type integer, dimension(:, :), allocatable :: coord !< List of grid points indices with non-zero source_spatial values @@ -555,12 +390,7 @@ module m_derived_types real(wp), dimension(:), allocatable :: angle !< List of angles with x-axis for mom source term vector real(wp), dimension(:, :), allocatable :: xyz_to_r_ratios !< List of [xyz]/r for mom source term vector end type source_spatial_type -!$omp declare mapper (source_spatial_type::x) map ( & -!$omp x%coord & -!$omp , x%val & -!$omp , x%angle & -!$omp , x%xyz_to_r_ratios & -!$omp ) + !> Ghost Point for Immersed Boundaries type ghost_point integer, dimension(3) :: loc !< Physical location of the ghost point @@ -571,40 +401,28 @@ module m_derived_types logical :: slip integer, dimension(3) :: DB end type ghost_point -!$omp declare mapper (ghost_point::x) map ( & -!$omp x%loc & -!$omp , x%ip_loc & -!$omp , x%ip_grid & -!$omp , x%interp_coeffs & -!$omp , x%ib_patch_id & -!$omp , x%slip & -!$omp , x%DB & -!$omp ) + !> Species parameters type species_parameters character(LEN=name_len) :: name !< Name of species end type species_parameters -!$omp declare mapper (species_parameters::x) map ( & -!$omp x%name & -!$omp ) + !> Chemistry parameters type chemistry_parameters character(LEN=name_len) :: cantera_file !< Path to Cantera file + logical :: diffusion logical :: reactions + !> Method of determining gamma. !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. integer :: gamma_method end type chemistry_parameters -!$omp declare mapper (chemistry_parameters::x) map ( & -!$omp x%cantera_file & -!$omp , x%diffusion & -!$omp , x%reactions & -!$omp , x%gamma_method & -!$omp ) + !> Lagrangian bubble parameters type bubbles_lagrange_parameters + integer :: solver_approach !< 1: One-way coupling, 2: two-way coupling integer :: cluster_type !< Cluster model to find p_inf logical :: pressure_corrector !< Cell pressure correction term @@ -622,42 +440,13 @@ module m_derived_types real(wp) :: T0, Thost !< Reference temperature and host temperature real(wp) :: x0 !< Reference length real(wp) :: diffcoefvap !< Vapor diffusivity in the gas + end type bubbles_lagrange_parameters -!$omp declare mapper (bubbles_lagrange_parameters::x) map ( & -!$omp x%solver_approach & -!$omp , x%cluster_type & -!$omp , x%pressure_corrector & -!$omp , x%smooth_type & -!$omp , x%heatTransfer_model & -!$omp , x%massTransfer_model & -!$omp , x%write_bubbles & -!$omp , x%write_bubbles_stats & -!$omp , x%nBubs_glb & -!$omp , x%epsilonb & -!$omp , x%charwidth & -!$omp , x%valmaxvoid & -!$omp , x%c0 & -!$omp , x%rho0 & -!$omp , x%T0 & -!$omp , x%Thost & -!$omp , x%x0 & -!$omp , x%diffcoefvap & -!$omp ) + !> Max and min number of cells in a direction of each combination of x-,y-, and z- type cell_num_bounds integer :: mn_max, np_max, mp_max, mnp_max integer :: mn_min, np_min, mp_min, mnp_min end type cell_num_bounds -!$omp declare mapper (cell_num_bounds::x) map ( & -!$omp x%mn_max & -!$omp , x%np_max & -!$omp , x%mp_max & -!$omp , x%mnp_max & -!$omp , x%mn_min & -!$omp , x%np_min & -!$omp , x%mp_min & -!$omp , x%mnp_min & -!$omp ) -end module m_derived_types - -! Code was translated using: /media/shared/Documents/GitHub/OSPO/intel-application-migration-tool-for-openacc-to-openmp/simulation/src/intel-application-migration-tool-for-openacc-to-openmp -keep-binding-clauses=all simulation/p_main.fpp.f90 + +end module m_derived_types \ No newline at end of file diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp index 91eebe1992..e2dc0e8b14 100644 --- a/src/common/m_helper_basic.fpp +++ b/src/common/m_helper_basic.fpp @@ -161,7 +161,7 @@ contains !! @param m Number of cells in x-axis !! @param n Number of cells in y-axis !! @param p Number of cells in z-axis - pure elemental subroutine s_update_cell_bounds(bounds, m, n, p) + elemental subroutine s_update_cell_bounds(bounds, m, n, p) type(cell_num_bounds), intent(out) :: bounds integer, intent(in) :: m, n, p diff --git a/src/post_process/m_derived_variables.fpp b/src/post_process/m_derived_variables.fpp index d3f27f6e81..ce5e4bc07b 100644 --- a/src/post_process/m_derived_variables.fpp +++ b/src/post_process/m_derived_variables.fpp @@ -117,7 +117,7 @@ contains !! ratio. The latter is stored in the derived flow quantity !! storage variable, q_sf. !! @param q_sf Specific heat ratio - pure subroutine s_derive_specific_heat_ratio(q_sf) + subroutine s_derive_specific_heat_ratio(q_sf) real(wp), & dimension(-offset_x%beg:m + offset_x%end, & @@ -144,7 +144,7 @@ contains !! values of the liquid stiffness, which are stored in the !! derived flow quantity storage variable, q_sf. !! @param q_sf Liquid stiffness - pure subroutine s_derive_liquid_stiffness(q_sf) + subroutine s_derive_liquid_stiffness(q_sf) real(wp), & dimension(-offset_x%beg:m + offset_x%end, & @@ -173,7 +173,7 @@ contains !! derived flow quantity storage variable, q_sf. !! @param q_prim_vf Primitive variables !! @param q_sf Speed of sound - pure subroutine s_derive_sound_speed(q_prim_vf, q_sf) + subroutine s_derive_sound_speed(q_prim_vf, q_sf) type(scalar_field), & dimension(sys_size), & @@ -230,7 +230,7 @@ contains !! @param i Component indicator !! @param q_prim_vf Primitive variables !! @param q_sf Flux limiter - pure subroutine s_derive_flux_limiter(i, q_prim_vf, q_sf) + subroutine s_derive_flux_limiter(i, q_prim_vf, q_sf) integer, intent(in) :: i @@ -324,7 +324,7 @@ contains !! @param b right-hane-side !! @param sol Solution !! @param ndim Problem size - pure subroutine s_solve_linear_system(A, b, sol, ndim) + subroutine s_solve_linear_system(A, b, sol, ndim) integer, intent(in) :: ndim real(wp), dimension(ndim, ndim), intent(inout) :: A @@ -374,7 +374,7 @@ contains !! @param i Vorticity component indicator !! @param q_prim_vf Primitive variables !! @param q_sf Vorticity component - pure subroutine s_derive_vorticity_component(i, q_prim_vf, q_sf) + subroutine s_derive_vorticity_component(i, q_prim_vf, q_sf) integer, intent(in) :: i @@ -476,7 +476,7 @@ contains !! quantity storage variable, q_sf. !! @param q_prim_vf Primitive variables !! @param q_sf Q_M - pure subroutine s_derive_qm(q_prim_vf, q_sf) + subroutine s_derive_qm(q_prim_vf, q_sf) type(scalar_field), & dimension(sys_size), & intent(in) :: q_prim_vf diff --git a/src/pre_process/m_assign_variables.fpp b/src/pre_process/m_assign_variables.fpp index f1bfe06fa3..a4407266b3 100644 --- a/src/pre_process/m_assign_variables.fpp +++ b/src/pre_process/m_assign_variables.fpp @@ -102,7 +102,7 @@ contains !! @param eta pseudo volume fraction !! @param q_prim_vf Primitive variables !! @param patch_id_fp Array to track patch ids - pure subroutine s_assign_patch_mixture_primitive_variables(patch_id, j, k, l, & + subroutine s_assign_patch_mixture_primitive_variables(patch_id, j, k, l, & eta, q_prim_vf, patch_id_fp) $:GPU_ROUTINE(parallelism='[seq]') @@ -191,7 +191,7 @@ contains !! @param k the y-dir node index !! @param l the z-dir node index !! @param q_prim_vf Primitive variables - pure subroutine s_perturb_primitive(j, k, l, q_prim_vf) + subroutine s_perturb_primitive(j, k, l, q_prim_vf) integer, intent(in) :: j, k, l type(scalar_field), dimension(1:sys_size), intent(inout) :: q_prim_vf diff --git a/src/pre_process/m_compute_levelset.fpp b/src/pre_process/m_compute_levelset.fpp index 17f66f8d68..673a2aff5b 100644 --- a/src/pre_process/m_compute_levelset.fpp +++ b/src/pre_process/m_compute_levelset.fpp @@ -27,7 +27,7 @@ module m_compute_levelset contains - pure subroutine s_circle_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_circle_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm @@ -63,7 +63,7 @@ contains end subroutine s_circle_levelset - pure subroutine s_airfoil_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_airfoil_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm @@ -146,7 +146,7 @@ contains end subroutine s_airfoil_levelset - pure subroutine s_3D_airfoil_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_3D_airfoil_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm @@ -250,7 +250,7 @@ contains end subroutine s_3D_airfoil_levelset !> Initialize IBM module - pure subroutine s_rectangle_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_rectangle_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm @@ -347,7 +347,7 @@ contains end subroutine s_rectangle_levelset - pure subroutine s_cuboid_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_cuboid_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm @@ -464,7 +464,7 @@ contains end subroutine s_cuboid_levelset - pure subroutine s_sphere_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_sphere_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm @@ -501,7 +501,7 @@ contains end subroutine s_sphere_levelset - pure subroutine s_cylinder_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_cylinder_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm diff --git a/src/pre_process/m_model.fpp b/src/pre_process/m_model.fpp index 734e83f4af..03225a4b76 100644 --- a/src/pre_process/m_model.fpp +++ b/src/pre_process/m_model.fpp @@ -424,7 +424,7 @@ contains end subroutine s_model_write !> This procedure frees the memory allocated for an STL mesh. - pure subroutine s_model_free(model) + subroutine s_model_free(model) type(t_model), intent(inout) :: model @@ -532,7 +532,7 @@ contains !! @param ray Ray. !! @param triangle Triangle. !! @return True if the ray intersects the triangle, false otherwise. - pure elemental function f_intersects_triangle(ray, triangle) result(intersects) + elemental function f_intersects_triangle(ray, triangle) result(intersects) type(t_ray), intent(in) :: ray type(t_triangle), intent(in) :: triangle @@ -592,7 +592,7 @@ contains !! @param boundary_v Output boundary vertices/normals. !! @param boundary_vertex_count Output total boundary vertex count !! @param boundary_edge_count Output total boundary edge counts - pure subroutine f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) + subroutine f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) type(t_model), intent(in) :: model real(wp), allocatable, intent(out), dimension(:, :, :) :: boundary_v !< Output boundary vertices/normals integer, intent(out) :: boundary_vertex_count, boundary_edge_count !< Output boundary vertex/edge count @@ -705,7 +705,7 @@ contains !! @param edge Edges end points to be registered !! @param edge_index Edge index iterator !! @param edge_count Total number of edges - pure subroutine f_register_edge(temp_boundary_v, edge, edge_index, edge_count) + subroutine f_register_edge(temp_boundary_v, edge, edge_index, edge_count) integer, intent(inout) :: edge_index !< Edge index iterator integer, intent(inout) :: edge_count !< Total number of edges real(wp), intent(in), dimension(1:2, 1:2) :: edge !< Edges end points to be registered @@ -723,7 +723,7 @@ contains !! @param boundary_edge_count Output total number of boundary edges !! @param spacing Dimensions of the current levelset cell !! @param interpolate Logical output - pure subroutine f_check_interpolation_2D(boundary_v, boundary_edge_count, spacing, interpolate) + subroutine f_check_interpolation_2D(boundary_v, boundary_edge_count, spacing, interpolate) logical, intent(inout) :: interpolate !< Logical indicator of interpolation integer, intent(in) :: boundary_edge_count !< Number of boundary edges real(wp), intent(in), dimension(1:boundary_edge_count, 1:3, 1:2) :: boundary_v @@ -753,7 +753,7 @@ contains !! @param model Model to search in. !! @param spacing Dimensions of the current levelset cell !! @param interpolate Logical output - pure subroutine f_check_interpolation_3D(model, spacing, interpolate) + subroutine f_check_interpolation_3D(model, spacing, interpolate) logical, intent(inout) :: interpolate type(t_model), intent(in) :: model real(wp), dimension(1:3), intent(in) :: spacing @@ -799,7 +799,7 @@ contains !! @param spacing Dimensions of the current levelset cell !! @param interpolated_boundary_v Output all the boundary vertices of the interpolated 2D model !! @param total_vertices Total number of vertices after interpolation - pure subroutine f_interpolate_2D(boundary_v, boundary_edge_count, spacing, interpolated_boundary_v, total_vertices) + subroutine f_interpolate_2D(boundary_v, boundary_edge_count, spacing, interpolated_boundary_v, total_vertices) real(wp), intent(in), dimension(:, :, :) :: boundary_v real(wp), dimension(1:3), intent(in) :: spacing real(wp), allocatable, intent(inout), dimension(:, :) :: interpolated_boundary_v @@ -1042,7 +1042,7 @@ contains !! @param point The cell centers of the current level cell !! @param normals The output levelset normals !! @param distance The output levelset distance - pure subroutine f_distance_normals_3D(model, point, normals, distance) + subroutine f_distance_normals_3D(model, point, normals, distance) type(t_model), intent(IN) :: model real(wp), dimension(1:3), intent(in) :: point real(wp), dimension(1:3), intent(out) :: normals @@ -1104,7 +1104,7 @@ contains !! @param point The cell centers of the current levelset cell !! @param spacing Dimensions of the current levelset cell !! @return Distance which the levelset distance without interpolation - pure function f_distance(boundary_v, boundary_edge_count, point) result(distance) + function f_distance(boundary_v, boundary_edge_count, point) result(distance) integer, intent(in) :: boundary_edge_count real(wp), intent(in), dimension(1:boundary_edge_count, 1:3, 1:2) :: boundary_v real(wp), dimension(1:3), intent(in) :: point @@ -1134,7 +1134,7 @@ contains !! @param boundary_edge_count Output the total number of boundary edges !! @param point The cell centers of the current levelset cell !! @param normals Output levelset normals without interpolation - pure subroutine f_normals(boundary_v, boundary_edge_count, point, normals) + subroutine f_normals(boundary_v, boundary_edge_count, point, normals) integer, intent(in) :: boundary_edge_count real(wp), intent(in), dimension(1:boundary_edge_count, 1:3, 1:2) :: boundary_v real(wp), dimension(1:3), intent(in) :: point @@ -1169,7 +1169,7 @@ contains end subroutine f_normals !> This procedure calculates the barycentric facet area - pure subroutine f_tri_area(tri, tri_area) + subroutine f_tri_area(tri, tri_area) real(wp), dimension(1:3, 1:3), intent(in) :: tri real(wp), intent(out) :: tri_area real(wp), dimension(1:3) :: AB, AC, cross @@ -1192,7 +1192,7 @@ contains !! @param total_vertices Total number of vertices after interpolation !! @param point The cell centers of the current levelset cell !! @return Distance which the levelset distance without interpolation - pure function f_interpolated_distance(interpolated_boundary_v, total_vertices, point) result(distance) + function f_interpolated_distance(interpolated_boundary_v, total_vertices, point) result(distance) integer, intent(in) :: total_vertices real(wp), intent(in), dimension(1:total_vertices, 1:3) :: interpolated_boundary_v real(wp), dimension(1:3), intent(in) :: point diff --git a/src/pre_process/m_patches.fpp b/src/pre_process/m_patches.fpp index db09af9d1f..f1427a255f 100644 --- a/src/pre_process/m_patches.fpp +++ b/src/pre_process/m_patches.fpp @@ -2266,7 +2266,7 @@ contains end subroutine s_convert_cylindrical_to_cartesian_coord - pure function f_convert_cyl_to_cart(cyl) result(cart) + function f_convert_cyl_to_cart(cyl) result(cart) $:GPU_ROUTINE(parallelism='[seq]') @@ -2292,7 +2292,7 @@ contains !! @param myth Angle !! @param offset Thickness !! @param a Starting position - pure elemental function f_r(myth, offset, a) + elemental function f_r(myth, offset, a) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: myth, offset, a real(wp) :: b diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 39f671ba7f..e75b6629e0 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -183,7 +183,7 @@ contains ! Keep outer loop sequel because different sources can have very different number of points do ai = 1, num_source ! Skip if the pulse has not started yet for sine and square waves - if (not (sim_time < delay(ai) .and. (pulse(ai) == 1 .or. pulse(ai) == 3))) then + if (.not. (sim_time < delay(ai) .and. (pulse(ai) == 1 .or. pulse(ai) == 3))) then ! Decide if frequency need to be converted from wavelength freq_conv_flag = f_is_default(frequency(ai)) diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 020833d6c2..e99f073205 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -773,7 +773,7 @@ contains end if ! FD2 or FD4 of RHS at j = 0 - ! #:call GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]') + #:call GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]') do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1104,7 +1104,7 @@ contains end do end do - ! #:endcall GPU_PARALLEL_LOOP + #:endcall GPU_PARALLEL_LOOP end if #:endfor From cb025117812e2baab8207955a2e72b63a219a452 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Fri, 25 Jul 2025 16:29:59 -0400 Subject: [PATCH 25/60] Forgot to add something for IGR and add back parallel loop for data output --- src/simulation/m_data_output.fpp | 4 ++-- src/simulation/m_igr.fpp | 12 ++++++++++-- src/simulation/m_start_up.fpp | 5 ++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index f39bdfc940..9f80df014e 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -279,7 +279,7 @@ contains integer :: j, k, l ! Computing Stability Criteria at Current Time-step - ! #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') do l = 0, p do k = 0, n do j = 0, m @@ -296,7 +296,7 @@ contains end do end do end do - ! #:endcall GPU_PARALLEL_LOOP + #:endcall GPU_PARALLEL_LOOP ! end: Computing Stability Criteria at Current Time-step diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index 59684c9e05..0bf904b220 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -23,10 +23,14 @@ module m_igr s_igr_sigma_x, & s_igr_flux_add, & s_finalize_igr_module + real(wp), allocatable, target, dimension(:, :, :) :: jac - real(wp), allocatable, dimension(:, :, :) :: jac, jac_rhs, jac_old + real(wp), allocatable, dimension(:, :, :) :: jac_rhs, jac_old $:GPU_DECLARE(create='[jac, jac_rhs, jac_old]') + type(scalar_field), dimension(1) :: jac_sf + $:GPU_DECLARE(create='[jac_sf]') + real(wp), allocatable, dimension(:, :) :: Res $:GPU_DECLARE(create='[Res]') @@ -161,6 +165,10 @@ contains end if #:endif + jac_sf(1)%sf => jac + $:GPU_ENTER_DATA(copyin='[jac_sf(1)%sf]') + $:GPU_ENTER_DATA(attach='[jac_sf(1)%sf]') + end subroutine s_initialize_igr_module subroutine s_igr_iterative_solve(q_cons_vf, bc_type, t_step) @@ -247,7 +255,7 @@ contains end do #:endcall GPU_PARALLEL_LOOP - call s_populate_F_igr_buffers(bc_type, jac) + call s_populate_F_igr_buffers(bc_type, jac_sf) if (igr_iter_solver == 1) then ! Jacobi iteration #:call GPU_PARALLEL_LOOP(collapse=3) diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index b425a3a5cd..8f950c6b50 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1430,7 +1430,7 @@ contains $:GPU_UPDATE(device='[sigma, surface_tension]') $:GPU_UPDATE(device='[dx,dy,dz,x_cb,x_cc,y_cb,y_cc,z_cb,z_cc]') - +! #if defined(MFC_OpenACC) $:GPU_UPDATE(device='[bc_x%vb1,bc_x%vb2,bc_x%vb3,bc_x%ve1,bc_x%ve2,bc_x%ve3]') $:GPU_UPDATE(device='[bc_y%vb1,bc_y%vb2,bc_y%vb3,bc_y%ve1,bc_y%ve2,bc_y%ve3]') $:GPU_UPDATE(device='[bc_z%vb1,bc_z%vb2,bc_z%vb3,bc_z%ve1,bc_z%ve2,bc_z%ve3]') @@ -1438,6 +1438,9 @@ contains $:GPU_UPDATE(device='[bc_x%grcbc_in,bc_x%grcbc_out,bc_x%grcbc_vel_out]') $:GPU_UPDATE(device='[bc_y%grcbc_in,bc_y%grcbc_out,bc_y%grcbc_vel_out]') $:GPU_UPDATE(device='[bc_z%grcbc_in,bc_z%grcbc_out,bc_z%grcbc_vel_out]') +! #elif defined(MFC_OpenMP) +! $:GPU_UPDATE(device='[bc_x,bc_y,bc_z]') +! #endif $:GPU_UPDATE(device='[relax, relax_model]') if (relax) then From 69b579247eac95799a3e25b222e0976107d047d6 Mon Sep 17 00:00:00 2001 From: Anand Radhakrishnan Date: Mon, 28 Jul 2025 10:01:01 -0400 Subject: [PATCH 26/60] change binding + test suite works --- src/common/include/omp_macros.fpp | 4 ++-- src/simulation/m_ibm.fpp | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 00735eae2c..3ae7927943 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -160,7 +160,7 @@ & no_create_val.strip('\n') + present_val.strip('\n') + & & deviceptr_val.strip('\n') + attach_val.strip('\n') #! Hardcoding the parallelism for now - #:set omp_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams)' + & + #:set omp_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel)' + & & clause_val + extraOmpArgs_val.strip('\n') #:set omp_end_directive = '!$omp end target teams loop' $:omp_directive @@ -295,4 +295,4 @@ #:set omp_directive = '!$omp barrier ' + clause_val + extraOmpArgs_val.strip('\n') $:omp_directive #:enddef -! New line at end of file is required for FYPP \ No newline at end of file +! New line at end of file is required for FYPP diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 668a15d41e..d7972188d7 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -175,9 +175,9 @@ contains gp = ghost_points(i) j = gp%loc(1) k = gp%loc(2) - l = gp%loc(3) - patch_id = ghost_points(i)%ib_patch_id - + l = gp%loc(3) + patch_id = ghost_points(i)%ib_patch_id + ! Calculate physical location of GP if (p > 0) then physical_loc = [x_cc(j), y_cc(k), z_cc(l)] @@ -202,7 +202,6 @@ contains call s_interpolate_image_point(q_prim_vf, gp, & alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP) end if - dyn_pres = 0._wp ! Set q_prim_vf params at GP so that mixture vars calculated properly @@ -214,8 +213,7 @@ contains if (surface_tension) then q_prim_vf(c_idx)%sf(j, k, l) = c_IP - end if - + end if if (model_eqns /= 4) then ! If in simulation, use acc mixture subroutines if (elasticity) then @@ -259,7 +257,7 @@ contains ! Set color function if (surface_tension) then q_cons_vf(c_idx)%sf(j, k, l) = c_IP - end if + end if ! Set Energy if (bubbles_euler) then @@ -267,10 +265,10 @@ contains else q_cons_vf(E_idx)%sf(j, k, l) = gamma*pres_IP + pi_inf + dyn_pres end if - ! Set bubble vars if (bubbles_euler .and. .not. qbmm) then call s_comp_n_from_prim(alpha_IP(1), r_IP, nbub, weight) + $:GPU_LOOP(parallelism='[seq]') do q = 1, nb q_cons_vf(bubxb + (q - 1)*2)%sf(j, k, l) = nbub*r_IP(q) q_cons_vf(bubxb + (q - 1)*2 + 1)%sf(j, k, l) = nbub*v_IP(q) @@ -286,15 +284,20 @@ contains if (qbmm) then nbub = nmom_IP(1) + $:GPU_LOOP(parallelism='[seq]') do q = 1, nb*nmom q_cons_vf(bubxb + q - 1)%sf(j, k, l) = nbub*nmom_IP(q) end do + + $:GPU_LOOP(parallelism='[seq]') do q = 1, nb q_cons_vf(bubxb + (q - 1)*nmom)%sf(j, k, l) = nbub end do if (.not. polytropic) then + $:GPU_LOOP(parallelism='[seq]') do q = 1, nb + $:GPU_LOOP(parallelism='[seq]') do r = 1, nnode pb_in(j, k, l, r, q) = presb_IP((q - 1)*nnode + r) mv_in(j, k, l, r, q) = massv_IP((q - 1)*nnode + r) From a01f2627580ef4d4b31c758f9016101ab3819189 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Mon, 28 Jul 2025 15:11:06 -0400 Subject: [PATCH 27/60] Added missing space --- src/common/include/omp_macros.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 3ae7927943..bf71a73914 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -160,7 +160,7 @@ & no_create_val.strip('\n') + present_val.strip('\n') + & & deviceptr_val.strip('\n') + attach_val.strip('\n') #! Hardcoding the parallelism for now - #:set omp_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel)' + & + #:set omp_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' + & & clause_val + extraOmpArgs_val.strip('\n') #:set omp_end_directive = '!$omp end target teams loop' $:omp_directive From d7fbcab573c8d80f83b0d4851918f6ec3fcd13ee Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Mon, 28 Jul 2025 22:19:43 -0400 Subject: [PATCH 28/60] Chemistry works with OpenACC and almost works with OpenMP --- src/common/m_chemistry.fpp | 2 +- src/common/m_variables_conversion.fpp | 12 +++---- src/simulation/include/inline_riemann.fpp | 8 ++--- src/simulation/m_cbc.fpp | 14 ++++---- src/simulation/m_compute_cbc.fpp | 4 +++ src/simulation/m_riemann_solvers.fpp | 40 +++++++++++------------ src/simulation/m_start_up.fpp | 6 ++-- toolchain/mfc/run/input.py | 8 ++++- toolchain/pyproject.toml | 3 +- 9 files changed, 54 insertions(+), 43 deletions(-) diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 97dbcf2632..e9e5bc5ee8 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -112,7 +112,7 @@ contains rho = q_cons_qp(contxe)%sf(x, y, z) T = q_T_sf%sf(x, y, z) - ! call get_net_production_rates(rho, T, Ys, omega) + call get_net_production_rates(rho, T, Ys, omega) $:GPU_LOOP(parallelism='[seq]') do eqn = chemxb, chemxe diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index e68097df7b..0bffeee879 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -181,8 +181,8 @@ contains T_guess = T - ! call get_temperature(e_Per_Kg - Pdyn_Per_Kg, T_guess, Y_rs, .true., T) - ! call get_pressure(rho, T, Y_rs, pres) + call get_temperature(e_Per_Kg - Pdyn_Per_Kg, T_guess, Y_rs, .true., T) + call get_pressure(rho, T, Y_rs, pres) #:endif @@ -1306,9 +1306,9 @@ contains q_cons_vf(i)%sf(j, k, l) = rho*q_prim_vf(i)%sf(j, k, l) end do - ! call get_mixture_molecular_weight(Ys, mix_mol_weight) + call get_mixture_molecular_weight(Ys, mix_mol_weight) T = q_prim_vf(E_idx)%sf(j, k, l)*mix_mol_weight/(gas_constant*rho) - ! call get_mixture_energy_mass(T, Ys, e_mix) + call get_mixture_energy_mass(T, Ys, e_mix) q_cons_vf(E_idx)%sf(j, k, l) = & dyn_pres + rho*e_mix @@ -1534,10 +1534,10 @@ contains Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) end do !Computing the energy from the internal energy of the mixture - ! call get_mixture_molecular_weight(Y_k, mix_mol_weight) + call get_mixture_molecular_weight(Y_k, mix_mol_weight) R_gas = gas_constant/mix_mol_weight T_K = pres_K/rho_K/R_gas - ! call get_mixture_energy_mass(T_K, Y_K, E_K) + call get_mixture_energy_mass(T_K, Y_K, E_K) E_K = rho_K*E_K + 5.e-1_wp*rho_K*vel_K_sum else ! Computing the energy from the pressure diff --git a/src/simulation/include/inline_riemann.fpp b/src/simulation/include/inline_riemann.fpp index 69b79abba5..9972799b02 100644 --- a/src/simulation/include/inline_riemann.fpp +++ b/src/simulation/include/inline_riemann.fpp @@ -34,13 +34,13 @@ if (chemistry) then eps = 0.001_wp - ! call get_species_enthalpies_rt(T_L, h_iL) - ! call get_species_enthalpies_rt(T_R, h_iR) + call get_species_enthalpies_rt(T_L, h_iL) + call get_species_enthalpies_rt(T_R, h_iR) h_iL = h_iL*gas_constant/molecular_weights*T_L h_iR = h_iR*gas_constant/molecular_weights*T_R - ! call get_species_specific_heats_r(T_L, Cp_iL) - ! call get_species_specific_heats_r(T_R, Cp_iR) + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) h_avg_2 = (sqrt(rho_L)*h_iL + sqrt(rho_R)*h_iR)/(sqrt(rho_L) + sqrt(rho_R)) Yi_avg = (sqrt(rho_L)*Ys_L + sqrt(rho_R)*Ys_R)/(sqrt(rho_L) + sqrt(rho_R)) diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index e99f073205..14d81fdf31 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -818,21 +818,21 @@ contains Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i) end do - ! call get_mixture_molecular_weight(Ys, Mw) + call get_mixture_molecular_weight(Ys, Mw) R_gas = gas_constant/Mw T = pres/rho/R_gas - ! call get_mixture_specific_heat_cp_mass(T, Ys, Cp) - ! call get_mixture_energy_mass(T, Ys, e_mix) + call get_mixture_specific_heat_cp_mass(T, Ys, Cp) + call get_mixture_energy_mass(T, Ys, e_mix) E = rho*e_mix + 5.e-1_wp*rho*vel_K_sum if (chem_params%gamma_method == 1) then !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - ! call get_mole_fractions(Mw, Ys, Xs) - ! call get_species_specific_heats_r(T, Cp_i) + call get_mole_fractions(Mw, Ys, Xs) + call get_species_specific_heats_r(T, Cp_i) Gamma_i = Cp_i/(Cp_i - 1.0_wp) gamma = sum(Xs(:)/(Gamma_i(:) - 1.0_wp)) else if (chem_params%gamma_method == 2) then !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - ! call get_mixture_specific_heat_cv_mass(T, Ys, Cv) + call get_mixture_specific_heat_cv_mass(T, Ys, Cv) gamma = 1.0_wp/(Cp/Cv - 1.0_wp) end if else @@ -1045,7 +1045,7 @@ contains if (chemistry) then ! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990 - ! call get_species_enthalpies_rt(T, h_k) + call get_species_enthalpies_rt(T, h_k) sum_Enthalpies = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index a6e19c0ed4..5f361ee61b 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -37,6 +37,7 @@ contains real(wp), intent(in) :: dpres_ds integer :: i + $:GPU_LOOP(parallelism='[seq]') do i = 2, momxb L(i) = lambda_factor*lambda2*(c*c*dalpha_rho_ds(i - 1) - mf(i - 1)*dpres_ds) end do @@ -50,6 +51,7 @@ contains real(wp), dimension(num_dims), intent(in) :: dvel_ds integer :: i + $:GPU_LOOP(parallelism='[seq]') do i = momxb + 1, momxe L(i) = lambda_factor*lambda2*dvel_ds(dir_idx(i - contxe)) end do @@ -63,6 +65,7 @@ contains real(wp), dimension(num_fluids), intent(in) :: dadv_ds integer :: i + $:GPU_LOOP(parallelism='[seq]') do i = E_idx, advxe - 1 L(i) = lambda_factor*lambda2*dadv_ds(i - momxe) end do @@ -78,6 +81,7 @@ contains if (.not. chemistry) return + $:GPU_LOOP(parallelism='[seq]') do i = chemxb, chemxe L(i) = lambda_factor*lambda2*dYs_ds(i - chemxb + 1) end do diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 039020248b..03b9f683d2 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -477,8 +477,8 @@ contains Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) end do - ! call get_mixture_molecular_weight(Ys_L, MW_L) - ! call get_mixture_molecular_weight(Ys_R, MW_R) + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) @@ -488,8 +488,8 @@ contains T_L = pres_L/rho_L/R_gas_L T_R = pres_R/rho_R/R_gas_R - ! call get_species_specific_heats_r(T_L, Cp_iL) - ! call get_species_specific_heats_r(T_R, Cp_iR) + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) if (chem_params%gamma_method == 1) then ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. @@ -500,10 +500,10 @@ contains gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) else if (chem_params%gamma_method == 2) then ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - ! call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - ! call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - ! call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - ! call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) Gamm_L = Cp_L/Cv_L gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) @@ -511,8 +511,8 @@ contains gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) end if - ! call get_mixture_energy_mass(T_L, Ys_L, E_L) - ! call get_mixture_energy_mass(T_R, Ys_R, E_R) + call get_mixture_energy_mass(T_L, Ys_L, E_L) + call get_mixture_energy_mass(T_R, Ys_R, E_R) E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms @@ -2446,8 +2446,8 @@ contains Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) end do - ! call get_mixture_molecular_weight(Ys_L, MW_L) - ! call get_mixture_molecular_weight(Ys_R, MW_R) + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) @@ -2458,8 +2458,8 @@ contains T_L = pres_L/rho_L/R_gas_L T_R = pres_R/rho_R/R_gas_R - ! call get_species_specific_heats_r(T_L, Cp_iL) - ! call get_species_specific_heats_r(T_R, Cp_iR) + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) if (chem_params%gamma_method == 1) then !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. @@ -2470,10 +2470,10 @@ contains gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) else if (chem_params%gamma_method == 2) then !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - ! call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - ! call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - ! call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - ! call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) Gamm_L = Cp_L/Cv_L gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) @@ -2481,8 +2481,8 @@ contains gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) end if - ! call get_mixture_energy_mass(T_L, Ys_L, E_L) - ! call get_mixture_energy_mass(T_R, Ys_R, E_R) + call get_mixture_energy_mass(T_L, Ys_L, E_L) + call get_mixture_energy_mass(T_R, Ys_R, E_R) E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 8f950c6b50..f6a955ad38 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1383,10 +1383,10 @@ contains "case-optimized", & #:endif m, n, p, num_procs, & -#ifdef MFC_OpenACC -!&< +#if defined(MFC_OpenACC) "with OpenACC offloading" -!&> +#elif defined(MFC_OpenMP) + "with OpenMP offloading" #else "on CPUs" #endif diff --git a/toolchain/mfc/run/input.py b/toolchain/mfc/run/input.py index 1b4ea004d9..a220ca635d 100644 --- a/toolchain/mfc/run/input.py +++ b/toolchain/mfc/run/input.py @@ -73,13 +73,19 @@ def generate_fpp(self, target) -> None: # Determine the real type based on the single precision flag real_type = 'real(sp)' if ARG('single') else 'real(dp)' + gpu_type = None + if (ARG("gpu") == "mp"): + directive_str = 'mp' + elif (ARG("gpu") == "acc"): + directive_str = 'acc' + # Write the generated Fortran code to the m_thermochem.f90 file with the chosen precision common.file_write( os.path.join(modules_dir, "m_thermochem.f90"), pyro.FortranCodeGenerator().generate( "m_thermochem", self.get_cantera_solution(), - pyro.CodeGenerationOptions(scalar_type = real_type) + pyro.CodeGenerationOptions(scalar_type = real_type, directive_offload = directive_str) ), True ) diff --git a/toolchain/pyproject.toml b/toolchain/pyproject.toml index cb1bc8fa07..927b8277bd 100644 --- a/toolchain/pyproject.toml +++ b/toolchain/pyproject.toml @@ -39,7 +39,8 @@ dependencies = [ # Chemistry "cantera==3.1.0", - "pyrometheus == 1.0.3", + #"pyrometheus == 1.0.4", + "pyrometheus @ git+https://github.com/wilfonba/pyrometheus-wilfong.git@OpenMPTest", # Frontier Profiling "astunparse==1.6.2", From 0d550aaff6082f3c82a840e15933ac0b5ab91c3f Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 29 Jul 2025 11:44:35 -0400 Subject: [PATCH 29/60] Readd LTO to cmake --- CMakeLists.txt | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index af2941b7ae..304fce3725 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,7 +194,7 @@ elseif ((CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") OR (CMAKE_Fortran_COMPILER_ add_compile_options( $<$:-Mfreeform> $<$:-cpp> - $<$:-Minfo=inline> + $<$:-Minfo=inline> $<$:-Minfo=accel> ) @@ -234,14 +234,14 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release") elseif(CMAKE_Fortran_COMPILER_VERSION VERSION_LESS "23.11") message(STATUS "LTO/IPO is not supported in NVHPC Version < 23.11. Use a newer version of NVHPC for best performance.") else() - # message(STATUS "Performing IPO using -Mextract followed by -Minline") - # set(NVHPC_USE_TWO_PASS_IPO TRUE) + message(STATUS "Performing IPO using -Mextract followed by -Minline") + set(NVHPC_USE_TWO_PASS_IPO TRUE) endif() else() CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR) if (SUPPORTS_IPO) message(STATUS "Enabled IPO / LTO") - # set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) else() message(STATUS "IPO / LTO is NOT available") endif() @@ -401,14 +401,14 @@ function(MFC_SETUP_TARGET) # Here we need to split into "library" and "executable" to perform IPO on the NVIDIA compiler. # A little hacky, but it *is* an edge-case for *one* compiler. if (NVHPC_USE_TWO_PASS_IPO) - # add_library(${ARGS_TARGET}_lib OBJECT ${ARGS_SOURCES}) - # target_compile_options(${ARGS_TARGET}_lib PRIVATE - # $<$:-Mextract=lib:${ARGS_TARGET}_lib> - # $<$:-Minline> - # ) - # add_dependencies(${ARGS_TARGET} ${ARGS_TARGET}_lib) - # target_compile_options(${ARGS_TARGET} PRIVATE -Minline=lib:${ARGS_TARGET}_lib) - # list(PREPEND IPO_TARGETS ${ARGS_TARGET}_lib) + add_library(${ARGS_TARGET}_lib OBJECT ${ARGS_SOURCES}) + target_compile_options(${ARGS_TARGET}_lib PRIVATE + $<$:-Mextract=lib:${ARGS_TARGET}_lib> + $<$:-Minline> + ) + add_dependencies(${ARGS_TARGET} ${ARGS_TARGET}_lib) + target_compile_options(${ARGS_TARGET} PRIVATE -Minline=lib:${ARGS_TARGET}_lib) + list(PREPEND IPO_TARGETS ${ARGS_TARGET}_lib) endif() foreach (a_target ${IPO_TARGETS}) @@ -511,7 +511,7 @@ function(MFC_SETUP_TARGET) endforeach() target_compile_options(${a_target} - PRIVATE -gpu=lineinfo + PRIVATE -gpu=keep,ptxinfo,lineinfo ) # GH-200 Unified Memory Support @@ -527,7 +527,7 @@ function(MFC_SETUP_TARGET) if (CMAKE_BUILD_TYPE STREQUAL "Debug") target_compile_options(${a_target} - PRIVATE -gpu=debug + PRIVATE -gpu=autocompare,debug ) endif() elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") From bac089d35dfe519c828323babc32962f0d6b2f54 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 29 Jul 2025 11:58:38 -0400 Subject: [PATCH 30/60] Update toolchain to support swapping between OpenACC and OpenMP --- toolchain/mfc/args.py | 2 +- toolchain/mfc/state.py | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py index b73bf93763..3e39750d88 100644 --- a/toolchain/mfc/args.py +++ b/toolchain/mfc/args.py @@ -59,7 +59,7 @@ def add_common_arguments(p: argparse.ArgumentParser, mask = None): if "m" not in mask: for f in dataclasses.fields(config): if f.name == 'gpu': - p.add_argument(f"--{f.name}", action="store", nargs='?', default=gpuConfigOptions.ACC.value, choices=[e.value for e in gpuConfigOptions]) + p.add_argument(f"--{f.name}", action="store", nargs='?', default=gpuConfigOptions.ACC.value, dest=f.name, choices=[e.value for e in gpuConfigOptions], help=f"Turn the {f.name} option to OpenACC or OpenMP.") p.add_argument(f"--no-{f.name}", action="store_const", const = gpuConfigOptions.NONE.value, dest=f.name, help=f"Turn the {f.name} option OFF.") continue p.add_argument( f"--{f.name}", action="store_true", help=f"Turn the {f.name} option ON.") diff --git a/toolchain/mfc/state.py b/toolchain/mfc/state.py index 383a458ec9..d801cb7654 100644 --- a/toolchain/mfc/state.py +++ b/toolchain/mfc/state.py @@ -35,12 +35,24 @@ def items(self) -> typing.Iterable[typing.Tuple[str, typing.Any]]: def make_options(self) -> typing.List[str]: """ Returns a list of options that could be passed to mfc.sh again. Example: --no-debug --mpi --no-gpu --no-gcov --no-unified""" - return [ f"--{'no-' if not v else ''}{k}" for k, v in self.items() ] + options = [] + for k, v in self.items(): + if (k == 'gpu'): + options.append(f"--{v}-{k}") + else: + options.append(f"--{'no-' if not v else ''}{k}") + return options def make_slug(self) -> str: """ Sort the items by key, then join them with underscores. This uniquely identifies the configuration. Example: no-debug_no-gpu_no_mpi_no-gcov """ - return '_'.join([ f"{'no-' if not v else ''}{k}" for k, v in sorted(self.items(), key=lambda x: x[0]) ]) + options = [] + for k, v in sorted(self.items(), key=lambda x: x[0]): + if (k == 'gpu'): + options.append(f"--{v}-{k}") + else: + options.append(f"--{'no-' if not v else ''}{k}") + return '_'.join(options) def __eq__(self, other) -> bool: """ Check if two MFCConfig objects are equal, field by field. """ From 555cf062a17e411426489a9a335770904c1080a9 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 29 Jul 2025 11:59:18 -0400 Subject: [PATCH 31/60] Add themochem build for CPU --- toolchain/mfc/run/input.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/toolchain/mfc/run/input.py b/toolchain/mfc/run/input.py index a220ca635d..dac7072afe 100644 --- a/toolchain/mfc/run/input.py +++ b/toolchain/mfc/run/input.py @@ -5,7 +5,7 @@ from ..printer import cons from .. import common, build -from ..state import ARGS, ARG +from ..state import ARGS, ARG, gpuConfigOptions from ..case import Case @dataclasses.dataclass(init=False) @@ -73,10 +73,10 @@ def generate_fpp(self, target) -> None: # Determine the real type based on the single precision flag real_type = 'real(sp)' if ARG('single') else 'real(dp)' - gpu_type = None - if (ARG("gpu") == "mp"): + directive_str = None + if (ARG("gpu") == gpuConfigOptions.MP.value): directive_str = 'mp' - elif (ARG("gpu") == "acc"): + elif (ARG("gpu") == gpuConfigOptions.ACC.value): directive_str = 'acc' # Write the generated Fortran code to the m_thermochem.f90 file with the chosen precision From 16c9ce3ea7fe2d1317c1cbaf0f94c37b84506489 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Tue, 29 Jul 2025 12:33:29 -0400 Subject: [PATCH 32/60] pyro updates --- toolchain/mfc/run/input.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/toolchain/mfc/run/input.py b/toolchain/mfc/run/input.py index dac7072afe..7f14441fd6 100644 --- a/toolchain/mfc/run/input.py +++ b/toolchain/mfc/run/input.py @@ -73,11 +73,12 @@ def generate_fpp(self, target) -> None: # Determine the real type based on the single precision flag real_type = 'real(sp)' if ARG('single') else 'real(dp)' - directive_str = None if (ARG("gpu") == gpuConfigOptions.MP.value): directive_str = 'mp' elif (ARG("gpu") == gpuConfigOptions.ACC.value): directive_str = 'acc' + else: + directive_str = None # Write the generated Fortran code to the m_thermochem.f90 file with the chosen precision common.file_write( From d7dfc0d4677ce0edc3083d4353fb27ed2f27cfee Mon Sep 17 00:00:00 2001 From: wilfonba Date: Wed, 30 Jul 2025 08:18:08 -0400 Subject: [PATCH 33/60] compiles and runs with OpenMP on CCE but fails all tests --- CMakeLists.txt | 4 +- src/common/include/omp_macros.fpp | 9 +- src/common/m_derived_types.fpp | 3 +- src/common/m_helper.fpp | 22 +- src/common/m_helper_basic.fpp | 7 +- src/common/m_phase_change.fpp | 2 +- src/common/m_variables_conversion.fpp | 50 ++--- src/pre_process/m_assign_variables.fpp | 4 +- src/pre_process/m_compute_levelset.fpp | 14 +- src/pre_process/m_model.fpp | 34 +-- src/pre_process/m_patches.fpp | 5 +- src/simulation/m_fftw.fpp | 2 + src/simulation/m_global_parameters.fpp | 2 - src/simulation/m_hyperelastic.fpp | 62 +++--- src/simulation/m_hypoelastic.fpp | 250 +++++++++++------------ src/simulation/m_igr.fpp | 40 ++-- src/simulation/m_mhd.fpp | 20 +- src/simulation/m_pressure_relaxation.fpp | 14 +- src/simulation/m_qbmm.fpp | 10 +- src/simulation/m_rhs.fpp | 5 +- src/simulation/m_riemann_solvers.fpp | 66 +++--- 21 files changed, 319 insertions(+), 306 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 304fce3725..0a005beb16 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -492,8 +492,8 @@ function(MFC_SETUP_TARGET) target_compile_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64) target_link_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64) elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") - target_compile_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64 ) - target_link_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64) + target_compile_options(${a_target} PRIVATE -fopenmp) + target_link_options(${a_target} PRIVATE -fopenmp) endif() endif() diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index bf71a73914..bcc9b5533e 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -160,9 +160,12 @@ & no_create_val.strip('\n') + present_val.strip('\n') + & & deviceptr_val.strip('\n') + attach_val.strip('\n') #! Hardcoding the parallelism for now - #:set omp_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' + & + !#:set omp_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' + & + !& clause_val + extraOmpArgs_val.strip('\n') + !#:set omp_end_directive = '!$omp end target teams loop' + #:set omp_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) ' + & & clause_val + extraOmpArgs_val.strip('\n') - #:set omp_end_directive = '!$omp end target teams loop' + #:set omp_end_directive = '!$omp end target teams distribute parallel do simd' $:omp_directive $:code $:omp_end_directive @@ -201,7 +204,7 @@ #! Not implemented yet #:def OMP_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraOmpArgs=None) #! loop is going to be ignored since all loops right now are seq - #:set temp = '!$omp loop bind(thread)' + #:set temp = '' $:temp #:enddef diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index 23fcc87c13..0d3cfebee8 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -449,4 +449,5 @@ module m_derived_types integer :: mn_min, np_min, mp_min, mnp_min end type cell_num_bounds -end module m_derived_types \ No newline at end of file +end module m_derived_types + diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp index c8a17a1875..68c75b1388 100644 --- a/src/common/m_helper.fpp +++ b/src/common/m_helper.fpp @@ -14,7 +14,7 @@ module m_helper implicit none - private; + private; public :: s_comp_n_from_prim, & s_comp_n_from_cons, & s_initialize_nonpoly, & @@ -292,7 +292,7 @@ contains !! @param a First vector. !! @param b Second vector. !! @return The cross product of the two vectors. - function f_cross(a, b) result(c) + pure function f_cross(a, b) result(c) real(wp), dimension(3), intent(in) :: a, b real(wp), dimension(3) :: c @@ -513,11 +513,11 @@ contains real(wp) :: Y, prefactor, local_pi local_pi = acos(-1._wp) - prefactor = sqrt((2*l + 1)/(4*local_pi)*factorial(l - m_order)/factorial(l + m_order)); + prefactor = sqrt((2*l + 1)/(4*local_pi)*factorial(l - m_order)/factorial(l + m_order)); if (m_order == 0) then - Y = prefactor*associated_legendre(x, l, m_order); + Y = prefactor*associated_legendre(x, l, m_order); elseif (m_order > 0) then - Y = (-1._wp)**m_order*sqrt(2._wp)*prefactor*associated_legendre(x, l, m_order)*cos(m_order*phi); + Y = (-1._wp)**m_order*sqrt(2._wp)*prefactor*associated_legendre(x, l, m_order)*cos(m_order*phi); end if end function spherical_harmonic_func @@ -535,17 +535,17 @@ contains real(wp) :: result_P if (m_order <= 0 .and. l <= 0) then - result_P = 1; + result_P = 1; elseif (l == 1 .and. m_order <= 0) then - result_P = x; + result_P = x; elseif (l == 1 .and. m_order == 1) then - result_P = -(1 - x**2)**(1._wp/2._wp); + result_P = -(1 - x**2)**(1._wp/2._wp); elseif (m_order == l) then - result_P = (-1)**l*double_factorial(2*l - 1)*(1 - x**2)**(l/2); + result_P = (-1)**l*double_factorial(2*l - 1)*(1 - x**2)**(l/2); elseif (m_order == l - 1) then - result_P = x*(2*l - 1)*associated_legendre(x, l - 1, l - 1); + result_P = x*(2*l - 1)*associated_legendre(x, l - 1, l - 1); else - result_P = ((2*l - 1)*x*associated_legendre(x, l - 1, m_order) - (l + m_order - 1)*associated_legendre(x, l - 2, m_order))/(l - m_order); + result_P = ((2*l - 1)*x*associated_legendre(x, l - 1, m_order) - (l + m_order - 1)*associated_legendre(x, l - 2, m_order))/(l - m_order); end if end function associated_legendre diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp index e2dc0e8b14..9606140c76 100644 --- a/src/common/m_helper_basic.fpp +++ b/src/common/m_helper_basic.fpp @@ -89,11 +89,12 @@ contains !! @param var_array Array to check. logical function f_all_default(var_array) result(res) real(wp), intent(in) :: var_array(:) - ! logical :: res_array(size(var_array)) - ! integer :: i res = all(f_is_default(var_array)) + !logical :: res_array(size(var_array)) + !integer :: i + ! do i = 1, size(var_array) ! res_array(i) = f_is_default(var_array(i)) ! end do @@ -161,7 +162,7 @@ contains !! @param m Number of cells in x-axis !! @param n Number of cells in y-axis !! @param p Number of cells in z-axis - elemental subroutine s_update_cell_bounds(bounds, m, n, p) + elemental subroutine s_update_cell_bounds(bounds, m, n, p) type(cell_num_bounds), intent(out) :: bounds integer, intent(in) :: m, n, p diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index c36cff840d..a395b5c553 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -42,7 +42,7 @@ module m_phase_change real(wp) :: A, B, C, D !> @} - $:GPU_DECLARE(create='[max_iter,pCr,TCr,mixM,lp,vp,A,B,C,D]') + $:GPU_DECLARE(create='[A,B,C,D]') contains diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 0bffeee879..bcccd47e73 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -53,10 +53,10 @@ module m_variables_conversion $:GPU_DECLARE(create='[gammas,gs_min,pi_infs,ps_inf,cvs,qvs,qvps]') #endif - real(wp), allocatable, dimension(:) :: Gs - integer, allocatable, dimension(:) :: bubrs - real(wp), allocatable, dimension(:, :) :: Res - $:GPU_DECLARE(create='[bubrs,Gs,Res]') + real(wp), allocatable, dimension(:) :: Gs_vc + integer, allocatable, dimension(:) :: bubrs_vc + real(wp), allocatable, dimension(:, :) :: Res_vc + $:GPU_DECLARE(create='[bubrs_vc,Gs_vc,Res_vc]') integer :: is1b, is2b, is3b, is1e, is2e, is3e $:GPU_DECLARE(create='[is1b,is2b,is3b,is1e,is2e,is3e]') @@ -516,7 +516,7 @@ contains if (present(G_K)) then G_K = 0._wp do i = 1, num_fluids - !TODO: change to use Gs directly here? + !TODO: change to use Gs_vc directly here? !TODO: Make this changes as well for GPUs G_K = G_K + alpha_K(i)*G(i) end do @@ -531,7 +531,7 @@ contains if (Re_size(i) > 0) Re_K(i) = 0._wp do j = 1, Re_size(i) - Re_K(i) = alpha_K(Re_idx(i, j))/Res(i, j) & + Re_K(i) = alpha_K(Re_idx(i, j))/Res_vc(i, j) & + Re_K(i) end do @@ -594,7 +594,7 @@ contains if (Re_size(i) > 0) Re_K(i) = 0._wp do j = 1, Re_size(i) - Re_K(i) = (1._wp - alpha_K(Re_idx(i, j)))/Res(i, j) & + Re_K(i) = (1._wp - alpha_K(Re_idx(i, j)))/Res_vc(i, j) & + Re_K(i) end do @@ -624,7 +624,7 @@ contains @:ALLOCATE(cvs (1:num_fluids)) @:ALLOCATE(qvs (1:num_fluids)) @:ALLOCATE(qvps (1:num_fluids)) - @:ALLOCATE(Gs (1:num_fluids)) + @:ALLOCATE(Gs_vc (1:num_fluids)) #else @:ALLOCATE(gammas (1:num_fluids)) @:ALLOCATE(gs_min (1:num_fluids)) @@ -633,46 +633,46 @@ contains @:ALLOCATE(cvs (1:num_fluids)) @:ALLOCATE(qvs (1:num_fluids)) @:ALLOCATE(qvps (1:num_fluids)) - @:ALLOCATE(Gs (1:num_fluids)) + @:ALLOCATE(Gs_vc (1:num_fluids)) #endif do i = 1, num_fluids gammas(i) = fluid_pp(i)%gamma gs_min(i) = 1.0_wp/gammas(i) + 1.0_wp pi_infs(i) = fluid_pp(i)%pi_inf - Gs(i) = fluid_pp(i)%G + Gs_vc(i) = fluid_pp(i)%G ps_inf(i) = pi_infs(i)/(1.0_wp + gammas(i)) cvs(i) = fluid_pp(i)%cv qvs(i) = fluid_pp(i)%qv qvps(i) = fluid_pp(i)%qvp end do - $:GPU_UPDATE(device='[gammas,gs_min,pi_infs,ps_inf,cvs,qvs,qvps,Gs]') + $:GPU_UPDATE(device='[gammas,gs_min,pi_infs,ps_inf,cvs,qvs,qvps,Gs_vc]') #ifdef MFC_SIMULATION if (viscous) then - @:ALLOCATE(Res(1:2, 1:Re_size_max)) + @:ALLOCATE(Res_vc(1:2, 1:Re_size_max)) do i = 1, 2 do j = 1, Re_size(i) - Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) + Res_vc(i, j) = fluid_pp(Re_idx(i, j))%Re(i) end do end do - $:GPU_UPDATE(device='[Res,Re_idx,Re_size]') + $:GPU_UPDATE(device='[Res_vc,Re_idx,Re_size]') end if #endif if (bubbles_euler) then #ifdef MFC_SIMULATION - @:ALLOCATE(bubrs(1:nb)) + @:ALLOCATE(bubrs_vc(1:nb)) #else - @:ALLOCATE(bubrs(1:nb)) + @:ALLOCATE(bubrs_vc(1:nb)) #endif do i = 1, nb - bubrs(i) = bub_idx%rs(i) + bubrs_vc(i) = bub_idx%rs(i) end do - $:GPU_UPDATE(device='[bubrs]') + $:GPU_UPDATE(device='[bubrs_vc]') end if #ifdef MFC_POST_PROCESS @@ -906,7 +906,7 @@ contains ! If in simulation, use acc mixture subroutines if (elasticity) then call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, alpha_K, & - alpha_rho_K, Re_K, G_K, Gs) + alpha_rho_K, Re_K, G_K, Gs_vc) else if (bubbles_euler) then call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, pi_inf_K, qv_K, & alpha_K, alpha_rho_K, Re_K) @@ -1073,7 +1073,7 @@ contains if (bubbles_euler) then $:GPU_LOOP(parallelism='[seq]') do i = 1, nb - nRtmp(i) = qK_cons_vf(bubrs(i))%sf(j, k, l) + nRtmp(i) = qK_cons_vf(bubrs_vc(i))%sf(j, k, l) end do vftmp = qK_cons_vf(alf_idx)%sf(j, k, l) @@ -1517,7 +1517,7 @@ contains if (elasticity) then call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & alpha_K, alpha_rho_K, Re_K, & - G_K, Gs) + G_K, Gs_vc) else if (bubbles_euler) then call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, & pi_inf_K, qv_K, alpha_K, alpha_rho_K, Re_K) @@ -1607,14 +1607,14 @@ contains #endif #ifdef MFC_SIMULATION - @:DEALLOCATE(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps, Gs) + @:DEALLOCATE(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps, Gs_vc) if (bubbles_euler) then - @:DEALLOCATE(bubrs) + @:DEALLOCATE(bubrs_vc) end if #else - @:DEALLOCATE(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps, Gs) + @:DEALLOCATE(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps, Gs_vc) if (bubbles_euler) then - @:DEALLOCATE(bubrs) + @:DEALLOCATE(bubrs_vc) end if #endif diff --git a/src/pre_process/m_assign_variables.fpp b/src/pre_process/m_assign_variables.fpp index a4407266b3..a6b65e1ebc 100644 --- a/src/pre_process/m_assign_variables.fpp +++ b/src/pre_process/m_assign_variables.fpp @@ -102,7 +102,7 @@ contains !! @param eta pseudo volume fraction !! @param q_prim_vf Primitive variables !! @param patch_id_fp Array to track patch ids - subroutine s_assign_patch_mixture_primitive_variables(patch_id, j, k, l, & + subroutine s_assign_patch_mixture_primitive_variables(patch_id, j, k, l, & eta, q_prim_vf, patch_id_fp) $:GPU_ROUTINE(parallelism='[seq]') @@ -191,7 +191,7 @@ contains !! @param k the y-dir node index !! @param l the z-dir node index !! @param q_prim_vf Primitive variables - subroutine s_perturb_primitive(j, k, l, q_prim_vf) + subroutine s_perturb_primitive(j, k, l, q_prim_vf) integer, intent(in) :: j, k, l type(scalar_field), dimension(1:sys_size), intent(inout) :: q_prim_vf diff --git a/src/pre_process/m_compute_levelset.fpp b/src/pre_process/m_compute_levelset.fpp index 673a2aff5b..483045c553 100644 --- a/src/pre_process/m_compute_levelset.fpp +++ b/src/pre_process/m_compute_levelset.fpp @@ -27,7 +27,7 @@ module m_compute_levelset contains - subroutine s_circle_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_circle_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm @@ -63,7 +63,7 @@ contains end subroutine s_circle_levelset - subroutine s_airfoil_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_airfoil_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm @@ -146,7 +146,7 @@ contains end subroutine s_airfoil_levelset - subroutine s_3D_airfoil_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_3D_airfoil_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm @@ -250,7 +250,7 @@ contains end subroutine s_3D_airfoil_levelset !> Initialize IBM module - subroutine s_rectangle_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_rectangle_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm @@ -347,7 +347,7 @@ contains end subroutine s_rectangle_levelset - subroutine s_cuboid_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_cuboid_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm @@ -464,7 +464,7 @@ contains end subroutine s_cuboid_levelset - subroutine s_sphere_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_sphere_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm @@ -501,7 +501,7 @@ contains end subroutine s_sphere_levelset - subroutine s_cylinder_levelset(levelset, levelset_norm, ib_patch_id) + subroutine s_cylinder_levelset(levelset, levelset_norm, ib_patch_id) type(levelset_field), intent(INOUT) :: levelset type(levelset_norm_field), intent(INOUT) :: levelset_norm diff --git a/src/pre_process/m_model.fpp b/src/pre_process/m_model.fpp index 03225a4b76..0ae508a172 100644 --- a/src/pre_process/m_model.fpp +++ b/src/pre_process/m_model.fpp @@ -424,7 +424,7 @@ contains end subroutine s_model_write !> This procedure frees the memory allocated for an STL mesh. - subroutine s_model_free(model) + subroutine s_model_free(model) type(t_model), intent(inout) :: model @@ -532,7 +532,7 @@ contains !! @param ray Ray. !! @param triangle Triangle. !! @return True if the ray intersects the triangle, false otherwise. - elemental function f_intersects_triangle(ray, triangle) result(intersects) + elemental function f_intersects_triangle(ray, triangle) result(intersects) type(t_ray), intent(in) :: ray type(t_triangle), intent(in) :: triangle @@ -592,7 +592,8 @@ contains !! @param boundary_v Output boundary vertices/normals. !! @param boundary_vertex_count Output total boundary vertex count !! @param boundary_edge_count Output total boundary edge counts - subroutine f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) + subroutine f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) + type(t_model), intent(in) :: model real(wp), allocatable, intent(out), dimension(:, :, :) :: boundary_v !< Output boundary vertices/normals integer, intent(out) :: boundary_vertex_count, boundary_edge_count !< Output boundary vertex/edge count @@ -705,7 +706,8 @@ contains !! @param edge Edges end points to be registered !! @param edge_index Edge index iterator !! @param edge_count Total number of edges - subroutine f_register_edge(temp_boundary_v, edge, edge_index, edge_count) + subroutine f_register_edge(temp_boundary_v, edge, edge_index, edge_count) + integer, intent(inout) :: edge_index !< Edge index iterator integer, intent(inout) :: edge_count !< Total number of edges real(wp), intent(in), dimension(1:2, 1:2) :: edge !< Edges end points to be registered @@ -723,7 +725,8 @@ contains !! @param boundary_edge_count Output total number of boundary edges !! @param spacing Dimensions of the current levelset cell !! @param interpolate Logical output - subroutine f_check_interpolation_2D(boundary_v, boundary_edge_count, spacing, interpolate) + subroutine f_check_interpolation_2D(boundary_v, boundary_edge_count, spacing, interpolate) + logical, intent(inout) :: interpolate !< Logical indicator of interpolation integer, intent(in) :: boundary_edge_count !< Number of boundary edges real(wp), intent(in), dimension(1:boundary_edge_count, 1:3, 1:2) :: boundary_v @@ -753,7 +756,8 @@ contains !! @param model Model to search in. !! @param spacing Dimensions of the current levelset cell !! @param interpolate Logical output - subroutine f_check_interpolation_3D(model, spacing, interpolate) + subroutine f_check_interpolation_3D(model, spacing, interpolate) + logical, intent(inout) :: interpolate type(t_model), intent(in) :: model real(wp), dimension(1:3), intent(in) :: spacing @@ -799,7 +803,8 @@ contains !! @param spacing Dimensions of the current levelset cell !! @param interpolated_boundary_v Output all the boundary vertices of the interpolated 2D model !! @param total_vertices Total number of vertices after interpolation - subroutine f_interpolate_2D(boundary_v, boundary_edge_count, spacing, interpolated_boundary_v, total_vertices) + subroutine f_interpolate_2D(boundary_v, boundary_edge_count, spacing, interpolated_boundary_v, total_vertices) + real(wp), intent(in), dimension(:, :, :) :: boundary_v real(wp), dimension(1:3), intent(in) :: spacing real(wp), allocatable, intent(inout), dimension(:, :) :: interpolated_boundary_v @@ -1042,7 +1047,8 @@ contains !! @param point The cell centers of the current level cell !! @param normals The output levelset normals !! @param distance The output levelset distance - subroutine f_distance_normals_3D(model, point, normals, distance) + subroutine f_distance_normals_3D(model, point, normals, distance) + type(t_model), intent(IN) :: model real(wp), dimension(1:3), intent(in) :: point real(wp), dimension(1:3), intent(out) :: normals @@ -1104,7 +1110,8 @@ contains !! @param point The cell centers of the current levelset cell !! @param spacing Dimensions of the current levelset cell !! @return Distance which the levelset distance without interpolation - function f_distance(boundary_v, boundary_edge_count, point) result(distance) + function f_distance(boundary_v, boundary_edge_count, point) result(distance) + integer, intent(in) :: boundary_edge_count real(wp), intent(in), dimension(1:boundary_edge_count, 1:3, 1:2) :: boundary_v real(wp), dimension(1:3), intent(in) :: point @@ -1134,7 +1141,8 @@ contains !! @param boundary_edge_count Output the total number of boundary edges !! @param point The cell centers of the current levelset cell !! @param normals Output levelset normals without interpolation - subroutine f_normals(boundary_v, boundary_edge_count, point, normals) + subroutine f_normals(boundary_v, boundary_edge_count, point, normals) + integer, intent(in) :: boundary_edge_count real(wp), intent(in), dimension(1:boundary_edge_count, 1:3, 1:2) :: boundary_v real(wp), dimension(1:3), intent(in) :: point @@ -1169,7 +1177,8 @@ contains end subroutine f_normals !> This procedure calculates the barycentric facet area - subroutine f_tri_area(tri, tri_area) + subroutine f_tri_area(tri, tri_area) + real(wp), dimension(1:3, 1:3), intent(in) :: tri real(wp), intent(out) :: tri_area real(wp), dimension(1:3) :: AB, AC, cross @@ -1192,7 +1201,8 @@ contains !! @param total_vertices Total number of vertices after interpolation !! @param point The cell centers of the current levelset cell !! @return Distance which the levelset distance without interpolation - function f_interpolated_distance(interpolated_boundary_v, total_vertices, point) result(distance) + function f_interpolated_distance(interpolated_boundary_v, total_vertices, point) result(distance) + integer, intent(in) :: total_vertices real(wp), intent(in), dimension(1:total_vertices, 1:3) :: interpolated_boundary_v real(wp), dimension(1:3), intent(in) :: point diff --git a/src/pre_process/m_patches.fpp b/src/pre_process/m_patches.fpp index f1427a255f..3a25346eab 100644 --- a/src/pre_process/m_patches.fpp +++ b/src/pre_process/m_patches.fpp @@ -2266,7 +2266,7 @@ contains end subroutine s_convert_cylindrical_to_cartesian_coord - function f_convert_cyl_to_cart(cyl) result(cart) + function f_convert_cyl_to_cart(cyl) result(cart) $:GPU_ROUTINE(parallelism='[seq]') @@ -2292,7 +2292,8 @@ contains !! @param myth Angle !! @param offset Thickness !! @param a Starting position - elemental function f_r(myth, offset, a) + elemental function f_r(myth, offset, a) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: myth, offset, a real(wp) :: b diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 4ea2ac6934..fa29f5bac3 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -136,6 +136,7 @@ contains integer :: i, j, k, l !< Generic loop iterators integer :: ierr !< Generic flag used to identify and report GPU errors +#if 0 ! Restrict filter to processors that have cells adjacent to axis if (bc_y%beg >= 0) return #if defined(MFC_GPU) @@ -302,6 +303,7 @@ contains end do end do end do +#endif #endif end subroutine s_apply_fourier_filter diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 854e350781..36d78bd379 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -170,8 +170,6 @@ module m_global_parameters integer :: num_igr_warm_start_iters !< number of warm start iterations for elliptic solve real(wp) :: alf_factor !< alpha factor for IGR - $:GPU_DECLARE(create='[chemistry]') - logical :: bodyForces logical :: bf_x, bf_y, bf_z !< body force toggle in three directions !< amplitude, frequency, and phase shift sinusoid in each direction diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index bd79e0f7fb..2b171016cb 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -28,12 +28,12 @@ module m_hyperelastic type(vector_field) :: btensor !< $:GPU_DECLARE(create='[btensor]') - real(wp), allocatable, dimension(:, :) :: fd_coeff_x - real(wp), allocatable, dimension(:, :) :: fd_coeff_y - real(wp), allocatable, dimension(:, :) :: fd_coeff_z - $:GPU_DECLARE(create='[fd_coeff_x,fd_coeff_y, fd_coeff_z]') - real(wp), allocatable, dimension(:) :: Gs - $:GPU_DECLARE(create='[Gs]') + real(wp), allocatable, dimension(:, :) :: fd_coeff_x_hyper + real(wp), allocatable, dimension(:, :) :: fd_coeff_y_hyper + real(wp), allocatable, dimension(:, :) :: fd_coeff_z_hyper + $:GPU_DECLARE(create='[fd_coeff_x_hyper,fd_coeff_y_hyper, fd_coeff_z_hyper]') + real(wp), allocatable, dimension(:) :: Gs_hyper + $:GPU_DECLARE(create='[Gs_hyper]') contains @@ -54,34 +54,34 @@ contains end do @:ACC_SETUP_VFs(btensor) - @:ALLOCATE(Gs(1:num_fluids)) + @:ALLOCATE(Gs_hyper(1:num_fluids)) $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - Gs(i) = fluid_pp(i)%G + Gs_hyper(i) = fluid_pp(i)%G end do - $:GPU_UPDATE(device='[Gs]') + $:GPU_UPDATE(device='[Gs_hyper]') - @:ALLOCATE(fd_coeff_x(-fd_number:fd_number, 0:m)) + @:ALLOCATE(fd_coeff_x_hyper(-fd_number:fd_number, 0:m)) if (n > 0) then - @:ALLOCATE(fd_coeff_y(-fd_number:fd_number, 0:n)) + @:ALLOCATE(fd_coeff_y_hyper(-fd_number:fd_number, 0:n)) end if if (p > 0) then - @:ALLOCATE(fd_coeff_z(-fd_number:fd_number, 0:p)) + @:ALLOCATE(fd_coeff_z_hyper(-fd_number:fd_number, 0:p)) end if ! Computing centered finite difference coefficients - call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x, buff_size, & + call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x_hyper, buff_size, & fd_number, fd_order) - $:GPU_UPDATE(device='[fd_coeff_x]') + $:GPU_UPDATE(device='[fd_coeff_x_hyper]') if (n > 0) then - call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y, buff_size, & + call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y_hyper, buff_size, & fd_number, fd_order) - $:GPU_UPDATE(device='[fd_coeff_y]') + $:GPU_UPDATE(device='[fd_coeff_y_hyper]') end if if (p > 0) then - call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z, buff_size, & + call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z_hyper, buff_size, & fd_number, fd_order) - $:GPU_UPDATE(device='[fd_coeff_z]') + $:GPU_UPDATE(device='[fd_coeff_z_hyper]') end if end subroutine s_initialize_hyperelastic_module @@ -117,7 +117,7 @@ contains end do ! If in simulation, use acc mixture subroutines call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha_k, & - alpha_rho_k, Re, G_local, Gs) + alpha_rho_k, Re, G_local, Gs_hyper) rho = max(rho, sgm_eps) G_local = max(G_local, sgm_eps) !if ( G_local <= verysmall ) G_K = 0._wp @@ -135,17 +135,17 @@ contains $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number ! derivatives in the x-direction - tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x(r, j) - tensora(2) = tensora(2) + q_prim_vf(xibeg + 1)%sf(j + r, k, l)*fd_coeff_x(r, j) - tensora(3) = tensora(3) + q_prim_vf(xiend)%sf(j + r, k, l)*fd_coeff_x(r, j) + tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) + tensora(2) = tensora(2) + q_prim_vf(xibeg + 1)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) + tensora(3) = tensora(3) + q_prim_vf(xiend)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) ! derivatives in the y-direction - tensora(4) = tensora(4) + q_prim_vf(xibeg)%sf(j, k + r, l)*fd_coeff_y(r, k) - tensora(5) = tensora(5) + q_prim_vf(xibeg + 1)%sf(j, k + r, l)*fd_coeff_y(r, k) - tensora(6) = tensora(6) + q_prim_vf(xiend)%sf(j, k + r, l)*fd_coeff_y(r, k) + tensora(4) = tensora(4) + q_prim_vf(xibeg)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) + tensora(5) = tensora(5) + q_prim_vf(xibeg + 1)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) + tensora(6) = tensora(6) + q_prim_vf(xiend)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) ! derivatives in the z-direction - tensora(7) = tensora(7) + q_prim_vf(xibeg)%sf(j, k, l + r)*fd_coeff_z(r, l) - tensora(8) = tensora(8) + q_prim_vf(xibeg + 1)%sf(j, k, l + r)*fd_coeff_z(r, l) - tensora(9) = tensora(9) + q_prim_vf(xiend)%sf(j, k, l + r)*fd_coeff_z(r, l) + tensora(7) = tensora(7) + q_prim_vf(xibeg)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) + tensora(8) = tensora(8) + q_prim_vf(xibeg + 1)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) + tensora(9) = tensora(9) + q_prim_vf(xiend)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) end do ! STEP 2a: computing the adjoint of the grad_xi tensor for the inverse tensorb(1) = tensora(5)*tensora(9) - tensora(6)*tensora(8) @@ -298,11 +298,11 @@ contains do i = 1, b_size @:DEALLOCATE(btensor%vf(i)%sf) end do - @:DEALLOCATE(fd_coeff_x) + @:DEALLOCATE(fd_coeff_x_hyper) if (n > 0) then - @:DEALLOCATE(fd_coeff_y) + @:DEALLOCATE(fd_coeff_y_hyper) if (p > 0) then - @:DEALLOCATE(fd_coeff_z) + @:DEALLOCATE(fd_coeff_z_hyper) end if end if diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index e4c781ebe9..8748478c85 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -19,21 +19,21 @@ module m_hypoelastic s_compute_hypoelastic_rhs, & s_compute_damage_state - real(wp), allocatable, dimension(:) :: Gs - $:GPU_DECLARE(create='[Gs]') + real(wp), allocatable, dimension(:) :: Gs_hypo + $:GPU_DECLARE(create='[Gs_hypo]') - real(wp), allocatable, dimension(:, :, :) :: du_dx, du_dy, du_dz - real(wp), allocatable, dimension(:, :, :) :: dv_dx, dv_dy, dv_dz - real(wp), allocatable, dimension(:, :, :) :: dw_dx, dw_dy, dw_dz - $:GPU_DECLARE(create='[du_dx,du_dy,du_dz,dv_dx,dv_dy,dv_dz,dw_dx,dw_dy,dw_dz]') + real(wp), allocatable, dimension(:, :, :) :: du_dx_hypo, du_dy_hypo, du_dz_hypo + real(wp), allocatable, dimension(:, :, :) :: dv_dx_hypo, dv_dy_hypo, dv_dz_hypo + real(wp), allocatable, dimension(:, :, :) :: dw_dx_hypo, dw_dy_hypo, dw_dz_hypo + $:GPU_DECLARE(create='[du_dx_hypo,du_dy_hypo,du_dz_hypo,dv_dx_hypo,dv_dy_hypo,dv_dz_hypo,dw_dx_hypo,dw_dy_hypo,dw_dz_hypo]') real(wp), allocatable, dimension(:, :, :) :: rho_K_field, G_K_field $:GPU_DECLARE(create='[rho_K_field,G_K_field]') - real(wp), allocatable, dimension(:, :) :: fd_coeff_x_h - real(wp), allocatable, dimension(:, :) :: fd_coeff_y_h - real(wp), allocatable, dimension(:, :) :: fd_coeff_z_h - $:GPU_DECLARE(create='[fd_coeff_x_h,fd_coeff_y_h,fd_coeff_z_h]') + real(wp), allocatable, dimension(:, :) :: fd_coeff_x_hypo + real(wp), allocatable, dimension(:, :) :: fd_coeff_y_hypo + real(wp), allocatable, dimension(:, :) :: fd_coeff_z_hypo + $:GPU_DECLARE(create='[fd_coeff_x_hypo,fd_coeff_y_hypo,fd_coeff_z_hypo]') contains @@ -41,43 +41,43 @@ contains integer :: i - @:ALLOCATE(Gs(1:num_fluids)) + @:ALLOCATE(Gs_hypo(1:num_fluids)) @:ALLOCATE(rho_K_field(0:m,0:n,0:p), G_K_field(0:m,0:n,0:p)) - @:ALLOCATE(du_dx(0:m,0:n,0:p)) + @:ALLOCATE(du_dx_hypo(0:m,0:n,0:p)) if (n > 0) then - @:ALLOCATE(du_dy(0:m,0:n,0:p), dv_dx(0:m,0:n,0:p), dv_dy(0:m,0:n,0:p)) + @:ALLOCATE(du_dy_hypo(0:m,0:n,0:p), dv_dx_hypo(0:m,0:n,0:p), dv_dy_hypo(0:m,0:n,0:p)) if (p > 0) then - @:ALLOCATE(du_dz(0:m,0:n,0:p), dv_dz(0:m,0:n,0:p)) - @:ALLOCATE(dw_dx(0:m,0:n,0:p), dw_dy(0:m,0:n,0:p), dw_dz(0:m,0:n,0:p)) + @:ALLOCATE(du_dz_hypo(0:m,0:n,0:p), dv_dz_hypo(0:m,0:n,0:p)) + @:ALLOCATE(dw_dx_hypo(0:m,0:n,0:p), dw_dy_hypo(0:m,0:n,0:p), dw_dz_hypo(0:m,0:n,0:p)) end if end if do i = 1, num_fluids - Gs(i) = fluid_pp(i)%G + Gs_hypo(i) = fluid_pp(i)%G end do - $:GPU_UPDATE(device='[Gs]') + $:GPU_UPDATE(device='[Gs_hypo]') - @:ALLOCATE(fd_coeff_x_h(-fd_number:fd_number, 0:m)) + @:ALLOCATE(fd_coeff_x_hypo(-fd_number:fd_number, 0:m)) if (n > 0) then - @:ALLOCATE(fd_coeff_y_h(-fd_number:fd_number, 0:n)) + @:ALLOCATE(fd_coeff_y_hypo(-fd_number:fd_number, 0:n)) end if if (p > 0) then - @:ALLOCATE(fd_coeff_z_h(-fd_number:fd_number, 0:p)) + @:ALLOCATE(fd_coeff_z_hypo(-fd_number:fd_number, 0:p)) end if ! Computing centered finite difference coefficients - call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x_h, buff_size, & + call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x_hypo, buff_size, & fd_number, fd_order) - $:GPU_UPDATE(device='[fd_coeff_x_h]') + $:GPU_UPDATE(device='[fd_coeff_x_hypo]') if (n > 0) then - call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y_h, buff_size, & + call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y_hypo, buff_size, & fd_number, fd_order) - $:GPU_UPDATE(device='[fd_coeff_y_h]') + $:GPU_UPDATE(device='[fd_coeff_y_hypo]') end if if (p > 0) then - call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z_h, buff_size, & + call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z_hypo, buff_size, & fd_number, fd_order) - $:GPU_UPDATE(device='[fd_coeff_z_h]') + $:GPU_UPDATE(device='[fd_coeff_z_hypo]') end if end subroutine s_initialize_hypoelastic_module @@ -108,7 +108,7 @@ contains do q = 0, p do l = 0, n do k = 0, m - du_dx(k, l, q) = 0._wp + du_dx_hypo(k, l, q) = 0._wp end do end do end do @@ -120,8 +120,8 @@ contains do k = 0, m $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number - du_dx(k, l, q) = du_dx(k, l, q) & - + q_prim_vf(momxb)%sf(k + r, l, q)*fd_coeff_x_h(r, k) + du_dx_hypo(k, l, q) = du_dx_hypo(k, l, q) & + + q_prim_vf(momxb)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) end do end do @@ -134,7 +134,7 @@ contains do q = 0, p do l = 0, n do k = 0, m - du_dy(k, l, q) = 0._wp; dv_dx(k, l, q) = 0._wp; dv_dy(k, l, q) = 0._wp + du_dy_hypo(k, l, q) = 0._wp; dv_dx_hypo(k, l, q) = 0._wp; dv_dy_hypo(k, l, q) = 0._wp end do end do end do @@ -146,12 +146,12 @@ contains do k = 0, m $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number - du_dy(k, l, q) = du_dy(k, l, q) & - + q_prim_vf(momxb)%sf(k, l + r, q)*fd_coeff_y_h(r, l) - dv_dx(k, l, q) = dv_dx(k, l, q) & - + q_prim_vf(momxb + 1)%sf(k + r, l, q)*fd_coeff_x_h(r, k) - dv_dy(k, l, q) = dv_dy(k, l, q) & - + q_prim_vf(momxb + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) + du_dy_hypo(k, l, q) = du_dy_hypo(k, l, q) & + + q_prim_vf(momxb)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) + dv_dx_hypo(k, l, q) = dv_dx_hypo(k, l, q) & + + q_prim_vf(momxb + 1)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) + dv_dy_hypo(k, l, q) = dv_dy_hypo(k, l, q) & + + q_prim_vf(momxb + 1)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) end do end do end do @@ -165,8 +165,8 @@ contains do q = 0, p do l = 0, n do k = 0, m - du_dz(k, l, q) = 0._wp; dv_dz(k, l, q) = 0._wp; dw_dx(k, l, q) = 0._wp; - dw_dy(k, l, q) = 0._wp; dw_dz(k, l, q) = 0._wp; + du_dz_hypo(k, l, q) = 0._wp; dv_dz_hypo(k, l, q) = 0._wp; dw_dx_hypo(k, l, q) = 0._wp; + dw_dy_hypo(k, l, q) = 0._wp; dw_dz_hypo(k, l, q) = 0._wp; end do end do end do @@ -178,16 +178,16 @@ contains do k = 0, m $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number - du_dz(k, l, q) = du_dz(k, l, q) & - + q_prim_vf(momxb)%sf(k, l, q + r)*fd_coeff_z_h(r, q) - dv_dz(k, l, q) = dv_dz(k, l, q) & - + q_prim_vf(momxb + 1)%sf(k, l, q + r)*fd_coeff_z_h(r, q) - dw_dx(k, l, q) = dw_dx(k, l, q) & - + q_prim_vf(momxe)%sf(k + r, l, q)*fd_coeff_x_h(r, k) - dw_dy(k, l, q) = dw_dy(k, l, q) & - + q_prim_vf(momxe)%sf(k, l + r, q)*fd_coeff_y_h(r, l) - dw_dz(k, l, q) = dw_dz(k, l, q) & - + q_prim_vf(momxe)%sf(k, l, q + r)*fd_coeff_z_h(r, q) + du_dz_hypo(k, l, q) = du_dz_hypo(k, l, q) & + + q_prim_vf(momxb)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) + dv_dz_hypo(k, l, q) = dv_dz_hypo(k, l, q) & + + q_prim_vf(momxb + 1)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) + dw_dx_hypo(k, l, q) = dw_dx_hypo(k, l, q) & + + q_prim_vf(momxe)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) + dw_dy_hypo(k, l, q) = dw_dy_hypo(k, l, q) & + + q_prim_vf(momxe)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) + dw_dz_hypo(k, l, q) = dw_dz_hypo(k, l, q) & + + q_prim_vf(momxe)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) end do end do end do @@ -203,7 +203,7 @@ contains rho_K = 0._wp; G_K = 0._wp do i = 1, num_fluids rho_K = rho_K + q_prim_vf(i)%sf(k, l, q) !alpha_rho_K(1) - G_K = G_K + q_prim_vf(advxb - 1 + i)%sf(k, l, q)*Gs(i) !alpha_K(1) * Gs(1) + G_K = G_K + q_prim_vf(advxb - 1 + i)%sf(k, l, q)*Gs_hypo(i) !alpha_K(1) * Gs_hypo(1) end do if (cont_damage) G_K = G_K*max((1._wp - q_prim_vf(damage_idx)%sf(k, l, q)), 0._wp) @@ -229,7 +229,7 @@ contains rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & ((4._wp*G_K_field(k, l, q)/3._wp) + & q_prim_vf(strxb)%sf(k, l, q))* & - du_dx(k, l, q) + du_dx_hypo(k, l, q) end do end do end do @@ -241,31 +241,31 @@ contains do l = 0, n do k = 0, m rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy(k, l, q) - & - q_prim_vf(strxb)%sf(k, l, q)*dv_dy(k, l, q) - & - 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dv_dy(k, l, q)) + (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy_hypo(k, l, q) - & + q_prim_vf(strxb)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & + 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dv_dy_hypo(k, l, q)) rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxb)%sf(k, l, q)*dv_dx(k, l, q) - & - q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*du_dy(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy(k, l, q) - & - q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dy(k, l, q) + & - dv_dx(k, l, q))) + (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb)%sf(k, l, q)*dv_dx_hypo(k, l, q) - & + q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*du_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & + q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dy_hypo(k, l, q) + & + dv_dx_hypo(k, l, q))) rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx(k, l, q) - & - q_prim_vf(strxb + 2)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy(k, l, q) - & - q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(dv_dy(k, l, q) - (1._wp/3._wp)* & - (du_dx(k, l, q) + & - dv_dy(k, l, q)))) + (q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx_hypo(k, l, q) - & + q_prim_vf(strxb + 2)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & + q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(dv_dy_hypo(k, l, q) - (1._wp/3._wp)* & + (du_dx_hypo(k, l, q) + & + dv_dy_hypo(k, l, q)))) end do end do end do @@ -277,62 +277,62 @@ contains do l = 0, n do k = 0, m rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz(k, l, q) + & - q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz(k, l, q) - & - q_prim_vf(strxb)%sf(k, l, q)*dw_dz(k, l, q) - & - 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dw_dz(k, l, q)) + (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz_hypo(k, l, q) - & + q_prim_vf(strxb)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dw_dz_hypo(k, l, q)) rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 4)%sf(k, l, q)*du_dz(k, l, q) + & - q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dz(k, l, q) - & - q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dz(k, l, q)) + (q_prim_vf(strxb + 4)%sf(k, l, q)*du_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dz_hypo(k, l, q) - & + q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dz_hypo(k, l, q)) rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz(k, l, q) - & - q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dz(k, l, q) - & - 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dw_dz(k, l, q)) + (q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz_hypo(k, l, q) - & + q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dw_dz_hypo(k, l, q)) rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxb)%sf(k, l, q)*dw_dx(k, l, q) - & - q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*du_dy(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dy(k, l, q) - & - q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dy(k, l, q) + & - q_prim_vf(strxb + 5)%sf(k, l, q)*du_dz(k, l, q) + & - q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz(k, l, q) - & - q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dz(k, l, q) + & - dw_dx(k, l, q))) + (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & + q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*du_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & + q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 5)%sf(k, l, q)*du_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dz_hypo(k, l, q) + & + dw_dx_hypo(k, l, q))) rhs_vf(strxb + 4)%sf(k, l, q) = rhs_vf(strxb + 4)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dx(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dx(k, l, q) - & - q_prim_vf(strxb + 4)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dy(k, l, q) - & - q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy(k, l, q) + & - q_prim_vf(strxb + 5)%sf(k, l, q)*dv_dz(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz(k, l, q) - & - q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(dv_dz(k, l, q) + & - dw_dy(k, l, q))) + (q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & + q_prim_vf(strxb + 4)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & + q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 5)%sf(k, l, q)*dv_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(dv_dz_hypo(k, l, q) + & + dw_dy_hypo(k, l, q))) rhs_vf(strxe)%sf(k, l, q) = rhs_vf(strxe)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx(k, l, q) + & - q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx(k, l, q) - & - q_prim_vf(strxe)%sf(k, l, q)*du_dx(k, l, q) + & - q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy(k, l, q) + & - q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy(k, l, q) - & - q_prim_vf(strxe)%sf(k, l, q)*dv_dy(k, l, q) + & - q_prim_vf(strxe)%sf(k, l, q)*dw_dz(k, l, q) + & - q_prim_vf(strxe)%sf(k, l, q)*dw_dz(k, l, q) - & - q_prim_vf(strxe)%sf(k, l, q)*dw_dz(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(dw_dz(k, l, q) - (1._wp/3._wp)* & - (du_dx(k, l, q) + & - dv_dy(k, l, q) + & - dw_dz(k, l, q)))) + (q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx_hypo(k, l, q) + & + q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & + q_prim_vf(strxe)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) + & + q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & + q_prim_vf(strxe)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & + q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(dw_dz_hypo(k, l, q) - (1._wp/3._wp)* & + (du_dx_hypo(k, l, q) + & + dv_dy_hypo(k, l, q) + & + dw_dz_hypo(k, l, q)))) end do end do end do @@ -364,7 +364,7 @@ contains rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + & rho_K_field(k, l, q)*( & -(q_prim_vf(strxb + 3)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q))* & - (du_dx(k, l, q) + dv_dy(k, l, q) + q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) & + (du_dx_hypo(k, l, q) + dv_dy_hypo(k, l, q) + q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) & + 2._wp*(q_prim_vf(strxb + 3)%sf(k, l, q) + G_K_field(k, l, q))*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) end do end do @@ -377,16 +377,16 @@ contains impure subroutine s_finalize_hypoelastic_module() - @:DEALLOCATE(Gs) + @:DEALLOCATE(Gs_hypo) @:DEALLOCATE(rho_K_field, G_K_field) - @:DEALLOCATE(du_dx) - @:DEALLOCATE(fd_coeff_x_h) + @:DEALLOCATE(du_dx_hypo) + @:DEALLOCATE(fd_coeff_x_hypo) if (n > 0) then - @:DEALLOCATE(du_dy,dv_dx,dv_dy) - @:DEALLOCATE(fd_coeff_y_h) + @:DEALLOCATE(du_dy_hypo,dv_dx_hypo,dv_dy_hypo) + @:DEALLOCATE(fd_coeff_y_hypo) if (p > 0) then - @:DEALLOCATE(du_dz, dv_dz, dw_dx, dw_dy, dw_dz) - @:DEALLOCATE(fd_coeff_z_h) + @:DEALLOCATE(du_dz_hypo, dv_dz_hypo, dw_dx_hypo, dw_dy_hypo, dw_dz_hypo) + @:DEALLOCATE(fd_coeff_z_hypo) end if end if diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index 0bf904b220..e868b2f0a7 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -23,16 +23,16 @@ module m_igr s_igr_sigma_x, & s_igr_flux_add, & s_finalize_igr_module - real(wp), allocatable, target, dimension(:, :, :) :: jac + real(wp), allocatable, target, dimension(: ,:, :) :: jac real(wp), allocatable, dimension(:, :, :) :: jac_rhs, jac_old $:GPU_DECLARE(create='[jac, jac_rhs, jac_old]') type(scalar_field), dimension(1) :: jac_sf $:GPU_DECLARE(create='[jac_sf]') - real(wp), allocatable, dimension(:, :) :: Res - $:GPU_DECLARE(create='[Res]') + real(wp), allocatable, dimension(:, :) :: Res_igr + $:GPU_DECLARE(create='[Res_igr]') real(wp) :: alf_igr $:GPU_DECLARE(create='[alf_igr]') @@ -88,13 +88,13 @@ contains subroutine s_initialize_igr_module() if (viscous) then - @:ALLOCATE(Res(1:2, 1:maxval(Re_size))) + @:ALLOCATE(Res_igr(1:2, 1:maxval(Re_size))) do i = 1, 2 do j = 1, Re_size(i) - Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) + Res_igr(i, j) = fluid_pp(Re_idx(i, j))%Re(i) end do end do - $:GPU_UPDATE(device='[Res, Re_idx, Re_size]') + $:GPU_UPDATE(device='[Res_igr, Re_idx, Re_size]') end if @:ALLOCATE(jac(idwbuff(1)%beg:idwbuff(1)%end, & @@ -128,7 +128,7 @@ contains #:if not MFC_CASE_OPTIMIZATION if (igr_order == 3) then - vidxb = -1; vidxe = 2; + vidxb = -1; vidxe = 2; $:GPU_UPDATE(device='[vidxb, vidxe]') @:ALLOCATE(coeff_L(0:2)) @@ -144,7 +144,7 @@ contains $:GPU_UPDATE(device='[coeff_R]') elseif (igr_order == 5) then - vidxb = -2; vidxe = 3; + vidxb = -2; vidxe = 3; $:GPU_UPDATE(device='[vidxb, vidxe]') @:ALLOCATE(coeff_L(-1:3)) @@ -516,8 +516,8 @@ contains mu_L = 0._wp; mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - mu_L = alpha_L(i)/Res(1, i) + mu_L - mu_R = alpha_R(i)/Res(1, i) + mu_R + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R end do $:GPU_ATOMIC(atomic='update') @@ -938,8 +938,8 @@ contains mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - mu_L = alpha_L(i)/Res(1, i) + mu_L - mu_R = alpha_R(i)/Res(1, i) + mu_R + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R end do $:GPU_ATOMIC(atomic='update') @@ -1362,8 +1362,8 @@ contains mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - mu_L = alpha_L(i)/Res(1, i) + mu_L - mu_R = alpha_R(i)/Res(1, i) + mu_R + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R end do $:GPU_ATOMIC(atomic='update') @@ -1762,8 +1762,8 @@ contains mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - mu_L = alpha_L(i)/Res(1, i) + mu_L - mu_R = alpha_R(i)/Res(1, i) + mu_R + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R end do $:GPU_ATOMIC(atomic='update') @@ -2218,8 +2218,8 @@ contains mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - mu_L = alpha_L(i)/Res(1, i) + mu_L - mu_R = alpha_R(i)/Res(1, i) + mu_R + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R end do $:GPU_ATOMIC(atomic='update') @@ -2502,7 +2502,7 @@ contains end subroutine s_igr_riemann_solver - pure subroutine s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + subroutine s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & E_R, gamma_R, pi_inf_R, rho_R, vel_R, & pres_L, pres_R, cfl) $:GPU_ROUTINE(parallelism='[seq]') @@ -2607,7 +2607,7 @@ contains subroutine s_finalize_igr_module() if (viscous) then - @:DEALLOCATE(Res) + @:DEALLOCATE(Res_igr) end if @:DEALLOCATE(jac, jac_rhs) diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index 1680efdb6f..12ba72809c 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -21,10 +21,10 @@ module m_mhd s_finalize_mhd_powell_module, & s_compute_mhd_powell_rhs - real(wp), allocatable, dimension(:, :, :) :: du_dx, du_dy, du_dz - real(wp), allocatable, dimension(:, :, :) :: dv_dx, dv_dy, dv_dz - real(wp), allocatable, dimension(:, :, :) :: dw_dx, dw_dy, dw_dz - $:GPU_DECLARE(create='[du_dx,du_dy,du_dz,dv_dx,dv_dy,dv_dz,dw_dx,dw_dy,dw_dz]') + real(wp), allocatable, dimension(:, :, :) :: du_dx_mhd, du_dy_mhd, du_dz_mhd + real(wp), allocatable, dimension(:, :, :) :: dv_dx_mhd, dv_dy_mhd, dv_dz_mhd + real(wp), allocatable, dimension(:, :, :) :: dw_dx_mhd, dw_dy_mhd, dw_dz_mhd + $:GPU_DECLARE(create='[du_dx_mhd,du_dy_mhd,du_dz_mhd,dv_dx_mhd,dv_dy_mhd,dv_dz_mhd,dw_dx_mhd,dw_dy_mhd,dw_dz_mhd]') real(wp), allocatable, dimension(:, :) :: fd_coeff_x_h real(wp), allocatable, dimension(:, :) :: fd_coeff_y_h @@ -38,10 +38,10 @@ contains ! Additional safety check beyond m_checker if (n == 0) call s_mpi_abort('Fatal Error: Powell correction is not applicable for 1D') - @:ALLOCATE(du_dx(0:m,0:n,0:p), dv_dx(0:m,0:n,0:p), dw_dx(0:m,0:n,0:p)) - @:ALLOCATE(du_dy(0:m,0:n,0:p), dv_dy(0:m,0:n,0:p), dw_dy(0:m,0:n,0:p)) + @:ALLOCATE(du_dx_mhd(0:m,0:n,0:p), dv_dx_mhd(0:m,0:n,0:p), dw_dx_mhd(0:m,0:n,0:p)) + @:ALLOCATE(du_dy_mhd(0:m,0:n,0:p), dv_dy_mhd(0:m,0:n,0:p), dw_dy_mhd(0:m,0:n,0:p)) if (p > 0) then - @:ALLOCATE(dw_dx(0:m,0:n,0:p), dw_dy(0:m,0:n,0:p), dw_dz(0:m,0:n,0:p)) + @:ALLOCATE(dw_dx_mhd(0:m,0:n,0:p), dw_dy_mhd(0:m,0:n,0:p), dw_dz_mhd(0:m,0:n,0:p)) end if @:ALLOCATE(fd_coeff_x_h(-fd_number:fd_number, 0:m)) @@ -135,12 +135,12 @@ contains impure subroutine s_finalize_mhd_powell_module - @:DEALLOCATE(du_dx, dv_dx, dw_dx) + @:DEALLOCATE(du_dx_mhd, dv_dx_mhd, dw_dx_mhd) @:DEALLOCATE(fd_coeff_x_h) - @:DEALLOCATE(du_dy, dv_dy, dw_dy) + @:DEALLOCATE(du_dy_mhd, dv_dy_mhd, dw_dy_mhd) @:DEALLOCATE(fd_coeff_y_h) if (p > 0) then - @:DEALLOCATE(dw_dx, dw_dy, dw_dz) + @:DEALLOCATE(dw_dx_mhd, dw_dy_mhd, dw_dz_mhd) @:DEALLOCATE(fd_coeff_z_h) end if diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp index 52340ccbb2..407c01ff10 100644 --- a/src/simulation/m_pressure_relaxation.fpp +++ b/src/simulation/m_pressure_relaxation.fpp @@ -23,8 +23,8 @@ module m_pressure_relaxation real(wp), allocatable, dimension(:) :: gamma_min, pres_inf $:GPU_DECLARE(create='[gamma_min, pres_inf]') - real(wp), allocatable, dimension(:, :) :: Res - $:GPU_DECLARE(create='[Res]') + real(wp), allocatable, dimension(:, :) :: Res_pr + $:GPU_DECLARE(create='[Res_pr]') contains @@ -42,13 +42,13 @@ contains $:GPU_UPDATE(device='[gamma_min, pres_inf]') if (viscous) then - @:ALLOCATE(Res(1:2, 1:Re_size_max)) + @:ALLOCATE(Res_pr(1:2, 1:Re_size_max)) do i = 1, 2 do j = 1, Re_size(i) - Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) + Res_pr(i, j) = fluid_pp(Re_idx(i, j))%Re(i) end do end do - $:GPU_UPDATE(device='[Res, Re_idx, Re_size]') + $:GPU_UPDATE(device='[Res_pr, Re_idx, Re_size]') end if end subroutine s_initialize_pressure_relaxation_module @@ -58,7 +58,7 @@ contains @:DEALLOCATE(gamma_min, pres_inf) if (viscous) then - @:DEALLOCATE(Res) + @:DEALLOCATE(Res_pr) end if end subroutine s_finalize_pressure_relaxation_module @@ -288,7 +288,7 @@ contains if (Re_size(i) > 0) Re(i) = 0._wp $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) - Re(i) = alpha(Re_idx(i, q))/Res(i, q) + Re(i) + Re(i) = alpha(Re_idx(i, q))/Res_pr(i, q) + Re(i) end do Re(i) = 1._wp/max(Re(i), sgm_eps) end do diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 6ce6005e87..2da72f2c53 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -37,9 +37,9 @@ module m_qbmm type(int_bounds_info) :: is1_qbmm, is2_qbmm, is3_qbmm $:GPU_DECLARE(create='[is1_qbmm,is2_qbmm,is3_qbmm]') - integer, allocatable, dimension(:) :: bubrs + integer, allocatable, dimension(:) :: bubrs_qbmm integer, allocatable, dimension(:, :) :: bubmoms - $:GPU_DECLARE(create='[bubrs,bubmoms]') + $:GPU_DECLARE(create='[bubrs_qbmm,bubmoms]') contains @@ -394,13 +394,13 @@ contains $:GPU_UPDATE(device='[momrhs]') - @:ALLOCATE(bubrs(1:nb)) + @:ALLOCATE(bubrs_qbmm(1:nb)) @:ALLOCATE(bubmoms(1:nb, 1:nmom)) do i = 1, nb - bubrs(i) = bub_idx%rs(i) + bubrs_qbmm(i) = bub_idx%rs(i) end do - $:GPU_UPDATE(device='[bubrs]') + $:GPU_UPDATE(device='[bubrs_qbmm]') do j = 1, nmom do i = 1, nb diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index bb6ba2871e..ac36e2bff2 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -852,8 +852,9 @@ contains flux_gsrc_n(id)%vf, & id, irx, iry, irz) call nvtxEndRange - ! $:GPU_UPDATE(host='[flux_n(1)%vf(1)%sf]') - ! print *, "FLUX", flux_n(1)%vf(1)%sf(100:300, 0, 0) + + !$:GPU_UPDATE(host='[flux_n(1)%vf(1)%sf]') + !print *, "FLUX", flux_n(1)%vf(1)%sf(100:300, 0, 0) ! Additional physics and source terms ! RHS addition for advection source diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 03b9f683d2..41001dbe3c 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -107,11 +107,11 @@ module m_riemann_solvers $:GPU_DECLARE(create='[is1,is2,is3,isx,isy,isz]') - real(wp), allocatable, dimension(:) :: Gs - $:GPU_DECLARE(create='[Gs]') + real(wp), allocatable, dimension(:) :: Gs_rs + $:GPU_DECLARE(create='[Gs_rs]') - real(wp), allocatable, dimension(:, :) :: Res - $:GPU_DECLARE(create='[Res]') + real(wp), allocatable, dimension(:, :) :: Res_gs + $:GPU_DECLARE(create='[Res_gs]') contains @@ -357,7 +357,7 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R,G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R,G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -459,9 +459,9 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res(i, q) & + Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res(i, q) & + Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & + Re_R(i) end do @@ -561,8 +561,8 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs(i) - G_R = G_R + alpha_R(i)*Gs(i) + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) end do if (cont_damage) then @@ -595,8 +595,8 @@ contains ! ! $:GPU_LOOP(parallelism='[seq]') ! do i = 1, num_fluids - ! G_L = G_L + alpha_L(i)*Gs(i) - ! G_R = G_R + alpha_R(i)*Gs(i) + ! G_L = G_L + alpha_L(i)*Gs_rs(i) + ! G_R = G_R + alpha_R(i)*Gs_rs(i) ! end do ! ! Elastic contribution to energy if G large enough ! if ((G_L > 1.e-3_wp) .and. (G_R > 1.e-3_wp)) then @@ -1267,7 +1267,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + Re_L(i) end do @@ -1283,7 +1283,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + Re_R(i) end do @@ -1305,8 +1305,8 @@ contains G_L = 0._wp; G_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs(i) - G_R = G_R + alpha_R(i)*Gs(i) + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) end do $:GPU_LOOP(parallelism='[seq]') do i = 1, strxe - strxb + 1 @@ -1334,8 +1334,8 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids ! Mixture left and right shear modulus - G_L = G_L + alpha_L(i)*Gs(i) - G_R = G_R + alpha_R(i)*Gs(i) + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) end do ! Elastic contribution to energy if G large enough if (G_L > verysmall .and. G_R > verysmall) then @@ -1951,7 +1951,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) - Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & + Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & + Re_L(i) end do @@ -1967,7 +1967,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) - Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & + Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & + Re_R(i) end do @@ -2414,7 +2414,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + Re_L(i) end do @@ -2430,7 +2430,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + Re_R(i) end do @@ -2508,8 +2508,8 @@ contains G_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs(i) - G_R = G_R + alpha_R(i)*Gs(i) + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) end do $:GPU_LOOP(parallelism='[seq]') do i = 1, strxe - strxb + 1 @@ -2538,8 +2538,8 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids ! Mixture left and right shear modulus - G_L = G_L + alpha_L(i)*Gs(i) - G_R = G_R + alpha_R(i)*Gs(i) + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) end do ! Elastic contribution to energy if G large enough if (G_L > verysmall .and. G_R > verysmall) then @@ -3133,24 +3133,24 @@ contains ! the Riemann problem solution integer :: i, j - @:ALLOCATE(Gs(1:num_fluids)) + @:ALLOCATE(Gs_rs(1:num_fluids)) do i = 1, num_fluids - Gs(i) = fluid_pp(i)%G + Gs_rs(i) = fluid_pp(i)%G end do - $:GPU_UPDATE(device='[Gs]') + $:GPU_UPDATE(device='[Gs_rs]') if (viscous) then - @:ALLOCATE(Res(1:2, 1:Re_size_max)) + @:ALLOCATE(Res_gs(1:2, 1:Re_size_max)) end if if (viscous) then do i = 1, 2 do j = 1, Re_size(i) - Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) + Res_gs(i, j) = fluid_pp(Re_idx(i, j))%Re(i) end do end do - $:GPU_UPDATE(device='[Res,Re_idx,Re_size]') + $:GPU_UPDATE(device='[Res_gs,Re_idx,Re_size]') end if $:GPU_ENTER_DATA(copyin='[is1,is2,is3,isx,isy,isz]') @@ -4084,8 +4084,6 @@ contains subroutine s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, tau_shear_out) $:GPU_ROUTINE(parallelism='[seq]') - implicit none - ! Arguments real(wp), dimension(num_dims, num_dims), intent(in) :: vel_grad_avg real(wp), intent(in) :: Re_shear @@ -4118,8 +4116,6 @@ contains subroutine s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, tau_bulk_out) $:GPU_ROUTINE(parallelism='[seq]') - implicit none - ! Arguments real(wp), intent(in) :: Re_bulk real(wp), intent(in) :: divergence_v From d52abd2bf20213100da5a53b735525bdcb416fdb Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 31 Jul 2025 12:31:18 -0400 Subject: [PATCH 34/60] Add AMD compiler support, different macro expansions based on compiler --- CMakeLists.txt | 6 +++ src/common/include/omp_macros.fpp | 78 +++++++++++++++++++++++----- src/common/m_chemistry.fpp | 2 + src/simulation/m_cbc.fpp | 2 + src/simulation/m_fftw.fpp | 5 +- src/simulation/m_riemann_solvers.fpp | 2 + 6 files changed, 80 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a005beb16..92f26ac417 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -494,6 +494,9 @@ function(MFC_SETUP_TARGET) elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") target_compile_options(${a_target} PRIVATE -fopenmp) target_link_options(${a_target} PRIVATE -fopenmp) + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a) + target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a) endif() endif() @@ -533,6 +536,9 @@ function(MFC_SETUP_TARGET) elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") find_package(hipfort COMPONENTS hip CONFIG REQUIRED) target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn) + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + find_package(hipfort COMPONENTS hip CONFIG REQUIRED) + target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn flang_rt.hostdevice) endif() elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc") diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index bcc9b5533e..f595546a5d 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -1,5 +1,11 @@ #:include 'shared_parallel_macros.fpp' +#:set NVIDIA_COMPILER_ID="NVHPC" +#:set PGI_COMPILER_ID="PGI" +#:set INTEL_COMPILER_ID="Intel" +#:set CCE_COMPILER_ID="Cray" +#:set AMD_COMPILER_ID="LLVMFlang" + #:def OMP_MAP_STR(map_type, var_list) #:assert map_type is not None #:assert isinstance(map_type, str) @@ -17,8 +23,15 @@ #:assert isinstance(default, str) #:assert (default == 'present' or default == 'none') #:if default == 'present' - #! #:set default_val = 'defaultmap(present:aggregate) defaultmap(present:allocatable) defaultmap(present:pointer) ' - #:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) ' + #:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID + #:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) ' + #:elif MFC_COMPILER == CCE_COMPILER_ID + #:set default_val = 'defaultmap(present:aggregate) defaultmap(present:allocatable) defaultmap(present:pointer) ' + #:elif MFC_COMPILER == AMD_COMPILER_ID + #:set default_val = '' + #:else + #:set default_val = 'defaultmap(tofrom:aggregate) defaultmap(tofrom:allocatable) defaultmap(tofrom:pointer) ' + #:endif #:elif default == 'none' #:stop 'Not Supported Yet' #:endif @@ -160,12 +173,22 @@ & no_create_val.strip('\n') + present_val.strip('\n') + & & deviceptr_val.strip('\n') + attach_val.strip('\n') #! Hardcoding the parallelism for now - !#:set omp_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' + & - !& clause_val + extraOmpArgs_val.strip('\n') - !#:set omp_end_directive = '!$omp end target teams loop' - #:set omp_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) ' + & - & clause_val + extraOmpArgs_val.strip('\n') - #:set omp_end_directive = '!$omp end target teams distribute parallel do simd' + + #:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID + #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' + #:set omp_end_directive = '!$omp end target teams loop' + #:elif MFC_COMPILER == CCE_COMPILER_ID + #:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) ' + #:set omp_end_directive = '!$omp end target teams distribute parallel do simd' + #:elif MFC_COMPILER == AMD_COMPILER_ID + #:set omp_start_directive = '!$omp target teams distribute parallel do ' + #:set omp_end_directive = '!$omp end target teams distribute parallel do' + #:else + #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' + #:set omp_end_directive = '!$omp end target teams loop' + #:endif + + #:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n') $:omp_directive $:code $:omp_end_directive @@ -184,7 +207,13 @@ #:else #:set function_name_val = '' #:endif - #:set clause_val = nohost_val.strip('\n') + + #:if MFC_COMPILER == AMD_COMPILER_ID + #:set clause_val = '' + #:else + #:set clause_val = nohost_val.strip('\n') + #:endif + #:set omp_directive = '!$omp declare target ' + & & clause_val + extraOmpArgs_val.strip('\n') $:omp_directive @@ -201,11 +230,16 @@ $:omp_directive #:enddef -#! Not implemented yet +#! Not fully implemented yet (ignores most args right now) #:def OMP_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraOmpArgs=None) - #! loop is going to be ignored since all loops right now are seq - #:set temp = '' - $:temp + #:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID + #:set omp_directive = '!$omp loop bind(thread)' + #:elif MFC_COMPILER == CRAY_COMPILER_ID or MFC_COMPILER == AMD_COMPILER_ID + #:set omp_directive = '' + #:else + #:set omp_directive = '' + #:endif + $:omp_directive #:enddef #:def OMP_DATA(code, copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraOmpArgs=None) @@ -298,4 +332,22 @@ #:set omp_directive = '!$omp barrier ' + clause_val + extraOmpArgs_val.strip('\n') $:omp_directive #:enddef + +#:def UNDEF_AMD(code) + #:if MFC_COMPILER != AMD_COMPILER_ID + $:code + #:endif +#:enddef + +#:def UNDEF_CCE(code) + #:if MFC_COMPILER != CCE_COMPILER_ID + $:code + #:endif +#:enddef + +#:def UNDEF_NVIDIA(code) + #:if MFC_COMPILER != NVIDIA_COMPILER_ID and MFC_COMPILER != PGI_COMPILER_ID + $:code + #:endif +#:enddef ! New line at end of file is required for FYPP diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index e9e5bc5ee8..cd1cfc984e 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -99,6 +99,7 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega + #:block UNDEF_AMD #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]') do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end @@ -127,6 +128,7 @@ contains end do end do #:endcall GPU_PARALLEL_LOOP + #:endblock UNDEF_AMD end subroutine s_compute_chemistry_reaction_flux diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 14d81fdf31..ac7829eb0a 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -773,6 +773,7 @@ contains end if ! FD2 or FD4 of RHS at j = 0 + #:block UNDEF_AMD #:call GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]') do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1105,6 +1106,7 @@ contains end do end do #:endcall GPU_PARALLEL_LOOP + #:endblock UNDEF_AMD end if #:endfor diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index fa29f5bac3..8e81af6fa6 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -136,7 +136,7 @@ contains integer :: i, j, k, l !< Generic loop iterators integer :: ierr !< Generic flag used to identify and report GPU errors -#if 0 +#:block UNDEF_CCE ! Restrict filter to processors that have cells adjacent to axis if (bc_y%beg >= 0) return #if defined(MFC_GPU) @@ -304,7 +304,8 @@ contains end do end do #endif -#endif +#:endblock UNDEF_CCE + end subroutine s_apply_fourier_filter !> The purpose of this subroutine is to destroy the fftw plan diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 41001dbe3c..436a8e8029 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -2943,6 +2943,7 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then + #:block UNDEF_AMD #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, rho, pres,E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld]') do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3116,6 +3117,7 @@ contains end do end do #:endcall GPU_PARALLEL_LOOP + #:endblock UNDEF_AMD end if #:endfor From cc8cd816423e2e4c4b42c81c7a2d130b2c9f327c Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 31 Jul 2025 19:08:24 -0400 Subject: [PATCH 35/60] Remove autocompare as failing debug cases, fix COMPILER_ID bug --- CMakeLists.txt | 2 +- src/common/include/omp_macros.fpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 92f26ac417..e51c33e481 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -530,7 +530,7 @@ function(MFC_SETUP_TARGET) if (CMAKE_BUILD_TYPE STREQUAL "Debug") target_compile_options(${a_target} - PRIVATE -gpu=autocompare,debug + PRIVATE -gpu=debug ) endif() elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index f595546a5d..61e45056bc 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -234,7 +234,7 @@ #:def OMP_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraOmpArgs=None) #:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID #:set omp_directive = '!$omp loop bind(thread)' - #:elif MFC_COMPILER == CRAY_COMPILER_ID or MFC_COMPILER == AMD_COMPILER_ID + #:elif MFC_COMPILER == CCE_COMPILER_ID or MFC_COMPILER == AMD_COMPILER_ID #:set omp_directive = '' #:else #:set omp_directive = '' From e28423a47e4eafdd2956b7e361c3816d6b31078a Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Fri, 1 Aug 2025 16:09:41 -0400 Subject: [PATCH 36/60] made nonparameterized molecular_weights to compile few AMD kernels, make attach an always map --- src/common/include/omp_macros.fpp | 38 +++++++++++++++++++++---------- src/common/m_chemistry.fpp | 16 +++++++++---- src/simulation/m_cbc.fpp | 12 ++++++++-- src/simulation/m_start_up.fpp | 9 ++++++++ 4 files changed, 57 insertions(+), 18 deletions(-) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 61e45056bc..522bb62167 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -84,13 +84,15 @@ $:deviceptr_val #:enddef -#:def OMP_ATTACH_STR(attach) - #! #:if attach is not None - #! #:stop 'attach is not supported yet' - #! #:endif - #:set attach_val = '' - $:attach_val -#:enddef +#! #:def OMP_ATTACH_STR(attach) + #! #:set attach_val = OMP_MAP_STR('always,to', attach) + #! $:attach_val +#! #:enddef + +#! #:def OMP_DETACH_STR(detach) + #! #:set detach_val = OMP_MAP_STR('always,from', detach) + #! $:detach_val +#! #:enddef #:def OMP_TO_STR(to) #:set to_val = GEN_PARENTHESES_CLAUSE('to', to) @@ -130,7 +132,7 @@ #:set no_create_val = OMP_NOCREATE_STR(no_create) #:set present_val = OMP_PRESENT_STR(present) #:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr) - #:set attach_val = OMP_MAP_STR('tofrom', attach) + #:set attach_val = OMP_MAP_STR('always,tofrom', attach) #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) #:set omp_clause_val = default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & & copy_val.strip('\n') + copyin_val.strip('\n') + & @@ -164,7 +166,7 @@ #:set no_create_val = OMP_NOCREATE_STR(no_create) #:set present_val = OMP_PRESENT_STR(present) #:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr) - #:set attach_val = OMP_MAP_STR('tofrom', attach) + #:set attach_val = OMP_MAP_STR('always,tofrom', attach) #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & @@ -255,7 +257,7 @@ #:set no_create_val = OMP_NOCREATE_STR(no_create) #:set present_val = OMP_PRESENT_STR(present) #:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr) - #:set attach_val = OMP_MAP_STR('tofrom', attach) + #:set attach_val = OMP_MAP_STR('always,tofrom', attach) #:set default_val = OMP_DEFAULT_STR(default) #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & @@ -273,7 +275,7 @@ #:def OMP_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraOmpArgs=None) #:set copyin_val = OMP_COPYIN_STR(copyin).strip('\n') + OMP_COPYIN_STR(copyinReadOnly).strip('\n') #:set create_val = OMP_CREATE_STR(create) - #:set attach_val = OMP_MAP_STR('to', attach) + #:set attach_val = OMP_MAP_STR('always,to', attach) #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) #:set omp_clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n') #:set omp_directive = '!$omp target enter data ' + omp_clause_val + extraOmpArgs_val.strip('\n') @@ -283,7 +285,7 @@ #:def OMP_EXIT_DATA(copyout=None, delete=None, detach=None, extraOmpArgs=None) #:set copyout_val = OMP_COPYOUT_STR(copyout) #:set delete_val = OMP_DELETE_STR(delete) - #:set detach_val = OMP_MAP_STR('from', detach) + #:set detach_val = OMP_MAP_STR('always,from', detach) #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) #:set clause_val = copyout_val.strip('\n') + delete_val.strip('\n') + detach_val.strip('\n') #:set omp_directive = '!$omp target exit data ' + clause_val + extraOmpArgs_val.strip('\n') @@ -339,12 +341,24 @@ #:endif #:enddef +#:def DEF_AMD(code) + #:if MFC_COMPILER == AMD_COMPILER_ID + $:code + #:endif +#:enddef + #:def UNDEF_CCE(code) #:if MFC_COMPILER != CCE_COMPILER_ID $:code #:endif #:enddef +#:def DEF_CCE(code) + #:if MFC_COMPILER == CCE_COMPILER_ID + $:code + #:endif +#:enddef + #:def UNDEF_NVIDIA(code) #:if MFC_COMPILER != NVIDIA_COMPILER_ID and MFC_COMPILER != PGI_COMPILER_ID $:code diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index cd1cfc984e..2e5b13ac43 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -15,6 +15,13 @@ module m_chemistry use m_global_parameters implicit none + + #:block DEF_AMD + real(dp) :: molecular_weights_nonparameter(10) = & + (/ 2.016d0, 1.008d0, 15.999d0, 31.998d0, 17.007d0, 18.015d0, 33.006d0, & + 34.014d0, 39.95d0, 28.014d0 /) + $:GPU_DECLARE(create='[molecular_weights_nonparameter]') + #:endblock DEF_AMD contains @@ -99,7 +106,6 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - #:block UNDEF_AMD #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]') do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end @@ -117,9 +123,12 @@ contains $:GPU_LOOP(parallelism='[seq]') do eqn = chemxb, chemxe - + #:block UNDEF_AMD omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1) - + #:endblock UNDEF_AMD + #:block DEF_AMD + omega_m = molecular_weights_nonparameter(eqn - chemxb + 1)*omega(eqn - chemxb + 1) + #:endblock DEF_AMD rhs_vf(eqn)%sf(x, y, z) = rhs_vf(eqn)%sf(x, y, z) + omega_m end do @@ -128,7 +137,6 @@ contains end do end do #:endcall GPU_PARALLEL_LOOP - #:endblock UNDEF_AMD end subroutine s_compute_chemistry_reaction_flux diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index ac7829eb0a..1d3b84229a 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -37,6 +37,9 @@ module m_cbc molecular_weights, get_species_specific_heats_r, & get_mole_fractions, get_species_specific_heats_r + #:block DEF_AMD + use m_chemistry, only: molecular_weights_nonparameter + #:endblock DEF_AMD implicit none private; public :: s_initialize_cbc_module, s_cbc, s_finalize_cbc_module @@ -773,7 +776,6 @@ contains end if ! FD2 or FD4 of RHS at j = 0 - #:block UNDEF_AMD #:call GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]') do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1050,8 +1052,15 @@ contains sum_Enthalpies = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species + #:block UNDEF_AMD h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) + #:endblock UNDEF_AMD + + #:block DEF_AMD + h_k(i) = h_k(i)*gas_constant/molecular_weights_nonparameter(i)*T + sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights_nonparameter(i)*Cp/R_gas)*dYs_dt(i) + #:endblock DEF_AMD end do flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & + ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies) @@ -1106,7 +1115,6 @@ contains end do end do #:endcall GPU_PARALLEL_LOOP - #:endblock UNDEF_AMD end if #:endfor diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index f6a955ad38..85c7d727ea 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1453,6 +1453,15 @@ contains $:GPU_UPDATE(device='[igr, igr_order]') + #:block DEF_AMD + block + use m_thermochem, only: molecular_weights + use m_chemistry, only: molecular_weights_nonparameter + molecular_weights_nonparameter(:) = molecular_weights(:) + $:GPU_UPDATE(device='[molecular_weights_nonparameter]') + end block + #:endblock + end subroutine s_initialize_gpu_vars impure subroutine s_finalize_modules From aaa0ddfd2d44be6671529e2897e57493646b86ff Mon Sep 17 00:00:00 2001 From: Anand Radhakrishnan Date: Fri, 1 Aug 2025 19:22:36 -0400 Subject: [PATCH 37/60] Test suite passes fully on Phoenix (NVHPC + OPENMP), Fixes failing chemistry cases --- src/common/m_constants.fpp | 1 + src/common/m_variables_conversion.fpp | 3 +-- src/simulation/m_riemann_solvers.fpp | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index 3682bbcf56..16570aff0f 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -10,6 +10,7 @@ module m_constants real(wp), parameter :: dflt_real = -1.e6_wp !< Default real value real(wp), parameter :: sgm_eps = 1.e-16_wp !< Segmentation tolerance + real(wp), parameter :: Chem_Tolerance = 1.e-16_wp !< Speed of Sound Tolerance in Chemistry real(wp), parameter :: small_alf = 1.e-11_wp !< Small alf tolerance real(wp), parameter :: pi = 3.141592653589793_wp !< Pi real(wp), parameter :: verysmall = 1.e-12_wp !< Very small number diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index bcccd47e73..36a0562d0e 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -1634,12 +1634,11 @@ contains real(wp), intent(out) :: c real(wp) :: blkmod1, blkmod2 - real(wp) :: Tolerance integer :: q if (chemistry) then - if (avg_state == 1 .and. abs(c_c) > Tolerance) then + if (avg_state == 1 .and. abs(c_c) > Chem_Tolerance) then c = sqrt(c_c - (gamma - 1.0_wp)*(vel_sum - H)) else c = sqrt((1.0_wp + 1.0_wp/gamma)*pres/rho) diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 436a8e8029..d2258a1dec 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -2326,12 +2326,12 @@ contains #:endcall GPU_PARALLEL_LOOP else ! 5-EQUATION MODEL WITH HLLC - #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg,Phi_avg, h_iL, h_iR, h_avg_2]', copyin='[is1, is2, is3]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[T_L, T_R, vel_L, vel_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg,Phi_avg, h_iL, h_iR, h_avg_2]', copyin='[is1, is2, is3]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end - !idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 + idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids @@ -2459,7 +2459,7 @@ contains T_R = pres_R/rho_R/R_gas_R call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) + call get_species_specific_heats_r(T_R, Cp_iR) if (chem_params%gamma_method == 1) then !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. @@ -2554,20 +2554,20 @@ contains end if H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + H_R = (E_R + pres_R)/rho_R @:compute_average_state() call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + vel_L_rms, 0._wp, c_L) call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + vel_R_rms, 0._wp, c_R) !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. + ! variables are placeholders to call the subroutine. call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, c_sum_Yi_Phi, c_avg) + vel_avg_rms, c_sum_Yi_Phi, c_avg) if (viscous) then $:GPU_LOOP(parallelism='[seq]') From ced589a056e4156c4c07e57e672211ea43157c88 Mon Sep 17 00:00:00 2001 From: Tanush Date: Sun, 14 Sep 2025 23:57:22 -0400 Subject: [PATCH 38/60] OpenMP CI added --- .github/workflows/bench.yml | 37 ++++++++++++++++------ .github/workflows/frontier/bench.sh | 10 ++++-- .github/workflows/frontier/build.sh | 14 ++++++-- .github/workflows/frontier/submit-bench.sh | 1 + .github/workflows/frontier/submit.sh | 1 + .github/workflows/frontier/test.sh | 15 ++++++++- .github/workflows/phoenix/bench.sh | 10 +++++- .github/workflows/phoenix/submit-bench.sh | 3 +- .github/workflows/phoenix/submit.sh | 6 ++-- .github/workflows/phoenix/test.sh | 5 +++ .github/workflows/test.yml | 22 +++++++++---- toolchain/mfc/args.py | 2 +- 12 files changed, 98 insertions(+), 28 deletions(-) mode change 100644 => 100755 .github/workflows/phoenix/submit.sh diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index cadfe220f3..563566788e 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -36,6 +36,7 @@ jobs: labels: gt flag: p device: cpu + interface: none build_script: "" - cluster: phoenix name: Georgia Tech | Phoenix (NVHPC) @@ -43,14 +44,32 @@ jobs: labels: gt flag: p device: gpu + interface: acc build_script: "" + - cluster: phoenix + name: Georgia Tech | Phoenix (NVHPC) + group: phoenix + labels: gt + flag: p + device: gpu + interface: omp + build_script: "" + - cluster: frontier + name: Oak Ridge | Frontier (CCE) + group: phoenix + labels: frontier + flag: f + device: gpu + interface: acc + build_script: "bash .github/workflows/frontier/build.sh gpu acc bench" - cluster: frontier name: Oak Ridge | Frontier (CCE) group: phoenix labels: frontier flag: f device: gpu - build_script: "bash .github/workflows/frontier/build.sh gpu bench" + interface: omp + build_script: "bash .github/workflows/frontier/build.sh gpu omp bench" runs-on: group: ${{ matrix.group }} labels: ${{ matrix.labels }} @@ -80,29 +99,29 @@ jobs: - name: Bench (Master v. PR) run: | - (cd pr && bash .github/workflows/${{ matrix.cluster }}/submit-bench.sh .github/workflows/${{ matrix.cluster }}/bench.sh ${{ matrix.device }}) & - (cd master && bash .github/workflows/${{ matrix.cluster }}/submit-bench.sh .github/workflows/${{ matrix.cluster }}/bench.sh ${{ matrix.device }}) & + (cd pr && bash .github/workflows/${{ matrix.cluster }}/submit-bench.sh .github/workflows/${{ matrix.cluster }}/bench.sh ${{ matrix.device }} ${{ matrix.interface }}) & + (cd master && bash .github/workflows/${{ matrix.cluster }}/submit-bench.sh .github/workflows/${{ matrix.cluster }}/bench.sh ${{ matrix.device }} ${{ matrix.interface }}) & wait %1 && wait %2 - name: Generate & Post Comment run: | (cd pr && . ./mfc.sh load -c ${{ matrix.flag }} -m g) - (cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}.yaml ../pr/bench-${{ matrix.device }}.yaml) + (cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}-${{ matrix.interface }}.yaml ../pr/bench-${{ matrix.device }}-${{ matrix.interface }}.yaml) - name: Print Logs if: always() run: | - cat pr/bench-${{ matrix.device }}.* 2>/dev/null || true - cat master/bench-${{ matrix.device }}.* 2>/dev/null || true + cat pr/bench-${{ matrix.device }}-${{ matrix.interface }}.* 2>/dev/null || true + cat master/bench-${{ matrix.device }}-${{ matrix.interface }}.* 2>/dev/null || true # All other runners (non-Phoenix) just run without special env - name: Archive Logs (Frontier) if: always() && matrix.cluster != 'phoenix' uses: actions/upload-artifact@v4 with: - name: ${{ matrix.cluster }}-${{ matrix.device }} + name: ${{ matrix.cluster }}-${{ matrix.device }}-${{ matrix.interface }} path: | - pr/bench-${{ matrix.device }}.* + pr/bench-${{ matrix.device }}-${{ matrix.interface }}.* pr/build/benchmarks/* - master/bench-${{ matrix.device }}.* + master/bench-${{ matrix.device }}-${{ matrix.interface }}.* master/build/benchmarks/* diff --git a/.github/workflows/frontier/bench.sh b/.github/workflows/frontier/bench.sh index 31a514d45d..35b4c5950e 100644 --- a/.github/workflows/frontier/bench.sh +++ b/.github/workflows/frontier/bench.sh @@ -1,12 +1,18 @@ #!/bin/bash n_ranks=12 - +device_opts="" if [ "$job_device" = "gpu" ]; then gpus=$(rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' ') n_ranks=$(echo "$gpus" | wc -w) # number of GPUs on node gpu_ids=$(echo "$gpus" | tr ' ' '\n' | tr '\n' ' ' | sed 's/ $//') # GPU IDs from rocm-smi - device_opts="--gpu -g $gpu_ids" + device_opts+="--gpu" + if [ "$job_interface" = "acc" ]; then + device_opts+=" acc" + elif [ "$job_interface" = "omp" ]; then + device_opts+=" mp" + fi + device_opts+=" -g $gpu_ids" fi if [ "$job_device" = "gpu" ]; then diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh index af272564e8..010976f3ac 100644 --- a/.github/workflows/frontier/build.sh +++ b/.github/workflows/frontier/build.sh @@ -1,13 +1,21 @@ #!/bin/bash +job_device=$1 +job_interface=$2 +run_bench=$3 build_opts="" -if [ "$1" = "gpu" ]; then - build_opts="--gpu" +if [ "$job_device" = "gpu" ]; then + build_opts+="--gpu" + if [ "$job_interface" = "acc" ]; then + build_opts+=" acc" + elif [ "$job_interface" = "omp" ]; then + build_opts+=" mp" + fi fi . ./mfc.sh load -c f -m g -if [ "$2" == "bench" ]; then +if [ "$run_bench" == "bench" ]; then for dir in benchmarks/*/; do dirname=$(basename "$dir") ./mfc.sh run "$dir/case.py" --case-optimization -j 8 --dry-run $build_opts diff --git a/.github/workflows/frontier/submit-bench.sh b/.github/workflows/frontier/submit-bench.sh index ff3fabdb37..693a5fdcf5 100644 --- a/.github/workflows/frontier/submit-bench.sh +++ b/.github/workflows/frontier/submit-bench.sh @@ -45,6 +45,7 @@ echo "Running in $(pwd):" job_slug="$job_slug" job_device="$2" +job_interface="$3" . ./mfc.sh load -c f -m g diff --git a/.github/workflows/frontier/submit.sh b/.github/workflows/frontier/submit.sh index 8a4ce336c8..043432cca1 100644 --- a/.github/workflows/frontier/submit.sh +++ b/.github/workflows/frontier/submit.sh @@ -46,6 +46,7 @@ echo "Running in $(pwd):" job_slug="$job_slug" job_device="$2" +job_interface="$3" . ./mfc.sh load -c f -m g diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh index aa977aa004..464a26cf16 100644 --- a/.github/workflows/frontier/test.sh +++ b/.github/workflows/frontier/test.sh @@ -3,8 +3,21 @@ gpus=`rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' '` ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c` +device_opts="" if [ "$job_device" = "gpu" ]; then - ./mfc.sh test -a --rdma-mpi --max-attempts 3 -j $ngpus -- -c frontier + n_ranks=$(echo "$gpus" | wc -w) # number of GPUs on node + gpu_ids=$(echo "$gpus" | tr ' ' '\n' | tr '\n' ' ' | sed 's/ $//') # GPU IDs from rocm-smi + device_opts+="--gpu" + if [ "$job_interface" = "acc" ]; then + device_opts+=" acc" + elif [ "$job_interface" = "omp" ]; then + device_opts+=" mp" + fi + device_opts+=" -g $gpu_ids" +fi + +if [ "$job_device" = "gpu" ]; then + ./mfc.sh test -a --rdma-mpi --max-attempts 3 -j $ngpus $job_device -- -c frontier else ./mfc.sh test -a --rdma-mpi --max-attempts 3 -j 32 -- -c frontier fi diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh index f58ef44721..99d0a07562 100644 --- a/.github/workflows/phoenix/bench.sh +++ b/.github/workflows/phoenix/bench.sh @@ -2,10 +2,18 @@ n_ranks=12 +echo "My interface is:" $job_interface +device_opts="" if [ "$job_device" = "gpu" ]; then n_ranks=$(nvidia-smi -L | wc -l) # number of GPUs on node gpu_ids=$(seq -s ' ' 0 $(($n_ranks-1))) # 0,1,2,...,gpu_count-1 - device_opts="--gpu -g $gpu_ids" + device_opts+="--gpu" + if [ "$job_interface" = "acc" ]; then + device_opts+=" acc" + elif [ "$job_interface" = "omp" ]; then + device_opts+=" mp" + fi + device_opts+=" -g $gpu_ids" fi tmpbuild=/storage/scratch1/6/sbryngelson3/mytmp_build diff --git a/.github/workflows/phoenix/submit-bench.sh b/.github/workflows/phoenix/submit-bench.sh index e8b6dd3484..eba1570bd7 100644 --- a/.github/workflows/phoenix/submit-bench.sh +++ b/.github/workflows/phoenix/submit-bench.sh @@ -3,7 +3,7 @@ set -e usage() { - echo "Usage: $0 [script.sh] [cpu|gpu]" + echo "Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp]" } if [ ! -z "$1" ]; then @@ -55,6 +55,7 @@ echo "Running in $(pwd):" job_slug="$job_slug" job_device="$2" +job_interface="$3" . ./mfc.sh load -c p -m $2 diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh old mode 100644 new mode 100755 index 6700e38c50..38483d189f --- a/.github/workflows/phoenix/submit.sh +++ b/.github/workflows/phoenix/submit.sh @@ -3,7 +3,7 @@ set -e usage() { - echo "Usage: $0 [script.sh] [cpu|gpu]" + echo "Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp]" } if [ ! -z "$1" ]; then @@ -55,10 +55,10 @@ echo "Running in $(pwd):" job_slug="$job_slug" job_device="$2" +job_interface="$3" . ./mfc.sh load -c p -m $2 $sbatch_script_contents -EOT - +EOT \ No newline at end of file diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh index 74d1d1265a..47a25cf596 100644 --- a/.github/workflows/phoenix/test.sh +++ b/.github/workflows/phoenix/test.sh @@ -3,6 +3,11 @@ build_opts="" if [ "$job_device" = "gpu" ]; then build_opts="--gpu" + if [ "$job_interface" = "omp" ]; then + build_opts+=" mp" + elif [ "$job_interface" = "acc" ]; then + build_opts+=" acc" + fi fi ./mfc.sh test --dry-run -j 8 $build_opts diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3031858eb1..74f3f5c1cb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -96,8 +96,16 @@ jobs: timeout-minutes: 1400 strategy: matrix: - device: ['cpu', 'gpu'] + device: ['gpu'] + interface: ['acc', 'omp'] lbl: ['gt', 'frontier'] + include: + - device: 'cpu' + interface: 'none' + lbl: 'gt' + - device: 'cpu' + interface: 'none' + lbl: 'frontier' runs-on: group: phoenix labels: ${{ matrix.lbl }} @@ -111,23 +119,23 @@ jobs: - name: Build & Test if: matrix.lbl == 'gt' - run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }} + run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }} ${{ matrix.interface }} - name: Build if: matrix.lbl == 'frontier' - run: bash .github/workflows/frontier/build.sh ${{ matrix.device }} + run: bash .github/workflows/frontier/build.sh ${{ matrix.device }} ${{ matrix.interface }} - name: Test if: matrix.lbl == 'frontier' - run: bash .github/workflows/frontier/submit.sh .github/workflows/frontier/test.sh ${{matrix.device}} + run: bash .github/workflows/frontier/submit.sh .github/workflows/frontier/test.sh ${{matrix.device}} ${{ matrix.interface }} - name: Print Logs if: always() - run: cat test-${{ matrix.device }}.out + run: cat test-${{ matrix.device }}-${{ matrix.interface }}.out - name: Archive Logs uses: actions/upload-artifact@v4 if: matrix.lbl == 'frontier' with: - name: logs-${{ strategy.job-index }}-${{ matrix.device }} - path: test-${{ matrix.device }}.out + name: logs-${{ strategy.job-index }}-${{ matrix.device }}-${{ matrix.interface }} + path: test-${{ matrix.device }}-${{ matrix.interface }}.out diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py index bae5cb6b24..169ee076a4 100644 --- a/toolchain/mfc/args.py +++ b/toolchain/mfc/args.py @@ -59,7 +59,7 @@ def add_common_arguments(p: argparse.ArgumentParser, mask = None): if "m" not in mask: for f in dataclasses.fields(config): if f.name == 'gpu': - p.add_argument(f"--{f.name}", action="store", nargs='?', default=gpuConfigOptions.ACC.value, dest=f.name, choices=[e.value for e in gpuConfigOptions], help=f"Turn the {f.name} option to OpenACC or OpenMP.") + p.add_argument(f"--{f.name}", action="store", nargs='?', const= gpuConfigOptions.ACC.value,default=gpuConfigOptions.ACC.value, dest=f.name, choices=[e.value for e in gpuConfigOptions], help=f"Turn the {f.name} option to OpenACC or OpenMP.") p.add_argument(f"--no-{f.name}", action="store_const", const = gpuConfigOptions.NONE.value, dest=f.name, help=f"Turn the {f.name} option OFF.") continue p.add_argument( f"--{f.name}", action="store_true", help=f"Turn the {f.name} option ON.") From 11a98bff39158fc2782fa61e220b49608fd67420 Mon Sep 17 00:00:00 2001 From: Tanush Date: Mon, 15 Sep 2025 00:18:57 -0400 Subject: [PATCH 39/60] Fixed issues from merge --- src/simulation/m_riemann_solvers.fpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 52330456e3..d6549e2538 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -1251,9 +1251,9 @@ contains if (Re_size(i) > 0) Re_R(i) = 0._wp $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + Re_L(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + Re_R(i) end do Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) @@ -1923,9 +1923,9 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) - Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & + Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & + Re_L(i) - Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & + Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & + Re_R(i) end do @@ -2363,9 +2363,9 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + Re_L(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + Re_R(i) end do Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) From 3ee82f55d24fcabca185bf332a5f4172cbc6f0b6 Mon Sep 17 00:00:00 2001 From: Tanush Date: Mon, 15 Sep 2025 00:20:10 -0400 Subject: [PATCH 40/60] Ran formatter --- src/simulation/m_riemann_solvers.fpp | 404 +++++++++++++-------------- 1 file changed, 202 insertions(+), 202 deletions(-) diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index d6549e2538..27abbf8313 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -1190,12 +1190,12 @@ contains idx1 = dir_idx(1) - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp - alpha_L_sum = 0._wp; alpha_R_sum = 0._wp + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp + alpha_L_sum = 0._wp; alpha_R_sum = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims @@ -1205,26 +1205,26 @@ contains vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) - alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) + alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) + end do + end if $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids @@ -1242,24 +1242,24 @@ contains alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, advxb + i - 1) end do - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + if (viscous) then $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) + end do + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) end do - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do - end if + end if if (viscous) then $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 @@ -1293,8 +1293,8 @@ contains end do end if - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY if (hypoelasticity) then @@ -1616,11 +1616,11 @@ contains do k = is2%beg, is2%end do j = is1%beg, is1%end - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe @@ -1628,13 +1628,13 @@ contains alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids @@ -1642,24 +1642,24 @@ contains alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + qv_L = qv_L + alpha_rho_L(i)*qvs(i) + + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + qv_R = qv_R + alpha_rho_R(i)*qvs(i) + end do - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R H_L = (E_L + pres_L)/rho_L H_R = (E_R + pres_R)/rho_R @@ -1855,92 +1855,92 @@ contains do k = is2%beg, is2%end do j = is1%beg, is1%end - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp - ! Retain this in the refactor - if (mpp_lim .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do - else if (num_fluids > 2) then + $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - else - rho_L = qL_prim_rs${XYZ}$_vf(j, k, l, 1) - gamma_L = gammas(1) - pi_inf_L = pi_infs(1) - qv_L = qvs(1) - rho_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, 1) - gamma_R = gammas(1) - pi_inf_R = pi_infs(1) - qv_R = qvs(1) - end if - if (viscous) then - if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2 + ! Retain this in the refactor + if (mpp_lim .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + do i = 1, num_fluids + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + end do + else if (num_fluids > 2) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + end do + else + rho_L = qL_prim_rs${XYZ}$_vf(j, k, l, 1) + gamma_L = gammas(1) + pi_inf_L = pi_infs(1) + qv_L = qvs(1) + rho_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, 1) + gamma_R = gammas(1) + pi_inf_R = pi_infs(1) + qv_R = qvs(1) + end if + if (viscous) then + if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2 $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & - + Re_R(i) - end do + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - end do + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & + + Re_R(i) + end do + + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + + end do + end if end if - end if - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms H_L = (E_L + pres_L)/rho_L H_R = (E_R + pres_R)/rho_R @@ -2293,14 +2293,14 @@ contains do k = is2%beg, is2%end do j = is1%beg, is1%end - !idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 + !idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp - alpha_L_sum = 0._wp; alpha_R_sum = 0._wp + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp + alpha_L_sum = 0._wp; alpha_R_sum = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids @@ -2308,36 +2308,36 @@ contains alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do - - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - ! Change this by splitting it into the cases - ! present in the bubbles_euler - if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) - alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) - end do - end if + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + + ! Change this by splitting it into the cases + ! present in the bubbles_euler + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) + alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) + end do + end if $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids @@ -2352,26 +2352,26 @@ contains qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) end do - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) + end do + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) end do - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do - end if + end if if (chemistry) then c_sum_Yi_Phi = 0.0_wp @@ -2410,20 +2410,20 @@ contains call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) - Gamm_L = Cp_L/Cv_L; Gamm_R = Cp_R/Cv_R - gamma_L = 1.0_wp/(Gamm_L - 1.0_wp); gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) - end if + Gamm_L = Cp_L/Cv_L; Gamm_R = Cp_R/Cv_R + gamma_L = 1.0_wp/(Gamm_L - 1.0_wp); gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) + end if call get_mixture_energy_mass(T_L, Ys_L, E_L) call get_mixture_energy_mass(T_R, Ys_R, E_R) - E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms - E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - else - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R + E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms + E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + else + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R H_L = (E_L + pres_L)/rho_L H_R = (E_R + pres_R)/rho_R From 116fe8343c4beaea042c8eac0c64c6e6a5342434 Mon Sep 17 00:00:00 2001 From: Tanush Date: Mon, 15 Sep 2025 00:23:31 -0400 Subject: [PATCH 41/60] Update lint to ignore other gpu macro files --- .github/workflows/lint-source.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint-source.yml b/.github/workflows/lint-source.yml index 10b73169b8..bc35b887ea 100644 --- a/.github/workflows/lint-source.yml +++ b/.github/workflows/lint-source.yml @@ -30,7 +30,7 @@ jobs: - name: Looking for raw directives run: | - ! grep -iR '!\$acc\|!\$omp' --exclude="parallel_macros.fpp" --exclude="syscheck.fpp" ./src/* + ! grep -iR '!\$acc\|!\$omp' --exclude="parallel_macros.fpp" --exclude="acc_macros.fpp" --exclude="omp_macros.fpp" --exclude="shared_parallel_macros.fpp" --exclude="syscheck.fpp" ./src/* - name: No double precision intrinsics run: | From 14cac1f43f97d52ceae5b1ae5f29e82b55f84fb9 Mon Sep 17 00:00:00 2001 From: Tanush Date: Mon, 15 Sep 2025 00:30:16 -0400 Subject: [PATCH 42/60] Fixed linting toolchain issues --- toolchain/mfc/run/input.py | 4 ++-- toolchain/mfc/state.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/toolchain/mfc/run/input.py b/toolchain/mfc/run/input.py index 7f14441fd6..ac34dae9d4 100644 --- a/toolchain/mfc/run/input.py +++ b/toolchain/mfc/run/input.py @@ -73,9 +73,9 @@ def generate_fpp(self, target) -> None: # Determine the real type based on the single precision flag real_type = 'real(sp)' if ARG('single') else 'real(dp)' - if (ARG("gpu") == gpuConfigOptions.MP.value): + if ARG("gpu") == gpuConfigOptions.MP.value: directive_str = 'mp' - elif (ARG("gpu") == gpuConfigOptions.ACC.value): + elif ARG("gpu") == gpuConfigOptions.ACC.value: directive_str = 'acc' else: directive_str = None diff --git a/toolchain/mfc/state.py b/toolchain/mfc/state.py index 90b8b1daed..ec5bd002dc 100644 --- a/toolchain/mfc/state.py +++ b/toolchain/mfc/state.py @@ -38,7 +38,7 @@ def make_options(self) -> typing.List[str]: Example: --no-debug --mpi --no-gpu --no-gcov --no-unified""" options = [] for k, v in self.items(): - if (k == 'gpu'): + if k == 'gpu': options.append(f"--{v}-{k}") else: options.append(f"--{'no-' if not v else ''}{k}") @@ -49,7 +49,7 @@ def make_slug(self) -> str: identifies the configuration. Example: no-debug_no-gpu_no_mpi_no-gcov """ options = [] for k, v in sorted(self.items(), key=lambda x: x[0]): - if (k == 'gpu'): + if k == 'gpu': options.append(f"--{v}-{k}") else: options.append(f"--{'no-' if not v else ''}{k}") From 063fd64e9b909eb7a1cd1e41f7c852d0dc7f6789 Mon Sep 17 00:00:00 2001 From: Tanush Date: Mon, 15 Sep 2025 14:31:59 -0400 Subject: [PATCH 43/60] Updated job slug --- .github/workflows/frontier/submit-bench.sh | 2 +- .github/workflows/frontier/submit.sh | 2 +- .github/workflows/phoenix/submit-bench.sh | 2 +- .github/workflows/phoenix/submit.sh | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/frontier/submit-bench.sh b/.github/workflows/frontier/submit-bench.sh index 693a5fdcf5..9dbc5e5817 100644 --- a/.github/workflows/frontier/submit-bench.sh +++ b/.github/workflows/frontier/submit-bench.sh @@ -24,7 +24,7 @@ else fi -job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2" +job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3" sbatch < Date: Sun, 21 Sep 2025 22:55:47 -0400 Subject: [PATCH 44/60] Fixed cylindrical --- src/common/include/omp_macros.fpp | 24 ------ src/common/include/parallel_macros.fpp | 27 +++++++ src/simulation/m_fftw.fpp | 108 +++++++++++++------------ 3 files changed, 82 insertions(+), 77 deletions(-) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 522bb62167..1c2fb9c985 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -340,28 +340,4 @@ $:code #:endif #:enddef - -#:def DEF_AMD(code) - #:if MFC_COMPILER == AMD_COMPILER_ID - $:code - #:endif -#:enddef - -#:def UNDEF_CCE(code) - #:if MFC_COMPILER != CCE_COMPILER_ID - $:code - #:endif -#:enddef - -#:def DEF_CCE(code) - #:if MFC_COMPILER == CCE_COMPILER_ID - $:code - #:endif -#:enddef - -#:def UNDEF_NVIDIA(code) - #:if MFC_COMPILER != NVIDIA_COMPILER_ID and MFC_COMPILER != PGI_COMPILER_ID - $:code - #:endif -#:enddef ! New line at end of file is required for FYPP diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 7729e056d7..61bc30b431 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -193,4 +193,31 @@ #endif #:enddef + +#:def DEF_AMD(code) + #:if MFC_COMPILER == AMD_COMPILER_ID + $:code + #:endif +#:enddef + +#:def UNDEF_CCE(code) + #:if MFC_COMPILER != CCE_COMPILER_ID + $:code + #:endif +#:enddef + +#:def DEF_CCE(code) + #:if MFC_COMPILER == CCE_COMPILER_ID + $:code + #:endif +#:enddef + +#:def UNDEF_NVIDIA(code) + #:if MFC_COMPILER != NVIDIA_COMPILER_ID and MFC_COMPILER != PGI_COMPILER_ID + $:code + #:endif +#:enddef + +#:set USING_NVHPC = (MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID) +#:set USING_CCE = (MFC_COMPILER == CCE_COMPILER_ID) ! New line at end of file is required for FYPP diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 8044fe2b25..4b3eeaaf82 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -161,117 +161,119 @@ contains end do #:endcall GPU_PARALLEL_LOOP +#:if not USING_NVHPC p_real => data_real_gpu p_cmplx => data_cmplx_gpu p_fltr_cmplx => data_fltr_cmplx_gpu +#:endif #:call GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]') #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_cmplx, p_fltr_cmplx]') #if defined(__PGI) - ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) + ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx)) call hipCheck(hipDeviceSynchronize()) #endif #:endcall GPU_HOST_DATA - Nfq = 3 - $:GPU_UPDATE(device='[Nfq]') + Nfq = 3 + $:GPU_UPDATE(device='[Nfq]') - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) - end do + #:call GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + #:endcall GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') #if defined(__PGI) - ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) + ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real)) call hipCheck(hipDeviceSynchronize()) #endif #:endcall GPU_HOST_DATA + #:call GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP + + do i = 1, fourier_rings + #:call GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) end do end do end do #:endcall GPU_PARALLEL_LOOP - do i = 1, fourier_rings - - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - - #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) - end do + #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + #:endcall GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_cmplx]') #if defined(__PGI) - ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) + ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx)) call hipCheck(hipDeviceSynchronize()) #endif #:endcall GPU_HOST_DATA - Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) - $:GPU_UPDATE(device='[Nfq]') + Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) + $:GPU_UPDATE(device='[Nfq]') - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) - end do + #:call GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + #:endcall GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') #if defined(__PGI) - ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) + ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real)) call hipCheck(hipDeviceSynchronize()) #endif #:endcall GPU_HOST_DATA - #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) - end do + #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) end do end do - #:endcall GPU_PARALLEL_LOOP + end do + #:endcall GPU_PARALLEL_LOOP - end do + end do #:endcall GPU_DATA #else From 2cad73951f0d02fad6115276ef6e54fb159aa606 Mon Sep 17 00:00:00 2001 From: Tanush Date: Sun, 21 Sep 2025 23:30:13 -0400 Subject: [PATCH 45/60] Ran formatter --- src/simulation/m_fftw.fpp | 116 +++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 4b3eeaaf82..39c8bd493e 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -161,119 +161,119 @@ contains end do #:endcall GPU_PARALLEL_LOOP -#:if not USING_NVHPC - p_real => data_real_gpu - p_cmplx => data_cmplx_gpu - p_fltr_cmplx => data_fltr_cmplx_gpu -#:endif + #:if not USING_NVHPC + p_real => data_real_gpu + p_cmplx => data_cmplx_gpu + p_fltr_cmplx => data_fltr_cmplx_gpu + #:endif #:call GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]') #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_cmplx, p_fltr_cmplx]') #if defined(__PGI) - ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) + ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx)) call hipCheck(hipDeviceSynchronize()) #endif #:endcall GPU_HOST_DATA - Nfq = 3 - $:GPU_UPDATE(device='[Nfq]') + Nfq = 3 + $:GPU_UPDATE(device='[Nfq]') - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + #:call GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + end do end do end do - end do - #:endcall GPU_PARALLEL_LOOP + #:endcall GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') #if defined(__PGI) - ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) + ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real)) call hipCheck(hipDeviceSynchronize()) #endif #:endcall GPU_HOST_DATA - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) - end do - end do - end do - #:endcall GPU_PARALLEL_LOOP - - do i = 1, fourier_rings - #:call GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) end do end do end do #:endcall GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) + do i = 1, fourier_rings + + #:call GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + end do end do end do - end do - #:endcall GPU_PARALLEL_LOOP + #:endcall GPU_PARALLEL_LOOP + + #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) + end do + end do + end do + #:endcall GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_cmplx]') #if defined(__PGI) - ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) + ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx)) call hipCheck(hipDeviceSynchronize()) #endif #:endcall GPU_HOST_DATA - Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) - $:GPU_UPDATE(device='[Nfq]') + Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) + $:GPU_UPDATE(device='[Nfq]') - #:call GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + #:call GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + end do end do end do - end do - #:endcall GPU_PARALLEL_LOOP + #:endcall GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') #if defined(__PGI) - ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) + ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real)) call hipCheck(hipDeviceSynchronize()) #endif #:endcall GPU_HOST_DATA - #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + end do end do end do - end do - #:endcall GPU_PARALLEL_LOOP + #:endcall GPU_PARALLEL_LOOP - end do + end do #:endcall GPU_DATA #else From 4b913aff741de2b175cf1f17ceec746636059002 Mon Sep 17 00:00:00 2001 From: Tanush Date: Mon, 22 Sep 2025 00:04:55 -0400 Subject: [PATCH 46/60] Change doubles to wp --- src/common/m_chemistry.fpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 94fbe1cc0b..98706bf3c7 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -17,9 +17,9 @@ module m_chemistry implicit none #:block DEF_AMD - real(dp) :: molecular_weights_nonparameter(10) = & - (/2.016d0, 1.008d0, 15.999d0, 31.998d0, 17.007d0, 18.015d0, 33.006d0, & - 34.014d0, 39.95d0, 28.014d0/) + real(wp) :: molecular_weights_nonparameter(10) = & + (/2.016, 1.008, 15.999, 31.998, 17.007, 18.015, 33.006, & + 34.014, 39.95, 28.014/) $:GPU_DECLARE(create='[molecular_weights_nonparameter]') #:endblock DEF_AMD From 110e49713ab8e6b683d62e196e226911fe9bc40a Mon Sep 17 00:00:00 2001 From: Tanush Date: Mon, 22 Sep 2025 15:07:55 -0400 Subject: [PATCH 47/60] Mostly fixed IBM test cases --- src/simulation/m_ibm.fpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 0095b5415b..487e6a5951 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -317,8 +317,9 @@ contains #:endcall GPU_PARALLEL_LOOP !Correct the state of the inner points in IBs + if (num_inner_gps > 0) then #:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp,j,k,l,q]') - do i = 1, num_inner_gps + do i = 1, num_inner_gps innerp = inner_points(i) j = innerp%loc(1) @@ -331,6 +332,7 @@ contains end do end do #:endcall GPU_PARALLEL_LOOP + end if end subroutine s_ibm_correct_state From d69a62b6140b6f9964d989a221cea9d8637fd2b7 Mon Sep 17 00:00:00 2001 From: Tanush Date: Mon, 22 Sep 2025 15:08:57 -0400 Subject: [PATCH 48/60] Ran formatter --- src/simulation/m_ibm.fpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 487e6a5951..d676484b82 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -318,21 +318,21 @@ contains !Correct the state of the inner points in IBs if (num_inner_gps > 0) then - #:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp,j,k,l,q]') - do i = 1, num_inner_gps + #:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp,j,k,l,q]') + do i = 1, num_inner_gps - innerp = inner_points(i) - j = innerp%loc(1) - k = innerp%loc(2) - l = innerp%loc(3) + innerp = inner_points(i) + j = innerp%loc(1) + k = innerp%loc(2) + l = innerp%loc(3) - $:GPU_LOOP(parallelism='[seq]') - do q = momxb, momxe - q_cons_vf(q)%sf(j, k, l) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = momxb, momxe + q_cons_vf(q)%sf(j, k, l) = 0._wp + end do end do - end do - #:endcall GPU_PARALLEL_LOOP - end if + #:endcall GPU_PARALLEL_LOOP + end if end subroutine s_ibm_correct_state From 10f33611f099cbc37cdda2910c90b83e31f6d79f Mon Sep 17 00:00:00 2001 From: Tanush Date: Mon, 22 Sep 2025 17:54:16 -0400 Subject: [PATCH 49/60] Fixed case when num_gps is 0 for OpenMP --- src/simulation/m_ibm.fpp | 255 ++++++++++++++++++++------------------- 1 file changed, 128 insertions(+), 127 deletions(-) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index d676484b82..49993ca706 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -168,153 +168,154 @@ contains real(wp) :: buf type(ghost_point) :: gp type(ghost_point) :: innerp - - #:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, j,k,l,q]') - do i = 1, num_gps - - gp = ghost_points(i) - j = gp%loc(1) - k = gp%loc(2) - l = gp%loc(3) - patch_id = ghost_points(i)%ib_patch_id - - ! Calculate physical location of GP - if (p > 0) then - physical_loc = [x_cc(j), y_cc(k), z_cc(l)] - else - physical_loc = [x_cc(j), y_cc(k), 0._wp] - end if - - !Interpolate primitive variables at image point associated w/ GP - if (bubbles_euler .and. .not. qbmm) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP) - else if (qbmm .and. polytropic) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP, nmom_IP) - else if (qbmm .and. .not. polytropic) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb_in, mv_in, presb_IP, massv_IP) - else - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP) - end if - dyn_pres = 0._wp - - ! Set q_prim_vf params at GP so that mixture vars calculated properly - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) - q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) - end do - - if (surface_tension) then - q_prim_vf(c_idx)%sf(j, k, l) = c_IP - end if - if (model_eqns /= 4) then - ! If in simulation, use acc mixture subroutines - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K, G_K, Gs) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K) + if (num_gps > 0) then + #:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, j,k,l,q]') + do i = 1, num_gps + + gp = ghost_points(i) + j = gp%loc(1) + k = gp%loc(2) + l = gp%loc(3) + patch_id = ghost_points(i)%ib_patch_id + + ! Calculate physical location of GP + if (p > 0) then + physical_loc = [x_cc(j), y_cc(k), z_cc(l)] else - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K) + physical_loc = [x_cc(j), y_cc(k), 0._wp] end if - end if - ! Calculate velocity of ghost cell - if (gp%slip) then - norm(1:3) = levelset_norm%sf(gp%loc(1), gp%loc(2), gp%loc(3), gp%ib_patch_id, 1:3) - buf = sqrt(sum(norm**2)) - norm = norm/buf - vel_norm_IP = sum(vel_IP*norm)*norm - vel_g = vel_IP - vel_norm_IP - else - vel_g = 0._wp - end if - - ! Set momentum - $:GPU_LOOP(parallelism='[seq]') - do q = momxb, momxe - q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) - dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & - vel_g(q - momxb + 1)/2._wp - end do - - ! Set continuity and adv vars - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - q_cons_vf(q)%sf(j, k, l) = alpha_rho_IP(q) - q_cons_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) - end do - - ! Set color function - if (surface_tension) then - q_cons_vf(c_idx)%sf(j, k, l) = c_IP - end if + !Interpolate primitive variables at image point associated w/ GP + if (bubbles_euler .and. .not. qbmm) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP) + else if (qbmm .and. polytropic) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP, nmom_IP) + else if (qbmm .and. .not. polytropic) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb_in, mv_in, presb_IP, massv_IP) + else + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP) + end if + dyn_pres = 0._wp - ! Set Energy - if (bubbles_euler) then - q_cons_vf(E_idx)%sf(j, k, l) = (1 - alpha_IP(1))*(gamma*pres_IP + pi_inf + dyn_pres) - else - q_cons_vf(E_idx)%sf(j, k, l) = gamma*pres_IP + pi_inf + dyn_pres - end if - ! Set bubble vars - if (bubbles_euler .and. .not. qbmm) then - call s_comp_n_from_prim(alpha_IP(1), r_IP, nbub, weight) + ! Set q_prim_vf params at GP so that mixture vars calculated properly $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - q_cons_vf(bubxb + (q - 1)*2)%sf(j, k, l) = nbub*r_IP(q) - q_cons_vf(bubxb + (q - 1)*2 + 1)%sf(j, k, l) = nbub*v_IP(q) - if (.not. polytropic) then - q_cons_vf(bubxb + (q - 1)*4)%sf(j, k, l) = nbub*r_IP(q) - q_cons_vf(bubxb + (q - 1)*4 + 1)%sf(j, k, l) = nbub*v_IP(q) - q_cons_vf(bubxb + (q - 1)*4 + 2)%sf(j, k, l) = nbub*pb_IP(q) - q_cons_vf(bubxb + (q - 1)*4 + 3)%sf(j, k, l) = nbub*mv_IP(q) - end if + do q = 1, num_fluids + q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) + q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) end do - end if - if (qbmm) then + if (surface_tension) then + q_prim_vf(c_idx)%sf(j, k, l) = c_IP + end if + if (model_eqns /= 4) then + ! If in simulation, use acc mixture subroutines + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & + alpha_rho_IP, Re_K, G_K, Gs) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & + alpha_rho_IP, Re_K) + else + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & + alpha_rho_IP, Re_K) + end if + end if - nbub = nmom_IP(1) + ! Calculate velocity of ghost cell + if (gp%slip) then + norm(1:3) = levelset_norm%sf(gp%loc(1), gp%loc(2), gp%loc(3), gp%ib_patch_id, 1:3) + buf = sqrt(sum(norm**2)) + norm = norm/buf + vel_norm_IP = sum(vel_IP*norm)*norm + vel_g = vel_IP - vel_norm_IP + else + vel_g = 0._wp + end if + + ! Set momentum $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb*nmom - q_cons_vf(bubxb + q - 1)%sf(j, k, l) = nbub*nmom_IP(q) + do q = momxb, momxe + q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) + dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & + vel_g(q - momxb + 1)/2._wp end do + ! Set continuity and adv vars $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - q_cons_vf(bubxb + (q - 1)*nmom)%sf(j, k, l) = nbub + do q = 1, num_fluids + q_cons_vf(q)%sf(j, k, l) = alpha_rho_IP(q) + q_cons_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) end do - if (.not. polytropic) then + ! Set color function + if (surface_tension) then + q_cons_vf(c_idx)%sf(j, k, l) = c_IP + end if + + ! Set Energy + if (bubbles_euler) then + q_cons_vf(E_idx)%sf(j, k, l) = (1 - alpha_IP(1))*(gamma*pres_IP + pi_inf + dyn_pres) + else + q_cons_vf(E_idx)%sf(j, k, l) = gamma*pres_IP + pi_inf + dyn_pres + end if + ! Set bubble vars + if (bubbles_euler .and. .not. qbmm) then + call s_comp_n_from_prim(alpha_IP(1), r_IP, nbub, weight) + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + q_cons_vf(bubxb + (q - 1)*2)%sf(j, k, l) = nbub*r_IP(q) + q_cons_vf(bubxb + (q - 1)*2 + 1)%sf(j, k, l) = nbub*v_IP(q) + if (.not. polytropic) then + q_cons_vf(bubxb + (q - 1)*4)%sf(j, k, l) = nbub*r_IP(q) + q_cons_vf(bubxb + (q - 1)*4 + 1)%sf(j, k, l) = nbub*v_IP(q) + q_cons_vf(bubxb + (q - 1)*4 + 2)%sf(j, k, l) = nbub*pb_IP(q) + q_cons_vf(bubxb + (q - 1)*4 + 3)%sf(j, k, l) = nbub*mv_IP(q) + end if + end do + end if + + if (qbmm) then + + nbub = nmom_IP(1) + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb*nmom + q_cons_vf(bubxb + q - 1)%sf(j, k, l) = nbub*nmom_IP(q) + end do + $:GPU_LOOP(parallelism='[seq]') do q = 1, nb + q_cons_vf(bubxb + (q - 1)*nmom)%sf(j, k, l) = nbub + end do + + if (.not. polytropic) then $:GPU_LOOP(parallelism='[seq]') - do r = 1, nnode - pb_in(j, k, l, r, q) = presb_IP((q - 1)*nnode + r) - mv_in(j, k, l, r, q) = massv_IP((q - 1)*nnode + r) + do q = 1, nb + $:GPU_LOOP(parallelism='[seq]') + do r = 1, nnode + pb_in(j, k, l, r, q) = presb_IP((q - 1)*nnode + r) + mv_in(j, k, l, r, q) = massv_IP((q - 1)*nnode + r) + end do end do - end do + end if end if - end if - if (model_eqns == 3) then - $:GPU_LOOP(parallelism='[seq]') - do q = intxb, intxe - q_cons_vf(q)%sf(j, k, l) = alpha_IP(q - intxb + 1)*(gammas(q - intxb + 1)*pres_IP & - + pi_infs(q - intxb + 1)) - end do - end if - end do - #:endcall GPU_PARALLEL_LOOP + if (model_eqns == 3) then + $:GPU_LOOP(parallelism='[seq]') + do q = intxb, intxe + q_cons_vf(q)%sf(j, k, l) = alpha_IP(q - intxb + 1)*(gammas(q - intxb + 1)*pres_IP & + + pi_infs(q - intxb + 1)) + end do + end if + end do + #:endcall GPU_PARALLEL_LOOP + end if !Correct the state of the inner points in IBs if (num_inner_gps > 0) then From 1db105f8cd2dcd6d99b7930d4642371820d97819 Mon Sep 17 00:00:00 2001 From: Tanush Date: Fri, 3 Oct 2025 16:14:29 -0400 Subject: [PATCH 50/60] Ran formatter --- src/common/m_chemistry.fpp | 206 +++++++++++++-------------- src/simulation/m_rhs.fpp | 128 ++++++++--------- src/simulation/m_riemann_solvers.fpp | 116 +++++++-------- src/simulation/m_start_up.fpp | 2 +- 4 files changed, 226 insertions(+), 226 deletions(-) diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 396394558f..afd5f25ab6 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -192,112 +192,112 @@ contains offsets(idir) = 1 #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys_L, Ys_R, Ys_cell, Xs_L, Xs_R, mass_diffusivities_mixavg1, mass_diffusivities_mixavg2, mass_diffusivities_mixavg_Cell, h_l, h_r, Xs_cell, h_k, dXk_dxi,Mass_Diffu_Flux]', copyin='[offsets]') - do z = isc3%beg, isc3%end - do y = isc2%beg, isc2%end - do x = isc1%beg, isc1%end - ! Calculate grid spacing using direction-based indexing - select case (idir) - case (1) - grid_spacing = x_cc(x + 1) - x_cc(x) - case (2) - grid_spacing = y_cc(y + 1) - y_cc(y) - case (3) - grid_spacing = z_cc(z + 1) - z_cc(z) - end select - - ! Extract species mass fractions - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = q_prim_qp(i)%sf(x, y, z) - Ys_R(i - chemxb + 1) = q_prim_qp(i)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - Ys_cell(i - chemxb + 1) = 0.5_wp*(Ys_L(i - chemxb + 1) + Ys_R(i - chemxb + 1)) - end do - - ! Calculate molecular weights and mole fractions - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) - MW_cell = 0.5_wp*(MW_L + MW_R) - - call get_mole_fractions(MW_L, Ys_L, Xs_L) - call get_mole_fractions(MW_R, Ys_R, Xs_R) - - ! Calculate gas constants and thermodynamic properties - Rgas_L = gas_constant/MW_L - Rgas_R = gas_constant/MW_R - - P_L = q_prim_qp(E_idx)%sf(x, y, z) - P_R = q_prim_qp(E_idx)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - - rho_L = q_prim_qp(1)%sf(x, y, z) - rho_R = q_prim_qp(1)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - - T_L = P_L/rho_L/Rgas_L - T_R = P_R/rho_R/Rgas_R - - rho_cell = 0.5_wp*(rho_L + rho_R) - dT_dxi = (T_R - T_L)/grid_spacing - - ! Get transport properties - call get_species_mass_diffusivities_mixavg(P_L, T_L, Ys_L, mass_diffusivities_mixavg1) - call get_species_mass_diffusivities_mixavg(P_R, T_R, Ys_R, mass_diffusivities_mixavg2) - - call get_mixture_thermal_conductivity_mixavg(T_L, Ys_L, lambda_L) - call get_mixture_thermal_conductivity_mixavg(T_R, Ys_R, lambda_R) - - call get_species_enthalpies_rt(T_L, h_l) - call get_species_enthalpies_rt(T_R, h_r) - - ! Calculate species properties and gradients - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights(i - chemxb + 1) - h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights(i - chemxb + 1) - Xs_cell(i - chemxb + 1) = 0.5_wp*(Xs_L(i - chemxb + 1) + Xs_R(i - chemxb + 1)) - h_k(i - chemxb + 1) = 0.5_wp*(h_l(i - chemxb + 1) + h_r(i - chemxb + 1)) - dXk_dxi(i - chemxb + 1) = (Xs_R(i - chemxb + 1) - Xs_L(i - chemxb + 1))/grid_spacing - end do - - ! Calculate mixture-averaged diffusivities - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - mass_diffusivities_mixavg_Cell(i - chemxb + 1) = & - (mass_diffusivities_mixavg2(i - chemxb + 1) + mass_diffusivities_mixavg1(i - chemxb + 1))/2.0_wp - end do - - lambda_Cell = 0.5_wp*(lambda_R + lambda_L) - - ! Calculate mass diffusion fluxes - rho_Vic = 0.0_wp - Mass_Diffu_Energy = 0.0_wp - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & - molecular_weights(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) - rho_Vic = rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) - Mass_Diffu_Energy = Mass_Diffu_Energy + h_k(eqn - chemxb + 1)*Mass_Diffu_Flux(eqn - chemxb + 1) - end do - - ! Apply corrections for mass conservation - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Mass_Diffu_Energy = Mass_Diffu_Energy - h_k(eqn - chemxb + 1)*Ys_cell(eqn - chemxb + 1)*rho_Vic - Mass_Diffu_Flux(eqn - chemxb + 1) = Mass_Diffu_Flux(eqn - chemxb + 1) - rho_Vic*Ys_cell(eqn - chemxb + 1) - end do - - ! Add thermal conduction contribution - Mass_Diffu_Energy = lambda_Cell*dT_dxi + Mass_Diffu_Energy - - ! Update flux arrays - flux_src_vf(E_idx)%sf(x, y, z) = flux_src_vf(E_idx)%sf(x, y, z) - Mass_Diffu_Energy - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - flux_src_vf(eqn)%sf(x, y, z) = flux_src_vf(eqn)%sf(x, y, z) - Mass_diffu_Flux(eqn - chemxb + 1) + do z = isc3%beg, isc3%end + do y = isc2%beg, isc2%end + do x = isc1%beg, isc1%end + ! Calculate grid spacing using direction-based indexing + select case (idir) + case (1) + grid_spacing = x_cc(x + 1) - x_cc(x) + case (2) + grid_spacing = y_cc(y + 1) - y_cc(y) + case (3) + grid_spacing = z_cc(z + 1) - z_cc(z) + end select + + ! Extract species mass fractions + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = q_prim_qp(i)%sf(x, y, z) + Ys_R(i - chemxb + 1) = q_prim_qp(i)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + Ys_cell(i - chemxb + 1) = 0.5_wp*(Ys_L(i - chemxb + 1) + Ys_R(i - chemxb + 1)) + end do + + ! Calculate molecular weights and mole fractions + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) + MW_cell = 0.5_wp*(MW_L + MW_R) + + call get_mole_fractions(MW_L, Ys_L, Xs_L) + call get_mole_fractions(MW_R, Ys_R, Xs_R) + + ! Calculate gas constants and thermodynamic properties + Rgas_L = gas_constant/MW_L + Rgas_R = gas_constant/MW_R + + P_L = q_prim_qp(E_idx)%sf(x, y, z) + P_R = q_prim_qp(E_idx)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + + rho_L = q_prim_qp(1)%sf(x, y, z) + rho_R = q_prim_qp(1)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + + T_L = P_L/rho_L/Rgas_L + T_R = P_R/rho_R/Rgas_R + + rho_cell = 0.5_wp*(rho_L + rho_R) + dT_dxi = (T_R - T_L)/grid_spacing + + ! Get transport properties + call get_species_mass_diffusivities_mixavg(P_L, T_L, Ys_L, mass_diffusivities_mixavg1) + call get_species_mass_diffusivities_mixavg(P_R, T_R, Ys_R, mass_diffusivities_mixavg2) + + call get_mixture_thermal_conductivity_mixavg(T_L, Ys_L, lambda_L) + call get_mixture_thermal_conductivity_mixavg(T_R, Ys_R, lambda_R) + + call get_species_enthalpies_rt(T_L, h_l) + call get_species_enthalpies_rt(T_R, h_r) + + ! Calculate species properties and gradients + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights(i - chemxb + 1) + h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights(i - chemxb + 1) + Xs_cell(i - chemxb + 1) = 0.5_wp*(Xs_L(i - chemxb + 1) + Xs_R(i - chemxb + 1)) + h_k(i - chemxb + 1) = 0.5_wp*(h_l(i - chemxb + 1) + h_r(i - chemxb + 1)) + dXk_dxi(i - chemxb + 1) = (Xs_R(i - chemxb + 1) - Xs_L(i - chemxb + 1))/grid_spacing + end do + + ! Calculate mixture-averaged diffusivities + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + mass_diffusivities_mixavg_Cell(i - chemxb + 1) = & + (mass_diffusivities_mixavg2(i - chemxb + 1) + mass_diffusivities_mixavg1(i - chemxb + 1))/2.0_wp + end do + + lambda_Cell = 0.5_wp*(lambda_R + lambda_L) + + ! Calculate mass diffusion fluxes + rho_Vic = 0.0_wp + Mass_Diffu_Energy = 0.0_wp + + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & + molecular_weights(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) + rho_Vic = rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) + Mass_Diffu_Energy = Mass_Diffu_Energy + h_k(eqn - chemxb + 1)*Mass_Diffu_Flux(eqn - chemxb + 1) + end do + + ! Apply corrections for mass conservation + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + Mass_Diffu_Energy = Mass_Diffu_Energy - h_k(eqn - chemxb + 1)*Ys_cell(eqn - chemxb + 1)*rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) = Mass_Diffu_Flux(eqn - chemxb + 1) - rho_Vic*Ys_cell(eqn - chemxb + 1) + end do + + ! Add thermal conduction contribution + Mass_Diffu_Energy = lambda_Cell*dT_dxi + Mass_Diffu_Energy + + ! Update flux arrays + flux_src_vf(E_idx)%sf(x, y, z) = flux_src_vf(E_idx)%sf(x, y, z) - Mass_Diffu_Energy + + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + flux_src_vf(eqn)%sf(x, y, z) = flux_src_vf(eqn)%sf(x, y, z) - Mass_diffu_Flux(eqn - chemxb + 1) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index e45c37e32c..7e2cc240e8 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -1566,38 +1566,38 @@ contains if ((surface_tension .or. viscous) .or. chem_params%diffusion) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - if (surface_tension .or. viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & - (flux_src_n_in(i)%sf(j - 1, k, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do - end if + do l = 0, p + do k = 0, n + do j = 0, m + if (surface_tension .or. viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & + (flux_src_n_in(i)%sf(j - 1, k, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do + end if - if (chem_params%diffusion) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & - (flux_src_n_in(i)%sf(j - 1, k, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do + if (chem_params%diffusion) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & + (flux_src_n_in(i)%sf(j - 1, k, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do - if (.not. viscous) then - rhs_vf(E_idx)%sf(j, k, l) = & - rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dx(j)* & - (flux_src_n_in(E_idx)%sf(j - 1, k, l) & - - flux_src_n_in(E_idx)%sf(j, k, l)) + if (.not. viscous) then + rhs_vf(E_idx)%sf(j, k, l) = & + rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dx(j)* & + (flux_src_n_in(E_idx)%sf(j - 1, k, l) & + - flux_src_n_in(E_idx)%sf(j, k, l)) + end if end if - end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -1673,37 +1673,37 @@ contains if ((surface_tension .or. viscous) .or. chem_params%diffusion) then #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - if (surface_tension .or. viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do - end if + do l = 0, p + do k = 0, n + do j = 0, m + if (surface_tension .or. viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do + end if - if (chem_params%diffusion) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do - if (.not. viscous) then - rhs_vf(E_idx)%sf(j, k, l) = & - rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_src_n_in(E_idx)%sf(j, k - 1, l) & - - flux_src_n_in(E_idx)%sf(j, k, l)) + if (chem_params%diffusion) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do + if (.not. viscous) then + rhs_vf(E_idx)%sf(j, k, l) = & + rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_src_n_in(E_idx)%sf(j, k - 1, l) & + - flux_src_n_in(E_idx)%sf(j, k, l)) + end if end if - end if + end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if end if @@ -1782,19 +1782,19 @@ contains end if #:call GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & - (flux_src_n_in(i)%sf(j, k, l - 1) & - - flux_src_n_in(i)%sf(j, k, l)) + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & + (flux_src_n_in(i)%sf(j, k, l - 1) & + - flux_src_n_in(i)%sf(j, k, l)) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP if (grid_geometry == 3) then diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 9e362ff0f1..6cbc16e978 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -641,9 +641,9 @@ contains if (viscous) then if (chemistry) then - call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) - end if - $:GPU_LOOP(parallelism='[seq]') + call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) + end if + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -2506,9 +2506,9 @@ contains if (viscous) then if (chemistry) then - call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) - end if - $:GPU_LOOP(parallelism='[seq]') + call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) + end if + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -3627,47 +3627,47 @@ contains if (viscous .or. (surface_tension)) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_src_vf(i)%sf(j, k, l) = 0._wp + do i = momxb, E_idx + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_src_vf(i)%sf(j, k, l) = 0._wp + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (chem_params%diffusion) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = E_idx, chemxe - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(j, k, l) = 0._wp - end if + do i = E_idx, chemxe + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(j, k, l) = 0._wp + end if + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (chem_params%diffusion) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = E_idx, chemxe - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(j, k, l) = 0._wp - end if + do i = E_idx, chemxe + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(j, k, l) = 0._wp + end if + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -3704,31 +3704,31 @@ contains if (chem_params%diffusion) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = E_idx, chemxe - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(k, j, l) = 0._wp - end if + do i = E_idx, chemxe + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(k, j, l) = 0._wp + end if + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (qbmm) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = 1, 4 - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + 1 - mom_sp_rsy_vf(j, k, l, i) = mom_sp(i)%sf(k, j, l) + do i = 1, 4 + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + 1 + mom_sp_rsy_vf(j, k, l, i) = mom_sp(i)%sf(k, j, l) + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if @@ -3751,33 +3751,33 @@ contains if (chem_params%diffusion) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = E_idx, chemxe - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(l, k, j) = 0._wp - end if + do i = E_idx, chemxe + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(l, k, j) = 0._wp + end if + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if if (chem_params%diffusion) then #:call GPU_PARALLEL_LOOP(collapse=4) - do i = E_idx, chemxe - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(l, k, j) = 0._wp - end if + do i = E_idx, chemxe + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(l, k, j) = 0._wp + end if + end do end do end do end do - end do #:endcall GPU_PARALLEL_LOOP end if diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 1aced4f017..426b9b28b6 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1491,7 +1491,7 @@ contains end if $:GPU_UPDATE(device='[chem_params]') - + $:GPU_UPDATE(device='[nb,R0ref,Ca,Web,Re_inv,weight,R0, & & bubbles_euler,polytropic,polydisperse,qbmm, & & ptil,bubble_model,thermal,poly_sigma,adv_n,adap_dt, & From e2b11f97d92a69a56b4eacef7f4a00c96da51671 Mon Sep 17 00:00:00 2001 From: Tanush Date: Fri, 3 Oct 2025 16:30:00 -0400 Subject: [PATCH 51/60] Exit with OMP and Frontier test --- .github/workflows/frontier/test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh index ec6e4d32e1..434fa01753 100644 --- a/.github/workflows/frontier/test.sh +++ b/.github/workflows/frontier/test.sh @@ -12,6 +12,7 @@ if [ "$job_device" = "gpu" ]; then device_opts+=" acc" elif [ "$job_interface" = "omp" ]; then device_opts+=" mp" + exit 0 fi device_opts+=" -g $gpu_ids" fi From d7f4386308b97a32d1bab86a345c027a15f732c3 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Fri, 3 Oct 2025 16:35:54 -0400 Subject: [PATCH 52/60] Fixed typos --- .typos.toml | 1 + docs/documentation/gpuParallelization.md | 2 +- examples/1D_shuosher_analytical/case.py | 2 +- examples/1D_shuosher_old/case.py | 2 +- examples/1D_shuosher_teno5/case.py | 2 +- examples/1D_shuosher_teno7/case.py | 2 +- examples/1D_shuosher_wenojs5/case.py | 2 +- examples/1D_shuosher_wenom5/case.py | 2 +- examples/1D_shuosher_wenoz5/case.py | 2 +- src/common/m_boundary_common.fpp | 2 +- src/common/m_phase_change.fpp | 4 ++-- src/simulation/m_body_forces.fpp | 2 +- src/simulation/m_bubbles_EL.fpp | 2 +- 13 files changed, 14 insertions(+), 13 deletions(-) diff --git a/.typos.toml b/.typos.toml index 492855221f..1fb0c90272 100644 --- a/.typos.toml +++ b/.typos.toml @@ -19,6 +19,7 @@ strang = "strang" Strang = "Strang" TKE = "TKE" HSA = "HSA" +infp = "infp" [files] extend-exclude = ["docs/documentation/references*", "tests/", "toolchain/cce_simulation_workgroup_256.sh"] diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index c40d3c57d9..8c64996599 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -31,7 +31,7 @@ Note: Ordering is not guaranteed or stable, so use key-value pairing when using - Data on the GPU has a reference counter - When data is referred to being allocated, it means that GPU memory is allocated if it is not already present in GPU memory. If a variable is already present, the reference counter is just incremented. -- When data is referred to being dellocated, it means that the reference counter is decremented. If the reference counter is zero, then the data is actually deallocated from GPU memory +- When data is referred to being deallocated, it means that the reference counter is decremented. If the reference counter is zero, then the data is actually deallocated from GPU memory - When data is referred to being attached, it means that the device pointer attaches to target if it not already attached. If pointer is already attached, then the attachment counter is just incremented - When data is referred to being detached, it means that the attachment counter is decremented. If attachment counter is zero, then actually detached diff --git a/examples/1D_shuosher_analytical/case.py b/examples/1D_shuosher_analytical/case.py index 30e50acff5..8126714ff4 100644 --- a/examples/1D_shuosher_analytical/case.py +++ b/examples/1D_shuosher_analytical/case.py @@ -57,7 +57,7 @@ "patch_icpp(1)%pres": 10.3333, "patch_icpp(1)%alpha_rho(1)": 3.957143, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of -4 < x < 5 + # One analytic patch to take care of -4 < x < 5 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 0.5, diff --git a/examples/1D_shuosher_old/case.py b/examples/1D_shuosher_old/case.py index 24dd240ddf..7ad71e733a 100644 --- a/examples/1D_shuosher_old/case.py +++ b/examples/1D_shuosher_old/case.py @@ -57,7 +57,7 @@ "patch_icpp(1)%pres": 10.3333, "patch_icpp(1)%alpha_rho(1)": 3.957143, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of -4 < x < 5 + # One analytic patch to take care of -4 < x < 5 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 0.5, diff --git a/examples/1D_shuosher_teno5/case.py b/examples/1D_shuosher_teno5/case.py index 73189b3cec..2477ba6a87 100644 --- a/examples/1D_shuosher_teno5/case.py +++ b/examples/1D_shuosher_teno5/case.py @@ -59,7 +59,7 @@ "patch_icpp(1)%pres": 10.333, "patch_icpp(1)%alpha_rho(1)": 3.857, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of 1 < x < 10 + # One analytic patch to take care of 1 < x < 10 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 5.5, diff --git a/examples/1D_shuosher_teno7/case.py b/examples/1D_shuosher_teno7/case.py index 8a21aa1309..9bac5d82a0 100644 --- a/examples/1D_shuosher_teno7/case.py +++ b/examples/1D_shuosher_teno7/case.py @@ -59,7 +59,7 @@ "patch_icpp(1)%pres": 10.333, "patch_icpp(1)%alpha_rho(1)": 3.857, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of 1 < x < 10 + # One analytic patch to take care of 1 < x < 10 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 5.5, diff --git a/examples/1D_shuosher_wenojs5/case.py b/examples/1D_shuosher_wenojs5/case.py index b99e5ef7e2..52763938fd 100644 --- a/examples/1D_shuosher_wenojs5/case.py +++ b/examples/1D_shuosher_wenojs5/case.py @@ -58,7 +58,7 @@ "patch_icpp(1)%pres": 10.333, "patch_icpp(1)%alpha_rho(1)": 3.857, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of 1 < x < 10 + # One analytic patch to take care of 1 < x < 10 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 5.5, diff --git a/examples/1D_shuosher_wenom5/case.py b/examples/1D_shuosher_wenom5/case.py index 8c96fea5b6..c3dde3a589 100644 --- a/examples/1D_shuosher_wenom5/case.py +++ b/examples/1D_shuosher_wenom5/case.py @@ -58,7 +58,7 @@ "patch_icpp(1)%pres": 10.333, "patch_icpp(1)%alpha_rho(1)": 3.857, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of 1 < x < 10 + # One analytic patch to take care of 1 < x < 10 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 5.5, diff --git a/examples/1D_shuosher_wenoz5/case.py b/examples/1D_shuosher_wenoz5/case.py index a7e8e3e3cd..f959f363ae 100644 --- a/examples/1D_shuosher_wenoz5/case.py +++ b/examples/1D_shuosher_wenoz5/case.py @@ -58,7 +58,7 @@ "patch_icpp(1)%pres": 10.333, "patch_icpp(1)%alpha_rho(1)": 3.857, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of 1 < x < 10 + # One analytic patch to take care of 1 < x < 10 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 5.5, diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 4304b74a8e..81d4bc8d65 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_boundary_conditions_common !> @brief The purpose of the module is to apply noncharacteristic and processor -!! boundary condiitons +!! boundary conditions #:include 'macros.fpp' diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index a395b5c553..82d8f41389 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -162,7 +162,7 @@ contains ! depleting the mass of liquid q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM - ! tranferring the total mass to vapor + ! transferring the total mass to vapor q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM ! calling pT-equilibrium for overheated vapor, which is MFL = 0 @@ -172,7 +172,7 @@ contains call s_TSat(pSOV, TSatOV, TSOV) ! subcooled liquid case - ! tranferring the total mass to liquid + ! transferring the total mass to liquid q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM ! depleting the mass of vapor diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index a5b95a4b41..e3fbeb822e 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -24,7 +24,7 @@ module m_body_forces contains - !> This subroutine inializes the module global array of mixture + !> This subroutine initializes the module global array of mixture !! densities in each grid cell impure subroutine s_initialize_body_forces_module diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 9b3beb01a0..73c7141171 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -1518,7 +1518,7 @@ contains lsizes(1) = max(1, bub_id) lsizes(2) = 21 - ! if the partcle number is zero, put 1 since MPI cannot deal with writing + ! if the particle number is zero, put 1 since MPI cannot deal with writing ! zero particle part_order(:) = 1 part_order(proc_rank + 1) = max(1, bub_id) From 01a4dea523f8a4ae18f75181bd8c05c395369fe6 Mon Sep 17 00:00:00 2001 From: Tanush Date: Fri, 3 Oct 2025 16:57:10 -0400 Subject: [PATCH 53/60] Update docs --- docs/documentation/gpuParallelization.md | 122 ++++++++++++++++------- 1 file changed, 88 insertions(+), 34 deletions(-) diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index 8c64996599..f3d2089057 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -44,14 +44,19 @@ Note: Ordering is not guaranteed or stable, so use key-value pairing when using **Macro Invocation** -Uses FYPP eval directive using `$:` +Uses FYPP eval directive using `#:call` -`$:GPU_PARALLEL_LOOP(...)` +```C +#:call GPU_PARALLEL_LOOP(...) + {code} +#:endcall GPU_PARALLEL_LOOP +``` **Parameters** | name | data type | Default Value | description | |------------------|---------------------|-------------------|-------------------------------------------------------------------------------------------| +| `code` | code | Required | Region of code where the GPU parallelizes loops | | `collapse` | integer | None | Number of loops to combine into 1 loop | | `parallelism` | string list | '\[gang,vector\]' | Parallelism granularity to use for this loop | | `default` | string | 'present' | Implicit assumptions compiler should make | @@ -69,6 +74,7 @@ Uses FYPP eval directive using `$:` | `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | | `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| `extraOmpArgs` | string | None | String of any extra arguments added to the OpenMP directive | **Parameter Restrictions** @@ -88,9 +94,15 @@ Uses FYPP eval directive using `$:` **Example** -```python - $:GPU_PARALLEL_LOOP(collapse=3, private='[tmp, r]', reduction='[[vol, avg], [max_val]]', reductionOp='[+, MAX]') - $:GPU_PARALLEL_LOOP(collapse=2, private='[sum_holder]', copyin='[starting_sum]', copyout='[eigenval,C]') +```C + #:call GPU_PARALLEL_LOOP(collapse=3, private='[tmp, r]', reduction='[[vol, avg], [max_val]]', reductionOp='[+, MAX]') + {code} + ... + #:endcall GPU_PARALLEL_LOOP + #:call GPU_PARALLEL_LOOP(collapse=2, private='[sum_holder]', copyin='[starting_sum]', copyout='[eigenval,C]') + {code} + ... + #:endcall GPU_PARALLEL_LOOP ``` @@ -115,6 +127,7 @@ Uses FYPP eval directive using `$:` | `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | | `reductionOp` | string list | None | Operator that each list of reduction will reduce with | | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| `extraOmpArgs` | string | None | String of any extra arguments added to the OpenMP directive | **Parameter Restrictions** @@ -154,23 +167,25 @@ Uses FYPP call directive using `#:call` **Parameters** -| name | data type | Default Value | description | -|------------------|---------------------|-------------------|-------------------------------------------------------------------------------------------| -| `default` | string | 'present' | Implicit assumptions compiler should make | -| `private` | string list | None | Variables that are private to each iteration/thread | -| `firstprivate` | string list | None | Initialized variables that are private to each iteration/thread | -| `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | -| `reductionOp` | string list | None | Operator that each list of reduction will reduce with | -| `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | -| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | -| `copyinReadOnly` | string list | None | Allocates and copies readonly data to GPU and then deallocated on exit | -| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | -| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | -| `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | -| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | -| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | -| `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| name | data type | Default Value | description | +|------------------|---------------------|---------------|-------------------------------------------------------------------------------------------| +| `code` | code | Required | Region of code where a kernel is launched on the GPU | +| `default` | string | 'present' | Implicit assumptions compiler should make | +| `private` | string list | None | Variables that are private to each iteration/thread | +| `firstprivate` | string list | None | Initialized variables that are private to each iteration/thread | +| `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | +| `reductionOp` | string list | None | Operator that each list of reduction will reduce with | +| `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | +| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | +| `copyinReadOnly` | string list | None | Allocates and copies readonly data to GPU and then deallocated on exit | +| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | +| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | +| `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | +| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | +| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | +| `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| `extraOmpArgs` | string | None | String of any extra arguments added to the OpenMP directive | **Parameter Restrictions** @@ -234,6 +249,8 @@ Uses FYPP call directive using `#:call` | `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | | `default` | string | None | Implicit assumptions compiler should make | | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| `extraOmpArgs` | string | None | String of any extra arguments added to the OpenMP directive | + **Parameter Restrictions** @@ -274,6 +291,8 @@ Uses FYPP eval directive using `$:` | `create` | string list | None | Allocates data on GPU on entrance | | `attach` | string list | None | Attaches device pointer to device targets on entrance | | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| `extraOmpArgs` | string | None | String of any extra arguments added to the OpenMP directive | + **Example** @@ -301,6 +320,8 @@ Uses FYPP eval directive using `$:` | `delete` | string list | None | Deallocates data on GPU on exit | | `detach` | string list | None | Detach device pointer from device targets on exit | | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| `extraOmpArgs` | string | None | String of any extra arguments added to the OpenMP directive | + **Example** @@ -333,6 +354,8 @@ Uses FYPP eval directive using `$:` | `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | | `link` | string list | None | Declare global link, and only allocate when variable used in data clause. | | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| `extraOmpArgs` | string | None | String of any extra arguments added to the OpenMP directive | + **Additional information** @@ -366,6 +389,8 @@ Uses FYPP eval directive using `$:` | `host` | string list | None | Updates data from GPU to CPU | | `device` | string list | None | Updates data from CPU to GPU | | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| `extraOmpArgs` | string | None | String of any extra arguments added to the OpenMP directive | + **Example** @@ -391,11 +416,14 @@ Uses FYPP call directive using `#:call` **Parameters** -| name | data type | Default Value | description | -|----------------|-------------|---------------|------------------------------------------------------------------| -| `code` | code | Required | Region of code where GPU memory addresses is accessible | -| `use_device` | string list | None | Use GPU memory address of variable instead of CPU memory address | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| name | data type | Default Value | description | +|-------------------|-------------|---------------|------------------------------------------------------------------| +| `code` | code | Required | Region of code where GPU memory addresses is accessible | +| `use_device_addr` | string list | None | Use GPU memory address of variable instead of CPU memory address | +| `use_device_ptr` | string list | None | Use GPU pointer of pointers instead of CPU pointer | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| `extraOmpArgs` | string | None | String of any extra arguments added to the OpenMP directive | + **Parameter Restrictions** @@ -436,6 +464,8 @@ Uses FYPP eval directive using `$:` | name | data type | Default Value | description | |----------------|-----------|---------------|--------------------------------------------------------------| | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| `extraOmpArgs` | string | None | String of any extra arguments added to the OpenMP directive | + **Example** @@ -460,6 +490,8 @@ Uses FYPP eval directive using `$:` |----------------|-----------|---------------|--------------------------------------------------------------| | `atomic` | string | Required | Which atomic operation is performed | | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| `extraOmpArgs` | string | None | String of any extra arguments added to the OpenMP directive | + **Parameter Restrictions** @@ -517,6 +549,9 @@ Uses FYPP eval directive using `$:` | `nohost` | boolean | False | Do not compile procedure code for CPU | | `cray_inline` | boolean | False | Inline procedure on cray compiler | | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| `extraOmpArgs` | string | None | String of any extra arguments added to the OpenMP directive | + + **Parameter Restrictions** @@ -555,6 +590,9 @@ Uses FYPP eval directive using `$:` | `cache` | string list | Required | Data that should to stored in cache | | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +**NOTE** +Does not do anything for OpenMP currently + **Example** ```python @@ -570,27 +608,32 @@ Uses FYPP eval directive using `$:` ## Compiler agnostic tools ## OpenMP tools + ```bash OMP_DISPLAY_ENV=true | false | verbose ``` + - Prints out the internal control values and environment variables at the beginning of the program if `true` or `verbose` - `verbose` will also print out vendor-specific internal control values and environment variables ```bash OMP_TARGET_OFFLOAD = MANDATORY | DISABLED | DEFAULT ``` + - Quick way to turn off off-load (`DISABLED`) or make it abort if a GPU isn't found (`MANDATORY`) -- Great first test: does the problem disappear when you drop back to the CPU? +- Great first test: does the problem disappear when you drop back to the CPU? ```bash OMP_THREAD_LIMIT= ``` + - Sets the maximum number of OpenMP threads to use in a contention group - Might be useful in checking for issues with contention or race conditions ```bash OMP_DISPLAY_AFFINITY=TRUE ``` + - Will display affinity bindings for each OpenMP thread, containing hostname, process identifier, OS thread identifier, OpenMP thread identifier, and affinity binding. ## Cray Compiler Tools @@ -620,6 +663,7 @@ CRAY_ACC_FORCE_EARLY_INIT=1 ```bash CRAY_ACC_PRESENT_DUMP_SAVE_NAMES=1 ``` + - Will cause `acc_present_dump()` to output variable names and file locations in addition to variable mappings - Add `acc_present_dump()` around hotspots to help find problems with data movements - Helps more if adding `CRAY_ACC_DEBUG` environment variable @@ -631,12 +675,14 @@ CRAY_ACC_PRESENT_DUMP_SAVE_NAMES=1 ```bash STATIC_RANDOM_SEED=1 ``` -- Forces the seed returned by `RANDOM_SEED` to be constant, so it generates the same sequence of random numbers + +- Forces the seed returned by `RANDOM_SEED` to be constant, so it generates the same sequence of random numbers - Useful for testing issues with randomized data ```bash NVCOMPILER_TERM=option[,option] ``` + - `[no]debug`: Enables/disables just-in-time debugging (debugging invoked on error) - `[no]trace`: Enables/disables stack traceback on error @@ -645,17 +691,19 @@ NVCOMPILER_TERM=option[,option] ```bash NVCOMPILER_ACC_NOTIFY= ``` + - Assign the environment variable to a bitmask to print out information to stderr for the following - kernel launches: 1 - data transfers: 2 - region entry/exit: 4 - wait operation of synchronizations with the device: 8 - device memory allocations and deallocations: 16 -- 1 (kernels only) is the usual first step.3 (kernels + copies) is great for "why is it so slow?" +- 1 (kernels only) is the usual first step.3 (kernels + copies) is great for "why is it so slow?" ```bash NVCOMPILER_ACC_TIME=1 ``` + - Lightweight profiler - prints a tidy end-of-run table with per-region and per-kernel times and bytes moved - Do not use with CUDA profiler at the same time @@ -663,8 +711,9 @@ NVCOMPILER_ACC_TIME=1 ```bash NVCOMPILER_ACC_DEBUG=1 ``` + - Spews everything the runtime sees: host/device addresses, mapping events, present-table look-ups, etc. -- Great for "partially present" or "pointer went missing" errors. +- Great for "partially present" or "pointer went missing" errors. - [Doc for NVCOMPILER_ACC_DEBUG](https://docs.nvidia.com/hpc-sdk/archive/20.9/pdf/hpc209openacc_gs.pdf) - Ctrl+F for `NVCOMPILER_ACC_DEBUG` @@ -673,6 +722,7 @@ NVCOMPILER_ACC_DEBUG=1 ```bash LIBOMPTARGET_PROFILE=run.json ``` + - Emits a Chrome-trace (JSON) timeline you can open in chrome://tracing or Speedscope - Great lightweight profiler when Nsight is overkill. - Granularity in µs via `LIBOMPTARGET_PROFILE_GRANULARITY` (default 500). @@ -680,9 +730,10 @@ LIBOMPTARGET_PROFILE=run.json ```bash LIBOMPTARGET_INFO= ``` + - Prints out different types of runtime information - Human-readable log of data-mapping inserts/updates, kernel launches, copies, waits. -- Perfect first stop for "why is nothing copied?" +- Perfect first stop for "why is nothing copied?" - Flags - Print all data arguments upon entering an OpenMP device kernel: 0x01 - Indicate when a mapped address already exists in the device mapping table: 0x02 @@ -694,6 +745,7 @@ LIBOMPTARGET_INFO= ```bash LIBOMPTARGET_DEBUG=1 ``` + - Developer-level trace (host-side) - Much noisier than `INFO` - Only works if the runtime was built with `-DOMPTARGET_DEBUG`. @@ -701,12 +753,14 @@ LIBOMPTARGET_DEBUG=1 ```bash LIBOMPTARGET_JIT_OPT_LEVEL=-O{0,1,2,3} ``` -- This environment variable can be used to change the optimization pipeline used to optimize the embedded device code as part of the device JIT. + +- This environment variable can be used to change the optimization pipeline used to optimize the embedded device code as part of the device JIT. - The value corresponds to the `-O{0,1,2,3}` command line argument passed to clang. ```bash LIBOMPTARGET_JIT_SKIP_OPT=1 ``` + - This environment variable can be used to skip the optimization pipeline during JIT compilation. - If set, the image will only be passed through the backend. - The backend is invoked with the `LIBOMPTARGET_JIT_OPT_LEVEL` flag. @@ -718,6 +772,6 @@ LIBOMPTARGET_JIT_SKIP_OPT=1 - [NVHPC & OpenACC Docs](https://docs.nvidia.com/hpc-sdk/compilers/hpc-compilers-user-guide/index.html?highlight=NVCOMPILER_#environment-variables) - [NVHPC & OpenMP Docs](https://docs.nvidia.com/hpc-sdk/compilers/hpc-compilers-user-guide/index.html?highlight=NVCOMPILER_#id2) - [LLVM & OpenMP Docs](https://openmp.llvm.org/design/Runtimes.html) - - NVHPC is built on top of LLVM + - NVHPC is built on top of LLVM - [OpenMP Docs](https://www.openmp.org/spec-html/5.1/openmp.html) - [OpenACC Docs](https://www.openacc.org/sites/default/files/inline-files/OpenACC.2.7.pdf) From 6ca76049bdd53f73cdd51940ea5894130804d713 Mon Sep 17 00:00:00 2001 From: Tanush Date: Fri, 3 Oct 2025 17:05:51 -0400 Subject: [PATCH 54/60] Exit on frontier openmp build --- .github/workflows/frontier/build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh index 70c29204c1..4e9fde2f15 100644 --- a/.github/workflows/frontier/build.sh +++ b/.github/workflows/frontier/build.sh @@ -10,6 +10,7 @@ if [ "$job_device" = "gpu" ]; then build_opts+=" acc" elif [ "$job_interface" = "omp" ]; then build_opts+=" mp" + exit 0 fi fi From 31f8cd2296840dc3ca53e71b502d12ffdea4f17c Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 7 Oct 2025 01:02:10 -0500 Subject: [PATCH 55/60] Chemistry variable was not autoprivatized --- src/common/m_chemistry.fpp | 2 +- src/common/m_variables_conversion.fpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index afd5f25ab6..4ba51e9564 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -129,7 +129,7 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega, T]') do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 52f17a8845..4d4bf60fa3 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -873,7 +873,7 @@ contains end if #:endif - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, Re_K, nRtmp, rho_K, gamma_K, pi_inf_K,qv_K, dyn_pres_K, rhoYks, B]') + #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, Re_K, nRtmp, rho_K, gamma_K, pi_inf_K,qv_K, dyn_pres_K, rhoYks, B, T]') do l = ibounds(3)%beg, ibounds(3)%end do k = ibounds(2)%beg, ibounds(2)%end do j = ibounds(1)%beg, ibounds(1)%end From e81b88412ee41ee39ee36a0fbae61252ee008b04 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 7 Oct 2025 01:04:15 -0500 Subject: [PATCH 56/60] Don't run frontier omp test --- .github/workflows/frontier/build.sh | 1 - .github/workflows/frontier/test.sh | 1 - .github/workflows/test.yml | 4 ++++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh index 4e9fde2f15..70c29204c1 100644 --- a/.github/workflows/frontier/build.sh +++ b/.github/workflows/frontier/build.sh @@ -10,7 +10,6 @@ if [ "$job_device" = "gpu" ]; then build_opts+=" acc" elif [ "$job_interface" = "omp" ]; then build_opts+=" mp" - exit 0 fi fi diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh index 434fa01753..ec6e4d32e1 100644 --- a/.github/workflows/frontier/test.sh +++ b/.github/workflows/frontier/test.sh @@ -12,7 +12,6 @@ if [ "$job_device" = "gpu" ]; then device_opts+=" acc" elif [ "$job_interface" = "omp" ]; then device_opts+=" mp" - exit 0 fi device_opts+=" -g $gpu_ids" fi diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 74f3f5c1cb..0e9c8d1b5a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -106,6 +106,10 @@ jobs: - device: 'cpu' interface: 'none' lbl: 'frontier' + exclude: + - device: 'gpu' + interface: 'omp' + lbl: 'frontier' runs-on: group: phoenix labels: ${{ matrix.lbl }} From 4cdabd9ffc249a893b817a64de0cabf904d88072 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 7 Oct 2025 01:15:22 -0500 Subject: [PATCH 57/60] Ran formatter --- src/common/m_phase_change.fpp | 10 +++++----- src/simulation/m_ibm.fpp | 36 +++++++++++++++++------------------ 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 36f0193c48..82d8f41389 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -162,8 +162,8 @@ contains ! depleting the mass of liquid q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM - ! transferring the total mass to vapor - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM + ! transferring the total mass to vapor + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM ! calling pT-equilibrium for overheated vapor, which is MFL = 0 call s_infinite_pt_relaxation_k(j, k, l, 0, pSOV, p_infOV, q_cons_vf, rhoe, TSOV) @@ -171,9 +171,9 @@ contains ! calculating Saturation temperature call s_TSat(pSOV, TSatOV, TSOV) - ! subcooled liquid case - ! transferring the total mass to liquid - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM + ! subcooled liquid case + ! transferring the total mass to liquid + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM ! depleting the mass of vapor q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 74c780257f..5353aed125 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -250,24 +250,24 @@ contains end if end if - ! Calculate velocity of ghost cell - if (gp%slip) then - norm(1:3) = levelset_norm%sf(gp%loc(1), gp%loc(2), gp%loc(3), gp%ib_patch_id, 1:3) - buf = sqrt(sum(norm**2)) - norm = norm/buf - vel_norm_IP = sum(vel_IP*norm)*norm - vel_g = vel_IP - vel_norm_IP - else - if (patch_ib(patch_id)%moving_ibm == 0) then - ! we know the object is not moving if moving_ibm is 0 (false) - vel_g = 0._wp - else - do q = 1, 3 - ! if mibm is 1 or 2, then the boundary may be moving - vel_g(q) = patch_ib(patch_id)%vel(q) - end do - end if - end if + ! Calculate velocity of ghost cell + if (gp%slip) then + norm(1:3) = levelset_norm%sf(gp%loc(1), gp%loc(2), gp%loc(3), gp%ib_patch_id, 1:3) + buf = sqrt(sum(norm**2)) + norm = norm/buf + vel_norm_IP = sum(vel_IP*norm)*norm + vel_g = vel_IP - vel_norm_IP + else + if (patch_ib(patch_id)%moving_ibm == 0) then + ! we know the object is not moving if moving_ibm is 0 (false) + vel_g = 0._wp + else + do q = 1, 3 + ! if mibm is 1 or 2, then the boundary may be moving + vel_g(q) = patch_ib(patch_id)%vel(q) + end do + end if + end if ! Set momentum $:GPU_LOOP(parallelism='[seq]') From 4db238b6482ce8ed9f9a8c33e0b360a805c5c070 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 7 Oct 2025 10:07:23 -0500 Subject: [PATCH 58/60] Fix issue in frontier test script --- .github/workflows/frontier/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh index ec6e4d32e1..3950f67398 100644 --- a/.github/workflows/frontier/test.sh +++ b/.github/workflows/frontier/test.sh @@ -17,7 +17,7 @@ if [ "$job_device" = "gpu" ]; then fi if [ "$job_device" = "gpu" ]; then - ./mfc.sh test -a --rdma-mpi --max-attempts 3 -j $ngpus $job_device -- -c frontier + ./mfc.sh test -a --rdma-mpi --max-attempts 3 -j $ngpus $device_opts -- -c frontier else ./mfc.sh test -a --max-attempts 3 -j 32 -- -c frontier fi From 865c6e2b846495bcc1012e51b5f0ca8b4d238771 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 7 Oct 2025 12:25:13 -0500 Subject: [PATCH 59/60] 1D Chemistry seems to have an issue with two-pass IPO --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b20c7c66b2..c588ae47ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -412,7 +412,7 @@ function(MFC_SETUP_TARGET) set(IPO_TARGETS ${ARGS_TARGET}) # Here we need to split into "library" and "executable" to perform IPO on the NVIDIA compiler. # A little hacky, but it *is* an edge-case for *one* compiler. - if (NVHPC_USE_TWO_PASS_IPO) + if (NVHPC_USE_TWO_PASS_IPO AND NOT(MFC_OpenMP AND ARGS_OpenMP)) add_library(${ARGS_TARGET}_lib OBJECT ${ARGS_SOURCES}) target_compile_options(${ARGS_TARGET}_lib PRIVATE $<$:-Mextract=lib:${ARGS_TARGET}_lib> From cc25407ee32b5c0c3a88a4f8b44d2242528425fb Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 7 Oct 2025 12:39:45 -0500 Subject: [PATCH 60/60] Issue with frontier test fix --- .github/workflows/frontier/test.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh index 3950f67398..8c51afc320 100644 --- a/.github/workflows/frontier/test.sh +++ b/.github/workflows/frontier/test.sh @@ -5,15 +5,12 @@ ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c` device_opts="" if [ "$job_device" = "gpu" ]; then - n_ranks=$(echo "$gpus" | wc -w) # number of GPUs on node - gpu_ids=$(echo "$gpus" | tr ' ' '\n' | tr '\n' ' ' | sed 's/ $//') # GPU IDs from rocm-smi device_opts+="--gpu" if [ "$job_interface" = "acc" ]; then device_opts+=" acc" elif [ "$job_interface" = "omp" ]; then device_opts+=" mp" fi - device_opts+=" -g $gpu_ids" fi if [ "$job_device" = "gpu" ]; then