Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions docs/documentation/gpuParallelization.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,16 @@ Note: Ordering is not guaranteed or stable, so use key-value pairing when using

**Macro Invocation**

Uses FYPP eval directive using `#:call`
In order to parallelize a loop, simply place two macro calls on either end of the loop:

```C
#:call GPU_PARALLEL_LOOP(...)
$:$GPU_PARALLEL_LOOP(...)
{code}
#:endcall GPU_PARALLEL_LOOP
$:END_GPU_PARALLEL_LOOP()
```

This wraps the lines in `code` with parallelization calls to openACC or openMP, depending on environment and compiler settings.

**Parameters**

| name | data type | Default Value | description |
Expand Down
7 changes: 2 additions & 5 deletions src/common/include/acc_macros.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@
$:end_acc_directive
#:enddef

#:def ACC_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', &
#:def ACC_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
& no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None)
Expand All @@ -155,10 +155,7 @@
& deviceptr_val.strip('\n') + attach_val.strip('\n')
#:set acc_directive = '!$acc parallel loop ' + &
& clause_val + extraAccArgs_val.strip('\n')
#:set acc_end_directive = '!$acc end parallel loop'
$:acc_directive
$:code
$:acc_end_directive
#:enddef

#:def ACC_ROUTINE(function_name=None, parallelism=None, nohost=False, extraAccArgs=None)
Expand Down Expand Up @@ -308,4 +305,4 @@
#:set acc_directive = '!$acc wait ' + clause_val + extraAccArgs_val.strip('\n')
$:acc_directive
#:enddef
! New line at end of file is required for FYPP
! New line at end of file is required for FYPP
21 changes: 15 additions & 6 deletions src/common/include/omp_macros.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@
$:omp_end_directive
#:enddef

#:def OMP_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', &
#:def OMP_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
& no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None)
Expand Down Expand Up @@ -178,21 +178,30 @@

#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
#:set omp_end_directive = '!$omp end target teams loop'
#:elif MFC_COMPILER == CCE_COMPILER_ID
#:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) '
#:set omp_end_directive = '!$omp end target teams distribute parallel do simd'
#:elif MFC_COMPILER == AMD_COMPILER_ID
#:set omp_start_directive = '!$omp target teams distribute parallel do '
#:set omp_end_directive = '!$omp end target teams distribute parallel do'
#:else
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
#:set omp_end_directive = '!$omp end target teams loop'
#:endif

#:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n')
$:omp_directive
$:code
#:enddef

#:def END_OMP_PARALLEL_LOOP()

#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
#:set omp_end_directive = '!$omp end target teams loop'
#:elif MFC_COMPILER == CCE_COMPILER_ID
#:set omp_end_directive = '!$omp end target teams distribute parallel do simd'
#:elif MFC_COMPILER == AMD_COMPILER_ID
#:set omp_end_directive = '!$omp end target teams distribute parallel do'
#:else
#:set omp_end_directive = '!$omp end target teams loop'
#:endif

$:omp_end_directive
#:enddef

Expand Down
26 changes: 19 additions & 7 deletions src/common/include/parallel_macros.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,33 @@

#:enddef

#:def GPU_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', &
#:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
& no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None)

#:set acc_code = ACC_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs)
#:set omp_code = OMP_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs)
#:set acc_directive = ACC_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs)
#:set omp_directive = OMP_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs)

#if defined(MFC_OpenACC)
$:acc_code
$:acc_directive
#elif defined(MFC_OpenMP)
$:omp_code
#else
$:code
$:omp_directive
#endif

#:enddef

#:def END_GPU_PARALLEL_LOOP()

#:set acc_end_directive = '!$acc end parallel loop'
#:set omp_end_directive = END_OMP_PARALLEL_LOOP()

#if defined(MFC_OpenACC)
$:acc_end_directive
#elif defined(MFC_OpenMP)
$:omp_end_directive
#endif

#:enddef

#:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None, extraOmpArgs=None)
Expand Down
2 changes: 1 addition & 1 deletion src/common/include/shared_parallel_macros.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,4 @@
#:endif
$:extraArgs_val
#:enddef
! New line at end of file is required for FYPP
! New line at end of file is required for FYPP
Loading
Loading