diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md
new file mode 100644
index 0000000000..8579914485
--- /dev/null
+++ b/docs/documentation/gpuParallelization.md
@@ -0,0 +1,566 @@
+# GPU Parallelization
+
+MFC compiles GPU code via OpenACC and in the future OpenMP as well.
+
+In order to swap between OpenACC and OpenMP, custom GPU macros are used that translate to equivalent OpenACC and OpenMP directives.
+FYPP is used to process the GPU macros.
+
+[OpenACC Quick start Guide](https://openacc-best-practices-guide.readthedocs.io/en/latest/01-Introduction.html)
+
+[OpenACC API Documentation](https://www.openacc.org/sites/default/files/inline-files/API%20Guide%202.7.pdf)
+
+------------------------------------------------------------------------------------------
+
+## Macro API Documentation
+
+Note: Ordering is not guaranteed or stable, so use key-value pairing when using macros
+
+### Data Type Meanings
+
+- Integer is a number
+
+- Boolean is a pythonic boolean - Valid options: `True` or `False`
+
+- String List is given as a comma separated list surrounding by brackets and inside quotations
+ - Ex: ``'[hello, world, Fortran]'``
+
+- 2-level string list is given as a comma separated list of string lists surrounding by brackets and inside quotations
+ - Ex: ``'[[hello, world], [Fortran, MFC]]'`` or ``'[[hello]]'``
+
+### Data Flow
+
+- Data on the GPU has a reference counter
+- When data is referred to being allocated, it means that GPU memory is allocated if it is not already present in GPU memory. If a variable is already present, the reference counter is just incremented.
+- When data is referred to being dellocated, it means that the reference counter is decremented. If the reference counter is zero, then the data is actually deallocated from GPU memory
+- When data is referred to being attached, it means that the device pointer attaches to target if it not already attached. If pointer is already attached, then the attachment counter is just incremented
+- When data is referred to being detached, it means that the attachment counter is decremented. If attachment counter is zero, then actually detached
+
+------------------------------------------------------------------------------------------
+
+### Computation Macros
+
+
+ GPU_PARALLEL_LOOP -- (Execute the following loop on the GPU in parallel)
+
+**Macro Invocation**
+
+Uses FYPP eval directive using `$:`
+
+`$:GPU_PARALLEL_LOOP(...)`
+
+**Parameters**
+
+| name | data type | Default Value | description |
+|------------------|---------------------|-------------------|-------------------------------------------------------------------------------------------|
+| `collapse` | integer | None | Number of loops to combine into 1 loop |
+| `parallelism` | string list | '\[gang,vector\]' | Parallelism granularity to use for this loop |
+| `default` | string | 'present' | Implicit assumptions compiler should make |
+| `private` | string list | None | Variables that are private to each iteration/thread |
+| `firstprivate` | string list | None | Initialized variables that are private to each iteration/thread |
+| `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end |
+| `reductionOp` | string list | None | Operator that each list of reduction will reduce with |
+| `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit |
+| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit |
+| `copyinReadOnly` | string list | None | Allocates and copies readonly data to GPU and then deallocated on exit |
+| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit |
+| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit |
+| `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory |
+| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit |
+| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory |
+| `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit |
+| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive |
+
+**Parameter Restrictions**
+
+| name | Restricted range |
+|---------------|---------------------------------------------------|
+| `collapse` | Must be greater than 1 |
+| `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' |
+| `default` | 'present' or 'none' |
+
+**Additional information**
+
+- default present means that the any non-scalar data in assumed to be present on the GPU
+- default none means that the compiler should not implicitly determine the data attributes for any variable
+- reduction and reductionOp must match in length
+- With ``reduction='[[sum1, sum2], [largest]]'`` and ``reductionOp='[+, max]'``, `sum1` and `sum2` will be the sum of sum1/sum2 in each loop iteration, and `largest` will the maximum value of `largest` all the loop iterations
+- A reduction implies a copy, so it does not need to be added for both
+
+**Example**
+
+```python
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[tmp, r]', reduction='[[vol, avg], [max_val]]', reductionOp='[+, MAX]')
+ $:GPU_PARALLEL_LOOP(collapse=2, private='[sum_holder]', copyin='[starting_sum]', copyout='[eigenval,C]')
+```
+
+
+
+
+ GPU_LOOP -- (Execute loop on GPU)
+
+**Macro Invocation**
+
+Uses FYPP eval directive using `$:`
+
+`$:GPU_LOOP(...)`
+
+**Parameters**
+
+| name | data type | Default Value | description |
+|-------------------|---------------------|---------------|--------------------------------------------------------------------------------------------------|
+| `collapse` | integer | None | Number of loops to combine into 1 loop |
+| `parallelism` | string list | None | Parallelism granularity to use for this loop |
+| `data_dependency` | string | None | 'independent'-> assert loop iterations are independent, 'auto->let compiler analyze dependencies |
+| `private` | string list | None | Variables that are private to each iteration/thread |
+| `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end |
+| `reductionOp` | string list | None | Operator that each list of reduction will reduce with |
+| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive |
+
+**Parameter Restrictions**
+
+| name | Restricted range |
+|-------------------|---------------------------------------------------|
+| `collapse` | Must be greater than 1 |
+| `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' |
+| `data_dependency` | 'auto' or 'independent' |
+
+**Additional information**
+
+- Loop parallelism is most commonly ``'[seq]'``
+- reduction and reductionOp must match in length
+- With ``reduction='[[sum1, sum2], [largest]]'`` and ``reductionOp='[+, max]'``, `sum1` and `sum2` will be the sum of sum1/sum2 in each loop iteration, and `largest` will the maximum value of `largest` all the loop iterations
+
+**Example**
+
+```python
+ $:GPU_LOOP(parallelism='[seq]')
+ $:GPU_LOOP(collapse=3, parallelism='[seq]',private='[tmp, r]')
+```
+
+
+
+
+ GPU_PARALLEL -- (Execute the following on the GPU in parallel)
+
+**Macro Invocation**
+
+Uses FYPP call directive using `#:call`
+
+```C
+#:call GPU_PARALLEL(...)
+ {code}
+#:endcall GPU_PARALLEL
+```
+
+**Parameters**
+
+| name | data type | Default Value | description |
+|------------------|---------------------|-------------------|-------------------------------------------------------------------------------------------|
+| `default` | string | 'present' | Implicit assumptions compiler should make |
+| `private` | string list | None | Variables that are private to each iteration/thread |
+| `firstprivate` | string list | None | Initialized variables that are private to each iteration/thread |
+| `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end |
+| `reductionOp` | string list | None | Operator that each list of reduction will reduce with |
+| `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit |
+| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit |
+| `copyinReadOnly` | string list | None | Allocates and copies readonly data to GPU and then deallocated on exit |
+| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit |
+| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit |
+| `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory |
+| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit |
+| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory |
+| `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit |
+| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive |
+
+**Parameter Restrictions**
+
+| name | Restricted range |
+|---------------|---------------------------------------------------|
+| `default` | 'present' or 'none' |
+
+**Additional information**
+
+- default present means that the any non-scalar data in assumed to be present on the GPU
+- default none means that the compiler should not implicitly determine the data attributes for any variable
+- reduction and reductionOp must match in length
+- With ``reduction='[[sum1, sum2], [largest]]'`` and ``reductionOp='[+, max]'``, `sum1` and `sum2` will be the sum of sum1/sum2 in each loop iteration, and `largest` will the maximum value of `largest` all the loop iterations
+- A reduction implies a copy, so it does not need to be added for both
+
+**Example**
+
+```C
+ #:call GPU_PARALLEL()
+ {code}
+ ...
+ #:endcall GPU_PARALLEL
+ #:call GPU_PARALLEL(create='[pixel_arr]', copyin='[initial_index]')
+ {code}
+ ...
+ #:endcall
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### Data Control Macros
+
+
+ GPU_DATA -- (Make data accessible on GPU in specified region)
+
+**Macro Invocation**
+
+Uses FYPP call directive using `#:call`
+
+```C
+#:call GPU_DATA(...)
+ {code}
+#:endcall GPU_DATA
+```
+
+**Parameters**
+
+| name | data type | Default Value | description |
+|------------------|-------------|---------------|----------------------------------------------------------------------------------------------|
+| `code` | code | Required | Region of code where defined data is accessible |
+| `copy` | string list | None | Allocates and copies variable to GPU on entrance, then deallocated and copies to CPU on exit |
+| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit |
+| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit |
+| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit |
+| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit |
+| `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory |
+| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit |
+| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory |
+| `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit |
+| `default` | string | None | Implicit assumptions compiler should make |
+| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive |
+
+**Parameter Restrictions**
+
+| name | Restricted range |
+|--------|--------------------------------------------------|
+| `code` | Do not assign it manually with key-value pairing |
+
+**Example**
+
+```C
+ #:call GPU_DATA(copy='[pixel_arr]', copyin='[starting_pixels, initial_index]',attach='[p_real, p_cmplx, p_fltr_cmplx]')
+ {code}
+ ...
+ #:endcall GPU_DATA
+ #:call GPU_DATA(create='[pixel_arr]', copyin='[initial_index]')
+ {code}
+ ...
+ #:endcall
+```
+
+
+
+
+ GPU_ENTER_DATA -- (Allocate/move data to GPU until matching GPU_EXIT_DATA or program termination)
+
+**Macro Invocation**
+
+Uses FYPP eval directive using `$:`
+
+`$:GPU_ENTER_DATA(...)`
+
+**Parameter**
+
+| name | data type | Default Value | description |
+|------------------|-------------|---------------|--------------------------------------------------------------|
+| `copyin` | string list | None | Allocates and copies data to GPU on entrance |
+| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU on entrance |
+| `create` | string list | None | Allocates data on GPU on entrance |
+| `attach` | string list | None | Attaches device pointer to device targets on entrance |
+| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive |
+
+**Example**
+
+```python
+ $:GPU_ENTER_DATA(copyin='[pixels_arr]', copyinReadOnly='[starting_pixels, initial_index]')
+ $:GPU_ENTER_DATA(create='[bc_buffers(1:num_dims, -1:1)]', copyin='[initial_index]')
+```
+
+
+
+
+ GPU_EXIT_DATA -- (Deallocate/move data from GPU created by GPU_ENTER_DATA)
+
+**Macro Invocation**
+
+Uses FYPP eval directive using `$:`
+
+`$:GPU_EXIT_DATA(...)`
+
+**Parameters**
+
+| name | data type | Default Value | description |
+|----------------|-------------|---------------|--------------------------------------------------------------|
+| `copyout` | string list | None | Deallocates and copies data from GPU to CPU on exit |
+| `delete` | string list | None | Deallocates data on GPU on exit |
+| `detach` | string list | None | Detach device pointer from device targets on exit |
+| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive |
+
+**Example**
+
+```python
+ $:GPU_EXIT_DATA(copyout='[pixels_arr]', delete='[starting_pixels, initial_index]')
+ $:GPU_EXIT_DATA(delete='[bc_buffers(1:num_dims, -1:1)]', copyout='[initial_index]')
+```
+
+
+
+
+ GPU_DECLARE -- (Allocate module variables on GPU or for implicit data region )
+
+**Macro Invocation**
+
+Uses FYPP eval directive using `$:`
+
+`$:GPU_DECLARE(...)`
+
+**Parameters**
+
+| name | data type | Default Value | description |
+|------------------|-------------|---------------|-------------------------------------------------------------------------------------------|
+| `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit |
+| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit |
+| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit |
+| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit |
+| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit |
+| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit |
+| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory |
+| `link` | string list | None | Declare global link, and only allocate when variable used in data clause. |
+| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive |
+
+**Additional information**
+
+- An implicit data region is created at the start of each procedure and ends after the last executable statement in that procedure.
+- Use only create, copyin, device_resident or link clauses for module variables
+- GPU_DECLARE exit is the end of the implicit data region
+- Link is useful for large global static data objects
+
+**Example**
+
+```python
+ $:GPU_DECLARE(create='[x_cb,y_cb,z_cb,x_cc,y_cc,z_cc,dx,dy,dz,dt,m,n,p]')
+ $:GPU_DECLARE(create='[x_cb,y_cb,z_cb]', copyin='[x_cc,y_cc,z_cc]', link='[dx,dy,dz,dt,m,n,p]')
+```
+
+
+
+
+ GPU_UPDATE -- (Updates data from CPU to GPU or GPU to CPU)
+
+**Macro Invocation**
+
+Uses FYPP eval directive using `$:`
+
+`$:GPU_UPDATE(...)`
+
+**Parameters**
+
+| name | data type | Default Value | description |
+|----------------|-------------|---------------|--------------------------------------------------------------|
+| `host` | string list | None | Updates data from GPU to CPU |
+| `device` | string list | None | Updates data from CPU to GPU |
+| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive |
+
+**Example**
+
+```python
+ $:GPU_UPDATE(host='[arr1, arr2]')
+ $:GPU_UPDATE(host='[updated_gpu_val]', device='[updated_cpu_val]')
+```
+
+
+
+
+ GPU_HOST_DATA -- (Make GPU memory address available on CPU)
+
+**Macro Invocation**
+
+Uses FYPP call directive using `#:call`
+
+```C
+ #:call GPU_HOST_DATA(...)
+ {code}
+ #:endcall GPU_HOST_DATA
+```
+
+**Parameters**
+
+| name | data type | Default Value | description |
+|----------------|-------------|---------------|------------------------------------------------------------------|
+| `code` | code | Required | Region of code where GPU memory addresses is accessible |
+| `use_device` | string list | None | Use GPU memory address of variable instead of CPU memory address |
+| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive |
+
+**Parameter Restrictions**
+
+| name | Restricted range |
+|--------|--------------------------------------------------|
+| `code` | Do not assign it manually with key-value pairing |
+
+**Example**
+
+```C
+ #:call GPU_HOST_DATA(use_device='[addr1, addr2]')
+ {code}
+ ...
+ #:endcall GPU_HOST_DATA
+ #:call GPU_HOST_DATA(use_device='[display_arr]')
+ {code}
+ ...
+ #:endcall
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### Synchronization Macros
+
+
+ GPU_WAIT -- (Makes CPU wait for async GPU activities)
+
+**Macro Invocation**
+
+Uses FYPP eval directive using `$:`
+
+`$:GPU_WAIT(...)`
+
+**Parameters**
+
+| name | data type | Default Value | description |
+|----------------|-----------|---------------|--------------------------------------------------------------|
+| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive |
+
+**Example**
+
+```python
+ $:GPU_WAIT()
+```
+
+
+
+
+ GPU_ATOMIC -- (Do an atomic operation on the GPU)
+
+**Macro Invocation**
+
+Uses FYPP eval directive using `$:`
+
+`$:GPU_ATOMIC(...)`
+
+**Parameters**
+
+| name | data type | Default Value | description |
+|----------------|-----------|---------------|--------------------------------------------------------------|
+| `atomic` | string | Required | Which atomic operation is performed |
+| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive |
+
+**Parameter Restrictions**
+
+| name | Restricted range |
+|----------|-----------------------------------------|
+| `atomic` | 'read', 'write', 'update', or 'capture' |
+
+**Additional information**
+
+- read atomic is reading in a value
+ - Ex: `v=x`
+- write atomic is writing a value to a variable
+ - Ex:`x=square(tmp)`
+- update atomic is updating a variable in-place
+ - Ex:`x= x .and. 1`
+- Capture is a pair of read/write/update operations with one dependent on the other
+ - Ex:
+
+ ```Fortran
+ x=x .and. 1
+ v=x
+ ```
+
+**Example**
+
+```python
+ $:GPU_ATOMIC(atomic='update')
+ x = square(x)
+ $:GPU_ATOMIC(atomic='capture')
+ x = square(x)
+ v = x
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### Miscellaneous Macros
+
+
+ GPU_ROUTINE -- (Compile a procedure for the GPU)
+
+**Macro Invocation**
+
+Uses FYPP eval directive using `$:`
+
+`$:GPU_ROUTINE(...)`
+
+**Parameters**
+
+| name | data type | Default Value | description |
+|-----------------|-------------|---------------|--------------------------------------------------------------|
+| `function_name` | string | None | Name of subroutine/function |
+| `parallelism` | string list | None | Parallelism granularity to use for this routine |
+| `nohost` | boolean | False | Do not compile procedure code for CPU |
+| `cray_inline` | boolean | False | Inline procedure on cray compiler |
+| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive |
+
+**Parameter Restrictions**
+
+| name | Restricted range |
+|---------------|---------------------------------------------------|
+| `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' |
+
+**Additional information**
+
+- Function name only needs to be given when cray_inline is True
+- Future capability is to parse function header for function name
+- Routine parallelism is most commonly ``'[seq]'``
+
+**Example**
+
+```python
+ $:GPU_ROUTINE(parallelism='[seq]')
+ $:GPU_ROUTINE(function_name='s_matmult', parallelism='[seq]', cray_inline=True)
+```
+
+
+
+
+ GPU_CACHE -- (Data to be cache in software-managed cache)
+
+**Macro Invocation**
+
+Uses FYPP eval directive using `$:`
+
+`$:GPU_CACHE(...)`
+
+**Parameters**
+
+| name | data type | Default Value | description |
+|------------------|-------------|---------------|--------------------------------------------------------------|
+| `cache` | string list | Required | Data that should to stored in cache |
+| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive |
+
+**Example**
+
+```python
+ $:GPU_CACHE(cache='[pixels_arr]')
+```
+
+
+
+------------------------------------------------------------------------------------------
diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp
index 02ceb8fe29..fea730cbd1 100644
--- a/src/common/include/macros.fpp
+++ b/src/common/include/macros.fpp
@@ -1,3 +1,5 @@
+#:include 'parallel_macros.fpp'
+
#:def LOG(expr)
#ifdef MFC_DEBUG
block
@@ -12,14 +14,16 @@
#:def ALLOCATE(*args)
@:LOG({'@:ALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'})
- allocate (${', '.join(args)}$)
- !$acc enter data create(${', '.join(args)}$)
+ #:set allocated_variables = ', '.join(args)
+ allocate (${allocated_variables}$)
+ $:GPU_ENTER_DATA(create=('[' + allocated_variables + ']'))
#:enddef ALLOCATE
#:def DEALLOCATE(*args)
@:LOG({'@:DEALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'})
- !$acc exit data delete(${', '.join(args)}$)
- deallocate (${', '.join(args)}$)
+ #:set allocated_variables = ', '.join(args)
+ $:GPU_EXIT_DATA(delete=('[' + allocated_variables + ']'))
+ deallocate (${allocated_variables}$)
#:enddef DEALLOCATE
#:def ACC_SETUP_VFs(*args)
@@ -30,13 +34,13 @@
@:LOG({'@:ACC_SETUP_VFs(${', '.join(args)}$)'})
#:for arg in args
- !$acc enter data copyin(${arg}$)
- !$acc enter data copyin(${arg}$%vf)
+ $:GPU_ENTER_DATA(copyin=('[' + arg + ']'))
+ $:GPU_ENTER_DATA(copyin=('[' + arg + '%vf]'))
if (allocated(${arg}$%vf)) then
do macros_setup_vfs_i = lbound(${arg}$%vf, 1), ubound(${arg}$%vf, 1)
if (associated(${arg}$%vf(macros_setup_vfs_i)%sf)) then
- !$acc enter data copyin(${arg}$%vf(macros_setup_vfs_i))
- !$acc enter data create(${arg}$%vf(macros_setup_vfs_i)%sf)
+ $:GPU_ENTER_DATA(copyin=('[' + arg + '%vf(macros_setup_vfs_i)]'))
+ $:GPU_ENTER_DATA(copyin=('[' + arg + '%vf(macros_setup_vfs_i)%sf]'))
end if
end do
end if
@@ -52,9 +56,9 @@
@:LOG({'@:ACC_SETUP_SFs(${', '.join(args)}$)'})
#:for arg in args
- !$acc enter data copyin(${arg}$)
+ $:GPU_ENTER_DATA(copyin=('[' + arg + ']'))
if (associated(${arg}$%sf)) then
- !$acc enter data create(${arg}$%sf)
+ $:GPU_ENTER_DATA(copyin=('[' + arg + '%sf]'))
end if
#:endfor
end block
@@ -68,18 +72,18 @@
@:LOG({'@:ACC_SETUP_source_spatials(${', '.join(args)}$)'})
#:for arg in args
- !$acc enter data copyin(${arg}$)
+ $:GPU_ENTER_DATA(copyin=('[' + arg + ']'))
if (allocated(${arg}$%coord)) then
- !$acc enter data create(${arg}$%coord)
+ $:GPU_ENTER_DATA(copyin=('[' + arg + '%coord]'))
end if
if (allocated(${arg}$%val)) then
- !$acc enter data create(${arg}$%val)
+ $:GPU_ENTER_DATA(copyin=('[' + arg + '%val]'))
end if
if (allocated(${arg}$%angle)) then
- !$acc enter data create(${arg}$%angle)
+ $:GPU_ENTER_DATA(copyin=('[' + arg + '%angle]'))
end if
if (allocated(${arg}$%xyz_to_r_ratios)) then
- !$acc enter data create(${arg}$%xyz_to_r_ratios)
+ $:GPU_ENTER_DATA(copyin=('[' + arg + '%xyz_to_r_ratios]'))
end if
#:endfor
end block
@@ -102,3 +106,4 @@
//${message or '"No error description."'}$)
end if
#:enddef
+! New line at end of file is required for FYPP
diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp
new file mode 100644
index 0000000000..8d0a5a673b
--- /dev/null
+++ b/src/common/include/parallel_macros.fpp
@@ -0,0 +1,425 @@
+#:mute
+
+#:def ASSERT_LIST(data, datatype)
+ #:assert data is not None
+ #:assert isinstance(data, list)
+ #:assert len(data) != 0
+ #:assert all(isinstance(element, datatype) for element in data)
+#:enddef
+
+#:def GEN_PARENTHESES_CLAUSE(clause_name, clause_str)
+ #:set clause_regex = re.compile(',(?![^(]*\\))')
+ #:assert isinstance(clause_name, str)
+ #:if clause_str is not None
+ #:set count = 0
+ #:assert isinstance(clause_str, str)
+ #:assert clause_str[0] == '[' and clause_str[-1] == ']'
+ #:for c in clause_str
+ #:if c == '('
+ #:set count = count + 1
+ #:elif c == ')'
+ #:set count = count - 1
+ #:endif
+ #:if c == ',' and count > 1
+ #:stop 'Nested parentheses with comma inside is not supported. Incorrect clause: {}'.format(clause_str)
+ #:elif count < 0
+ #:stop 'Missing parentheses. Incorrect clause: {}'.format(clause_str)
+ #:endif
+ #:endfor
+ #:set clause_str = re.sub(clause_regex, ';', clause_str)
+ #:set clause_list = [x.strip() for x in clause_str.strip('[]').split(';')]
+ $:ASSERT_LIST(clause_list, str)
+ #:set clause_str = clause_name + '(' + ', '.join(clause_list) + ') '
+ #:else
+ #:set clause_str = ''
+ #:endif
+ $:clause_str
+#:enddef
+
+#:def GEN_PRIVATE_STR(private, initialized_values)
+ #:assert isinstance(initialized_values, bool)
+ #:if initialized_values == True
+ #:set private_val = GEN_PARENTHESES_CLAUSE('firstprivate', private)
+ #:else
+ #:set private_val = GEN_PARENTHESES_CLAUSE('private', private)
+ #:endif
+ $:private_val
+#:enddef
+
+#:def GEN_COPY_STR(copy)
+ #:set copy_val = GEN_PARENTHESES_CLAUSE('copy', copy)
+ $:copy_val
+#:enddef
+
+#:def GEN_COPYIN_STR(copyin, readonly)
+ #:assert isinstance(readonly, bool)
+ #:set copyin_val = GEN_PARENTHESES_CLAUSE('copyin', copyin)
+ #:if copyin is not None and readonly == True
+ #:set index = copyin_val.find('copyin(') + len('copyin(')
+ #:set copyin_val = copyin_val[:index] + 'readonly:' + copyin_val[index:]
+ #:endif
+ $:copyin_val
+#:enddef
+
+#:def GEN_COPYOUT_STR(copyout)
+ #:set copyout_val = GEN_PARENTHESES_CLAUSE('copyout', copyout)
+ $:copyout_val
+#:enddef
+
+#:def GEN_CREATE_STR(create)
+ #:set create_val = GEN_PARENTHESES_CLAUSE('create', create)
+ $:create_val
+#:enddef
+
+#:def GEN_NOCREATE_STR(no_create)
+ #:set nocreate_val = GEN_PARENTHESES_CLAUSE('no_create', no_create)
+ $:nocreate_val
+#:enddef
+
+#:def GEN_DELETE_STR(delete)
+ #:set delete_val = GEN_PARENTHESES_CLAUSE('delete', delete)
+ $:delete_val
+#:enddef
+
+#:def GEN_PRESENT_STR(present)
+ #:set present_val = GEN_PARENTHESES_CLAUSE('present', present)
+ $:present_val
+#:enddef
+
+#:def GEN_DEVICEPTR_STR(deviceptr)
+ #:set deviceptr_val = GEN_PARENTHESES_CLAUSE('deviceptr', deviceptr)
+ $:deviceptr_val
+#:enddef
+
+#:def GEN_ATTACH_STR(attach)
+ #:set attach_val = GEN_PARENTHESES_CLAUSE('attach', attach)
+ $:attach_val
+#:enddef
+
+#:def GEN_DETACH_STR(detach)
+ #:set detach_val = GEN_PARENTHESES_CLAUSE('detach', detach)
+ $:detach_val
+#:enddef
+
+#:def GEN_LINK_STR(link)
+ #:set link_val = GEN_PARENTHESES_CLAUSE('link', link)
+ $:link_val
+#:enddef
+
+#:def GEN_EXTRA_ARGS_STR(extraArgs)
+ #:if extraArgs is not None
+ #:assert isinstance(extraArgs, str)
+ #:set extraArgs_val = extraArgs
+ #:else
+ #:set extraArgs_val = ''
+ #:endif
+ $:extraArgs_val
+#:enddef
+
+#:def GEN_PARALLELISM_STR(parallelism)
+ #:if parallelism is not None
+ #:assert isinstance(parallelism, str)
+ #:assert parallelism[0] == '[' and parallelism[-1] == ']'
+ #:set parallelism_list = [x.strip() for x in parallelism.strip('[]').split(',')]
+ $:ASSERT_LIST(parallelism_list, str)
+ #:assert all((element == 'gang' or element == 'worker' or &
+ & element == 'vector' or element == 'seq') for element in parallelism_list)
+ #:set parallelism_val = ' '.join(parallelism_list) + ' '
+ #:else
+ #:set parallelism_val = ''
+ #:endif
+ $:parallelism_val
+#:enddef
+
+#:def GEN_COLLAPSE_STR(collapse)
+ #:if collapse is not None
+ #:set collapse = int(collapse)
+ #:assert isinstance(collapse, int)
+ #:assert collapse > 1
+ #:set collapse_val = 'collapse(' + str(collapse) + ') '
+ #:else
+ #:set collapse_val = ''
+ #:endif
+ $:collapse_val
+#:enddef
+
+#:def GEN_DEFAULT_STR(default)
+ #:if default is not None
+ #:assert isinstance(default, str)
+ #:assert (default == 'present' or default == 'none')
+ #:set default_val = 'default(' + default + ') '
+ #:else
+ #:set default_val = ''
+ #:endif
+ $:default_val
+#:enddef
+
+#:def GEN_REDUCTION_STR(reduction, reductionOp)
+ #:if reduction is not None and reductionOp is not None
+ #:assert isinstance(reduction, str)
+ #:assert isinstance(reductionOp, str)
+ #:assert reduction[0] == '[' and reduction[-1] == ']'
+ #:assert reductionOp[0] == '[' and reductionOp[-1] == ']'
+ #:set reduction = reduction.replace(' ', '')
+ #:set reduction = reduction[1:-1]
+ #:set reduction_list = reduction.split('],')
+ #:set reduction_list = [str + ']' for str in reduction_list]
+ #:assert all(str[0] == '[' and str[-1] == ']' for str in reduction_list)
+
+ #:set reductionOp_list = [x.strip() for x in reductionOp.strip('[]').split(',')]
+ $:ASSERT_LIST(reduction_list, str)
+ $:ASSERT_LIST(reductionOp_list, str)
+ #:assert len(reduction_list) == len(reductionOp_list)
+ #:set reduction_val = ''
+ #:for i in range(len(reduction_list))
+ #:set temp_clause = GEN_PARENTHESES_CLAUSE('reduction', reduction_list[i]).strip('\n')
+ #:set ind = temp_clause.find('reduction(') + len('reduction(')
+ #:set reduction_val = reduction_val.strip('\n') + temp_clause[:ind] + reductionOp_list[i] + ':' + temp_clause[ind:]
+ #:endfor
+ #:elif reduction is not None or reductionOp is not None
+ #:stop 'Cannot set the reduction list or reduction operation without setting the other'
+ #:else
+ #:set reduction_val = ''
+ #:endif
+ $:reduction_val
+#:enddef
+
+#:def GEN_HOST_STR(host)
+ #:set host_val = GEN_PARENTHESES_CLAUSE('host', host)
+ $:host_val
+#:enddef
+
+#:def GEN_DEVICE_STR(device)
+ #:set device_val = GEN_PARENTHESES_CLAUSE('device', device)
+ $:device_val
+#:enddef
+
+#:def GEN_USE_DEVICE_STR(use_device)
+ #:set use_device_val = GEN_PARENTHESES_CLAUSE('use_device', use_device)
+ $:use_device_val
+#:enddef
+
+#:def GPU_PARALLEL(code, private=None, default='present', firstprivate=None, reduction=None, reductionOp=None, &
+ & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
+ & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None)
+ #:set default_val = GEN_DEFAULT_STR(default)
+ #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n')
+ #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp)
+ #:set copy_val = GEN_COPY_STR(copy)
+ #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n')
+ #:set copyout_val = GEN_COPYOUT_STR(copyout)
+ #:set create_val = GEN_CREATE_STR(create)
+ #:set no_create_val = GEN_NOCREATE_STR(no_create)
+ #:set present_val = GEN_PRESENT_STR(present)
+ #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr)
+ #:set attach_val = GEN_ATTACH_STR(attach)
+ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
+ #:set clause_val = default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + &
+ & copy_val.strip('\n') + copyin_val.strip('\n') + &
+ & copyout_val.strip('\n') + create_val.strip('\n') + &
+ & no_create_val.strip('\n') + present_val.strip('\n') + &
+ & deviceptr_val.strip('\n') + attach_val.strip('\n')
+ #:set acc_directive = '!$acc parallel ' + &
+ & clause_val + extraAccArgs_val.strip('\n')
+ #:set end_acc_directive = '!$acc end parallel'
+ $:acc_directive
+ $:code
+ $:end_acc_directive
+#:enddef
+
+
+#:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
+ & default='present', firstprivate=None, reduction=None, reductionOp=None, &
+ & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
+ & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None)
+ #:set collapse_val = GEN_COLLAPSE_STR(collapse)
+ #:set parallelism_val = GEN_PARALLELISM_STR(parallelism)
+ #:set default_val = GEN_DEFAULT_STR(default)
+ #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n')
+ #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp)
+ #:set copy_val = GEN_COPY_STR(copy)
+ #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n')
+ #:set copyout_val = GEN_COPYOUT_STR(copyout)
+ #:set create_val = GEN_CREATE_STR(create)
+ #:set no_create_val = GEN_NOCREATE_STR(no_create)
+ #:set present_val = GEN_PRESENT_STR(present)
+ #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr)
+ #:set attach_val = GEN_ATTACH_STR(attach)
+ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
+ #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + &
+ & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + &
+ & copy_val.strip('\n') + copyin_val.strip('\n') + &
+ & copyout_val.strip('\n') + create_val.strip('\n') + &
+ & no_create_val.strip('\n') + present_val.strip('\n') + &
+ & deviceptr_val.strip('\n') + attach_val.strip('\n')
+ #:set acc_directive = '!$acc parallel loop ' + &
+ & clause_val + extraAccArgs_val.strip('\n')
+ $:acc_directive
+#:enddef
+
+#:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None)
+ #:assert isinstance(cray_inline, bool)
+ #:set parallelism_val = GEN_PARALLELISM_STR(parallelism)
+ #:assert isinstance(nohost, bool)
+ #:if nohost == True
+ #:set nohost_val = 'nohost'
+ #:else
+ #:set nohost_val = ''
+ #:endif
+ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
+ #:set clause_val = parallelism_val.strip('\n') + nohost_val.strip('\n')
+ #:set acc_directive = '!$acc routine ' + &
+ & clause_val + extraAccArgs_val.strip('\n')
+ #:if cray_inline == True
+ #:if not isinstance(function_name, str)
+ #:stop "When inlining for Cray Compiler, function name must be given and given as a string"
+ #:endif
+ #:set cray_directive = ('!DIR$ INLINEALWAYS ' + function_name).strip('\n')
+#ifdef _CRAYFTN
+ $:cray_directive
+#else
+ $:acc_directive
+#endif
+ #:else
+ $:acc_directive
+ #:endif
+#:enddef
+
+#:def GPU_DECLARE(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None)
+ #:set copy_val = GEN_COPY_STR(copy)
+ #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n')
+ #:set copyout_val = GEN_COPYOUT_STR(copyout)
+ #:set create_val = GEN_CREATE_STR(create)
+ #:set present_val = GEN_PRESENT_STR(present)
+ #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr)
+ #:set link_val = GEN_LINK_STR(link)
+ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
+ #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + &
+ & copyout_val.strip('\n') + create_val.strip('\n') + &
+ & present_val.strip('\n') + deviceptr_val.strip('\n') + &
+ & link_val.strip('\n')
+ #:set acc_directive = '!$acc declare ' + clause_val + extraAccArgs_val.strip('\n')
+ $:acc_directive
+#:enddef
+
+#:def GPU_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None)
+ #:set collapse_val = GEN_COLLAPSE_STR(collapse)
+ #:set parallelism_val = GEN_PARALLELISM_STR(parallelism)
+ #:if data_dependency is not None
+ #:assert isinstance(data_dependency, str)
+ #:assert (data_dependency == 'auto' or data_dependency == 'independent')
+ #:set data_dependency_val = data_dependency
+ #:else
+ #:set data_dependency_val = ''
+ #:endif
+ #:set private_val = GEN_PRIVATE_STR(private, False)
+ #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp)
+ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
+ #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + &
+ & data_dependency_val.strip('\n') + private_val.strip('\n') + &
+ & reduction_val.strip('\n')
+ #:set acc_directive = '!$acc loop ' + &
+ & clause_val + extraAccArgs_val.strip('\n')
+ $:acc_directive
+#:enddef
+
+#:def GPU_DATA(code, copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None)
+ #:assert code is not None
+ #:assert isinstance(code, str)
+ #:if code == '' or code.isspace()
+ #:stop 'GPU_DATA macro has no effect on the code as it is not surrounding any code'
+ #:endif
+ #:set copy_val = GEN_COPY_STR(copy)
+ #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n')
+ #:set copyout_val = GEN_COPYOUT_STR(copyout)
+ #:set create_val = GEN_CREATE_STR(create)
+ #:set no_create_val = GEN_NOCREATE_STR(no_create)
+ #:set present_val = GEN_PRESENT_STR(present)
+ #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr)
+ #:set attach_val = GEN_ATTACH_STR(attach)
+ #:set default_val = GEN_DEFAULT_STR(default)
+ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
+ #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + &
+ & copyout_val.strip('\n') + create_val.strip('\n') + &
+ & no_create_val.strip('\n') + present_val.strip('\n') + &
+ & deviceptr_val.strip('\n') + attach_val.strip('\n') + &
+ & default_val.strip('\n')
+ #:set acc_directive = '!$acc data ' + clause_val + extraAccArgs_val.strip('\n')
+ #:set end_acc_directive = '!$acc end data'
+ $:acc_directive
+ $:code
+ $:end_acc_directive
+#:enddef
+
+#:def GPU_HOST_DATA(code, use_device=None, extraAccArgs=None)
+ #:assert code is not None
+ #:assert isinstance(code, str)
+ #:if code == '' or code.isspace()
+ #:stop 'GPU_HOST_DATA macro has no effect on the code as it is not surrounding any code'
+ #:endif
+ #:set use_device_val = GEN_USE_DEVICE_STR(use_device)
+ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
+ #:set clause_val = use_device_val.strip('\n')
+ #:set acc_directive = '!$acc host_data ' + clause_val + extraAccArgs_val.strip('\n')
+ #:set end_acc_directive = '!$acc end host_data'
+ $:acc_directive
+ $:code
+ $:end_acc_directive
+#:enddef
+
+#:def GPU_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None)
+ #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n')
+ #:set create_val = GEN_CREATE_STR(create)
+ #:set attach_val = GEN_ATTACH_STR(attach)
+ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
+ #:set clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n')
+ #:set acc_directive = '!$acc enter data ' + clause_val + extraAccArgs_val.strip('\n')
+ $:acc_directive
+#:enddef
+
+#:def GPU_EXIT_DATA(copyout=None, delete=None, detach=None, extraAccArgs=None)
+ #:set copyout_val = GEN_COPYOUT_STR(copyout)
+ #:set delete_val = GEN_DELETE_STR(delete)
+ #:set detach_val = GEN_DETACH_STR(detach)
+ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
+ #:set clause_val = copyout_val.strip('\n') + delete_val.strip('\n') + detach_val.strip('\n')
+ #:set acc_directive = '!$acc exit data ' + clause_val + extraAccArgs_val.strip('\n')
+ $:acc_directive
+#:enddef
+
+#:def GPU_CACHE(cache, extraAccArgs=None)
+ #:set cache_val = GEN_PARENTHESES_CLAUSE('cache', cache)
+ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
+ #:set clause_val = cache_val.strip('\n')
+ #:set acc_directive = '!$acc ' + clause_val + extraAccArgs_val.strip('\n')
+ $:acc_directive
+#:enddef
+
+#:def GPU_ATOMIC(atomic, extraAccArgs=None)
+ #:assert isinstance(atomic, str)
+ #:assert (atomic == 'read' or atomic == 'write' or atomic == 'update' or atomic == 'capture')
+ #:set atomic_val = atomic
+ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
+ #:set clause_val = atomic_val.strip('\n')
+ #:set acc_directive = '!$acc atomic ' + clause_val + extraAccArgs_val.strip('\n')
+ $:acc_directive
+#:enddef
+
+#:def GPU_UPDATE(host=None, device=None, extraAccArgs=None)
+ #:set host_val = GEN_HOST_STR(host)
+ #:set device_val = GEN_DEVICE_STR(device)
+ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
+ #:set clause_val = host_val.strip('\n') + device_val.strip('\n')
+ #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n')
+ $:acc_directive
+#:enddef
+
+#:def GPU_WAIT(extraAccArgs=None)
+ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
+ #:set clause_val = ''
+ #:set acc_directive = '!$acc wait ' + clause_val + extraAccArgs_val.strip('\n')
+ $:acc_directive
+#:enddef
+
+#:endmute
+! New line at end of file is required for FYPP
diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp
index 4087a0045e..2c48c760f2 100644
--- a/src/common/m_boundary_common.fpp
+++ b/src/common/m_boundary_common.fpp
@@ -24,7 +24,7 @@ module m_boundary_common
implicit none
type(scalar_field), dimension(:, :), allocatable :: bc_buffers
-!$acc declare create(bc_buffers)
+ $:GPU_DECLARE(create='[bc_buffers]')
#ifdef MFC_MPI
integer, dimension(1:3, -1:1) :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE
@@ -87,7 +87,7 @@ contains
if (bc_x%beg >= 0) then
call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb, mv)
else
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = 0, p
do k = 0, n
select case (int(bc_type(1, -1)%sf(0, k, l)))
@@ -116,7 +116,7 @@ contains
if (bc_x%end >= 0) then
call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb, mv)
else
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = 0, p
do k = 0, n
select case (int(bc_type(1, 1)%sf(0, k, l)))
@@ -149,7 +149,7 @@ contains
if (bc_y%beg >= 0) then
call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb, mv)
else
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = 0, p
do k = -buff_size, m + buff_size
select case (int(bc_type(2, -1)%sf(k, 0, l)))
@@ -181,7 +181,7 @@ contains
if (bc_y%end >= 0) then
call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb, mv)
else
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = 0, p
do k = -buff_size, m + buff_size
select case (int(bc_type(2, 1)%sf(k, 0, l)))
@@ -214,7 +214,7 @@ contains
if (bc_z%beg >= 0) then
call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb, mv)
else
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = -buff_size, n + buff_size
do k = -buff_size, m + buff_size
select case (int(bc_type(3, -1)%sf(k, l, 0)))
@@ -243,7 +243,7 @@ contains
if (bc_z%end >= 0) then
call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb, mv)
else
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = -buff_size, n + buff_size
do k = -buff_size, m + buff_size
select case (int(bc_type(3, 1)%sf(k, l, 0)))
@@ -273,11 +273,8 @@ contains
end subroutine s_populate_variables_buffers
pure subroutine s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_ghost_cell_extrapolation
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_ghost_cell_extrapolation', &
+ & parallelism='[seq]', cray_inline=True)
type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
integer, intent(in) :: bc_dir, bc_loc
integer, intent(in) :: k, l
@@ -337,7 +334,7 @@ contains
end subroutine s_ghost_cell_extrapolation
pure subroutine s_symmetry(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
integer, intent(in) :: bc_dir, bc_loc
@@ -597,7 +594,7 @@ contains
end subroutine s_symmetry
pure subroutine s_periodic(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
integer, intent(in) :: bc_dir, bc_loc
@@ -736,7 +733,7 @@ contains
end subroutine s_periodic
pure subroutine s_axis(q_prim_vf, pb, mv, k, l)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
integer, intent(in) :: k, l
@@ -795,11 +792,8 @@ contains
end subroutine s_axis
pure subroutine s_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_slip_wall
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_slip_wall',parallelism='[seq]', &
+ & cray_inline=True)
type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
integer, intent(in) :: bc_dir, bc_loc
integer, intent(in) :: k, l
@@ -889,11 +883,9 @@ contains
end subroutine s_slip_wall
pure subroutine s_no_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_no_slip_wall
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_no_slip_wall',parallelism='[seq]', &
+ & cray_inline=True)
+
type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
integer, intent(in) :: bc_dir, bc_loc
integer, intent(in) :: k, l
@@ -1019,11 +1011,8 @@ contains
end subroutine s_no_slip_wall
pure subroutine s_dirichlet(q_prim_vf, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_dirichlet
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_dirichlet',parallelism='[seq]', &
+ & cray_inline=True)
type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
integer, intent(in) :: bc_dir, bc_loc
integer, intent(in) :: k, l
@@ -1087,7 +1076,7 @@ contains
end subroutine s_dirichlet
pure subroutine s_qbmm_extrapolation(bc_dir, bc_loc, k, l, pb, mv)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
integer, intent(in) :: bc_dir, bc_loc
integer, intent(in) :: k, l
@@ -1169,7 +1158,7 @@ contains
if (bc_x%beg >= 0) then
call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1)
else
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = 0, p
do k = 0, n
select case (bc_type(1, -1)%sf(0, k, l))
@@ -1187,7 +1176,7 @@ contains
if (bc_x%end >= 0) then
call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1)
else
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = 0, p
do k = 0, n
select case (bc_type(1, 1)%sf(0, k, l))
@@ -1208,7 +1197,7 @@ contains
if (bc_y%beg >= 0) then
call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1)
else
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = 0, p
do k = -buff_size, m + buff_size
select case (bc_type(2, -1)%sf(k, 0, l))
@@ -1226,7 +1215,7 @@ contains
if (bc_y%end >= 0) then
call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1)
else
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = 0, p
do k = -buff_size, m + buff_size
select case (bc_type(2, 1)%sf(k, 0, l))
@@ -1247,7 +1236,7 @@ contains
if (bc_z%beg >= 0) then
call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1)
else
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = -buff_size, n + buff_size
do k = -buff_size, m + buff_size
select case (bc_type(3, -1)%sf(k, l, 0))
@@ -1265,7 +1254,7 @@ contains
if (bc_z%end >= 0) then
call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1)
else
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = -buff_size, n + buff_size
do k = -buff_size, m + buff_size
select case (bc_type(3, 1)%sf(k, l, 0))
@@ -1282,11 +1271,8 @@ contains
end subroutine s_populate_capillary_buffers
pure subroutine s_color_function_periodic(c_divs, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_color_function_periodic
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_color_function_periodic', &
+ & parallelism='[seq]', cray_inline=True)
type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs
integer, intent(in) :: bc_dir, bc_loc
integer, intent(in) :: k, l
@@ -1340,11 +1326,8 @@ contains
end subroutine s_color_function_periodic
pure subroutine s_color_function_reflective(c_divs, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_color_function_reflective
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_color_function_reflective', &
+ & parallelism='[seq]', cray_inline=True)
type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs
integer, intent(in) :: bc_dir, bc_loc
integer, intent(in) :: k, l
@@ -1422,11 +1405,8 @@ contains
end subroutine s_color_function_reflective
pure subroutine s_color_function_ghost_cell_extrapolation(c_divs, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_color_function_ghost_cell_extrapolation
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_color_function_ghost_cell_extrapolation', &
+ & parallelism='[seq]', cray_inline=True)
type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs
integer, intent(in) :: bc_dir, bc_loc
integer, intent(in) :: k, l
@@ -1638,7 +1618,7 @@ contains
do dir = 1, num_dims
do loc = -1, 1, 2
read (1) bc_type(dir, loc)%sf
- !$acc update device(bc_type(dir, loc)%sf)
+ $:GPU_UPDATE(device='[bc_type(dir, loc)%sf]')
end do
end do
close (1)
@@ -1654,7 +1634,7 @@ contains
do dir = 1, num_dims
do loc = -1, 1, 2
read (1) bc_buffers(dir, loc)%sf
- !$acc update device(bc_buffers(dir, loc)%sf)
+ $:GPU_UPDATE(device='[bc_buffers(dir, loc)%sf]')
end do
end do
close (1)
@@ -1704,7 +1684,7 @@ contains
call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
offset = offset + sizeof(bc_type(dir, loc)%sf)
- !$acc update device(bc_type(dir, loc)%sf)
+ $:GPU_UPDATE(device='[bc_type(dir, loc)%sf]')
end do
end do
@@ -1714,7 +1694,7 @@ contains
call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
offset = offset + sizeof(bc_buffers(dir, loc)%sf)
- !$acc update device(bc_buffers(dir, loc)%sf)
+ $:GPU_UPDATE(device='[bc_buffers(dir, loc)%sf]')
end do
end do
@@ -1767,17 +1747,17 @@ contains
bc_type(1, -1)%sf(:, :, :) = bc_x%beg
bc_type(1, 1)%sf(:, :, :) = bc_x%end
- !$acc update device(bc_type(1,-1)%sf, bc_type(1,1)%sf)
+ $:GPU_UPDATE(device='[bc_type(1,-1)%sf,bc_type(1,1)%sf]')
if (n > 0) then
bc_type(2, -1)%sf(:, :, :) = bc_y%beg
bc_type(2, 1)%sf(:, :, :) = bc_y%end
- !$acc update device(bc_type(2,-1)%sf, bc_type(2,1)%sf)
+ $:GPU_UPDATE(device='[bc_type(2,-1)%sf,bc_type(2,1)%sf]')
if (p > 0) then
bc_type(3, -1)%sf(:, :, :) = bc_z%beg
bc_type(3, 1)%sf(:, :, :) = bc_z%end
- !$acc update device(bc_type(3,-1)%sf, bc_type(3,1)%sf)
+ $:GPU_UPDATE(device='[bc_type(3,-1)%sf,bc_type(3,1)%sf]')
end if
end if
diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp
index d61c42b1af..71aa890e87 100644
--- a/src/common/m_chemistry.fpp
+++ b/src/common/m_chemistry.fpp
@@ -35,7 +35,7 @@ contains
do z = bounds(3)%beg, bounds(3)%end
do y = bounds(2)%beg, bounds(2)%end
do x = bounds(1)%beg, bounds(1)%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do eqn = chemxb, chemxe
Ys(eqn - chemxb + 1) = &
q_cons_vf(eqn)%sf(x, y, z)/q_cons_vf(contxb)%sf(x, y, z)
@@ -46,7 +46,7 @@ contains
! cons. contxb = \rho (1-fluid model)
! cons. momxb + i = \rho u_i
energy = q_cons_vf(E_idx)%sf(x, y, z)/q_cons_vf(contxb)%sf(x, y, z)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do eqn = momxb, momxe
energy = energy - &
0.5_wp*(q_cons_vf(eqn)%sf(x, y, z)/q_cons_vf(contxb)%sf(x, y, z))**2._wp
@@ -72,7 +72,7 @@ contains
do z = bounds(3)%beg, bounds(3)%end
do y = bounds(2)%beg, bounds(2)%end
do x = bounds(1)%beg, bounds(1)%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = chemxb, chemxe
Ys(i - chemxb + 1) = q_prim_vf(i)%sf(x, y, z)
end do
@@ -99,13 +99,12 @@ contains
real(wp), dimension(num_species) :: Ys
real(wp), dimension(num_species) :: omega
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(Ys, omega)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]')
do z = bounds(3)%beg, bounds(3)%end
do y = bounds(2)%beg, bounds(2)%end
do x = bounds(1)%beg, bounds(1)%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do eqn = chemxb, chemxe
Ys(eqn - chemxb + 1) = q_prim_qp(eqn)%sf(x, y, z)
end do
@@ -115,7 +114,7 @@ contains
call get_net_production_rates(rho, T, Ys, omega)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do eqn = chemxb, chemxe
omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1)
diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp
index 2eb7920422..1857a31cd8 100644
--- a/src/common/m_finite_differences.fpp
+++ b/src/common/m_finite_differences.fpp
@@ -1,3 +1,5 @@
+#:include 'macros.fpp'
+
module m_finite_differences
use m_global_parameters
@@ -15,7 +17,8 @@ contains
integer :: x, y, z !< Generic loop iterators
real(wp) :: divergence
- !$acc parallel loop collapse(3) gang vector default(present) private(divergence)
+
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[divergence]')
do x = ix_s%beg, ix_s%end
do y = iy_s%beg, iy_s%end
do z = iz_s%beg, iz_s%end
diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp
index 6a376d7da5..f222e24d50 100644
--- a/src/common/m_helper.fpp
+++ b/src/common/m_helper.fpp
@@ -1,4 +1,5 @@
#:include 'macros.fpp'
+
!>
!! @file m_helper.f90
!! @brief Contains module m_helper
@@ -43,7 +44,7 @@ contains
!! @param Rtmp is the bubble radii
!! @param ntmp is the output number bubble density
pure subroutine s_comp_n_from_prim(vftmp, Rtmp, ntmp, weights)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: vftmp
real(wp), dimension(nb), intent(in) :: Rtmp
real(wp), intent(out) :: ntmp
@@ -57,7 +58,7 @@ contains
end subroutine s_comp_n_from_prim
pure subroutine s_comp_n_from_cons(vftmp, nRtmp, ntmp, weights)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: vftmp
real(wp), dimension(nb), intent(in) :: nRtmp
real(wp), intent(out) :: ntmp
diff --git a/src/common/m_helper_basic.f90 b/src/common/m_helper_basic.fpp
similarity index 96%
rename from src/common/m_helper_basic.f90
rename to src/common/m_helper_basic.fpp
index 74cb61f2ab..c78140c94b 100644
--- a/src/common/m_helper_basic.f90
+++ b/src/common/m_helper_basic.fpp
@@ -2,6 +2,8 @@
!! @file m_helper_basic.f90
!! @brief Contains module m_helper_basic
+#:include 'macros.fpp'
+
module m_helper_basic
use m_derived_types !< Definitions of the derived types
@@ -24,7 +26,7 @@ module m_helper_basic
!! @param tol_input Relative error (default = 1.e-10_wp).
!! @return Result of the comparison.
logical pure elemental function f_approx_equal(a, b, tol_input) result(res)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: a, b
real(wp), optional, intent(in) :: tol_input
real(wp) :: tol
@@ -50,7 +52,7 @@ end function f_approx_equal
!! @param tol_input Relative error (default = 1e-10_wp).
!! @return Result of the comparison.
logical pure function f_approx_in_array(a, b, tol_input) result(res)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: a
real(wp), intent(in) :: b(:)
real(wp), optional, intent(in) :: tol_input
@@ -76,7 +78,7 @@ end function f_approx_in_array
!> Checks if a real(wp) variable is of default value.
!! @param var Variable to check.
logical pure elemental function f_is_default(var) result(res)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: var
res = f_approx_equal(var, dflt_real)
@@ -101,7 +103,7 @@ end function f_all_default
!> Checks if a real(wp) variable is an integer.
!! @param var Variable to check.
logical pure elemental function f_is_integer(var) result(res)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: var
res = f_approx_equal(var, real(nint(var), wp))
diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index b920151488..568dddb299 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -25,7 +25,7 @@ module m_mpi_common
implicit none
integer, private :: ierr, v_size !<
- !$acc declare create(v_size)
+ $:GPU_DECLARE(create='[v_size]')
!! Generic flags used to identify and report MPI errors
real(wp), private, allocatable, dimension(:) :: buff_send !<
@@ -38,10 +38,10 @@ module m_mpi_common
!! average primitive variables, for a single computational domain boundary
!! at the time, from the relevant neighboring processor.
- !$acc declare create(buff_send, buff_recv)
+ $:GPU_DECLARE(create='[buff_send, buff_recv]')
integer :: halo_size
- !$acc declare create(halo_size)
+ $:GPU_DECLARE(create='[halo_size]')
contains
@@ -76,7 +76,7 @@ contains
halo_size = -1 + buff_size*(v_size)
end if
- !$acc update device(halo_size, v_size)
+ $:GPU_UPDATE(device='[halo_size, v_size]')
@:ALLOCATE(buff_send(0:halo_size), buff_recv(0:halo_size))
#endif
@@ -631,7 +631,7 @@ contains
/)
end if
- !$acc update device(v_size)
+ $:GPU_UPDATE(device='[v_size]')
buffer_count = buffer_counts(mpi_dir)
boundary_conditions = (/bc_x, bc_y, bc_z/)
@@ -667,7 +667,7 @@ contains
#:for mpi_dir in [1, 2, 3]
if (mpi_dir == ${mpi_dir}$) then
#:if mpi_dir == 1
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
do l = 0, p
do k = 0, n
do j = 0, buff_size - 1
@@ -680,7 +680,7 @@ contains
end do
if (qbmm_comm) then
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
do l = 0, p
do k = 0, n
do j = 0, buff_size - 1
@@ -695,7 +695,7 @@ contains
end do
end do
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
do l = 0, p
do k = 0, n
do j = 0, buff_size - 1
@@ -711,7 +711,7 @@ contains
end do
end if
#:elif mpi_dir == 2
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
do i = 1, nVar
do l = 0, p
do k = 0, buff_size - 1
@@ -726,7 +726,7 @@ contains
end do
if (qbmm_comm) then
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
do i = nVar + 1, nVar + 4
do l = 0, p
do k = 0, buff_size - 1
@@ -742,7 +742,7 @@ contains
end do
end do
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
do i = nVar + 1, nVar + 4
do l = 0, p
do k = 0, buff_size - 1
@@ -759,7 +759,7 @@ contains
end do
end if
#:else
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
do i = 1, nVar
do l = 0, buff_size - 1
do k = -buff_size, n + buff_size
@@ -774,7 +774,7 @@ contains
end do
if (qbmm_comm) then
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
do i = nVar + 1, nVar + 4
do l = 0, buff_size - 1
do k = -buff_size, n + buff_size
@@ -790,7 +790,7 @@ contains
end do
end do
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
do i = nVar + 1, nVar + 4
do l = 0, buff_size - 1
do k = -buff_size, n + buff_size
@@ -816,28 +816,33 @@ contains
#:for rdma_mpi in [False, True]
if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
#:if rdma_mpi
- !$acc host_data use_device(buff_send, buff_recv)
- call nvtxStartRange("RHS-COMM-SENDRECV-RDMA")
+ #:call GPU_HOST_DATA(use_device='[buff_send, buff_recv]')
+ call nvtxStartRange("RHS-COMM-SENDRECV-RDMA")
+
+ call MPI_SENDRECV( &
+ buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
+ buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
+ MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+ call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
+
+ #:endcall GPU_HOST_DATA
+ $:GPU_WAIT()
#:else
call nvtxStartRange("RHS-COMM-DEV2HOST")
- !$acc update host(buff_send)
+ $:GPU_UPDATE(host='[buff_send]')
call nvtxEndRange
call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA")
- #:endif
- call MPI_SENDRECV( &
- buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
- buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+ call MPI_SENDRECV( &
+ buff_send, buffer_count, mpi_p, dst_proc, send_tag, &
+ buff_recv, buffer_count, mpi_p, src_proc, recv_tag, &
+ MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
- call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
+ call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
- #:if rdma_mpi
- !$acc end host_data
- !$acc wait
- #:else
call nvtxStartRange("RHS-COMM-HOST2DEV")
- !$acc update device(buff_recv)
+ $:GPU_UPDATE(device='[buff_recv]')
call nvtxEndRange
#:endif
end if
@@ -854,7 +859,7 @@ contains
#:for mpi_dir in [1, 2, 3]
if (mpi_dir == ${mpi_dir}$) then
#:if mpi_dir == 1
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
do l = 0, p
do k = 0, n
do j = -buff_size, -1
@@ -874,7 +879,7 @@ contains
end do
if (qbmm_comm) then
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
do l = 0, p
do k = 0, n
do j = -buff_size, -1
@@ -889,7 +894,7 @@ contains
end do
end do
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
do l = 0, p
do k = 0, n
do j = -buff_size, -1
@@ -905,7 +910,7 @@ contains
end do
end if
#:elif mpi_dir == 2
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
do i = 1, nVar
do l = 0, p
do k = -buff_size, -1
@@ -926,7 +931,7 @@ contains
end do
if (qbmm_comm) then
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
do i = nVar + 1, nVar + 4
do l = 0, p
do k = -buff_size, -1
@@ -942,7 +947,7 @@ contains
end do
end do
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
do i = nVar + 1, nVar + 4
do l = 0, p
do k = -buff_size, -1
@@ -960,7 +965,7 @@ contains
end if
#:else
! Unpacking buffer from bc_z%beg
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
do i = 1, nVar
do l = -buff_size, -1
do k = -buff_size, n + buff_size
@@ -982,7 +987,7 @@ contains
end do
if (qbmm_comm) then
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
do i = nVar + 1, nVar + 4
do l = -buff_size, -1
do k = -buff_size, n + buff_size
@@ -999,7 +1004,7 @@ contains
end do
end do
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
do i = nVar + 1, nVar + 4
do l = -buff_size, -1
do k = -buff_size, n + buff_size
diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp
index d1dca0f6ca..e04242a787 100644
--- a/src/common/m_phase_change.fpp
+++ b/src/common/m_phase_change.fpp
@@ -42,7 +42,7 @@ module m_phase_change
real(wp) :: A, B, C, D
!> @}
- !$acc declare create(max_iter,pCr,TCr,mixM,lp,vp,A,B,C,D)
+ $:GPU_DECLARE(create='[max_iter,pCr,TCr,mixM,lp,vp,A,B,C,D]')
contains
@@ -89,23 +89,26 @@ contains
real(wp) :: rho, rM, m1, m2, MCT !< total density, total reacting mass, individual reacting masses
real(wp) :: TvF !< total volume fraction
- !$acc declare create(pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF)
+ $:GPU_DECLARE(create='[pS,pSOV,pSSL,TS,TSOV,TSSL,TSatOV,TSatSL]')
+ $:GPU_DECLARE(create='[rhoe,dynE,rhos,rho,rM,m1,m2,MCT,TvF]')
real(wp), dimension(num_fluids) :: p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok
+ $:GPU_DECLARE(create='[p_infOV,p_infpT,p_infSL,sk,hk,gk,ek,rhok]')
!< Generic loop iterators
integer :: i, j, k, l
- !$acc declare create(p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok)
-
! starting equilibrium solver
- !$acc parallel loop collapse(3) gang vector default(present) private(p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok,pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[p_infOV, p_infpT, p_infSL, &
+ & sk, hk, gk, ek, rhok,pS, pSOV, pSSL, &
+ & TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, &
+ & dynE, rhos, rho, rM, m1, m2, MCT, TvF]')
do j = 0, m
do k = 0, n
do l = 0, p
rho = 0.0_wp; TvF = 0.0_wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
! Mixture density
@@ -131,7 +134,7 @@ contains
! kinetic energy as an auxiliary variable to the calculation of the total internal energy
dynE = 0.0_wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, momxe
dynE = dynE + 5.0e-1_wp*q_cons_vf(i)%sf(j, k, l)**2/rho
@@ -253,7 +256,7 @@ contains
! calculating volume fractions, internal energies, and total entropy
rhos = 0.0_wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
! volume fractions
@@ -284,12 +287,8 @@ contains
!! @param rhoe mixture energy
!! @param TS equilibrium temperature at the interface
pure subroutine s_infinite_pt_relaxation_k(j, k, l, MFL, pS, p_infpT, q_cons_vf, rhoe, TS)
-
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_infinite_pt_relaxation_k
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_infinite_pt_relaxation_k', &
+ & parallelism='[seq]', cray_inline=True)
! initializing variables
integer, intent(in) :: j, k, l, MFL
@@ -305,7 +304,7 @@ contains
! auxiliary variables for the pT-equilibrium solver
mCP = 0.0_wp; mQ = 0.0_wp; p_infpT = ps_inf;
! Performing tests before initializing the pT-equilibrium
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
! sum of the total alpha*rho*cp of the system
@@ -354,7 +353,7 @@ contains
! updating functions used in the Newton's solver
gpp = 0.0_wp; gp = 0.0_wp; hp = 0.0_wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
gp = gp + (gs_min(i) - 1.0_wp)*q_cons_vf(i + contxb - 1)%sf(j, k, l)*cvs(i) &
@@ -388,12 +387,8 @@ contains
!! @param q_cons_vf Cell-average conservative variables
!! @param TS equilibrium temperature at the interface
pure subroutine s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS)
-
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_infinite_ptg_relaxation_k
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_infinite_ptg_relaxation_k', &
+ & parallelism='[seq]', cray_inline=True)
integer, intent(in) :: j, k, l
real(wp), intent(inout) :: pS
@@ -448,7 +443,7 @@ contains
mCP = 0.0_wp; mCPD = 0.0_wp; mCVGP = 0.0_wp; mCVGP2 = 0.0_wp; mQ = 0.0_wp; mQD = 0.0_wp
! Those must be updated through the iterations, as they either depend on
! the partial masses for all fluids, or on the equilibrium pressure
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
! sum of the total alpha*rho*cp of the system
@@ -513,12 +508,8 @@ contains
!! @param k generic loop iterator for y direction
!! @param l generic loop iterator for z direction
pure subroutine s_correct_partial_densities(MCT, q_cons_vf, rM, j, k, l)
-
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_correct_partial_densities
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_correct_partial_densities', &
+ & parallelism='[seq]', cray_inline=True)
!> @name variables for the correction of the reacting partial densities
!> @{
@@ -576,12 +567,8 @@ contains
!! @param q_cons_vf Cell-average conservative variables
!! @param TJac Transpose of the Jacobian Matrix
pure subroutine s_compute_jacobian_matrix(InvJac, j, Jac, k, l, mCPD, mCVGP, mCVGP2, pS, q_cons_vf, TJac)
-
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_jacobian_matrix
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_jacobian_matrix', &
+ & parallelism='[seq]', cray_inline=True)
real(wp), dimension(2, 2), intent(out) :: InvJac
integer, intent(in) :: j
@@ -683,12 +670,8 @@ contains
!! @param rhoe mixture energy
!! @param R2D (2D) residue array
pure subroutine s_compute_pTg_residue(j, k, l, mCPD, mCVGP, mQD, q_cons_vf, pS, rhoe, R2D)
-
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_pTg_residue
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_pTg_residue', &
+ & parallelism='[seq]', cray_inline=True)
integer, intent(in) :: j, k, l
real(wp), intent(in) :: mCPD, mCVGP, mQD
@@ -734,12 +717,8 @@ contains
!! @param TSat Saturation Temperature
!! @param TSIn equilibrium Temperature
pure elemental subroutine s_TSat(pSat, TSat, TSIn)
-
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_TSat
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_TSat',parallelism='[seq]', &
+ & cray_inline=True)
real(wp), intent(in) :: pSat
real(wp), intent(out) :: TSat
diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp
index 949eac92cb..6a67b56315 100644
--- a/src/common/m_variables_conversion.fpp
+++ b/src/common/m_variables_conversion.fpp
@@ -50,16 +50,16 @@ module m_variables_conversion
!! In simulation, gammas, pi_infs, and qvs are already declared in m_global_variables
#ifndef MFC_SIMULATION
real(wp), allocatable, public, dimension(:) :: gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps
- !$acc declare create(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps)
+ $:GPU_DECLARE(create='[gammas,gs_min,pi_infs,ps_inf,cvs,qvs,qvps]')
#endif
real(wp), allocatable, dimension(:) :: Gs
integer, allocatable, dimension(:) :: bubrs
real(wp), allocatable, dimension(:, :) :: Res
- !$acc declare create(bubrs, Gs, Res)
+ $:GPU_DECLARE(create='[bubrs,Gs,Res]')
integer :: is1b, is2b, is3b, is1e, is2e, is3e
- !$acc declare create(is1b, is2b, is3b, is1e, is2e, is3e)
+ $:GPU_DECLARE(create='[is1b,is2b,is3b,is1e,is2e,is3e]')
real(wp), allocatable, dimension(:, :, :), public :: rho_sf !< Scalar density function
real(wp), allocatable, dimension(:, :, :), public :: gamma_sf !< Scalar sp. heat ratio function
@@ -116,12 +116,8 @@ contains
!! @param stress Shear Stress
!! @param mom Momentum
subroutine s_compute_pressure(energy, alf, dyn_p, pi_inf, gamma, rho, qv, rhoYks, pres, T, stress, mom, G, pres_mag)
-
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_pressure
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_pressure',parallelism='[seq]', &
+ & cray_inline=True)
real(wp), intent(in) :: energy, alf
real(wp), intent(in) :: dyn_p
@@ -458,11 +454,8 @@ contains
gamma_K, pi_inf_K, qv_K, &
alpha_K, alpha_rho_K, Re_K, &
G_K, G)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_acc
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_convert_species_to_mixture_variables_acc', &
+ & parallelism='[seq]', cray_inline=True)
real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K
@@ -539,11 +532,8 @@ contains
pure subroutine s_convert_species_to_mixture_variables_bubbles_acc(rho_K, &
gamma_K, pi_inf_K, qv_K, &
alpha_K, alpha_rho_K, Re_K)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_bubbles_acc
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_convert_species_to_mixture_variables_bubbles_acc', &
+ & parallelism='[seq]', cray_inline=True)
real(wp), intent(inout) :: rho_K, gamma_K, pi_inf_K, qv_K
@@ -610,7 +600,7 @@ contains
integer :: i, j
-!$acc enter data copyin(is1b, is1e, is2b, is2e, is3b, is3e)
+ $:GPU_ENTER_DATA(copyin='[is1b,is1e,is2b,is2e,is3b,is3e]')
#ifdef MFC_SIMULATION
@:ALLOCATE(gammas (1:num_fluids))
@@ -642,7 +632,7 @@ contains
qvs(i) = fluid_pp(i)%qv
qvps(i) = fluid_pp(i)%qvp
end do
-!$acc update device(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps, Gs)
+ $:GPU_UPDATE(device='[gammas,gs_min,pi_infs,ps_inf,cvs,qvs,qvps,Gs]')
#ifdef MFC_SIMULATION
@@ -654,7 +644,7 @@ contains
end do
end do
- !$acc update device(Res, Re_idx, Re_size)
+ $:GPU_UPDATE(device='[Res,Re_idx,Re_size]')
end if
#endif
@@ -668,7 +658,7 @@ contains
do i = 1, nb
bubrs(i) = bub_idx%rs(i)
end do
- !$acc update device(bubrs)
+ $:GPU_UPDATE(device='[bubrs]')
end if
#ifdef MFC_POST_PROCESS
@@ -748,7 +738,7 @@ contains
nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, nb
mu = qK_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc
sig = (qK_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc - mu**2)**0.5_wp
@@ -781,7 +771,7 @@ contains
nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, nb
mu = qK_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc
sig = (qK_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc - mu**2)**0.5_wp
@@ -869,15 +859,15 @@ contains
end if
#:endif
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(alpha_K, alpha_rho_K, Re_K, nRtmp, rho_K, gamma_K, &
- !$acc pi_inf_K, qv_K, dyn_pres_K, rhoYks, B)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, Re_K, &
+ & nRtmp, rho_K, gamma_K, pi_inf_K,qv_K, &
+ & dyn_pres_K, rhoYks, B]')
do l = ibounds(3)%beg, ibounds(3)%end
do k = ibounds(2)%beg, ibounds(2)%end
do j = ibounds(1)%beg, ibounds(1)%end
dyn_pres_K = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l)
alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l)
@@ -921,13 +911,13 @@ contains
B2 = B(1)**2 + B(2)**2 + B(3)**2
m2 = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, momxe
m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2
end do
S = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 3
S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i)
end do
@@ -935,14 +925,14 @@ contains
E = qK_cons_vf(E_idx)%sf(j, k, l)
D = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
D = D + qK_cons_vf(i)%sf(j, k, l)
end do
! Newton-Raphson
W = E + D
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do iter = 1, relativity_cons_to_prim_max_iter
Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2)))
pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS
@@ -968,13 +958,13 @@ contains
qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2)
! Recover the other primitive variables
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 3
qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2)
end do
qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = B_idx%beg, B_idx%end
qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)
end do
@@ -984,22 +974,22 @@ contains
if (chemistry) then
rho_K = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = chemxb, chemxe
rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l))
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
qK_prim_vf(i)%sf(j, k, l) = rho_K
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = chemxb, chemxe
qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K)
end do
else
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)
end do
@@ -1009,7 +999,7 @@ contains
rho_K = max(rho_K, sgm_eps)
#endif
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, momxe
if (model_eqns /= 4) then
qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) &
@@ -1023,7 +1013,7 @@ contains
end do
if (chemistry) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_species
rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l)
end do
@@ -1053,7 +1043,7 @@ contains
end if
if (bubbles_euler) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, nb
nRtmp(i) = qK_cons_vf(bubrs(i))%sf(j, k, l)
end do
@@ -1065,7 +1055,7 @@ contains
nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l)
!Convert cons to prim
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = bubxb, bubxe
qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc
end do
@@ -1082,7 +1072,7 @@ contains
call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight)
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = bubxb, bubxe
qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc
end do
@@ -1090,21 +1080,21 @@ contains
end if
if (mhd) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = B_idx%beg, B_idx%end
qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)
end do
end if
if (elasticity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = strxb, strxe
qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K
end do
end if
if (hypoelasticity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = strxb, strxe
! subtracting elastic contribution for pressure calculation
if (G_K > verysmall) then
@@ -1121,13 +1111,13 @@ contains
end if
if (hyperelasticity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = xibeg, xiend
qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K
end do
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)
end do
@@ -1145,7 +1135,6 @@ contains
end do
end do
end do
- !$acc end parallel loop
end subroutine s_convert_conservative_to_primitive_variables
@@ -1465,32 +1454,33 @@ contains
is2b = is2%beg; is2e = is2%end
is3b = is3%beg; is3e = is3%end
- !$acc update device(is1b, is2b, is3b, is1e, is2e, is3e)
+ $:GPU_UPDATE(device='[is1b,is2b,is3b,is1e,is2e,is3e]')
! Computing the flux variables from the primitive variables, without
! accounting for the contribution of either viscosity or capillarity
#ifdef MFC_SIMULATION
- !$acc parallel loop collapse(3) gang vector default(present) private(alpha_rho_K, vel_K, alpha_K, Re_K, Y_K)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_K, vel_K, &
+ & alpha_K, Re_K, Y_K]')
do l = is3b, is3e
do k = is2b, is2e
do j = is1b, is1e
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
alpha_rho_K(i) = qK_prim_vf(j, k, l, i)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_vels
vel_K(i) = qK_prim_vf(j, k, l, contxe + i)
end do
vel_K_sum = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_vels
vel_K_sum = vel_K_sum + vel_K(i)**2._wp
end do
@@ -1511,7 +1501,7 @@ contains
! Computing the energy from the pressure
if (chemistry) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = chemxb, chemxe
Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i)
end do
@@ -1528,12 +1518,12 @@ contains
end if
! mass flux, this should be \alpha_i \rho_i u_i
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1))
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_vels
FK_vf(j, k, l, contxe + dir_idx(i)) = &
rho_K*vel_K(dir_idx(1)) &
@@ -1546,14 +1536,14 @@ contains
! Species advection Flux, \rho*u*Y
if (chemistry) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_species
FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i))
end do
end if
if (riemann_solver == 1 .or. riemann_solver == 4) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
FK_vf(j, k, l, i) = 0._wp
FK_src_vf(j, k, l, i) = alpha_K(i - E_idx)
@@ -1561,12 +1551,12 @@ contains
else
! Could be bubbles_euler!
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
FK_src_vf(j, k, l, i) = vel_K(dir_idx(1))
end do
@@ -1603,11 +1593,8 @@ contains
#ifndef MFC_PRE_PROCESS
pure subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c_c, c)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_speed_of_sound
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_speed_of_sound', &
+ & parallelism='[seq]', cray_inline=True)
real(wp), intent(in) :: pres
real(wp), intent(in) :: rho, gamma, pi_inf
@@ -1640,7 +1627,7 @@ contains
c = (1._wp/(rho*(adv(1)/blkmod1 + adv(2)/blkmod2)))
elseif (model_eqns == 3) then
c = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, num_fluids
c = c + adv(q)*(1._wp/gammas(q) + 1._wp)* &
(pres + pi_infs(q)/(gammas(q) + 1._wp))
@@ -1673,11 +1660,8 @@ contains
#ifndef MFC_PRE_PROCESS
pure subroutine s_compute_fast_magnetosonic_speed(rho, c, B, norm, c_fast, h)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_fast_magnetosonic_speed
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_fast_magnetosonic_speed', &
+ & parallelism='[seq]', cray_inline=True)
real(wp), intent(in) :: B(3), rho, c
real(wp), intent(in) :: h ! only used for relativity
diff --git a/src/pre_process/m_assign_variables.fpp b/src/pre_process/m_assign_variables.fpp
index 3cf39533e4..f1bfe06fa3 100644
--- a/src/pre_process/m_assign_variables.fpp
+++ b/src/pre_process/m_assign_variables.fpp
@@ -3,6 +3,7 @@
!! @brief Contains module m_assign_variables
#:include 'case.fpp'
+#:include 'macros.fpp'
module m_assign_variables
@@ -103,7 +104,7 @@ contains
!! @param patch_id_fp Array to track patch ids
pure subroutine s_assign_patch_mixture_primitive_variables(patch_id, j, k, l, &
eta, q_prim_vf, patch_id_fp)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
integer, intent(in) :: patch_id
integer, intent(in) :: j, k, l
@@ -276,7 +277,7 @@ contains
!! @param patch_id_fp Array to track patch ids
impure subroutine s_assign_patch_species_primitive_variables(patch_id, j, k, l, &
eta, q_prim_vf, patch_id_fp)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
integer, intent(in) :: patch_id
integer, intent(in) :: j, k, l
diff --git a/src/pre_process/m_patches.fpp b/src/pre_process/m_patches.fpp
index b3f6b48feb..f7700e84ac 100644
--- a/src/pre_process/m_patches.fpp
+++ b/src/pre_process/m_patches.fpp
@@ -2389,7 +2389,7 @@ contains
end subroutine s_model
subroutine s_convert_cylindrical_to_cartesian_coord(cyl_y, cyl_z)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: cyl_y, cyl_z
@@ -2400,7 +2400,7 @@ contains
pure function f_convert_cyl_to_cart(cyl) result(cart)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
t_vec3, intent(in) :: cyl
t_vec3 :: cart
@@ -2412,7 +2412,7 @@ contains
end function f_convert_cyl_to_cart
subroutine s_convert_cylindrical_to_spherical_coord(cyl_x, cyl_y)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(IN) :: cyl_x, cyl_y
@@ -2425,7 +2425,7 @@ contains
!! @param offset Thickness
!! @param a Starting position
pure elemental function f_r(myth, offset, a)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: myth, offset, a
real(wp) :: b
real(wp) :: f_r
diff --git a/src/simulation/include/inline_riemann.fpp b/src/simulation/include/inline_riemann.fpp
index 714ec3ef21..9972799b02 100644
--- a/src/simulation/include/inline_riemann.fpp
+++ b/src/simulation/include/inline_riemann.fpp
@@ -1,7 +1,7 @@
#:def arithmetic_avg()
rho_avg = 5.e-1_wp*(rho_L + rho_R)
vel_avg_rms = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_vels
vel_avg_rms = vel_avg_rms + (5.e-1_wp*(vel_L(i) + vel_R(i)))**2._wp
end do
@@ -17,7 +17,7 @@
vel_avg_rms = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_vels
vel_avg_rms = vel_avg_rms + (sqrt(rho_L)*vel_L(i) + sqrt(rho_R)*vel_R(i))**2._wp/ &
(sqrt(rho_L) + sqrt(rho_R))**2._wp
diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp
index 5f7e986d89..b14528b9d5 100644
--- a/src/simulation/m_acoustic_src.fpp
+++ b/src/simulation/m_acoustic_src.fpp
@@ -23,41 +23,43 @@ module m_acoustic_src
private; public :: s_initialize_acoustic_src, s_precalculate_acoustic_spatial_sources, s_acoustic_src_calculations
integer, allocatable, dimension(:) :: pulse, support
- !$acc declare create(pulse, support)
+ $:GPU_DECLARE(create='[pulse,support]')
logical, allocatable, dimension(:) :: dipole
- !$acc declare create(dipole)
+ $:GPU_DECLARE(create='[dipole]')
real(wp), allocatable, target, dimension(:, :) :: loc_acoustic
- !$acc declare create(loc_acoustic)
+ $:GPU_DECLARE(create='[loc_acoustic]')
- real(wp), allocatable, dimension(:) :: mag, length, height, wavelength, frequency, gauss_sigma_dist, gauss_sigma_time, npulse, dir, delay
- !$acc declare create(mag, length, height, wavelength, frequency, gauss_sigma_dist, gauss_sigma_time, npulse, dir, delay)
+ real(wp), allocatable, dimension(:) :: mag, length, height, wavelength, frequency
+ real(wp), allocatable, dimension(:) :: gauss_sigma_dist, gauss_sigma_time, npulse, dir, delay
+ $:GPU_DECLARE(create='[mag,length,height,wavelength,frequency]')
+ $:GPU_DECLARE(create='[gauss_sigma_dist,gauss_sigma_time,npulse,dir,delay]')
real(wp), allocatable, dimension(:) :: foc_length, aperture
- !$acc declare create(foc_length, aperture)
+ $:GPU_DECLARE(create='[foc_length,aperture]')
real(wp), allocatable, dimension(:) :: element_spacing_angle, element_polygon_ratio, rotate_angle
- !$acc declare create(element_spacing_angle, element_polygon_ratio, rotate_angle)
+ $:GPU_DECLARE(create='[element_spacing_angle,element_polygon_ratio,rotate_angle]')
real(wp), allocatable, dimension(:) :: bb_bandwidth, bb_lowest_freq
- !$acc declare create(bb_bandwidth, bb_lowest_freq)
+ $:GPU_DECLARE(create='[bb_bandwidth,bb_lowest_freq]')
integer, allocatable, dimension(:) :: num_elements, element_on, bb_num_freq
- !$acc declare create(num_elements, element_on, bb_num_freq)
+ $:GPU_DECLARE(create='[num_elements,element_on,bb_num_freq]')
!> @name Acoustic source terms
!> @{
real(wp), allocatable, dimension(:, :, :) :: mass_src, e_src
real(wp), allocatable, dimension(:, :, :, :) :: mom_src
!> @}
- !$acc declare create(mass_src, e_src, mom_src)
+ $:GPU_DECLARE(create='[mass_src,e_src,mom_src]')
integer, dimension(:), allocatable :: source_spatials_num_points !< Number of non-zero source grid points for each source
- !$acc declare create(source_spatials_num_points)
+ $:GPU_DECLARE(create='[source_spatials_num_points]')
type(source_spatial_type), dimension(:), allocatable :: source_spatials !< Data of non-zero source grid points for each source
- !$acc declare create(source_spatials)
+ $:GPU_DECLARE(create='[source_spatials]')
contains
@@ -108,7 +110,12 @@ contains
delay(i) = acoustic(i)%delay
end if
end do
- !$acc update device(loc_acoustic, mag, dipole, support, length, height, wavelength, frequency, gauss_sigma_dist, gauss_sigma_time, foc_length, aperture, npulse, pulse, dir, delay, element_polygon_ratio, rotate_angle, element_spacing_angle, num_elements, element_on, bb_num_freq, bb_bandwidth, bb_lowest_freq)
+ $:GPU_UPDATE(device='[loc_acoustic,mag,dipole,support,length, &
+ & height,wavelength,frequency,gauss_sigma_dist, &
+ & gauss_sigma_time,foc_length,aperture,npulse,pulse, &
+ & dir,delay,element_polygon_ratio,rotate_angle, &
+ & element_spacing_angle,num_elements,element_on, &
+ & bb_num_freq,bb_bandwidth,bb_lowest_freq]')
@:ALLOCATE(mass_src(0:m, 0:n, 0:p))
@:ALLOCATE(mom_src(1:num_vels, 0:m, 0:n, 0:p))
@@ -159,7 +166,7 @@ contains
sim_time = t_step*dt
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -198,7 +205,7 @@ contains
call s_mpi_send_random_number(phi_rn, bb_num_freq(ai))
end if
- !$acc loop reduction(+:sum_BB)
+ $:GPU_LOOP(reduction='[[sum_BB]]', reductionOp='[+]')
do k = 1, bb_num_freq(ai)
! Acoustic period of the wave at each discrete frequency
period_BB = 1._wp/(bb_lowest_freq(ai) + k*bb_bandwidth(ai))
@@ -212,7 +219,7 @@ contains
deallocate (phi_rn)
- !$acc parallel loop gang vector default(present) private(myalpha, myalpha_rho)
+ $:GPU_PARALLEL_LOOP(private='[myalpha,myalpha_rho]')
do i = 1, num_points
j = source_spatials(ai)%coord(1, i)
k = source_spatials(ai)%coord(2, i)
@@ -223,7 +230,7 @@ contains
B_tait = 0._wp
small_gamma = 0._wp
- !$acc loop
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, num_fluids
myalpha_rho(q) = q_cons_vf(q)%sf(j, k, l)
myalpha(q) = q_cons_vf(advxb + q - 1)%sf(j, k, l)
@@ -231,7 +238,7 @@ contains
if (bubbles_euler) then
if (num_fluids > 2) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, num_fluids - 1
myRho = myRho + myalpha_rho(q)
B_tait = B_tait + myalpha(q)*pi_infs(q)
@@ -245,7 +252,7 @@ contains
end if
if ((.not. bubbles_euler) .or. (mpp_lim .and. (num_fluids > 2))) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, num_fluids
myRho = myRho + myalpha_rho(q)
B_tait = B_tait + myalpha(q)*pi_infs(q)
@@ -312,15 +319,15 @@ contains
end do
! Update the rhs variables
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = contxb, contxe
rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mass_src(j, k, l)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = momxb, momxe
rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mom_src(q - contxe, j, k, l)
end do
@@ -339,7 +346,7 @@ contains
!! @param gauss_sigma_time_local sigma in time for Gaussian pulse
!! @param source Source term amplitude
pure elemental subroutine s_source_temporal(sim_time, c, ai, term_index, frequency_local, gauss_sigma_time_local, source, sum_BB)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
integer, intent(in) :: ai, term_index
real(wp), intent(in) :: sim_time, c, sum_BB
real(wp), intent(in) :: frequency_local, gauss_sigma_time_local
@@ -466,14 +473,14 @@ contains
call s_mpi_abort('Fatal Error: Inconsistent allocation of source_spatials')
end if
- !$acc update device(source_spatials(ai)%coord)
- !$acc update device(source_spatials(ai)%val)
+ $:GPU_UPDATE(device='[source_spatials(ai)%coord]')
+ $:GPU_UPDATE(device='[source_spatials(ai)%val]')
if (support(ai) >= 5) then
if (dim == 2) then
- !$acc update device(source_spatials(ai)%angle)
+ $:GPU_UPDATE(device='[source_spatials(ai)%angle]')
end if
if (dim == 3) then
- !$acc update device(source_spatials(ai)%xyz_to_r_ratios)
+ $:GPU_UPDATE(device='[source_spatials(ai)%xyz_to_r_ratios]')
end if
end if
@@ -691,7 +698,7 @@ contains
!! @param c Speed of sound
!! @return frequency_local Converted frequency
pure elemental function f_frequency_local(freq_conv_flag, ai, c)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
logical, intent(in) :: freq_conv_flag
integer, intent(in) :: ai
real(wp), intent(in) :: c
@@ -710,7 +717,7 @@ contains
!! @param ai Acoustic source index
!! @return gauss_sigma_time_local Converted Gaussian sigma time
pure elemental function f_gauss_sigma_time_local(gauss_conv_flag, ai, c)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
logical, intent(in) :: gauss_conv_flag
integer, intent(in) :: ai
real(wp), intent(in) :: c
diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp
index 1ef74cbcee..d5f811d273 100644
--- a/src/simulation/m_body_forces.fpp
+++ b/src/simulation/m_body_forces.fpp
@@ -22,7 +22,7 @@ module m_body_forces
s_finalize_body_forces_module
real(wp), allocatable, dimension(:, :, :) :: rhoM
- !$acc declare create(rhoM)
+ $:GPU_DECLARE(create='[rhoM]')
contains
@@ -67,7 +67,7 @@ contains
end if
end if
- !$acc update device(accel_bf)
+ $:GPU_UPDATE(device='[accel_bf]')
end subroutine s_compute_acceleration
@@ -79,7 +79,7 @@ contains
type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
integer :: i, j, k, l !< standard iterators
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -109,7 +109,7 @@ contains
call s_compute_acceleration(mytime)
call s_compute_mixture_density(q_cons_vf)
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = momxb, E_idx
do l = 0, p
do k = 0, n
@@ -122,7 +122,7 @@ contains
if (bf_x) then ! x-direction body forces
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -137,7 +137,7 @@ contains
if (bf_y) then ! y-direction body forces
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -152,7 +152,7 @@ contains
if (bf_z) then ! z-direction body forces
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp
index 79ca5fb982..6ee19c210c 100644
--- a/src/simulation/m_bubbles.fpp
+++ b/src/simulation/m_bubbles.fpp
@@ -22,7 +22,7 @@ module m_bubbles
real(wp) :: chi_vw !< Bubble wall properties (Ando 2010)
real(wp) :: k_mw !< Bubble wall properties (Ando 2010)
real(wp) :: rho_mw !< Bubble wall properties (Ando 2010)
- !$acc declare create(chi_vw, k_mw, rho_mw)
+ $:GPU_DECLARE(create='[chi_vw,k_mw,rho_mw]')
contains
@@ -41,7 +41,7 @@ contains
!! @param f_divu Divergence of velocity
!! @param fCson Speed of sound from fP (EL)
pure elemental function f_rddot(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, fCson)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf
real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu
real(wp), intent(in) :: fCson
@@ -82,7 +82,7 @@ contains
!! @param fV Current bubble velocity
!! @param fpb Internal bubble pressure
pure elemental function f_cpbw(fR0, fR, fV, fpb)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: fR0, fR, fV, fpb
real(wp) :: f_cpbw
@@ -101,7 +101,7 @@ contains
!! @param fntait Tait EOS parameter
!! @param fBtait Tait EOS parameter
pure elemental function f_H(fCpbw, fCpinf, fntait, fBtait)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: fCpbw, fCpinf, fntait, fBtait
real(wp) :: tmp1, tmp2, tmp3
@@ -121,7 +121,7 @@ contains
!! @param fBtait Tait EOS parameter
!! @param fH Bubble enthalpy
pure elemental function f_cgas(fCpinf, fntait, fBtait, fH)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: fCpinf, fntait, fBtait, fH
real(wp) :: tmp
@@ -144,7 +144,7 @@ contains
!! @param advsrc Advection equation source term
!! @param divu Divergence of velocity
pure elemental function f_cpinfdot(fRho, fP, falf, fntait, fBtait, advsrc, divu)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: fRho, fP, falf, fntait, fBtait, advsrc, divu
real(wp) :: c2_liquid
@@ -174,7 +174,7 @@ contains
!! @param fR0 Equilibrium bubble radius
!! @param fpbdot Time derivative of the internal bubble pressure
pure elemental function f_Hdot(fCpbw, fCpinf, fCpinf_dot, fntait, fBtait, fR, fV, fR0, fpbdot)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: fCpbw, fCpinf, fCpinf_dot, fntait, fBtait
real(wp), intent(in) :: fR, fV, fR0, fpbdot
@@ -210,7 +210,7 @@ contains
!! @param fR0 Equilibrium bubble radius
!! @param fCpbw Boundary wall pressure
pure elemental function f_rddot_RP(fCp, fRho, fR, fV, fCpbw)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: fCp, fRho, fR, fV, fCpbw
real(wp) :: f_rddot_RP
@@ -233,7 +233,7 @@ contains
!! @param fntait Tait EOS parameter
!! @param fBtait Tait EOS parameter
pure elemental function f_rddot_G(fCpbw, fR, fV, fH, fHdot, fcgas, fntait, fBtait)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: fCpbw, fR, fV, fH, fHdot
real(wp), intent(in) :: fcgas, fntait, fBtait
@@ -256,7 +256,7 @@ contains
!! @param fV Current bubble velocity
!! @param fpb Internal bubble pressure
pure elemental function f_cpbw_KM(fR0, fR, fV, fpb)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: fR0, fR, fV, fpb
real(wp) :: f_cpbw_KM
@@ -283,7 +283,7 @@ contains
!! @param fR0 Equilibrium bubble radius
!! @param fC Current sound speed
pure elemental function f_rddot_KM(fpbdot, fCp, fCpbw, fRho, fR, fV, fR0, fC)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: fpbdot, fCp, fCpbw
real(wp), intent(in) :: fRho, fR, fV, fR0, fC
@@ -317,7 +317,7 @@ contains
!! @param pb Internal bubble pressure
!! @param iR0 Current bubble size index
pure elemental subroutine s_bwproperty(pb, iR0, chi_vw, k_mw, rho_mw)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: pb
integer, intent(in) :: iR0
real(wp), intent(out) :: chi_vw !< Bubble wall properties (Ando 2010)
@@ -347,7 +347,7 @@ contains
!! @param fR_m Mixture gas constant (EL)
!! @param fgamma_m Mixture gamma (EL)
pure elemental subroutine s_vflux(fR, fV, fpb, fmass_v, iR0, vflux, fmass_n, fbeta_c, fR_m, fgamma_m)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: fR
real(wp), intent(in) :: fV
real(wp), intent(in) :: fpb
@@ -405,7 +405,7 @@ contains
!! @param fR_m Mixture gas constant (EL)
!! @param fgamma_m Mixture gamma (EL)
pure elemental function f_bpres_dot(fvflux, fR, fV, fpb, fmass_v, iR0, fbeta_t, fR_m, fgamma_m)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in) :: fvflux
real(wp), intent(in) :: fR
real(wp), intent(in) :: fV
@@ -465,11 +465,9 @@ contains
fntait, fBtait, f_bub_adv_src, f_divu, &
bub_id, fmass_v, fmass_n, fbeta_c, &
fbeta_t, fCson, adap_dt_stop)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_advance_step
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_advance_step',parallelism='[seq]', &
+ & cray_inline=True)
+
real(wp), intent(inout) :: fR, fV, fpb, fmass_v
real(wp), intent(in) :: fRho, fP, fR0, fpbdot, alf
real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu
@@ -599,11 +597,9 @@ contains
pure subroutine s_initial_substep_h(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, &
fntait, fBtait, f_bub_adv_src, f_divu, &
fCson, h)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_initial_substep_h
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_initial_substep_h',parallelism='[seq]', &
+ & cray_inline=True)
+
real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf
real(wp), intent(IN) :: fntait, fBtait, f_bub_adv_src, f_divu
real(wp), intent(IN) :: fCson
@@ -685,11 +681,9 @@ contains
bub_id, fmass_v, fmass_n, fbeta_c, &
fbeta_t, fCson, h, &
myR_tmp, myV_tmp, myPb_tmp, myMv_tmp)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_advance_substep
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_advance_substep',parallelism='[seq]', &
+ & cray_inline=True)
+
real(wp), intent(OUT) :: err
real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf
real(wp), intent(IN) :: fntait, fBtait, f_bub_adv_src, f_divu, h
@@ -786,7 +780,7 @@ contains
!! @param fdMvdt_tmp Rate of change of the mass of vapor in the bubble
pure elemental subroutine s_advance_EL(fR_tmp, fV_tmp, fPb_tmp, fMv_tmp, bub_id, &
fmass_n, fbeta_c, fbeta_t, fdPbdt_tmp, advance_EL)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(IN) :: fR_tmp, fV_tmp, fPb_tmp, fMv_tmp
real(wp), intent(IN) :: fmass_n, fbeta_c, fbeta_t
integer, intent(IN) :: bub_id
diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp
index ed3f064551..5fe6d38136 100644
--- a/src/simulation/m_bubbles_EE.fpp
+++ b/src/simulation/m_bubbles_EE.fpp
@@ -21,13 +21,13 @@ module m_bubbles_EE
real(wp), allocatable, dimension(:, :, :) :: bub_adv_src
real(wp), allocatable, dimension(:, :, :, :) :: bub_r_src, bub_v_src, bub_p_src, bub_m_src
- !$acc declare create(bub_adv_src, bub_r_src, bub_v_src, bub_p_src, bub_m_src)
+ $:GPU_DECLARE(create='[bub_adv_src,bub_r_src,bub_v_src,bub_p_src,bub_m_src]')
type(scalar_field) :: divu !< matrix for div(u)
- !$acc declare create(divu)
+ $:GPU_DECLARE(create='[divu]')
integer, allocatable, dimension(:) :: rs, vs, ms, ps
- !$acc declare create(rs, vs, ms, ps)
+ $:GPU_DECLARE(create='[rs,vs,ms,ps]')
contains
@@ -51,9 +51,9 @@ contains
end if
end do
- !$acc update device(rs, vs)
+ $:GPU_UPDATE(device='[rs, vs]')
if (.not. polytropic) then
- !$acc update device(ps, ms)
+ $:GPU_UPDATE(device='[ps, ms]')
end if
@:ALLOCATE(divu%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end))
@@ -76,12 +76,12 @@ contains
real(wp) :: nR3bar
integer(wp) :: i, j, k, l
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
nR3bar = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, nb
nR3bar = nR3bar + weight(i)*(q_cons_vf(rs(i))%sf(j, k, l))**3._wp
end do
@@ -103,7 +103,7 @@ contains
if (idir == 1) then
if (.not. qbmm) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -119,7 +119,7 @@ contains
elseif (idir == 2) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -133,7 +133,7 @@ contains
elseif (idir == 3) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -172,13 +172,13 @@ contains
integer :: dmBub_id !< Dummy variables for unified subgrid bubble subroutines
real(wp) :: dmMass_v, dmMass_n, dmBeta_c, dmBeta_t, dmCson
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
bub_adv_src(j, k, l) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, nb
bub_r_src(j, k, l, q) = 0._wp
bub_v_src(j, k, l, q) = 0._wp
@@ -190,8 +190,9 @@ contains
end do
adap_dt_stop_max = 0
- !$acc parallel loop collapse(3) gang vector default(present) private(Rtmp, Vtmp, myalpha_rho, myalpha) &
- !$acc reduction(MAX:adap_dt_stop_max) copy(adap_dt_stop_max)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[Rtmp, Vtmp, myalpha_rho, myalpha]', &
+ & reduction='[[adap_dt_stop_max]]', reductionOp='[MAX]', &
+ & copy='[adap_dt_stop_max]')
do l = 0, p
do k = 0, n
do j = 0, m
@@ -199,7 +200,7 @@ contains
if (adv_n) then
nbub = q_prim_vf(n_idx)%sf(j, k, l)
else
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, nb
Rtmp(q) = q_prim_vf(rs(q))%sf(j, k, l)
Vtmp(q) = q_prim_vf(vs(q))%sf(j, k, l)
@@ -207,7 +208,7 @@ contains
R3 = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, nb
R3 = R3 + weight(q)*Rtmp(q)**3._wp
end do
@@ -218,7 +219,7 @@ contains
if (.not. adap_dt) then
R2Vav = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, nb
R2Vav = R2Vav + weight(q)*Rtmp(q)**2._wp*Vtmp(q)
end do
@@ -226,10 +227,10 @@ contains
bub_adv_src(j, k, l) = 4._wp*pi*nbub*R2Vav
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, nb
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do ii = 1, num_fluids
myalpha_rho(ii) = q_cons_vf(ii)%sf(j, k, l)
myalpha(ii) = q_cons_vf(advxb + ii - 1)%sf(j, k, l)
@@ -240,14 +241,14 @@ contains
B_tait = 0._wp
if (mpp_lim .and. (num_fluids > 2)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do ii = 1, num_fluids
myRho = myRho + myalpha_rho(ii)
n_tait = n_tait + myalpha(ii)*gammas(ii)
B_tait = B_tait + myalpha(ii)*pi_infs(ii)
end do
else if (num_fluids > 2) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do ii = 1, num_fluids - 1
myRho = myRho + myalpha_rho(ii)
n_tait = n_tait + myalpha(ii)*gammas(ii)
@@ -323,14 +324,14 @@ contains
if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.")
if (.not. adap_dt) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do q = 0, n
do i = 0, m
rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + bub_adv_src(i, q, l)
if (num_fluids > 1) rhs_vf(advxb)%sf(i, q, l) = &
rhs_vf(advxb)%sf(i, q, l) - bub_adv_src(i, q, l)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do k = 1, nb
rhs_vf(rs(k))%sf(i, q, l) = rhs_vf(rs(k))%sf(i, q, l) + bub_r_src(i, q, l, k)
rhs_vf(vs(k))%sf(i, q, l) = rhs_vf(vs(k))%sf(i, q, l) + bub_v_src(i, q, l, k)
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 854e3f63a0..20e7515c83 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -34,20 +34,26 @@ module m_bubbles_EL
real(wp), allocatable, dimension(:) :: bub_R0 !< Initial bubble radius
real(wp), allocatable, dimension(:) :: Rmax_stats !< Maximum radius
real(wp), allocatable, dimension(:) :: Rmin_stats !< Minimum radius
+ $:GPU_DECLARE(create='[lag_id, bub_R0, Rmax_stats, Rmin_stats]')
+
real(wp), allocatable, dimension(:) :: gas_mg !< Bubble's gas mass
real(wp), allocatable, dimension(:) :: gas_betaT !< heatflux model (Preston et al., 2007)
real(wp), allocatable, dimension(:) :: gas_betaC !< massflux model (Preston et al., 2007)
real(wp), allocatable, dimension(:) :: bub_dphidt !< subgrid velocity potential (Maeda & Colonius, 2018)
+ $:GPU_DECLARE(create='[gas_mg, gas_betaT, gas_betaC, bub_dphidt]')
+
!(nBub, 1 -> actual val or 2 -> temp val)
real(wp), allocatable, dimension(:, :) :: gas_p !< Pressure in the bubble
real(wp), allocatable, dimension(:, :) :: gas_mv !< Vapor mass in the bubble
real(wp), allocatable, dimension(:, :) :: intfc_rad !< Bubble radius
real(wp), allocatable, dimension(:, :) :: intfc_vel !< Velocity of the bubble interface
+ $:GPU_DECLARE(create='[gas_p, gas_mv, intfc_rad, intfc_vel]')
!(nBub, 1-> x or 2->y or 3 ->z, 1 -> actual or 2 -> temporal val)
real(wp), allocatable, dimension(:, :, :) :: mtn_pos !< Bubble's position
real(wp), allocatable, dimension(:, :, :) :: mtn_posPrev !< Bubble's previous position
real(wp), allocatable, dimension(:, :, :) :: mtn_vel !< Bubble's velocity
real(wp), allocatable, dimension(:, :, :) :: mtn_s !< Bubble's computational cell position in real format
+ $:GPU_DECLARE(create='[mtn_pos, mtn_posPrev, mtn_vel, mtn_s]')
!(nBub, 1-> x or 2->y or 3 ->z, time-stage)
real(wp), allocatable, dimension(:, :) :: intfc_draddt !< Time derivative of bubble's radius
real(wp), allocatable, dimension(:, :) :: intfc_dveldt !< Time derivative of bubble's interface velocity
@@ -55,21 +61,18 @@ module m_bubbles_EL
real(wp), allocatable, dimension(:, :) :: gas_dmvdt !< Time derivative of the vapor mass in the bubble
real(wp), allocatable, dimension(:, :, :) :: mtn_dposdt !< Time derivative of the bubble's position
real(wp), allocatable, dimension(:, :, :) :: mtn_dveldt !< Time derivative of the bubble's velocity
-
- !$acc declare create(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt, &
- !$acc gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, &
- !$acc gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt)
+ $:GPU_DECLARE(create='[intfc_draddt, intfc_dveldt, gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt]')
integer, private :: lag_num_ts !< Number of time stages in the time-stepping scheme
- !$acc declare create(lag_num_ts)
+ $:GPU_DECLARE(create='[lag_num_ts]')
integer :: nBubs !< Number of bubbles in the local domain
real(wp) :: Rmax_glb, Rmin_glb !< Maximum and minimum bubbe size in the local domain
type(vector_field) :: q_beta !< Projection of the lagrangian particles in the Eulerian framework
integer :: q_beta_idx !< Size of the q_beta vector field
- !$acc declare create(nBubs, Rmax_glb, Rmin_glb, q_beta, q_beta_idx)
+ $:GPU_DECLARE(create='[nBubs,Rmax_glb,Rmin_glb,q_beta,q_beta_idx]')
contains
@@ -99,7 +102,7 @@ contains
call s_mpi_abort('Please check the lag_params%solver_approach input')
end if
- !$acc update device(lag_num_ts, q_beta_idx)
+ $:GPU_UPDATE(device='[lag_num_ts, q_beta_idx]')
@:ALLOCATE(q_beta%vf(1:q_beta_idx))
@@ -247,17 +250,19 @@ contains
print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id
- !$acc update device(bubbles_lagrange, lag_params)
+ $:GPU_UPDATE(device='[bubbles_lagrange, lag_params]')
- !$acc update device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, &
- !$acc bub_dphidt, gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
- !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, nBubs)
+ $:GPU_UPDATE(device='[lag_id,bub_R0,Rmax_stats,Rmin_stats,gas_mg, &
+ & gas_betaT,gas_betaC,bub_dphidt,gas_p,gas_mv, &
+ & intfc_rad,intfc_vel,mtn_pos,mtn_posPrev,mtn_vel, &
+ & mtn_s,intfc_draddt,intfc_dveldt,gas_dpdt,gas_dmvdt, &
+ & mtn_dposdt,mtn_dveldt,nBubs]')
Rmax_glb = min(dflt_real, -dflt_real)
Rmin_glb = max(dflt_real, -dflt_real)
- !$acc update device(Rmax_glb, Rmin_glb)
+ $:GPU_UPDATE(device='[Rmax_glb, Rmin_glb]')
- !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc)
+ $:GPU_UPDATE(device='[dx,dy,dz,x_cb,x_cc,y_cb,y_cc,z_cb,z_cc]')
!Populate temporal variables
call s_transfer_data_to_tmp()
@@ -524,7 +529,7 @@ contains
! Subgrid p_inf model based on Maeda and Colonius (2018).
if (lag_params%pressure_corrector) then
! Calculate velocity potentials (valid for one bubble per cell)
- !$acc parallel loop gang vector default(present) private(k, cell)
+ $:GPU_PARALLEL_LOOP(private='[k,cell]')
do k = 1, nBubs
call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega)
myR0 = bub_R0(k)
@@ -545,8 +550,9 @@ contains
! Radial motion model
adap_dt_stop_max = 0
- !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) &
- !$acc reduction(MAX:adap_dt_stop_max) copy(adap_dt_stop_max) copyin(stage)
+ $:GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell]', &
+ & reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', &
+ & copy='[adap_dt_stop_max]',copyin='[stage]')
do k = 1, nBubs
! Keller-Miksis model
@@ -569,7 +575,7 @@ contains
call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2)
! Obtain liquid density and computing speed of sound from pinf
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3))
myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3))
@@ -614,7 +620,7 @@ contains
if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.")
! Bubbles remain in a fixed position
- !$acc parallel loop collapse(2) gang vector default(present) private(k) copyin(stage)
+ $:GPU_PARALLEL_LOOP(collapse=2, private='[k]', copyin='[stage]')
do k = 1, nBubs
do l = 1, 3
mtn_dposdt(k, l, stage) = 0._wp
@@ -644,7 +650,7 @@ contains
if (lag_params%solver_approach == 2) then
if (p == 0) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do k = 0, p
do j = 0, n
do i = 0, m
@@ -660,7 +666,7 @@ contains
end do
end do
else
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do k = 0, p
do j = 0, n
do i = 0, m
@@ -680,7 +686,7 @@ contains
call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l)
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do k = 0, p
do j = 0, n
do i = 0, m
@@ -695,7 +701,7 @@ contains
end do
!source in energy
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do k = idwbuff(3)%beg, idwbuff(3)%end
do j = idwbuff(2)%beg, idwbuff(2)%end
do i = idwbuff(1)%beg, idwbuff(1)%end
@@ -706,7 +712,7 @@ contains
call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l)
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do k = 0, p
do j = 0, n
do i = 0, m
@@ -734,11 +740,9 @@ contains
!! @param pi_inf Liquid stiffness
!! @param cson Calculated speed of sound
pure subroutine s_compute_cson_from_pinf(q_prim_vf, pinf, cell, rhol, gamma, pi_inf, cson)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_cson_from_pinf
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_cson_from_pinf', &
+ & parallelism='[seq]', cray_inline=True)
+
type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
real(wp), intent(in) :: pinf, rhol, gamma, pi_inf
integer, dimension(3), intent(in) :: cell
@@ -748,7 +752,7 @@ contains
real(wp), dimension(num_dims) :: vel
integer :: i
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
vel(i) = q_prim_vf(i + contxe)%sf(cell(1), cell(2), cell(3))
end do
@@ -765,7 +769,7 @@ contains
call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS")
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, q_beta_idx
do l = idwbuff(3)%beg, idwbuff(3)%end
do k = idwbuff(2)%beg, idwbuff(2)%end
@@ -780,7 +784,7 @@ contains
mtn_s, mtn_pos, q_beta)
!Store 1-beta
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = idwbuff(3)%beg, idwbuff(3)%end
do k = idwbuff(2)%beg, idwbuff(2)%end
do j = idwbuff(1)%beg, idwbuff(1)%end
@@ -804,11 +808,9 @@ contains
!! @param cell Bubble cell
!! @param Romega Control volume radius
pure subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_get_pinf
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_get_pinf',parallelism='[seq]', &
+ & cray_inline=True)
+
integer, intent(in) :: bub_id, ptype
type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
real(wp), intent(out) :: f_pinfl
@@ -829,7 +831,7 @@ contains
!< Find current bubble cell
cell(:) = int(scoord(:))
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
if (scoord(i) < 0._wp) cell(i) = cell(i) - 1
end do
@@ -920,11 +922,11 @@ contains
charpres2 = 0._wp
vol = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, smearGrid
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do j = 1, smearGrid
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do k = 1, smearGridz
cellaux(1) = cell(1) + i - (mapCells + 1)
cellaux(2) = cell(2) + j - (mapCells + 1)
@@ -1023,7 +1025,7 @@ contains
integer :: k
if (time_stepper == 1) then ! 1st order TVD RK
- !$acc parallel loop gang vector default(present) private(k)
+ $:GPU_PARALLEL_LOOP(private='[k]')
do k = 1, nBubs
!u{1} = u{n} + dt * RHS{n}
intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1)
@@ -1039,13 +1041,13 @@ contains
if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats()
if (lag_params%write_bubbles) then
- !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel)
+ $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]')
call s_write_lag_particles(mytime)
end if
elseif (time_stepper == 2) then ! 2nd order TVD RK
if (stage == 1) then
- !$acc parallel loop gang vector default(present) private(k)
+ $:GPU_PARALLEL_LOOP(private='[k]')
do k = 1, nBubs
!u{1} = u{n} + dt * RHS{n}
intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1)
@@ -1057,7 +1059,7 @@ contains
end do
elseif (stage == 2) then
- !$acc parallel loop gang vector default(present) private(k)
+ $:GPU_PARALLEL_LOOP(private='[k]')
do k = 1, nBubs
!u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1})
intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp
@@ -1073,7 +1075,7 @@ contains
if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats()
if (lag_params%write_bubbles) then
- !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel)
+ $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]')
call s_write_lag_particles(mytime)
end if
@@ -1081,7 +1083,7 @@ contains
elseif (time_stepper == 3) then ! 3rd order TVD RK
if (stage == 1) then
- !$acc parallel loop gang vector default(present) private(k)
+ $:GPU_PARALLEL_LOOP(private='[k]')
do k = 1, nBubs
!u{1} = u{n} + dt * RHS{n}
intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1)
@@ -1093,7 +1095,7 @@ contains
end do
elseif (stage == 2) then
- !$acc parallel loop gang vector default(present) private(k)
+ $:GPU_PARALLEL_LOOP(private='[k]')
do k = 1, nBubs
!u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}]
intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp
@@ -1104,7 +1106,7 @@ contains
gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp
end do
elseif (stage == 3) then
- !$acc parallel loop gang vector default(present) private(k)
+ $:GPU_PARALLEL_LOOP(private='[k]')
do k = 1, nBubs
!u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}]
intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3))
@@ -1120,7 +1122,7 @@ contains
if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats()
if (lag_params%write_bubbles) then
- !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel)
+ $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]')
call s_write_lag_particles(mytime)
end if
@@ -1190,7 +1192,7 @@ contains
integer :: k
- !$acc parallel loop gang vector default(present) private(k)
+ $:GPU_PARALLEL_LOOP(private='[k]')
do k = 1, nBubs
gas_p(k, 2) = gas_p(k, 1)
gas_mv(k, 2) = gas_mv(k, 1)
@@ -1289,7 +1291,7 @@ contains
if (dir == 1) then
! Gradient in x dir.
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do k = 0, p
do j = 0, n
do i = 0, m
@@ -1304,7 +1306,7 @@ contains
else
if (dir == 2) then
! Gradient in y dir.
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do k = 0, p
do j = 0, n
do i = 0, m
@@ -1318,7 +1320,7 @@ contains
end do
else
! Gradient in z dir.
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do k = 0, p
do j = 0, n
do i = 0, m
@@ -1412,8 +1414,9 @@ contains
lag_void_max = 0._wp
lag_void_avg = 0._wp
lag_vol = 0._wp
- !$acc parallel loop collapse(3) gang vector default(present) reduction(+:lag_vol,lag_void_avg) &
- !$acc reduction(MAX:lag_void_max) copy(lag_vol, lag_void_avg, lag_void_max)
+ $:GPU_PARALLEL_LOOP(collapse=3, reduction='[[lag_vol, lag_void_avg], &
+ & [lag_void_max]]', reductionOp='[+, MAX]', &
+ & copy='[lag_vol, lag_void_avg, lag_void_max]')
do k = 0, p
do j = 0, n
do i = 0, m
@@ -1597,8 +1600,8 @@ contains
integer :: k
- !$acc parallel loop gang vector default(present) reduction(MAX:Rmax_glb) &
- !$acc reduction(MIN: Rmin_glb) copy(Rmax_glb, Rmin_glb)
+ $:GPU_PARALLEL_LOOP(reduction='[[Rmax_glb], [Rmin_glb]]', &
+ & reductionOp='[MAX, MIN]', copy='[Rmax_glb,Rmin_glb]')
do k = 1, nBubs
Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k))
Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k))
@@ -1617,7 +1620,7 @@ contains
write (file_loc, '(A,I0,A)') 'stats_lag_bubbles_', proc_rank, '.dat'
file_loc = trim(case_dir)//'/D/'//trim(file_loc)
- !$acc update host(Rmax_glb, Rmin_glb)
+ $:GPU_UPDATE(host='[Rmax_glb,Rmin_glb]')
open (13, FILE=trim(file_loc), FORM='formatted', position='rewind')
write (13, *) 'proc_rank, particleID, x, y, z, Rmax_glb, Rmin_glb'
@@ -1645,7 +1648,7 @@ contains
integer :: i
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = bub_id, nBubs - 1
lag_id(i, 1) = lag_id(i + 1, 1)
bub_R0(i) = bub_R0(i + 1)
@@ -1670,7 +1673,7 @@ contains
end do
nBubs = nBubs - 1
- !$acc update device(nBubs)
+ $:GPU_UPDATE(device='[nBubs]')
end subroutine s_remove_lag_bubble
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index 47566eac19..48ea3bad9a 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -55,7 +55,7 @@ contains
real(wp), dimension(3) :: s_coord
integer :: l
- !$acc parallel loop gang vector default(present) private(l, s_coord, cell)
+ $:GPU_PARALLEL_LOOP(private='[l,s_coord,cell]')
do l = 1, nBubs
volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp
@@ -74,19 +74,19 @@ contains
!Update void fraction field
addFun1 = strength_vol/Vol
- !$acc atomic update
+ $:GPU_ATOMIC(atomic='update')
updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) + addFun1
!Update time derivative of void fraction
addFun2 = strength_vel/Vol
- !$acc atomic update
+ $:GPU_ATOMIC(atomic='update')
updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) + addFun2
!Product of two smeared functions
!Update void fraction * time derivative of void fraction
if (lag_params%cluster_type >= 4) then
addFun3 = (strength_vol*strength_vel)/Vol
- !$acc atomic update
+ $:GPU_ATOMIC(atomic='update')
updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) + addFun3
end if
end do
@@ -120,7 +120,7 @@ contains
smearGridz = smearGrid
if (p == 0) smearGridz = 1
- !$acc parallel loop gang vector default(present) private(nodecoord, l, s_coord, cell, center) copyin(smearGrid, smearGridz)
+ $:GPU_PARALLEL_LOOP(private='[nodecoord,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]')
do l = 1, nBubs
nodecoord(1:3) = 0
center(1:3) = 0._wp
@@ -134,7 +134,7 @@ contains
strength_vol = volpart
strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2)
- !$acc loop collapse(3) private(cellaux, nodecoord)
+ $:GPU_LOOP(collapse=3,private='[cellaux,nodecoord]')
do i = 1, smearGrid
do j = 1, smearGrid
do k = 1, smearGridz
@@ -170,14 +170,14 @@ contains
!Update void fraction field
addFun1 = func*strength_vol
- !$acc atomic update
+ $:GPU_ATOMIC(atomic='update')
updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) = &
updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) &
+ addFun1
!Update time derivative of void fraction
addFun2 = func*strength_vel
- !$acc atomic update
+ $:GPU_ATOMIC(atomic='update')
updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) = &
updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) &
+ addFun2
@@ -186,7 +186,7 @@ contains
!Update void fraction * time derivative of void fraction
if (lag_params%cluster_type >= 4) then
addFun3 = func2*strength_vol*strength_vel
- !$acc atomic update
+ $:GPU_ATOMIC(atomic='update')
updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) = &
updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) &
+ addFun3
@@ -200,11 +200,9 @@ contains
!> The purpose of this subroutine is to apply the gaussian kernel function for each bubble (Maeda and Colonius, 2018)).
pure subroutine s_applygaussian(center, cellaux, nodecoord, stddsv, strength_idx, func)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_applygaussian
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_applygaussian',parallelism='[seq]', &
+ & cray_inline=True)
+
real(wp), dimension(3), intent(in) :: center
integer, dimension(3), intent(in) :: cellaux
real(wp), dimension(3), intent(in) :: nodecoord
@@ -270,11 +268,9 @@ contains
!! @param cellaux Tested cell to smear the bubble effect in.
!! @param celloutside If true, then cellaux is outside the computational domain.
pure subroutine s_check_celloutside(cellaux, celloutside)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_check_celloutside
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_check_celloutside',parallelism='[seq]', &
+ & cray_inline=True)
+
integer, dimension(3), intent(inout) :: cellaux
logical, intent(out) :: celloutside
@@ -306,11 +302,9 @@ contains
!! @param cell Cell of the current bubble
!! @param cellaux Cell to map the bubble effect in.
pure subroutine s_shift_cell_symmetric_bc(cellaux, cell)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_shift_cell_symmetric_bc
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_shift_cell_symmetric_bc', &
+ & parallelism='[seq]', cray_inline=True)
+
integer, dimension(3), intent(inout) :: cellaux
integer, dimension(3), intent(in) :: cell
@@ -347,11 +341,9 @@ contains
!! @param volpart Volume of the bubble
!! @param stddsv Standard deviaton
pure subroutine s_compute_stddsv(cell, volpart, stddsv)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_stddsv
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_stddsv',parallelism='[seq]', &
+ & cray_inline=True)
+
integer, dimension(3), intent(in) :: cell
real(wp), intent(in) :: volpart
real(wp), intent(out) :: stddsv
@@ -388,11 +380,9 @@ contains
!! @param cell Computational coordinates (x, y, z)
!! @param Charvol Characteristic volume
pure elemental subroutine s_get_char_vol(cellx, celly, cellz, Charvol)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_get_char_vol
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_get_char_vol',parallelism='[seq]', &
+ & cray_inline=True)
+
integer, intent(in) :: cellx, celly, cellz
real(wp), intent(out) :: Charvol
@@ -413,11 +403,9 @@ contains
!! @param s Computational coordinates of the bubble, real type
!! @param get_cell Computational coordinates of the bubble, integer type
pure subroutine s_get_cell(s_cell, get_cell)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_get_cell
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_get_cell',parallelism='[seq]', &
+ & cray_inline=True)
+
real(wp), dimension(3), intent(in) :: s_cell
integer, dimension(3), intent(out) :: get_cell
integer :: i
diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp
index 6fb5438a2f..d655f1bbf6 100644
--- a/src/simulation/m_cbc.fpp
+++ b/src/simulation/m_cbc.fpp
@@ -48,6 +48,7 @@ module m_cbc
real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsx_vf
real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsy_vf
real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsz_vf
+ $:GPU_DECLARE(create='[q_prim_rsx_vf,q_prim_rsy_vf,q_prim_rsz_vf]')
!! Cell-average fluxes (src - source). These are directly determined from the
!! cell-average primitive variables, q_prims_rs_vf, and not a Riemann solver.
@@ -55,6 +56,7 @@ module m_cbc
real(wp), allocatable, dimension(:, :, :, :) :: F_rsx_vf, F_src_rsx_vf !<
real(wp), allocatable, dimension(:, :, :, :) :: F_rsy_vf, F_src_rsy_vf !<
real(wp), allocatable, dimension(:, :, :, :) :: F_rsz_vf, F_src_rsz_vf !<
+ $:GPU_DECLARE(create='[F_rsx_vf,F_src_rsx_vf,F_rsy_vf,F_src_rsy_vf,F_rsz_vf,F_src_rsz_vf]')
!! There is a CCE bug that is causing some subset of these variables to interfere
!! with variables of the same name in m_riemann_solvers.fpp, and giving this versions
@@ -65,9 +67,10 @@ module m_cbc
real(wp), allocatable, dimension(:, :, :, :) :: flux_rsx_vf_l, flux_src_rsx_vf_l !<
real(wp), allocatable, dimension(:, :, :, :) :: flux_rsy_vf_l, flux_src_rsy_vf_l
real(wp), allocatable, dimension(:, :, :, :) :: flux_rsz_vf_l, flux_src_rsz_vf_l
+ $:GPU_DECLARE(create='[flux_rsx_vf_l,flux_src_rsx_vf_l,flux_rsy_vf_l,flux_src_rsy_vf_l,flux_rsz_vf_l,flux_src_rsz_vf_l]')
real(wp) :: dpres_ds !< Spatial derivatives in s-dir of pressure
- !$acc declare create(dpres_ds)
+ $:GPU_DECLARE(create='[dpres_ds]')
real(wp), allocatable, dimension(:) :: ds !< Cell-width distribution in the s-direction
@@ -87,18 +90,21 @@ module m_cbc
real(wp), allocatable, dimension(:, :, :) :: pi_coef_y !< Polynomial interpolant coefficients in y-dir
real(wp), allocatable, dimension(:, :, :) :: pi_coef_z !< Polynomial interpolant coefficients in z-dir
+ $:GPU_DECLARE(create='[ds,fd_coef_x,fd_coef_y,fd_coef_z,pi_coef_x,pi_coef_y,pi_coef_z]')
+
!! The first dimension of the array identifies the polynomial, the
!! second dimension identifies the position of its coefficients and the last
!! dimension denotes the location of the CBC.
type(int_bounds_info) :: is1, is2, is3 !< Indical bounds in the s1-, s2- and s3-directions
- !$acc declare create(is1, is2, is3)
+ $:GPU_DECLARE(create='[is1,is2,is3]')
integer :: dj
integer :: bcxb, bcxe, bcyb, bcye, bczb, bcze
integer :: cbc_dir, cbc_loc
integer :: flux_cbc_index
- !$acc declare create(dj, bcxb, bcxe, bcyb, bcye, bczb, bcze, cbc_dir, cbc_loc, flux_cbc_index)
+ $:GPU_DECLARE(create='[dj,bcxb,bcxe,bcyb,bcye,bczb,bcze]')
+ $:GPU_DECLARE(create='[cbc_dir, cbc_loc,flux_cbc_index]')
!! GRCBC inputs for subsonic inflow and outflow conditions consisting of
!! inflow velocities, pressure, density and void fraction as well as
@@ -107,14 +113,9 @@ module m_cbc
real(wp), allocatable, dimension(:) :: pres_in, pres_out, Del_in, Del_out
real(wp), allocatable, dimension(:, :) :: vel_in, vel_out
real(wp), allocatable, dimension(:, :) :: alpha_rho_in, alpha_in
- !$acc declare create(pres_in, pres_out, Del_in, Del_out)
- !$acc declare create(vel_in, vel_out)
- !$acc declare create(alpha_rho_in, alpha_in)
-
- !$acc declare create(q_prim_rsx_vf, q_prim_rsy_vf, q_prim_rsz_vf, F_rsx_vf, F_src_rsx_vf,flux_rsx_vf_l, flux_src_rsx_vf_l, &
- !$acc F_rsy_vf, F_src_rsy_vf,flux_rsy_vf_l, flux_src_rsy_vf_l, F_rsz_vf, F_src_rsz_vf,flux_rsz_vf_l, flux_src_rsz_vf_l, &
- !$acc ds,fd_coef_x,fd_coef_y,fd_coef_z, &
- !$acc pi_coef_x,pi_coef_y,pi_coef_z)
+ $:GPU_DECLARE(create='[pres_in,pres_out,Del_in,Del_out]')
+ $:GPU_DECLARE(create='[vel_in,vel_out]')
+ $:GPU_DECLARE(create='[alpha_rho_in,alpha_in]')
contains
@@ -131,7 +132,7 @@ contains
else
flux_cbc_index = adv_idx%end
end if
- !$acc update device(flux_cbc_index)
+ $:GPU_UPDATE(device='[flux_cbc_index]')
call s_any_cbc_boundaries(is_cbc)
@@ -381,7 +382,8 @@ contains
end if
- !$acc update device(fd_coef_x, fd_coef_y, fd_coef_z, pi_coef_x, pi_coef_y, pi_coef_z)
+ $:GPU_UPDATE(device='[fd_coef_x,fd_coef_y,fd_coef_z, &
+ & pi_coef_x,pi_coef_y,pi_coef_z]')
! Associating the procedural pointer to the appropriate subroutine
! that will be utilized in the conversion to the mixture variables
@@ -389,20 +391,20 @@ contains
bcxb = bc_x%beg
bcxe = bc_x%end
- !$acc update device(bcxb, bcxe)
+ $:GPU_UPDATE(device='[bcxb, bcxe]')
if (n > 0) then
bcyb = bc_y%beg
bcye = bc_y%end
- !$acc update device(bcyb, bcye)
+ $:GPU_UPDATE(device='[bcyb, bcye]')
end if
if (p > 0) then
bczb = bc_z%beg
bcze = bc_z%end
- !$acc update device(bczb, bcze)
+ $:GPU_UPDATE(device='[bczb, bcze]')
end if
! Allocate GRCBC inputs
@@ -434,7 +436,8 @@ contains
end do
end if
#:endfor
- !$acc update device(vel_in, vel_out, pres_in, pres_out, Del_in, Del_out, alpha_rho_in, alpha_in)
+ $:GPU_UPDATE(device='[vel_in,vel_out,pres_in,pres_out, &
+ & Del_in,Del_out,alpha_rho_in,alpha_in]')
end subroutine s_initialize_cbc_module
@@ -598,7 +601,7 @@ contains
end if
- !$acc update device(ds)
+ $:GPU_UPDATE(device='[ds]')
end subroutine s_associate_cbc_coefficients_pointers
@@ -674,7 +677,7 @@ contains
cbc_dir = cbc_dir_norm
cbc_loc = cbc_loc_norm
- !$acc update device(cbc_dir, cbc_loc)
+ $:GPU_UPDATE(device='[cbc_dir, cbc_loc]')
call s_initialize_cbc(q_prim_vf, flux_vf, flux_src_vf, &
ix, iy, iz)
@@ -692,7 +695,7 @@ contains
F_src_rs${XYZ}$_vf, &
is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg)
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = 1, flux_cbc_index
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -704,7 +707,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = advxb, advxe
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -723,7 +726,7 @@ contains
F_src_rs${XYZ}$_vf, &
is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg)
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, flux_cbc_index
do j = 0, 1
do r = is3%beg, is3%end
@@ -743,7 +746,7 @@ contains
end do
end do
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = advxb, advxe
do j = 0, 1
do r = is3%beg, is3%end
@@ -766,30 +769,33 @@ contains
end if
! FD2 or FD4 of RHS at j = 0
- !$acc parallel loop collapse(2) gang vector default(present) private(alpha_rho, vel, adv, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt,L, lambda,Ys,dYs_dt,dYs_ds,h_k,Cp_i,Gamma_i,Xs)
+ $:GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv, &
+ & mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, &
+ & dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, &
+ & dYs_ds, h_k, Cp_i, Gamma_i, Xs]')
do r = is3%beg, is3%end
do k = is2%beg, is2%end
! Transferring the Primitive Variables
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
alpha_rho(i) = q_prim_rs${XYZ}$_vf(0, k, r, i)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
vel(i) = q_prim_rs${XYZ}$_vf(0, k, r, contxe + i)
end do
vel_K_sum = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
vel_K_sum = vel_K_sum + vel(i)**2._wp
end do
pres = q_prim_rs${XYZ}$_vf(0, k, r, E_idx)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, advxe - E_idx
adv(i) = q_prim_rs${XYZ}$_vf(0, k, r, E_idx + i)
end do
@@ -800,13 +806,13 @@ contains
call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv, alpha_rho, Re_cbc)
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
mf(i) = alpha_rho(i)/rho
end do
if (chemistry) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = chemxb, chemxe
Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i)
end do
@@ -839,39 +845,39 @@ contains
! First-Order Spatial Derivatives of Primitive Variables
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
dalpha_rho_ds(i) = 0._wp
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
dvel_ds(i) = 0._wp
end do
dpres_ds = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, advxe - E_idx
dadv_ds(i) = 0._wp
end do
if (chemistry) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_species
dYs_ds(i) = 0._wp
end do
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do j = 0, buff_size
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
dalpha_rho_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, i)* &
fd_coef_${XYZ}$ (j, cbc_loc) + &
dalpha_rho_ds(i)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
dvel_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, contxe + i)* &
fd_coef_${XYZ}$ (j, cbc_loc) + &
@@ -881,7 +887,7 @@ contains
dpres_ds = q_prim_rs${XYZ}$_vf(j, k, r, E_idx)* &
fd_coef_${XYZ}$ (j, cbc_loc) + &
dpres_ds
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, advxe - E_idx
dadv_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, E_idx + i)* &
fd_coef_${XYZ}$ (j, cbc_loc) + &
@@ -889,7 +895,7 @@ contains
end do
if (chemistry) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_species
dYs_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, chemxb - 1 + i)* &
fd_coef_${XYZ}$ (j, cbc_loc) + &
@@ -916,7 +922,7 @@ contains
call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds)
! Add GRCBC for Subsonic Inflow
if (bc_${XYZ}$%grcbc_in) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 2, momxb
L(2) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$)
end do
@@ -926,7 +932,7 @@ contains
L(momxb + 2) = c*Ma*(vel(dir_idx(3)) - vel_in(${CBC_DIR}$, dir_idx(3)))/Del_in(${CBC_DIR}$)
end if
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = E_idx, advxe - 1
L(i) = c*Ma*(adv(i + 1 - E_idx) - alpha_in(i + 1 - E_idx, ${CBC_DIR}$))/Del_in(${CBC_DIR}$)
end do
@@ -966,13 +972,13 @@ contains
dpres_dt = -5.e-1_wp*(L(advxe) + L(1))
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
dalpha_rho_dt(i) = &
-(L(i + 1) - mf(i)*dpres_dt)/(c*c)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
dvel_dt(dir_idx(i)) = dir_flg(dir_idx(i))* &
(L(1) - L(advxe))/(2._wp*rho*c) + &
@@ -981,13 +987,13 @@ contains
end do
vel_dv_dt_sum = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
vel_dv_dt_sum = vel_dv_dt_sum + vel(i)*dvel_dt(i)
end do
if (chemistry) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_species
dYs_dt(i) = -1._wp*L(chemxb + i - 1)
end do
@@ -995,12 +1001,12 @@ contains
! The treatment of void fraction source is unclear
if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, advxe - E_idx
dadv_dt(i) = -L(momxe + i) !+ adv(i) * vel(dir_idx(1))/y_cc(n)
end do
else
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, advxe - E_idx
dadv_dt(i) = -L(momxe + i)
end do
@@ -1013,7 +1019,7 @@ contains
dgamma_dt = dadv_dt(1)
dpi_inf_dt = dadv_dt(2)
else
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
drho_dt = drho_dt + dalpha_rho_dt(i)
dgamma_dt = dgamma_dt + dadv_dt(i)*gammas(i)
@@ -1023,13 +1029,13 @@ contains
end if
! flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) &
+ ds(0)*dalpha_rho_dt(i)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, momxe
flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) &
+ ds(0)*(vel(i - contxe)*drho_dt &
@@ -1040,14 +1046,14 @@ contains
! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990
call get_species_enthalpies_rt(T, h_k)
sum_Enthalpies = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_species
h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T
sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i)
end do
flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) &
+ ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_species
flux_rs${XYZ}$_vf_l(-1, k, r, i - 1 + chemxb) = flux_rs${XYZ}$_vf_l(0, k, r, chemxb + i - 1) &
+ ds(0)*(drho_dt*Ys(i) + rho*dYs_dt(i))
@@ -1063,12 +1069,12 @@ contains
end if
if (riemann_solver == 1) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_rs${XYZ}$_vf_l(-1, k, r, i) = 0._wp
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = &
1._wp/max(abs(vel(dir_idx(1))), sgm_eps) &
@@ -1081,13 +1087,13 @@ contains
else
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) + &
ds(0)*dadv_dt(i - E_idx)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = flux_src_rs${XYZ}$_vf_l(0, k, r, i)
end do
@@ -1151,13 +1157,13 @@ contains
end if
dj = max(0, cbc_loc)
- !$acc update device(is1, is2, is3, dj)
- !$acc update device( dir_idx, dir_flg)
+ $:GPU_UPDATE(device='[is1,is2,is3,dj]')
+ $:GPU_UPDATE(device='[dir_idx,dir_flg]')
! Reshaping Inputted Data in x-direction
if (cbc_dir == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1169,7 +1175,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = 0, buff_size
@@ -1180,7 +1186,7 @@ contains
end do
end do
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, flux_cbc_index
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1193,7 +1199,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = -1, buff_size
@@ -1204,7 +1210,7 @@ contains
end do
if (riemann_solver == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = advxb, advxe
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1216,7 +1222,7 @@ contains
end do
end do
else
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = -1, buff_size
@@ -1233,7 +1239,7 @@ contains
! Reshaping Inputted Data in y-direction
elseif (cbc_dir == 2) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1245,7 +1251,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = 0, buff_size
@@ -1256,7 +1262,7 @@ contains
end do
end do
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, flux_cbc_index
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1269,7 +1275,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = -1, buff_size
@@ -1280,7 +1286,7 @@ contains
end do
if (riemann_solver == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = advxb, advxe
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1292,7 +1298,7 @@ contains
end do
end do
else
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = -1, buff_size
@@ -1309,7 +1315,7 @@ contains
! Reshaping Inputted Data in z-direction
else
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1321,7 +1327,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = 0, buff_size
@@ -1332,7 +1338,7 @@ contains
end do
end do
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, flux_cbc_index
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1345,7 +1351,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = -1, buff_size
@@ -1356,7 +1362,7 @@ contains
end do
if (riemann_solver == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = advxb, advxe
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1368,7 +1374,7 @@ contains
end do
end do
else
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = -1, buff_size
@@ -1402,12 +1408,12 @@ contains
! Determining the indicial shift based on CBC location
dj = max(0, cbc_loc)
- !$acc update device(dj)
+ $:GPU_UPDATE(device='[dj]')
! Reshaping Outputted Data in x-direction
if (cbc_dir == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, flux_cbc_index
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1419,7 +1425,7 @@ contains
end do
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = -1, buff_size
@@ -1430,7 +1436,7 @@ contains
end do
if (riemann_solver == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = advxb, advxe
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1442,7 +1448,7 @@ contains
end do
end do
else
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = -1, buff_size
@@ -1458,7 +1464,7 @@ contains
! Reshaping Outputted Data in y-direction
elseif (cbc_dir == 2) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, flux_cbc_index
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1471,7 +1477,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = -1, buff_size
@@ -1482,7 +1488,7 @@ contains
end do
if (riemann_solver == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = advxb, advxe
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1494,7 +1500,7 @@ contains
end do
end do
else
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = -1, buff_size
@@ -1511,7 +1517,7 @@ contains
! Reshaping Outputted Data in z-direction
else
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, flux_cbc_index
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1524,7 +1530,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = -1, buff_size
@@ -1535,7 +1541,7 @@ contains
end do
if (riemann_solver == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = advxb, advxe
do r = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1547,7 +1553,7 @@ contains
end do
end do
else
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do r = is3%beg, is3%end
do k = is2%beg, is2%end
do j = -1, buff_size
diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp
index 022a06175d..694f6735b2 100644
--- a/src/simulation/m_compute_cbc.fpp
+++ b/src/simulation/m_compute_cbc.fpp
@@ -2,6 +2,8 @@
!! @file m_compute_cbc.f90
!! @brief CBC computation module
+#:include 'macros.fpp'
+
module m_compute_cbc
use m_global_parameters
implicit none
@@ -18,7 +20,7 @@ module m_compute_cbc
contains
!> Base L1 calculation
pure function f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) result(L1)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), dimension(3), intent(in) :: lambda
real(wp), intent(in) :: rho, c, dpres_ds
real(wp), dimension(num_dims), intent(in) :: dvel_ds
@@ -28,7 +30,7 @@ contains
!> Fill density L variables
pure subroutine s_fill_density_L(L, lambda_factor, lambda2, c, mf, dalpha_rho_ds, dpres_ds)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), dimension(sys_size), intent(inout) :: L
real(wp), intent(in) :: lambda_factor, lambda2, c
real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds
@@ -42,7 +44,7 @@ contains
!> Fill velocity L variables
pure subroutine s_fill_velocity_L(L, lambda_factor, lambda2, dvel_ds)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), dimension(sys_size), intent(inout) :: L
real(wp), intent(in) :: lambda_factor, lambda2
real(wp), dimension(num_dims), intent(in) :: dvel_ds
@@ -55,7 +57,7 @@ contains
!> Fill advection L variables
pure subroutine s_fill_advection_L(L, lambda_factor, lambda2, dadv_ds)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), dimension(sys_size), intent(inout) :: L
real(wp), intent(in) :: lambda_factor, lambda2
real(wp), dimension(num_fluids), intent(in) :: dadv_ds
@@ -68,7 +70,7 @@ contains
!> Fill chemistry L variables
pure subroutine s_fill_chemistry_L(L, lambda_factor, lambda2, dYs_ds)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), dimension(sys_size), intent(inout) :: L
real(wp), intent(in) :: lambda_factor, lambda2
real(wp), dimension(num_species), intent(in) :: dYs_ds
@@ -83,11 +85,9 @@ contains
!> Slip wall CBC (Thompson 1990, pg. 451)
pure subroutine s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_slip_wall_L
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_slip_wall_L',parallelism='[seq]', &
+ & cray_inline=True)
+
real(wp), dimension(3), intent(in) :: lambda
real(wp), dimension(sys_size), intent(inout) :: L
real(wp), intent(in) :: rho, c, dpres_ds
@@ -101,11 +101,9 @@ contains
!> Nonreflecting subsonic buffer CBC (Thompson 1987, pg. 13)
pure subroutine s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_buffer_L
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_nonreflecting_subsonic_buffer_L', &
+ & parallelism='[seq]', cray_inline=True)
+
real(wp), dimension(3), intent(in) :: lambda
real(wp), dimension(sys_size), intent(inout) :: L
real(wp), intent(in) :: rho, c
@@ -131,11 +129,9 @@ contains
!> Nonreflecting subsonic inflow CBC (Thompson 1990, pg. 455)
pure subroutine s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_inflow_L
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_nonreflecting_subsonic_inflow_L', &
+ & parallelism='[seq]', cray_inline=True)
+
real(wp), dimension(3), intent(in) :: lambda
real(wp), dimension(sys_size), intent(inout) :: L
real(wp), intent(in) :: rho, c, dpres_ds
@@ -148,11 +144,9 @@ contains
!> Nonreflecting subsonic outflow CBC (Thompson 1990, pg. 454)
pure subroutine s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_outflow_L
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_nonreflecting_subsonic_outflow_L', &
+ & parallelism='[seq]', cray_inline=True)
+
real(wp), dimension(3), intent(in) :: lambda
real(wp), dimension(sys_size), intent(inout) :: L
real(wp), intent(in) :: rho, c
@@ -172,11 +166,9 @@ contains
!> Force-free subsonic outflow CBC (Thompson 1990, pg. 454)
pure subroutine s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_force_free_subsonic_outflow_L
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_force_free_subsonic_outflow_L', &
+ & parallelism='[seq]', cray_inline=True)
+
real(wp), dimension(3), intent(in) :: lambda
real(wp), dimension(sys_size), intent(inout) :: L
real(wp), intent(in) :: rho, c
@@ -194,11 +186,9 @@ contains
!> Constant pressure subsonic outflow CBC (Thompson 1990, pg. 455)
pure subroutine s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_constant_pressure_subsonic_outflow_L
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_constant_pressure_subsonic_outflow_L', &
+ & parallelism='[seq]', cray_inline=True)
+
real(wp), dimension(3), intent(in) :: lambda
real(wp), dimension(sys_size), intent(inout) :: L
real(wp), intent(in) :: rho, c
@@ -216,11 +206,9 @@ contains
!> Supersonic inflow CBC (Thompson 1990, pg. 453)
pure subroutine s_compute_supersonic_inflow_L(L)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_supersonic_inflow_L
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_supersonic_inflow_L', &
+ & parallelism='[seq]', cray_inline=True)
+
real(wp), dimension(sys_size), intent(inout) :: L
L(1:advxe) = 0._wp
if (chemistry) L(chemxb:chemxe) = 0._wp
@@ -228,11 +216,9 @@ contains
!> Supersonic outflow CBC (Thompson 1990, pg. 453)
pure subroutine s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_supersonic_outflow_L
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_supersonic_outflow_L', &
+ & parallelism='[seq]', cray_inline=True)
+
real(wp), dimension(3), intent(in) :: lambda
real(wp), dimension(sys_size), intent(inout) :: L
real(wp), intent(in) :: rho, c
diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp
index 552e52995b..f233bb5374 100644
--- a/src/simulation/m_data_output.fpp
+++ b/src/simulation/m_data_output.fpp
@@ -56,13 +56,14 @@ module m_data_output
real(wp), allocatable, dimension(:, :, :) :: ccfl_sf !< CCFL stability criterion
real(wp), allocatable, dimension(:, :, :) :: Rc_sf !< Rc stability criterion
real(wp), public, allocatable, dimension(:, :) :: c_mass
- !$acc declare create(icfl_sf, vcfl_sf, ccfl_sf, Rc_sf, c_mass)
+ $:GPU_DECLARE(create='[icfl_sf,vcfl_sf,ccfl_sf,Rc_sf,c_mass]')
real(wp) :: icfl_max_loc, icfl_max_glb !< ICFL stability extrema on local and global grids
real(wp) :: vcfl_max_loc, vcfl_max_glb !< VCFL stability extrema on local and global grids
real(wp) :: ccfl_max_loc, ccfl_max_glb !< CCFL stability extrema on local and global grids
real(wp) :: Rc_min_loc, Rc_min_glb !< Rc stability extrema on local and global grids
- !$acc declare create(icfl_max_loc, icfl_max_glb, vcfl_max_loc, vcfl_max_glb, ccfl_max_loc, ccfl_max_glb, Rc_min_loc, Rc_min_glb)
+ $:GPU_DECLARE(create='[icfl_max_loc,icfl_max_glb,vcfl_max_loc,vcfl_max_glb]')
+ $:GPU_DECLARE(create='[ccfl_max_loc,ccfl_max_glb,Rc_min_loc,Rc_min_glb]')
!> @name ICFL, VCFL, CCFL and Rc stability criteria extrema over all the time-steps
!> @{
@@ -279,7 +280,7 @@ contains
integer :: j, k, l
! Computing Stability Criteria at Current Time-step
- !$acc parallel loop collapse(3) gang vector default(present) private(vel, alpha, Re)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]')
do l = 0, p
do k = 0, n
do j = 0, m
@@ -296,17 +297,16 @@ contains
end do
end do
end do
- !$acc end parallel loop
! end: Computing Stability Criteria at Current Time-step
! Determining local stability criteria extrema at current time-step
#ifdef _CRAYFTN
- !$acc update host(icfl_sf)
+ $:GPU_UPDATE(host='[icfl_sf]')
if (viscous) then
- !$acc update host(vcfl_sf, Rc_sf)
+ $:GPU_UPDATE(host='[vcfl_sf,Rc_sf]')
end if
icfl_max_loc = maxval(icfl_sf)
@@ -316,15 +316,14 @@ contains
Rc_min_loc = minval(Rc_sf)
end if
#else
- !$acc kernels
- icfl_max_loc = maxval(icfl_sf)
- !$acc end kernels
-
+ #:call GPU_PARALLEL()
+ icfl_max_loc = maxval(icfl_sf)
+ #:endcall GPU_PARALLEL
if (viscous) then
- !$acc kernels
- vcfl_max_loc = maxval(vcfl_sf)
- Rc_min_loc = minval(Rc_sf)
- !$acc end kernels
+ #:call GPU_PARALLEL()
+ vcfl_max_loc = maxval(vcfl_sf)
+ Rc_min_loc = minval(Rc_sf)
+ #:endcall GPU_PARALLEL
end if
#endif
@@ -527,7 +526,7 @@ contains
if (prim_vars_wrt .or. (n == 0 .and. p == 0)) then
call s_convert_conservative_to_primitive_variables(q_cons_vf, q_T_sf, q_prim_vf, idwint)
do i = 1, sys_size
- !$acc update host(q_prim_vf(i)%sf(:,:,:))
+ $:GPU_UPDATE(host='[q_prim_vf(i)%sf(:,:,:)]')
end do
! q_prim_vf(bubxb) stores the value of nb needed in riemann solvers, so replace with true primitive value (=1._wp)
if (qbmm) then
diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp
index 3c18a8c1fe..87f612a76b 100644
--- a/src/simulation/m_fftw.fpp
+++ b/src/simulation/m_fftw.fpp
@@ -46,12 +46,12 @@ module m_fftw
!! Filtered complex data in Fourier space
#if defined(MFC_OpenACC)
- !$acc declare create(real_size, cmplx_size, x_size, batch_size, Nfq)
+ $:GPU_DECLARE(create='[real_size,cmplx_size,x_size,batch_size,Nfq]')
real(dp), allocatable, target :: data_real_gpu(:)
complex(dp), allocatable, target :: data_cmplx_gpu(:)
complex(dp), allocatable, target :: data_fltr_cmplx_gpu(:)
-!$acc declare create(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
+ $:GPU_DECLARE(create='[data_real_gpu,data_cmplx_gpu,data_fltr_cmplx_gpu]')
#if defined(__PGI)
integer :: fwd_plan_gpu, bwd_plan_gpu
@@ -89,8 +89,8 @@ contains
gpu_fft_size(1) = real_size;
iembed(1) = 0
oembed(1) = 0
- !$acc enter data copyin(real_size, cmplx_size, x_size, sys_size, batch_size, Nfq)
- !$acc update device(real_size, cmplx_size, x_size, sys_size, batch_size)
+ $:GPU_ENTER_DATA(copyin='[real_size,cmplx_size,x_size,sys_size,batch_size,Nfq]')
+ $:GPU_UPDATE(device='[real_size,cmplx_size,x_size,sys_size,batch_size]')
#else
! Allocate input and output DFT data sizes
fftw_real_data = fftw_alloc_real(int(real_size, c_size_t))
@@ -139,7 +139,7 @@ contains
if (bc_y%beg >= 0) return
#if defined(MFC_OpenACC)
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do k = 1, sys_size
do j = 0, m
do l = 1, cmplx_size
@@ -148,7 +148,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do k = 1, sys_size
do j = 0, m
do l = 0, p
@@ -161,139 +161,139 @@ contains
p_cmplx => data_cmplx_gpu
p_fltr_cmplx => data_fltr_cmplx_gpu
-!$acc data attach(p_real, p_cmplx, p_fltr_cmplx)
-!$acc host_data use_device(p_real, p_cmplx, p_fltr_cmplx)
+ #:call GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]')
+ #:call GPU_HOST_DATA(use_device='[p_real, p_cmplx, p_fltr_cmplx]')
#if defined(__PGI)
- ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
+ ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
#else
- ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx))
- call hipCheck(hipDeviceSynchronize())
+ ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx))
+ call hipCheck(hipDeviceSynchronize())
#endif
- !$acc end host_data
- Nfq = 3
- !$acc update device(Nfq)
+ #:endcall GPU_HOST_DATA
+ Nfq = 3
+ $:GPU_UPDATE(device='[Nfq]')
- !$acc parallel loop collapse(3) gang vector default(present)
- do k = 1, sys_size
- do j = 0, m
- do l = 1, Nfq
- data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size)
+ $:GPU_PARALLEL_LOOP(collapse=3)
+ do k = 1, sys_size
+ do j = 0, m
+ do l = 1, Nfq
+ data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size)
+ end do
end do
end do
- end do
-!$acc host_data use_device(p_real, p_fltr_cmplx)
+ #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]')
#if defined(__PGI)
- ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
+ ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
#else
- ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real))
- call hipCheck(hipDeviceSynchronize())
+ ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real))
+ call hipCheck(hipDeviceSynchronize())
#endif
- !$acc end host_data
+ #:endcall GPU_HOST_DATA
- !$acc parallel loop collapse(3) gang vector default(present)
- do k = 1, sys_size
- do j = 0, m
- do l = 0, p
- data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp)
- q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
+ $:GPU_PARALLEL_LOOP(collapse=3)
+ do k = 1, sys_size
+ do j = 0, m
+ do l = 0, p
+ data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp)
+ q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
+ end do
end do
end do
- end do
- do i = 1, fourier_rings
+ do i = 1, fourier_rings
- !$acc parallel loop collapse(3) gang vector default(present)
- do k = 1, sys_size
- do j = 0, m
- do l = 1, cmplx_size
- data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp)
+ $:GPU_PARALLEL_LOOP(collapse=3)
+ do k = 1, sys_size
+ do j = 0, m
+ do l = 1, cmplx_size
+ data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp)
+ end do
end do
end do
- end do
- !$acc parallel loop collapse(3) gang vector default(present) firstprivate(i)
- do k = 1, sys_size
- do j = 0, m
- do l = 0, p
- data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l)
+ $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]')
+ do k = 1, sys_size
+ do j = 0, m
+ do l = 0, p
+ data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l)
+ end do
end do
end do
- end do
-!$acc host_data use_device(p_real, p_cmplx)
+ #:call GPU_HOST_DATA(use_device='[p_real, p_cmplx]')
#if defined(__PGI)
- ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
+ ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
#else
- ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx))
- call hipCheck(hipDeviceSynchronize())
+ ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx))
+ call hipCheck(hipDeviceSynchronize())
#endif
- !$acc end host_data
+ #:endcall GPU_HOST_DATA
- Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size)
- !$acc update device(Nfq)
+ Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size)
+ $:GPU_UPDATE(device='[Nfq]')
- !$acc parallel loop collapse(3) gang vector default(present)
- do k = 1, sys_size
- do j = 0, m
- do l = 1, Nfq
- data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size)
+ $:GPU_PARALLEL_LOOP(collapse=3)
+ do k = 1, sys_size
+ do j = 0, m
+ do l = 1, Nfq
+ data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size)
+ end do
end do
end do
- end do
-!$acc host_data use_device(p_real, p_fltr_cmplx)
+ #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]')
#if defined(__PGI)
- ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
+ ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
#else
- ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real))
- call hipCheck(hipDeviceSynchronize())
+ ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real))
+ call hipCheck(hipDeviceSynchronize())
#endif
- !$acc end host_data
+ #:endcall GPU_HOST_DATA
- !$acc parallel loop collapse(3) gang vector default(present) firstprivate(i)
- do k = 1, sys_size
- do j = 0, m
- do l = 0, p
- data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp)
- q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
+ $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]')
+ do k = 1, sys_size
+ do j = 0, m
+ do l = 0, p
+ data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp)
+ q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
+ end do
end do
end do
- end do
- end do
-
-#else
- Nfq = 3
- do j = 0, m
- do k = 1, sys_size
- data_fltr_cmplx(:) = (0_dp, 0_dp)
- data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p)
- call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx)
- data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq)
- call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real)
- data_real(:) = data_real(:)/real(real_size, dp)
- q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1)
end do
- end do
- ! Apply Fourier filter to additional rings
- do i = 1, fourier_rings
- Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size)
+#else
+ Nfq = 3
do j = 0, m
do k = 1, sys_size
data_fltr_cmplx(:) = (0_dp, 0_dp)
- data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p)
+ data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p)
call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx)
data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq)
call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real)
data_real(:) = data_real(:)/real(real_size, dp)
- q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1)
+ q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1)
+ end do
+ end do
+
+ ! Apply Fourier filter to additional rings
+ do i = 1, fourier_rings
+ Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size)
+ do j = 0, m
+ do k = 1, sys_size
+ data_fltr_cmplx(:) = (0_dp, 0_dp)
+ data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p)
+ call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx)
+ data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq)
+ call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real)
+ data_real(:) = data_real(:)/real(real_size, dp)
+ q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1)
+ end do
end do
end do
- end do
#endif
-!$acc end data
+ #:endcall GPU_DATA
end subroutine s_apply_fourier_filter
!> The purpose of this subroutine is to destroy the fftw plan
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index 9362f013ad..560f71d1a3 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -53,7 +53,7 @@ module m_global_parameters
logical :: cyl_coord
integer :: grid_geometry
!> @}
- !$acc declare create(cyl_coord, grid_geometry)
+ $:GPU_DECLARE(create='[cyl_coord,grid_geometry]')
!> @name Cell-boundary (CB) locations in the x-, y- and z-directions, respectively
!> @{
@@ -76,7 +76,7 @@ module m_global_parameters
real(wp) :: dt !< Size of the time-step
- !$acc declare create(x_cb, y_cb, z_cb, x_cc, y_cc, z_cc, dx, dy, dz, dt, m, n, p)
+ $:GPU_DECLARE(create='[x_cb,y_cb,z_cb,x_cc,y_cc,z_cc,dx,dy,dz,dt,m,n,p]')
!> @name Starting time-step iteration, stopping time-step iteration and the number
!! of time-step iterations between successive solution backups, respectively
@@ -90,7 +90,7 @@ module m_global_parameters
real(wp) :: t_stop, t_save, cfl_target
integer :: n_start
!> @}
- !$acc declare create(cfl_target)
+ $:GPU_DECLARE(create='[cfl_target]')
logical :: cfl_adap_dt, cfl_const_dt, cfl_dt
@@ -158,7 +158,7 @@ module m_global_parameters
logical :: bulk_stress !< Bulk stresses
logical :: cont_damage !< Continuum damage modeling
- !$acc declare create(chemistry)
+ $:GPU_DECLARE(create='[chemistry]')
logical :: bodyForces
logical :: bf_x, bf_y, bf_z !< body force toggle in three directions
@@ -169,24 +169,27 @@ module m_global_parameters
#:endfor
#:endfor
real(wp), dimension(3) :: accel_bf
- !$acc declare create(accel_bf)
+ $:GPU_DECLARE(create='[accel_bf]')
integer :: cpu_start, cpu_end, cpu_rate
#:if not MFC_CASE_OPTIMIZATION
- !$acc declare create(num_dims, num_vels, weno_polyn, weno_order, weno_num_stencils, num_fluids, wenojs, mapped_weno, wenoz, teno, wenoz_q, mhd, relativity)
+ $:GPU_DECLARE(create='[num_dims,num_vels,weno_polyn,weno_order]')
+ $:GPU_DECLARE(create='[weno_num_stencils,num_fluids,wenojs]')
+ $:GPU_DECLARE(create='[mapped_weno, wenoz,teno,wenoz_q,mhd,relativity]')
#:endif
- !$acc declare create(mpp_lim, model_eqns, mixture_err, alt_soundspeed, avg_state, mp_weno, weno_eps, teno_CT, hypoelasticity, hyperelasticity, hyper_model, elasticity, low_Mach, viscous, shear_stress, bulk_stress, cont_damage)
+ $:GPU_DECLARE(create='[mpp_lim,model_eqns,mixture_err,alt_soundspeed]')
+ $:GPU_DECLARE(create='[avg_state,mp_weno,weno_eps,teno_CT,hypoelasticity]')
+ $:GPU_DECLARE(create='[hyperelasticity,hyper_model,elasticity,low_Mach]')
+ $:GPU_DECLARE(create='[viscous,shear_stress,bulk_stress,cont_damage]')
logical :: relax !< activate phase change
integer :: relax_model !< Relaxation model
real(wp) :: palpha_eps !< trigger parameter for the p relaxation procedure, phase change model
real(wp) :: ptgalpha_eps !< trigger parameter for the pTg relaxation procedure, phase change model
-!#ifndef _CRAYFTN
-!$acc declare create(relax, relax_model, palpha_eps,ptgalpha_eps)
-!#endif
+ $:GPU_DECLARE(create='[relax, relax_model, palpha_eps,ptgalpha_eps]')
integer :: num_bc_patches
logical :: bc_io
@@ -194,6 +197,10 @@ module m_global_parameters
!> @{
type(int_bounds_info) :: bc_x, bc_y, bc_z
!> @}
+ $:GPU_DECLARE(create='[bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3]')
+ $:GPU_DECLARE(create='[bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3]')
+ $:GPU_DECLARE(create='[bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3]')
+
type(bounds_info) :: x_domain, y_domain, z_domain
real(wp) :: x_a, y_a, z_a
real(wp) :: x_b, y_b, z_b
@@ -246,19 +253,20 @@ module m_global_parameters
integer :: c_idx !< Index of color function
integer :: damage_idx !< Index of damage state variable (D) for continuum damage model
!> @}
-
- !$acc declare create(bub_idx)
+ $:GPU_DECLARE(create='[sys_size,E_idx,n_idx,bub_idx,alf_idx,gamma_idx]')
+ $:GPU_DECLARE(create='[pi_inf_idx,B_idx,stress_idx,xi_idx,b_size]')
+ $:GPU_DECLARE(create='[tensor_size,species_idx,c_idx]')
! Cell Indices for the (local) interior points (O-m, O-n, 0-p).
! Stands for "InDices With INTerior".
type(int_bounds_info) :: idwint(1:3)
- !$acc declare create(idwint)
+ $:GPU_DECLARE(create='[idwint]')
! Cell Indices for the entire (local) domain. In simulation and post_process,
! this includes the buffer region. idwbuff and idwint are the same otherwise.
! Stands for "InDices With BUFFer".
type(int_bounds_info) :: idwbuff(1:3)
- !$acc declare create(idwbuff)
+ $:GPU_DECLARE(create='[idwbuff]')
!> @name The number of fluids, along with their identifying indexes, respectively,
!! for which viscous effects, e.g. the shear and/or the volume Reynolds (Re)
@@ -268,7 +276,7 @@ module m_global_parameters
integer, allocatable, dimension(:, :) :: Re_idx
!> @}
- !$acc declare create(Re_size, Re_idx)
+ $:GPU_DECLARE(create='[Re_size,Re_idx]')
! The WENO average (WA) flag regulates whether the calculation of any cell-
! average spatial derivatives is carried out in each cell by utilizing the
@@ -279,7 +287,7 @@ module m_global_parameters
real(wp) :: wa_flg
!> @{
- !$acc declare create(wa_flg)
+ $:GPU_DECLARE(create='[wa_flg]')
!> @name The coordinate direction indexes and flags (flg), respectively, for which
!! the configurations will be determined with respect to a working direction
@@ -291,14 +299,14 @@ module m_global_parameters
integer, dimension(3) :: dir_idx_tau !!used for hypoelasticity=true
!> @}
- !$acc declare create(dir_idx, dir_flg, dir_idx_tau)
+ $:GPU_DECLARE(create='[dir_idx,dir_flg,dir_idx_tau]')
integer :: buff_size !<
!! The number of cells that are necessary to be able to store enough boundary
!! conditions data to march the solution in the physical computational domain
!! to the next time-step.
- !$acc declare create(sys_size, buff_size, E_idx, gamma_idx, pi_inf_idx, alf_idx, n_idx, stress_idx, b_size, tensor_size, xi_idx, species_idx, B_idx, c_idx)
+ $:GPU_DECLARE(create='[buff_size]')
integer :: shear_num !! Number of shear stress components
integer, dimension(3) :: shear_indices !<
@@ -309,7 +317,7 @@ module m_global_parameters
!! Indices of shear stress components to reflect for boundary conditions.
!! Size: (1:3, 1:shear_BC_flip_num) for (x/y/z, [indices])
- !$acc declare create(shear_num, shear_indices, shear_BC_flip_num, shear_BC_flip_indices)
+ $:GPU_DECLARE(create='[shear_num,shear_indices,shear_BC_flip_num,shear_BC_flip_indices]')
! END: Simulation Algorithm Parameters
@@ -320,10 +328,6 @@ module m_global_parameters
!! in the flow. These include the stiffened gas equation of state parameters,
!! the Reynolds numbers and the Weber numbers.
- !$acc declare create(bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3)
- !$acc declare create(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3)
- !$acc declare create(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3)
-
integer :: fd_order !<
!! The order of the finite-difference (fd) approximations of the first-order
!! derivatives that need to be evaluated when the CoM or flow probe data
@@ -333,7 +337,7 @@ module m_global_parameters
!! The finite-difference number is given by MAX(1, fd_order/2). Essentially,
!! it is a measure of the half-size of the finite-difference stencil for the
!! selected order of accuracy.
- !$acc declare create(fd_order,fd_number)
+ $:GPU_DECLARE(create='[fd_order,fd_number]')
logical :: probe_wrt
logical :: integral_wrt
@@ -346,7 +350,7 @@ module m_global_parameters
!> @{
real(wp) :: rhoref, pref
!> @}
- !$acc declare create(rhoref, pref)
+ $:GPU_DECLARE(create='[rhoref,pref]')
!> @name Immersed Boundaries
!> @{
@@ -361,7 +365,7 @@ module m_global_parameters
!! the maximum allowable number of patches, num_patches_max, may be changed
!! in the module m_derived_types.f90.
- !$acc declare create(ib, num_ibs, patch_ib)
+ $:GPU_DECLARE(create='[ib,num_ibs,patch_ib]')
!> @}
!> @name Bubble modeling
@@ -376,26 +380,31 @@ module m_global_parameters
real(wp) :: Ca !< Cavitation number
real(wp) :: Web !< Weber number
real(wp) :: Re_inv !< Inverse Reynolds number
+ $:GPU_DECLARE(create='[R0ref,Ca,Web,Re_inv]')
real(wp), dimension(:), allocatable :: weight !< Simpson quadrature weights
real(wp), dimension(:), allocatable :: R0 !< Bubble sizes
real(wp), dimension(:), allocatable :: V0 !< Bubble velocities
- !$acc declare create(weight, R0, V0)
+ $:GPU_DECLARE(create='[weight,R0,V0]')
logical :: bubbles_euler !< Bubbles euler on/off
logical :: polytropic !< Polytropic switch
logical :: polydisperse !< Polydisperse bubbles
+ $:GPU_DECLARE(create='[bubbles_euler,polytropic,polydisperse]')
+
logical :: adv_n !< Solve the number density equation and compute alpha from number density
logical :: adap_dt !< Adaptive step size control
real(wp) :: adap_dt_tol !< Tolerance to control adaptive step size
+ $:GPU_DECLARE(create='[adv_n,adap_dt,adap_dt_tol]')
integer :: bubble_model !< Gilmore or Keller--Miksis bubble model
integer :: thermal !< Thermal behavior. 1 = adiabatic, 2 = isotherm, 3 = transfer
+ $:GPU_DECLARE(create='[bubble_model,thermal]')
real(wp), allocatable, dimension(:, :, :) :: ptil !< Pressure modification
- !$acc declare create(ptil)
real(wp) :: poly_sigma !< log normal sigma for polydisperse PDF
+ $:GPU_DECLARE(create='[ptil, poly_sigma]')
logical :: qbmm !< Quadrature moment method
integer, parameter :: nmom = 6 !< Number of carried moments per R0 location
@@ -404,38 +413,39 @@ module m_global_parameters
integer :: R0_type
real(wp) :: pi_fac !< Factor for artificial pi_inf
+ $:GPU_DECLARE(create='[qbmm, nmomsp,nmomtot,R0_type,pi_fac]')
#:if not MFC_CASE_OPTIMIZATION
- !$acc declare create(nb)
+ $:GPU_DECLARE(create='[nb]')
#:endif
- !$acc declare create(R0ref, Ca, Web, Re_inv, bubbles_euler, polytropic, polydisperse, qbmm, nmomsp, nmomtot, R0_type, bubble_model, thermal, poly_sigma, adv_n, adap_dt, adap_dt_tol, pi_fac)
-
type(scalar_field), allocatable, dimension(:) :: mom_sp
type(scalar_field), allocatable, dimension(:, :, :) :: mom_3d
- !$acc declare create(mom_sp, mom_3d)
+ $:GPU_DECLARE(create='[mom_sp,mom_3d]')
!> @}
type(chemistry_parameters) :: chem_params
- !$acc declare create(chem_params)
+ $:GPU_DECLARE(create='[chem_params]')
!> @name Physical bubble parameters (see Ando 2010, Preston 2007)
!> @{
real(wp) :: R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v, k_vl, k_nl, cp_n, cp_v
- !$acc declare create(R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v, k_vl, k_nl, cp_n, cp_v)
+ $:GPU_DECLARE(create='[R_n,R_v,phi_vn,phi_nv,Pe_c,Tw]')
+ $:GPU_DECLARE(create='[pv,M_n, M_v,k_vl,k_nl,cp_n,cp_v]')
real(wp), dimension(:), allocatable :: k_n, k_v, pb0, mass_n0, mass_v0, Pe_T
real(wp), dimension(:), allocatable :: Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN
- !$acc declare create( k_n, k_v, pb0, mass_n0, mass_v0, Pe_T, Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN)
+ $:GPU_DECLARE(create='[k_n,k_v,pb0,mass_n0,mass_v0,Pe_T]')
+ $:GPU_DECLARE(create='[Re_trans_T,Re_trans_c,Im_trans_T,Im_trans_c,omegaN]')
real(wp) :: mul0, ss, gamma_v, mu_v
real(wp) :: gamma_m, gamma_n, mu_n
real(wp) :: gam
!> @}
- !$acc declare create(mul0, ss, gamma_v, mu_v, gamma_m, gamma_n, mu_n, gam)
+ $:GPU_DECLARE(create='[mul0,ss,gamma_v,mu_v,gamma_m,gamma_n,mu_n,gam]')
!> @name Acoustic acoustic_source parameters
!> @{
@@ -443,14 +453,14 @@ module m_global_parameters
type(acoustic_parameters), dimension(num_probes_max) :: acoustic !< Acoustic source parameters
integer :: num_source !< Number of acoustic sources
!> @}
- !$acc declare create(acoustic_source, acoustic, num_source)
+ $:GPU_DECLARE(create='[acoustic_source,acoustic,num_source]')
!> @name Surface tension parameters
!> @{
real(wp) :: sigma
logical :: surface_tension
- !$acc declare create(sigma, surface_tension)
+ $:GPU_DECLARE(create='[sigma,surface_tension]')
!> @}
integer :: momxb, momxe
@@ -461,11 +471,13 @@ module m_global_parameters
integer :: strxb, strxe
integer :: chemxb, chemxe
integer :: xibeg, xiend
- !$acc declare create(momxb, momxe, advxb, advxe, contxb, contxe, intxb, intxe, bubxb, bubxe, strxb, strxe, chemxb, chemxe)
- !$acc declare create(xibeg,xiend)
+ $:GPU_DECLARE(create='[momxb,momxe,advxb,advxe,contxb,contxe]')
+ $:GPU_DECLARE(create='[intxb,intxe, bubxb,bubxe]')
+ $:GPU_DECLARE(create='[strxb,strxe,chemxb,chemxe]')
+ $:GPU_DECLARE(create='[xibeg,xiend]')
real(wp), allocatable, dimension(:) :: gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps
- !$acc declare create(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps)
+ $:GPU_DECLARE(create='[gammas,gs_min,pi_infs,ps_inf,cvs,qvs,qvps]')
real(wp) :: mytime !< Current simulation time
real(wp) :: finaltime !< Final simulation time
@@ -476,25 +488,25 @@ module m_global_parameters
type(pres_field), allocatable, dimension(:) :: mv_ts
- !$acc declare create(pb_ts, mv_ts)
+ $:GPU_DECLARE(create='[pb_ts,mv_ts]')
!> @name lagrangian subgrid bubble parameters
!> @{!
logical :: bubbles_lagrange !< Lagrangian subgrid bubble model switch
type(bubbles_lagrange_parameters) :: lag_params !< Lagrange bubbles' parameters
- !$acc declare create(bubbles_lagrange, lag_params)
+ $:GPU_DECLARE(create='[bubbles_lagrange,lag_params]')
!> @}
real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D)
logical :: powell !< Powell‐correction for div B = 0
- !$acc declare create(Bx0, powell)
+ $:GPU_DECLARE(create='[Bx0,powell]')
!> @name Continuum damage model parameters
!> @{!
real(wp) :: tau_star !< Stress threshold for continuum damage modeling
real(wp) :: cont_damage_s !< Exponent s for continuum damage modeling
real(wp) :: alpha_bar !< Damage rate factor for continuum damage modeling
- !$acc declare create(tau_star, cont_damage_s, alpha_bar)
+ $:GPU_DECLARE(create='[tau_star,cont_damage_s,alpha_bar]')
!> @}
contains
@@ -785,10 +797,10 @@ contains
else
weno_num_stencils = weno_polyn
end if
- !$acc update device(weno_polyn)
- !$acc update device(weno_num_stencils)
- !$acc update device(nb)
- !$acc update device(num_dims, num_vels, num_fluids)
+ $:GPU_UPDATE(device='[weno_polyn]')
+ $:GPU_UPDATE(device='[weno_num_stencils]')
+ $:GPU_UPDATE(device='[nb]')
+ $:GPU_UPDATE(device='[num_dims,num_vels,num_fluids]')
#:endif
! Initializing the number of fluids for which viscous effects will
@@ -1024,7 +1036,7 @@ contains
if (Re_size(1) > 0._wp) shear_stress = .true.
if (Re_size(2) > 0._wp) bulk_stress = .true.
- !$acc update device(Re_size, viscous, shear_stress, bulk_stress)
+ $:GPU_UPDATE(device='[Re_size,viscous,shear_stress,bulk_stress]')
! Bookkeeping the indexes of any viscous fluids and any pairs of
! fluids whose interface will support effects of surface tension
@@ -1080,7 +1092,7 @@ contains
! y-dir: flip tau_xy and tau_yz
! z-dir: flip tau_xz and tau_yz
end if
- !$acc update device(shear_num, shear_indices, shear_BC_flip_num, shear_BC_flip_indices)
+ $:GPU_UPDATE(device='[shear_num,shear_indices,shear_BC_flip_num,shear_BC_flip_indices]')
end if
if (hyperelasticity) then
@@ -1147,7 +1159,7 @@ contains
! cell-boundary values or otherwise, the unaltered left and right,
! WENO-reconstructed, cell-boundary values
wa_flg = 0._wp; if (weno_avg) wa_flg = 1._wp
- !$acc update device(wa_flg)
+ $:GPU_UPDATE(device='[wa_flg]')
! Resort to default WENO-JS if no other WENO scheme is selected
#:if not MFC_CASE_OPTIMIZATION
@@ -1157,7 +1169,7 @@ contains
if (ib) allocate (MPI_IO_IB_DATA%var%sf(0:m, 0:n, 0:p))
Np = 0
- !$acc update device(Re_size)
+ $:GPU_UPDATE(device='[Re_size]')
if (elasticity) then
fd_number = max(1, fd_order/2)
@@ -1175,7 +1187,7 @@ contains
idwint, idwbuff, viscous, &
bubbles_lagrange, m, n, p, &
num_dims)
- !$acc update device(idwint, idwbuff)
+ $:GPU_UPDATE(device='[idwint, idwbuff]')
! Configuring Coordinate Direction Indexes
if (bubbles_euler) then
@@ -1185,7 +1197,7 @@ contains
& idwbuff(3)%beg:idwbuff(3)%end))
end if
- !$acc update device(fd_order,fd_number)
+ $:GPU_UPDATE(device='[fd_order, fd_number]')
if (cyl_coord .neqv. .true.) then ! Cartesian grid
grid_geometry = 1
@@ -1212,30 +1224,44 @@ contains
chemxb = species_idx%beg
chemxe = species_idx%end
- !$acc update device(momxb, momxe, advxb, advxe, contxb, contxe, bubxb, bubxe, intxb, intxe, sys_size, buff_size, E_idx, alf_idx, n_idx, adv_n, adap_dt, pi_fac, strxb, strxe, chemxb, chemxe, c_idx)
- !$acc update device(b_size, xibeg, xiend, tensor_size)
+ $:GPU_UPDATE(device='[momxb,momxe,advxb,advxe,contxb,contxe, &
+ & bubxb,bubxe,intxb,intxe,sys_size,buff_size,E_idx, &
+ & alf_idx,n_idx,adv_n,adap_dt,pi_fac,strxb,strxe, &
+ & chemxb,chemxe,c_idx]')
+ $:GPU_UPDATE(device='[b_size,xibeg,xiend,tensor_size]')
- !$acc update device(species_idx)
- !$acc update device(cfl_target, m, n, p)
+ $:GPU_UPDATE(device='[species_idx]')
+ $:GPU_UPDATE(device='[cfl_target,m,n,p]')
- !$acc update device(alt_soundspeed, acoustic_source, num_source)
- !$acc update device(dt, sys_size, buff_size, pref, rhoref, gamma_idx, pi_inf_idx, E_idx, alf_idx, stress_idx, mpp_lim, bubbles_euler, hypoelasticity, alt_soundspeed, avg_state, num_fluids, model_eqns, num_dims, num_vels, mixture_err, grid_geometry, cyl_coord, mp_weno, weno_eps, teno_CT, hyperelasticity, hyper_model, elasticity, xi_idx, B_idx, low_Mach)
+ $:GPU_UPDATE(device='[alt_soundspeed,acoustic_source,num_source]')
+ $:GPU_UPDATE(device='[dt,sys_size,buff_size,pref,rhoref, &
+ & gamma_idx,pi_inf_idx,E_idx,alf_idx,stress_idx, &
+ & mpp_lim,bubbles_euler,hypoelasticity,alt_soundspeed, &
+ & avg_state,num_fluids,model_eqns,num_dims,num_vels, &
+ & mixture_err,grid_geometry,cyl_coord,mp_weno,weno_eps, &
+ & teno_CT,hyperelasticity,hyper_model,elasticity,xi_idx, &
+ & B_idx,low_Mach]')
- !$acc update device(Bx0, powell)
+ $:GPU_UPDATE(device='[Bx0, powell]')
- !$acc update device(cont_damage, tau_star, cont_damage_s, alpha_bar)
+ $:GPU_UPDATE(device='[cont_damage,tau_star,cont_damage_s,alpha_bar]')
#:if not MFC_CASE_OPTIMIZATION
- !$acc update device(wenojs, mapped_weno, wenoz, teno)
- !$acc update device(wenoz_q)
- !$acc update device(mhd, relativity)
+ $:GPU_UPDATE(device='[wenojs,mapped_weno,wenoz,teno]')
+ $:GPU_UPDATE(device='[wenoz_q]')
+ $:GPU_UPDATE(device='[mhd, relativity]')
#:endif
- !$acc enter data copyin(nb, R0ref, Ca, Web, Re_inv, weight, R0, V0, bubbles_euler, polytropic, polydisperse, qbmm, R0_type, ptil, bubble_model, thermal, poly_sigma)
- !$acc enter data copyin(R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v, k_n, k_v, pb0, mass_n0, mass_v0, Pe_T, Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN, mul0, ss, gamma_v, mu_v, gamma_m, gamma_n, mu_n, gam)
- !$acc enter data copyin(dir_idx, dir_flg, dir_idx_tau)
+ $:GPU_ENTER_DATA(copyin='[nb,R0ref,Ca,Web,Re_inv,weight,R0, &
+ & V0,bubbles_euler,polytropic,polydisperse,qbmm,R0_type, &
+ & ptil,bubble_model,thermal,poly_sigma]')
+ $:GPU_ENTER_DATA(copyin='[R_n,R_v,phi_vn,phi_nv,Pe_c,Tw,pv, &
+ & M_n,M_v,k_n,k_v,pb0,mass_n0,mass_v0,Pe_T, &
+ & Re_trans_T,Re_trans_c,Im_trans_T,Im_trans_c,omegaN, &
+ & mul0,ss,gamma_v,mu_v,gamma_m,gamma_n,mu_n,gam]')
+ $:GPU_ENTER_DATA(copyin='[dir_idx,dir_flg,dir_idx_tau]')
- !$acc enter data copyin(relax, relax_model, palpha_eps,ptgalpha_eps)
+ $:GPU_ENTER_DATA(copyin='[relax,relax_model,palpha_eps,ptgalpha_eps]')
! Allocating grid variables for the x-, y- and z-directions
@:ALLOCATE(x_cb(-1 - buff_size:m + buff_size))
diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp
index 0aed395e8e..628605a652 100644
--- a/src/simulation/m_hyperelastic.fpp
+++ b/src/simulation/m_hyperelastic.fpp
@@ -26,14 +26,14 @@ module m_hyperelastic
!! The btensor at the cell-interior Gaussian quadrature points.
!! These tensor is needed to be calculated once and make the code DRY.
type(vector_field) :: btensor !<
- !$acc declare create(btensor)
+ $:GPU_DECLARE(create='[btensor]')
real(wp), allocatable, dimension(:, :) :: fd_coeff_x
real(wp), allocatable, dimension(:, :) :: fd_coeff_y
real(wp), allocatable, dimension(:, :) :: fd_coeff_z
- !$acc declare create(fd_coeff_x,fd_coeff_y,fd_coeff_z)
+ $:GPU_DECLARE(create='[fd_coeff_x,fd_coeff_y, fd_coeff_z]')
real(wp), allocatable, dimension(:) :: Gs
- !$acc declare create(Gs)
+ $:GPU_DECLARE(create='[Gs]')
contains
@@ -55,11 +55,11 @@ contains
@:ACC_SETUP_VFs(btensor)
@:ALLOCATE(Gs(1:num_fluids))
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
Gs(i) = fluid_pp(i)%G
end do
- !$acc update device(Gs)
+ $:GPU_UPDATE(device='[Gs]')
@:ALLOCATE(fd_coeff_x(-fd_number:fd_number, 0:m))
if (n > 0) then
@@ -72,16 +72,16 @@ contains
! Computing centered finite difference coefficients
call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x, buff_size, &
fd_number, fd_order)
- !$acc update device(fd_coeff_x)
+ $:GPU_UPDATE(device='[fd_coeff_x]')
if (n > 0) then
call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y, buff_size, &
fd_number, fd_order)
- !$acc update device(fd_coeff_y)
+ $:GPU_UPDATE(device='[fd_coeff_y]')
end if
if (p > 0) then
call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z, buff_size, &
fd_number, fd_order)
- !$acc update device(fd_coeff_z)
+ $:GPU_UPDATE(device='[fd_coeff_z]')
end if
end subroutine s_initialize_hyperelastic_module
@@ -106,12 +106,12 @@ contains
real(wp) :: G
integer :: j, k, l, i, r
- !$acc parallel loop collapse(3) gang vector default(present) private(alpha_K, alpha_rho_K, &
- !$acc rho, gamma, pi_inf, qv, G, Re, tensora, tensorb)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, rho, &
+ & gamma, pi_inf, qv, G, Re, tensora, tensorb]')
do l = 0, p
do k = 0, n
do j = 0, m
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho_k(i) = q_cons_vf(i)%sf(j, k, l)
alpha_k(i) = q_cons_vf(advxb + i - 1)%sf(j, k, l)
@@ -124,7 +124,7 @@ contains
!if ( G <= verysmall ) G_K = 0._wp
if (G > verysmall) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, tensor_size
tensora(i) = 0._wp
end do
@@ -133,7 +133,7 @@ contains
! number for the tensor 1-3: dxix_dx, dxiy_dx, dxiz_dx
! 4-6 : dxix_dy, dxiy_dy, dxiz_dy
! 7-9 : dxix_dz, dxiy_dz, dxiz_dz
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do r = -fd_number, fd_number
! derivatives in the x-direction
tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x(r, j)
@@ -167,7 +167,7 @@ contains
if (tensorb(tensor_size) > verysmall) then
! STEP 2c: computing the inverse of grad_xi tensor = F
! tensorb is the adjoint, tensora becomes F
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, tensor_size - 1
tensora(i) = tensorb(i)/tensorb(tensor_size)
end do
@@ -198,7 +198,7 @@ contains
q_prim_vf(E_idx)%sf(j, k, l) = q_prim_vf(E_idx)%sf(j, k, l) - &
G*q_prim_vf(xiend + 1)%sf(j, k, l)/gamma
! STEP 5c: updating the Cauchy stress conservative scalar field
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, b_size - 1
q_cons_vf(strxb + i - 1)%sf(j, k, l) = &
rho*q_prim_vf(strxb + i - 1)%sf(j, k, l)
@@ -208,7 +208,6 @@ contains
end do
end do
end do
- !$acc end parallel loop
end subroutine s_hyperelastic_rmt_stress_update
!> The following subroutine handles the calculation of the btensor.
@@ -220,7 +219,7 @@ contains
!! calculate the FFtranspose to obtain the btensor, btensor is nxn tensor
!! btensor is symmetric, save the data space
pure subroutine s_neoHookean_cauchy_solver(btensor, q_prim_vf, G, j, k, l)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
type(scalar_field), dimension(b_size), intent(inout) :: btensor
real(wp), intent(in) :: G
@@ -239,7 +238,7 @@ contains
#:endfor
! dividing by the jacobian for neo-Hookean model
! setting the tensor to the stresses for riemann solver
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, b_size - 1
q_prim_vf(strxb + i - 1)%sf(j, k, l) = &
G*btensor(i)%sf(j, k, l)/btensor(b_size)%sf(j, k, l)
@@ -259,7 +258,7 @@ contains
!! calculate the FFtranspose to obtain the btensor, btensor is nxn tensor
!! btensor is symmetric, save the data space
pure subroutine s_Mooney_Rivlin_cauchy_solver(btensor, q_prim_vf, G, j, k, l)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
type(scalar_field), dimension(b_size), intent(inout) :: btensor
real(wp), intent(in) :: G
@@ -280,7 +279,7 @@ contains
! dividing by the jacobian for neo-Hookean model
! setting the tensor to the stresses for riemann solver
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, b_size - 1
q_prim_vf(strxb + i - 1)%sf(j, k, l) = &
G*btensor(i)%sf(j, k, l)/btensor(b_size)%sf(j, k, l)
diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp
index 059b5746d5..3f736b0b0b 100644
--- a/src/simulation/m_hypoelastic.fpp
+++ b/src/simulation/m_hypoelastic.fpp
@@ -20,20 +20,20 @@ module m_hypoelastic
s_compute_damage_state
real(wp), allocatable, dimension(:) :: Gs
- !$acc declare create(Gs)
+ $:GPU_DECLARE(create='[Gs]')
real(wp), allocatable, dimension(:, :, :) :: du_dx, du_dy, du_dz
real(wp), allocatable, dimension(:, :, :) :: dv_dx, dv_dy, dv_dz
real(wp), allocatable, dimension(:, :, :) :: dw_dx, dw_dy, dw_dz
- !$acc declare create(du_dx,du_dy,du_dz,dv_dx,dv_dy,dv_dz,dw_dx,dw_dy,dw_dz)
+ $:GPU_DECLARE(create='[du_dx,du_dy,du_dz,dv_dx,dv_dy,dv_dz,dw_dx,dw_dy,dw_dz]')
real(wp), allocatable, dimension(:, :, :) :: rho_K_field, G_K_field
- !$acc declare create(rho_K_field, G_K_field)
+ $:GPU_DECLARE(create='[rho_K_field,G_K_field]')
real(wp), allocatable, dimension(:, :) :: fd_coeff_x_h
real(wp), allocatable, dimension(:, :) :: fd_coeff_y_h
real(wp), allocatable, dimension(:, :) :: fd_coeff_z_h
- !$acc declare create(fd_coeff_x_h,fd_coeff_y_h,fd_coeff_z_h)
+ $:GPU_DECLARE(create='[fd_coeff_x_h,fd_coeff_y_h,fd_coeff_z_h]')
contains
@@ -55,7 +55,7 @@ contains
do i = 1, num_fluids
Gs(i) = fluid_pp(i)%G
end do
- !$acc update device(Gs)
+ $:GPU_UPDATE(device='[Gs]')
@:ALLOCATE(fd_coeff_x_h(-fd_number:fd_number, 0:m))
if (n > 0) then
@@ -68,16 +68,16 @@ contains
! Computing centered finite difference coefficients
call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x_h, buff_size, &
fd_number, fd_order)
- !$acc update device(fd_coeff_x_h)
+ $:GPU_UPDATE(device='[fd_coeff_x_h]')
if (n > 0) then
call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y_h, buff_size, &
fd_number, fd_order)
- !$acc update device(fd_coeff_y_h)
+ $:GPU_UPDATE(device='[fd_coeff_y_h]')
end if
if (p > 0) then
call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z_h, buff_size, &
fd_number, fd_order)
- !$acc update device(fd_coeff_z_h)
+ $:GPU_UPDATE(device='[fd_coeff_z_h]')
end if
end subroutine s_initialize_hypoelastic_module
@@ -104,7 +104,7 @@ contains
! calculate velocity gradients + rho_K and G_K
! TODO: re-organize these loops one by one for GPU efficiency if possible?
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do q = 0, p
do l = 0, n
do k = 0, m
@@ -112,13 +112,12 @@ contains
end do
end do
end do
- !$acc end parallel loop
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do q = 0, p
do l = 0, n
do k = 0, m
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do r = -fd_number, fd_number
du_dx(k, l, q) = du_dx(k, l, q) &
+ q_prim_vf(momxb)%sf(k + r, l, q)*fd_coeff_x_h(r, k)
@@ -127,10 +126,9 @@ contains
end do
end do
end do
- !$acc end parallel loop
if (ndirs > 1) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do q = 0, p
do l = 0, n
do k = 0, m
@@ -138,13 +136,12 @@ contains
end do
end do
end do
- !$acc end parallel loop
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do q = 0, p
do l = 0, n
do k = 0, m
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do r = -fd_number, fd_number
du_dy(k, l, q) = du_dy(k, l, q) &
+ q_prim_vf(momxb)%sf(k, l + r, q)*fd_coeff_y_h(r, l)
@@ -156,12 +153,11 @@ contains
end do
end do
end do
- !$acc end parallel loop
! 3D
if (ndirs == 3) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do q = 0, p
do l = 0, n
do k = 0, m
@@ -170,13 +166,12 @@ contains
end do
end do
end do
- !$acc end parallel loop
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do q = 0, p
do l = 0, n
do k = 0, m
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do r = -fd_number, fd_number
du_dz(k, l, q) = du_dz(k, l, q) &
+ q_prim_vf(momxb)%sf(k, l, q + r)*fd_coeff_z_h(r, q)
@@ -192,11 +187,10 @@ contains
end do
end do
end do
- !$acc end parallel loop
end if
end if
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do q = 0, p
do l = 0, n
do k = 0, m
@@ -220,7 +214,7 @@ contains
end do
! apply rhs source term to elastic stress equation
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do q = 0, p
do l = 0, n
do k = 0, m
@@ -234,7 +228,7 @@ contains
end do
elseif (idir == 2) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do q = 0, p
do l = 0, n
do k = 0, m
@@ -269,7 +263,7 @@ contains
end do
elseif (idir == 3) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do q = 0, p
do l = 0, n
do k = 0, m
@@ -337,7 +331,7 @@ contains
if (cyl_coord .and. idir == 2) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do q = 0, p
do l = 0, n
do k = 0, m
@@ -399,13 +393,13 @@ contains
if (n == 0) then
l = 0; q = 0
- !$acc parallel loop gang vector default(present)
+ $:GPU_PARALLEL_LOOP()
do k = 0, m
rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(q_cons_vf(stress_idx%beg)%sf(k, l, q)) - tau_star, 0._wp))**cont_damage_s
end do
elseif (p == 0) then
q = 0
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = 0, n
do k = 0, m
! Maximum principal stress
@@ -419,7 +413,7 @@ contains
end do
end do
else
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do q = 0, p
do l = 0, n
do k = 0, m
diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index 3c9b0db535..f9f12161b4 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -37,15 +37,15 @@ module m_ibm
type(integer_field), public :: ib_markers
type(levelset_field), public :: levelset
type(levelset_norm_field), public :: levelset_norm
- !$acc declare create(ib_markers, levelset, levelset_norm)
+ $:GPU_DECLARE(create='[ib_markers,levelset,levelset_norm]')
type(ghost_point), dimension(:), allocatable :: ghost_points
type(ghost_point), dimension(:), allocatable :: inner_points
- !$acc declare create(ghost_points, inner_points)
+ $:GPU_DECLARE(create='[ghost_points,inner_points]')
integer :: num_gps !< Number of ghost points
integer :: num_inner_gps !< Number of ghost points
- !$acc declare create(gp_layers, num_gps, num_inner_gps)
+ $:GPU_DECLARE(create='[gp_layers,num_gps,num_inner_gps]')
contains
@@ -72,7 +72,7 @@ contains
@:ACC_SETUP_SFs(levelset)
@:ACC_SETUP_SFs(levelset_norm)
- !$acc enter data copyin(num_gps, num_inner_gps)
+ $:GPU_ENTER_DATA(copyin='[num_gps,num_inner_gps]')
end subroutine s_initialize_ibm_module
@@ -82,31 +82,31 @@ contains
integer :: i, j, k
- !$acc update device(ib_markers%sf)
- !$acc update device(levelset%sf)
- !$acc update device(levelset_norm%sf)
+ $:GPU_UPDATE(device='[ib_markers%sf]')
+ $:GPU_UPDATE(device='[levelset%sf]')
+ $:GPU_UPDATE(device='[levelset_norm%sf]')
! Get neighboring IB variables from other processors
call s_populate_ib_buffers()
- !$acc update host(ib_markers%sf)
+ $:GPU_UPDATE(host='[ib_markers%sf]')
call s_find_num_ghost_points(num_gps, num_inner_gps)
- !$acc update device(num_gps, num_inner_gps)
+ $:GPU_UPDATE(device='[num_gps, num_inner_gps]')
@:ALLOCATE(ghost_points(1:num_gps))
@:ALLOCATE(inner_points(1:num_inner_gps))
- !$acc enter data copyin(ghost_points, inner_points)
+ $:GPU_ENTER_DATA(copyin='[ghost_points,inner_points]')
call s_find_ghost_points(ghost_points, inner_points)
- !$acc update device(ghost_points, inner_points)
+ $:GPU_UPDATE(device='[ghost_points, inner_points]')
call s_compute_image_points(ghost_points, levelset, levelset_norm)
- !$acc update device(ghost_points)
+ $:GPU_UPDATE(device='[ghost_points]')
call s_compute_interpolation_coeffs(ghost_points)
- !$acc update device(ghost_points)
+ $:GPU_UPDATE(device='[ghost_points]')
end subroutine s_ibm_setup
@@ -166,7 +166,11 @@ contains
type(ghost_point) :: gp
type(ghost_point) :: innerp
- !$acc parallel loop gang vector private(physical_loc, dyn_pres, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, vel_g, vel_norm_IP, r_IP, v_IP, pb_IP, mv_IP, nmom_IP, presb_IP, massv_IP, rho, gamma, pi_inf, Re_K, G_K, Gs, gp, innerp, norm, buf, j, k, l, q)
+ $:GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, &
+ & alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, &
+ & v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, &
+ & gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, &
+ & j,k,l,q]')
do i = 1, num_gps
gp = ghost_points(i)
@@ -203,7 +207,7 @@ contains
dyn_pres = 0._wp
! Set q_prim_vf params at GP so that mixture vars calculated properly
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, num_fluids
q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q)
q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q)
@@ -239,7 +243,7 @@ contains
end if
! Set momentum
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = momxb, momxe
q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1)
dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* &
@@ -247,7 +251,7 @@ contains
end do
! Set continuity and adv vars
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, num_fluids
q_cons_vf(q)%sf(j, k, l) = alpha_rho_IP(q)
q_cons_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q)
@@ -301,7 +305,7 @@ contains
end if
if (model_eqns == 3) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = intxb, intxe
q_cons_vf(q)%sf(j, k, l) = alpha_IP(q - intxb + 1)*(gammas(q - intxb + 1)*pres_IP &
+ pi_infs(q - intxb + 1))
@@ -310,7 +314,9 @@ contains
end do
!Correct the state of the inner points in IBs
- !$acc parallel loop gang vector private(physical_loc, dyn_pres, alpha_rho_IP, alpha_IP, vel_g, rho, gamma, pi_inf, Re_K, innerp, j, k, l, q)
+ $:GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, &
+ & alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp, &
+ & j,k,l,q]')
do i = 1, num_inner_gps
innerp = inner_points(i)
@@ -318,7 +324,7 @@ contains
k = innerp%loc(2)
l = innerp%loc(3)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = momxb, momxe
q_cons_vf(q)%sf(j, k, l) = 0._wp
end do
@@ -732,7 +738,7 @@ contains
!> Function that uses the interpolation coefficients and the current state
!! at the cell centers in order to estimate the state at the image point
pure subroutine s_interpolate_image_point(q_prim_vf, gp, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb, mv, presb_IP, massv_IP)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
type(scalar_field), &
dimension(sys_size), &
intent(IN) :: q_prim_vf !< Primitive Variables
@@ -785,11 +791,11 @@ contains
end if
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = i1, i2
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do j = j1, j2
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do k = k1, k2
coeff = gp%interp_coeffs(i - i1 + 1, j - j1 + 1, k - k1 + 1)
@@ -797,13 +803,13 @@ contains
pres_IP = pres_IP + coeff* &
q_prim_vf(E_idx)%sf(i, j, k)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = momxb, momxe
vel_IP(q + 1 - momxb) = vel_IP(q + 1 - momxb) + coeff* &
q_prim_vf(q)%sf(i, j, k)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do l = contxb, contxe
alpha_rho_IP(l) = alpha_rho_IP(l) + coeff* &
q_prim_vf(l)%sf(i, j, k)
@@ -816,7 +822,7 @@ contains
end if
if (bubbles_euler .and. .not. qbmm) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do l = 1, nb
if (polytropic) then
r_IP(l) = r_IP(l) + coeff*q_prim_vf(bubxb + (l - 1)*2)%sf(i, j, k)
diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp
index f5730b513f..8112b3af7e 100644
--- a/src/simulation/m_mhd.fpp
+++ b/src/simulation/m_mhd.fpp
@@ -24,12 +24,12 @@ module m_mhd
real(wp), allocatable, dimension(:, :, :) :: du_dx, du_dy, du_dz
real(wp), allocatable, dimension(:, :, :) :: dv_dx, dv_dy, dv_dz
real(wp), allocatable, dimension(:, :, :) :: dw_dx, dw_dy, dw_dz
- !$acc declare create(du_dx,du_dy,du_dz,dv_dx,dv_dy,dv_dz,dw_dx,dw_dy,dw_dz)
+ $:GPU_DECLARE(create='[du_dx,du_dy,du_dz,dv_dx,dv_dy,dv_dz,dw_dx,dw_dy,dw_dz]')
real(wp), allocatable, dimension(:, :) :: fd_coeff_x_h
real(wp), allocatable, dimension(:, :) :: fd_coeff_y_h
real(wp), allocatable, dimension(:, :) :: fd_coeff_z_h
- !$acc declare create(fd_coeff_x_h,fd_coeff_y_h,fd_coeff_z_h)
+ $:GPU_DECLARE(create='[fd_coeff_x_h,fd_coeff_y_h,fd_coeff_z_h]')
contains
@@ -52,12 +52,12 @@ contains
! Computing centered finite difference coefficients
call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x_h, buff_size, fd_number, fd_order)
- !$acc update device(fd_coeff_x_h)
+ $:GPU_UPDATE(device='[fd_coeff_x_h]')
call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y_h, buff_size, fd_number, fd_order)
- !$acc update device(fd_coeff_y_h)
+ $:GPU_UPDATE(device='[fd_coeff_y_h]')
if (p > 0) then
call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z_h, buff_size, fd_number, fd_order)
- !$acc update device(fd_coeff_z_h)
+ $:GPU_UPDATE(device='[fd_coeff_z_h]')
end if
end subroutine s_initialize_mhd_powell_module
@@ -76,23 +76,22 @@ contains
real(wp), dimension(3) :: v, B
real(wp) :: divB, vdotB
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(v, B)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[v, B]')
do q = 0, p
do l = 0, n
do k = 0, m
divB = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do r = -fd_number, fd_number
divB = divB + q_prim_vf(B_idx%beg)%sf(k + r, l, q)*fd_coeff_x_h(r, k)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do r = -fd_number, fd_number
divB = divB + q_prim_vf(B_idx%beg + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l)
end do
if (p > 0) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do r = -fd_number, fd_number
divB = divB + q_prim_vf(B_idx%beg + 2)%sf(k, l, q + r)*fd_coeff_z_h(r, q)
end do
@@ -130,7 +129,6 @@ contains
end do
end do
end do
- !$acc end parallel loop
end subroutine s_compute_mhd_powell_rhs
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 93d864c5e8..e500a00898 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -48,7 +48,7 @@ module m_mpi_proxy
!> @}
integer :: i_halo_size
- !$acc declare create(i_halo_size)
+ $:GPU_DECLARE(create='[i_halo_size]')
contains
@@ -71,7 +71,7 @@ contains
i_halo_size = -1 + gp_layers
end if
- !$acc update device(i_halo_size)
+ $:GPU_UPDATE(device='[i_halo_size]')
@:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size))
end if
#endif
@@ -297,7 +297,7 @@ contains
#:for mpi_dir in [1, 2, 3]
if (mpi_dir == ${mpi_dir}$) then
#:if mpi_dir == 1
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=3,private='[r]')
do l = 0, p
do k = 0, n
do j = 0, gp_layers - 1
@@ -307,7 +307,7 @@ contains
end do
end do
#:elif mpi_dir == 2
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=3,private='[r]')
do l = 0, p
do k = 0, gp_layers - 1
do j = -gp_layers, m + gp_layers
@@ -318,7 +318,7 @@ contains
end do
end do
#:else
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=3,private='[r]')
do l = 0, gp_layers - 1
do k = -gp_layers, n + gp_layers
do j = -gp_layers, m + gp_layers
@@ -345,7 +345,7 @@ contains
#:for mpi_dir in [1, 2, 3]
if (mpi_dir == ${mpi_dir}$) then
#:if mpi_dir == 1
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=3,private='[r]')
do l = 0, p
do k = 0, n
do j = -gp_layers, -1
@@ -355,7 +355,7 @@ contains
end do
end do
#:elif mpi_dir == 2
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=3,private='[r]')
do l = 0, p
do k = -gp_layers, -1
do j = -gp_layers, m + gp_layers
@@ -367,7 +367,7 @@ contains
end do
#:else
! Unpacking buffer from bc_z%beg
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
+ $:GPU_PARALLEL_LOOP(collapse=3,private='[r]')
do l = -gp_layers, -1
do k = -gp_layers, n + gp_layers
do j = -gp_layers, m + gp_layers
diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp
index ced668bef2..624cfa5390 100644
--- a/src/simulation/m_pressure_relaxation.fpp
+++ b/src/simulation/m_pressure_relaxation.fpp
@@ -21,10 +21,10 @@ module m_pressure_relaxation
s_finalize_pressure_relaxation_module
real(wp), allocatable, dimension(:) :: gamma_min, pres_inf
- !$acc declare create(gamma_min, pres_inf)
+ $:GPU_DECLARE(create='[gamma_min, pres_inf]')
real(wp), allocatable, dimension(:, :) :: Res
- !$acc declare create(Res)
+ $:GPU_DECLARE(create='[Res]')
contains
@@ -39,7 +39,7 @@ contains
gamma_min(i) = 1._wp/fluid_pp(i)%gamma + 1._wp
pres_inf(i) = fluid_pp(i)%pi_inf/(1._wp + fluid_pp(i)%gamma)
end do
- !$acc update device(gamma_min, pres_inf)
+ $:GPU_UPDATE(device='[gamma_min, pres_inf]')
if (viscous) then
@:ALLOCATE(Res(1:2, 1:maxval(Re_size)))
@@ -48,7 +48,7 @@ contains
Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i)
end do
end do
- !$acc update device(Res, Re_idx, Re_size)
+ $:GPU_UPDATE(device='[Res, Re_idx, Re_size]')
end if
end subroutine s_initialize_pressure_relaxation_module
@@ -70,7 +70,7 @@ contains
type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
integer :: j, k, l
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -83,7 +83,7 @@ contains
!> Process pressure relaxation for a single cell
pure subroutine s_relax_cell_pressure(q_cons_vf, j, k, l)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
integer, intent(in) :: j, k, l
@@ -103,14 +103,14 @@ contains
!> Check if pressure relaxation is needed for this cell
pure logical function s_needs_pressure_relaxation(q_cons_vf, j, k, l)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
integer, intent(in) :: j, k, l
integer :: i
s_needs_pressure_relaxation = .true.
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > (1._wp - sgm_eps)) then
s_needs_pressure_relaxation = .false.
@@ -121,7 +121,7 @@ contains
!> Correct volume fractions to physical bounds
pure subroutine s_correct_volume_fractions(q_cons_vf, j, k, l)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
integer, intent(in) :: j, k, l
@@ -129,7 +129,7 @@ contains
integer :: i
sum_alpha = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
if ((q_cons_vf(i + contxb - 1)%sf(j, k, l) < 0._wp) .or. &
(q_cons_vf(i + advxb - 1)%sf(j, k, l) < 0._wp)) then
@@ -142,7 +142,7 @@ contains
sum_alpha = sum_alpha + q_cons_vf(i + advxb - 1)%sf(j, k, l)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + advxb - 1)%sf(j, k, l)/sum_alpha
end do
@@ -151,7 +151,7 @@ contains
!> Main pressure equilibration using Newton-Raphson
pure subroutine s_equilibrate_pressure(q_cons_vf, j, k, l)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
integer, intent(in) :: j, k, l
@@ -164,7 +164,7 @@ contains
! Initialize pressures
pres_relax = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) then
pres_K_init(i) = (q_cons_vf(i + intxb - 1)%sf(j, k, l)/ &
@@ -180,7 +180,7 @@ contains
! Newton-Raphson iteration
f_pres = 1.e-9_wp
df_pres = 1.e9_wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do iter = 0, MAX_ITER - 1
if (abs(f_pres) > TOLERANCE) then
pres_relax = pres_relax - f_pres/df_pres
@@ -194,7 +194,7 @@ contains
! Newton-Raphson step
f_pres = -1._wp
df_pres = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) then
rho_K_s(i) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/ &
@@ -210,7 +210,7 @@ contains
end do
! Update volume fractions
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) &
q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/rho_K_s(i)
@@ -220,7 +220,7 @@ contains
!> Correct internal energies using equilibrated pressure
pure subroutine s_correct_internal_energies(q_cons_vf, j, k, l)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
integer, intent(in) :: j, k, l
@@ -230,7 +230,7 @@ contains
real(wp), dimension(2) :: Re
integer :: i, q
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho(i) = q_cons_vf(i)%sf(j, k, l)
alpha(i) = q_cons_vf(E_idx + i)%sf(j, k, l)
@@ -243,14 +243,14 @@ contains
if (bubbles_euler) then
if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho = rho + alpha_rho(i)
gamma = gamma + alpha(i)*gammas(i)
pi_inf = pi_inf + alpha(i)*pi_infs(i)
end do
else if ((model_eqns == 2) .and. (num_fluids > 2)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids - 1
rho = rho + alpha_rho(i)
gamma = gamma + alpha(i)*gammas(i)
@@ -264,7 +264,7 @@ contains
else
sum_alpha = 0._wp
if (mpp_lim) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho(i) = max(0._wp, alpha_rho(i))
alpha(i) = min(max(0._wp, alpha(i)), 1._wp)
@@ -273,7 +273,7 @@ contains
alpha = alpha/max(sum_alpha, sgm_eps)
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho = rho + alpha_rho(i)
gamma = gamma + alpha(i)*gammas(i)
@@ -281,11 +281,11 @@ contains
end do
if (viscous) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re(i) = dflt_real
if (Re_size(i) > 0) Re(i) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
Re(i) = alpha(Re_idx(i, q))/Res(i, q) + Re(i)
end do
@@ -296,7 +296,7 @@ contains
! Compute dynamic pressure and update internal energies
dyn_pres = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, momxe
dyn_pres = dyn_pres + 5.e-1_wp*q_cons_vf(i)%sf(j, k, l)* &
q_cons_vf(i)%sf(j, k, l)/max(rho, sgm_eps)
@@ -304,7 +304,7 @@ contains
pres_relax = (q_cons_vf(E_idx)%sf(j, k, l) - dyn_pres - pi_inf)/gamma
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
q_cons_vf(i + intxb - 1)%sf(j, k, l) = &
q_cons_vf(i + advxb - 1)%sf(j, k, l)*(gammas(i)*pres_relax + pi_infs(i))
diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp
index daf021c35c..027c47a567 100644
--- a/src/simulation/m_qbmm.fpp
+++ b/src/simulation/m_qbmm.fpp
@@ -25,21 +25,21 @@ module m_qbmm
private; public :: s_initialize_qbmm_module, s_mom_inv, s_coeff, s_compute_qbmm_rhs
real(wp), allocatable, dimension(:, :, :, :, :) :: momrhs
- !$acc declare create(momrhs)
+ $:GPU_DECLARE(create='[momrhs]')
#:if MFC_CASE_OPTIMIZATION
integer, parameter :: nterms = ${nterms}$
#:else
integer :: nterms
- !$acc declare create(nterms)
+ $:GPU_DECLARE(create='[nterms]')
#:endif
type(int_bounds_info) :: is1_qbmm, is2_qbmm, is3_qbmm
- !$acc declare create(is1_qbmm, is2_qbmm, is3_qbmm)
+ $:GPU_DECLARE(create='[is1_qbmm,is2_qbmm,is3_qbmm]')
integer, allocatable, dimension(:) :: bubrs
integer, allocatable, dimension(:, :) :: bubmoms
- !$acc declare create(bubrs, bubmoms)
+ $:GPU_DECLARE(create='[bubrs,bubmoms]')
contains
@@ -57,8 +57,8 @@ contains
nterms = 7
end if
- !$acc enter data copyin(nterms)
- !$acc update device(nterms)
+ $:GPU_ENTER_DATA(copyin='[nterms]')
+ $:GPU_UPDATE(device='[nterms]')
#:endif
@@ -392,7 +392,7 @@ contains
end do
end if
- !$acc update device(momrhs)
+ $:GPU_UPDATE(device='[momrhs]')
@:ALLOCATE(bubrs(1:nb))
@:ALLOCATE(bubmoms(1:nb, 1:nmom))
@@ -400,14 +400,14 @@ contains
do i = 1, nb
bubrs(i) = bub_idx%rs(i)
end do
- !$acc update device(bubrs)
+ $:GPU_UPDATE(device='[bubrs]')
do j = 1, nmom
do i = 1, nb
bubmoms(i, j) = bub_idx%moms(i, j)
end do
end do
- !$acc update device(bubmoms)
+ $:GPU_UPDATE(device='[bubmoms]')
end subroutine s_initialize_qbmm_module
@@ -433,7 +433,7 @@ contains
end select
if (.not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present) private(nb_q, nR, nR2, R, R2, nb_dot, nR_dot, nR2_dot, var, AX)
+ $:GPU_PARALLEL_LOOP(collapse=5,private='[nb_q,nR,nR2,R,R2,nb_dot,nR_dot,nR2_dot,var,AX]')
do i = 1, nb
do q = 1, nnode
do l = 0, p
@@ -538,13 +538,13 @@ contains
! The following block is not repeated and is left as is
if (idir == 1) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do q = 0, n
do i = 0, m
rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + mom_sp(2)%sf(i, q, l)
j = bubxb
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do k = 1, nb
rhs_vf(j)%sf(i, q, l) = rhs_vf(j)%sf(i, q, l) + mom_3d(0, 0, k)%sf(i, q, l)
rhs_vf(j + 1)%sf(i, q, l) = rhs_vf(j + 1)%sf(i, q, l) + mom_3d(1, 0, k)%sf(i, q, l)
@@ -563,11 +563,9 @@ contains
!Coefficient array for non-polytropic model (pb and mv values are accounted in wght_pb and wght_mv)
pure subroutine s_coeff_nonpoly(pres, rho, c, coeffs)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_coeff_nonpoly
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_coeff_nonpoly',parallelism='[seq]', &
+ & cray_inline=True)
+
real(wp), intent(in) :: pres, rho, c
real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs
@@ -636,11 +634,8 @@ contains
!Coefficient array for polytropic model (pb for each R0 bin accounted for in wght_pb)
pure subroutine s_coeff(pres, rho, c, coeffs)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_coeff
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_coeff',parallelism='[seq]', &
+ & cray_inline=True)
real(wp), intent(in) :: pres, rho, c
real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs
@@ -715,9 +710,12 @@ contains
integer :: id1, id2, id3, i1, i2, j, q, r
is1_qbmm = ix; is2_qbmm = iy; is3_qbmm = iz
- !$acc update device(is1_qbmm, is2_qbmm, is3_qbmm)
+ $:GPU_UPDATE(device='[is1_qbmm,is2_qbmm,is3_qbmm]')
- !$acc parallel loop collapse(3) gang vector default(present) private(moms, msum, wght, abscX, abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, n_tait, B_tait, pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[moms, msum, wght, abscX, &
+ & abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, &
+ & n_tait, B_tait, pres, rho, nbub, c, alf, momsum, &
+ & drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T]')
do id3 = is3_qbmm%beg, is3_qbmm%end
do id2 = is2_qbmm%beg, is2_qbmm%end
do id1 = is1_qbmm%beg, is1_qbmm%end
@@ -737,9 +735,10 @@ contains
if (alf > small_alf) then
nbub = q_cons_vf(bubxb)%sf(id1, id2, id3)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, nb
! Gather moments for this bubble bin
+ $:GPU_LOOP(parallelism='[seq]')
do r = 2, nmom
moms(r) = q_prim_vf(bubmoms(q, r))%sf(id1, id2, id3)
end do
@@ -747,12 +746,12 @@ contains
call s_chyqmom(moms, wght(:, q), abscX(:, q), abscY(:, q))
if (polytropic) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do j = 1, nnode
wght_pb(j, q) = wght(j, q)*(pb0(q) - pv)
end do
else
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do j = 1, nnode
chi_vw = 1._wp/(1._wp + R_v/R_n*(pb(id1, id2, id3, j, q)/pv - 1._wp))
x_vw = M_n*chi_vw/(M_v + (M_n - M_v)*chi_vw)
@@ -771,13 +770,13 @@ contains
! Compute change in moments due to bubble dynamics
r = 1
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i2 = 0, 2
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i1 = 0, 2
if ((i1 + i2) <= 2) then
momsum = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do j = 1, nterms
select case (bubble_model)
case (3)
@@ -807,7 +806,7 @@ contains
! Compute change in pb and mv for non-polytropic model
if (.not. polytropic) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do j = 1, nnode
drdt = msum(2)
drdt2 = merge(-1._wp, 1._wp, j == 1 .or. j == 2)/(2._wp*sqrt(merge(moms(4) - moms(2)**2._wp, verysmall, moms(4) - moms(2)**2._wp > 0._wp)))
@@ -835,11 +834,11 @@ contains
end if
end if
else
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, nb
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i1 = 0, 2
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i2 = 0, 2
moms3d(i1, i2, q)%sf(id1, id2, id3) = 0._wp
end do
@@ -857,11 +856,8 @@ contains
contains
! Helper to select the correct coefficient routine
subroutine s_coeff_selector(pres, rho, c, coeff, polytropic)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_coeff_selector
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_coeff_selector',parallelism='[seq]', &
+ & cray_inline=True)
real(wp), intent(in) :: pres, rho, c
real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeff
logical, intent(in) :: polytropic
@@ -873,11 +869,9 @@ contains
end subroutine s_coeff_selector
pure subroutine s_chyqmom(momin, wght, abscX, abscY)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_chyqmom
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_chyqmom',parallelism='[seq]', &
+ & cray_inline=True)
+
real(wp), dimension(nmom), intent(in) :: momin
real(wp), dimension(nnode), intent(inout) :: wght, abscX, abscY
@@ -933,11 +927,9 @@ contains
end subroutine s_chyqmom
pure subroutine s_hyqmom(frho, fup, fmom)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_hyqmom
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_hyqmom',parallelism='[seq]', &
+ & cray_inline=True)
+
real(wp), dimension(2), intent(inout) :: frho, fup
real(wp), dimension(3), intent(in) :: fmom
@@ -955,7 +947,7 @@ contains
end subroutine s_hyqmom
pure function f_quad(abscX, abscY, wght_in, q, r, s)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), dimension(nnode, nb), intent(in) :: abscX, abscY, wght_in
real(wp), intent(in) :: q, r, s
@@ -971,7 +963,7 @@ contains
end function f_quad
pure function f_quad2D(abscX, abscY, wght_in, pow)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), dimension(nnode), intent(in) :: abscX, abscY, wght_in
real(wp), dimension(3), intent(in) :: pow
diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp
index 8aceb2dfb4..5c16a5ba9f 100644
--- a/src/simulation/m_rhs.fpp
+++ b/src/simulation/m_rhs.fpp
@@ -75,13 +75,13 @@ module m_rhs
!! conservative variables, which are located in q_cons_vf, at cell-interior
!! Gaussian quadrature points (QP).
type(vector_field) :: q_cons_qp !<
- !$acc declare create(q_cons_qp)
+ $:GPU_DECLARE(create='[q_cons_qp]')
!! The primitive variables at cell-interior Gaussian quadrature points. These
!! are calculated from the conservative variables and gradient magnitude (GM)
!! of the volume fractions, q_cons_qp and gm_alpha_qp, respectively.
type(vector_field) :: q_prim_qp !<
- !$acc declare create(q_prim_qp)
+ $:GPU_DECLARE(create='[q_prim_qp]')
!> @name The first-order spatial derivatives of the primitive variables at cell-
!! interior Gaussian quadrature points. These are WENO-reconstructed from
@@ -90,7 +90,7 @@ module m_rhs
!! of the primitive variables, located in qK_prim_n, where K = L or R.
!> @{
type(vector_field), allocatable, dimension(:) :: dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp
- !$acc declare create(dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp)
+ $:GPU_DECLARE(create='[dq_prim_dx_qp,dq_prim_dy_qp,dq_prim_dz_qp]')
!> @}
!> @name The left and right WENO-reconstructed cell-boundary values of the cell-
@@ -100,26 +100,26 @@ module m_rhs
!> @{
type(vector_field), allocatable, dimension(:) :: dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n
type(vector_field), allocatable, dimension(:) :: dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n
- !$acc declare create(dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n)
- !$acc declare create(dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n)
+ $:GPU_DECLARE(create='[dqL_prim_dx_n,dqL_prim_dy_n,dqL_prim_dz_n]')
+ $:GPU_DECLARE(create='[dqR_prim_dx_n,dqR_prim_dy_n,dqR_prim_dz_n]')
!> @}
type(scalar_field), allocatable, dimension(:) :: tau_Re_vf
- !$acc declare create(tau_Re_vf)
+ $:GPU_DECLARE(create='[tau_Re_vf]')
type(vector_field) :: gm_alpha_qp !<
!! The gradient magnitude of the volume fractions at cell-interior Gaussian
!! quadrature points. gm_alpha_qp is calculated from individual first-order
!! spatial derivatives located in dq_prim_ds_qp.
- !$acc declare create(gm_alpha_qp)
+ $:GPU_DECLARE(create='[gm_alpha_qp]')
!> @name The left and right WENO-reconstructed cell-boundary values of the cell-
!! average gradient magnitude of volume fractions, located in gm_alpha_qp.
!> @{
type(vector_field), allocatable, dimension(:) :: gm_alphaL_n
type(vector_field), allocatable, dimension(:) :: gm_alphaR_n
- !$acc declare create(gm_alphaL_n, gm_alphaR_n)
+ $:GPU_DECLARE(create='[gm_alphaL_n,gm_alphaR_n]')
!> @}
!> @name The cell-boundary values of the fluxes (src - source, gsrc - geometrical
@@ -129,38 +129,38 @@ module m_rhs
type(vector_field), allocatable, dimension(:) :: flux_n
type(vector_field), allocatable, dimension(:) :: flux_src_n
type(vector_field), allocatable, dimension(:) :: flux_gsrc_n
- !$acc declare create(flux_n, flux_src_n, flux_gsrc_n)
+ $:GPU_DECLARE(create='[flux_n,flux_src_n,flux_gsrc_n]')
!> @}
type(vector_field), allocatable, dimension(:) :: qL_prim, qR_prim
- !$acc declare create(qL_prim, qR_prim)
+ $:GPU_DECLARE(create='[qL_prim,qR_prim]')
type(int_bounds_info) :: iv !< Vector field indical bounds
- !$acc declare create(iv)
+ $:GPU_DECLARE(create='[iv]')
!> @name Indical bounds in the x-, y- and z-directions
!> @{
type(int_bounds_info) :: irx, iry, irz
- !$acc declare create(irx, iry, irz)
+ $:GPU_DECLARE(create='[irx,iry,irz]')
type(int_bounds_info) :: is1, is2, is3
- !$acc declare create(is1, is2, is3)
+ $:GPU_DECLARE(create='[is1,is2,is3]')
!> @name Saved fluxes for testing
!> @{
type(scalar_field) :: alf_sum
!> @}
- !$acc declare create(alf_sum)
+ $:GPU_DECLARE(create='[alf_sum]')
real(wp), allocatable, dimension(:, :, :) :: blkmod1, blkmod2, alpha1, alpha2, Kterm
real(wp), allocatable, dimension(:, :, :, :) :: qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf
real(wp), allocatable, dimension(:, :, :, :) :: dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf
- !$acc declare create(blkmod1, blkmod2, alpha1, alpha2, Kterm)
- !$acc declare create(qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf)
- !$acc declare create(dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf)
+ $:GPU_DECLARE(create='[blkmod1,blkmod2,alpha1,alpha2,Kterm]')
+ $:GPU_DECLARE(create='[qL_rsx_vf,qL_rsy_vf,qL_rsz_vf,qR_rsx_vf,qR_rsy_vf,qR_rsz_vf]')
+ $:GPU_DECLARE(create='[dqL_rsx_vf,dqL_rsy_vf,dqL_rsz_vf,dqR_rsx_vf,dqR_rsy_vf,dqR_rsz_vf]')
real(wp), allocatable, dimension(:, :, :) :: nbub !< Bubble number density
- !$acc declare create(nbub)
+ $:GPU_DECLARE(create='[nbub]')
contains
@@ -173,8 +173,8 @@ contains
integer :: num_eqns_after_adv
- !$acc enter data copyin(idwbuff, idwbuff)
- !$acc update device(idwbuff, idwbuff)
+ $:GPU_ENTER_DATA(copyin='[idwbuff]')
+ $:GPU_UPDATE(device='[idwbuff]')
@:ALLOCATE(q_cons_qp%vf(1:sys_size))
@:ALLOCATE(q_prim_qp%vf(1:sys_size))
@@ -201,29 +201,29 @@ contains
@:ALLOCATE(q_prim_qp%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end))
else
q_prim_qp%vf(l)%sf => q_cons_qp%vf(l)%sf
- !$acc enter data copyin(q_prim_qp%vf(l)%sf)
- !$acc enter data attach(q_prim_qp%vf(l)%sf)
+ $:GPU_ENTER_DATA(copyin='[q_prim_qp%vf(l)%sf]')
+ $:GPU_ENTER_DATA(attach='[q_prim_qp%vf(l)%sf]')
end if
end do
do l = adv_idx%beg, adv_idx%end
q_prim_qp%vf(l)%sf => q_cons_qp%vf(l)%sf
- !$acc enter data copyin(q_prim_qp%vf(l)%sf)
- !$acc enter data attach(q_prim_qp%vf(l)%sf)
+ $:GPU_ENTER_DATA(copyin='[q_prim_qp%vf(l)%sf]')
+ $:GPU_ENTER_DATA(attach='[q_prim_qp%vf(l)%sf]')
end do
if (surface_tension) then
q_prim_qp%vf(c_idx)%sf => &
q_cons_qp%vf(c_idx)%sf
- !$acc enter data copyin(q_prim_qp%vf(c_idx)%sf)
- !$acc enter data attach(q_prim_qp%vf(c_idx)%sf)
+ $:GPU_ENTER_DATA(copyin='[q_prim_qp%vf(c_idx)%sf]')
+ $:GPU_ENTER_DATA(attach='[q_prim_qp%vf(c_idx)%sf]')
end if
if (cont_damage) then
q_prim_qp%vf(damage_idx)%sf => &
q_cons_qp%vf(damage_idx)%sf
- !$acc enter data copyin(q_prim_qp%vf(damage_idx)%sf)
- !$acc enter data attach(q_prim_qp%vf(damage_idx)%sf)
+ $:GPU_ENTER_DATA(copyin='[q_prim_qp%vf(damage_idx)%sf]')
+ $:GPU_ENTER_DATA(attach='[q_prim_qp%vf(damage_idx)%sf]')
end if
if (viscous) then
@@ -548,14 +548,14 @@ contains
if (riemann_solver /= 1 .and. riemann_solver /= 4) then
do l = adv_idx%beg + 1, adv_idx%end
flux_src_n(i)%vf(l)%sf => flux_src_n(i)%vf(adv_idx%beg)%sf
- !$acc enter data attach(flux_src_n(i)%vf(l)%sf)
+ $:GPU_ENTER_DATA(attach='[flux_src_n(i)%vf(l)%sf]')
end do
end if
else
do l = 1, sys_size
flux_n(i)%vf(l)%sf => flux_n(1)%vf(l)%sf
flux_src_n(i)%vf(l)%sf => flux_src_n(1)%vf(l)%sf
- !$acc enter data attach(flux_n(i)%vf(l)%sf,flux_src_n(i)%vf(l)%sf)
+ $:GPU_ENTER_DATA(attach='[flux_n(i)%vf(l)%sf,flux_src_n(i)%vf(l)%sf]')
end do
end if
end do
@@ -568,7 +568,7 @@ contains
call s_initialize_pressure_relaxation_module
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do id = 1, num_dims
do i = 1, sys_size
do l = idwbuff(3)%beg, idwbuff(3)%end
@@ -607,7 +607,7 @@ contains
call cpu_time(t_start)
! Association/Population of Working Variables
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do l = idwbuff(3)%beg, idwbuff(3)%end
do k = idwbuff(2)%beg, idwbuff(2)%end
@@ -621,16 +621,16 @@ contains
! Converting Conservative to Primitive Variables
if (mpp_lim .and. bubbles_euler) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = idwbuff(3)%beg, idwbuff(3)%end
do k = idwbuff(2)%beg, idwbuff(2)%end
do j = idwbuff(1)%beg, idwbuff(1)%end
alf_sum%sf(j, k, l) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe - 1
alf_sum%sf(j, k, l) = alf_sum%sf(j, k, l) + q_cons_qp%vf(i)%sf(j, k, l)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe - 1
q_cons_qp%vf(i)%sf(j, k, l) = q_cons_qp%vf(i)%sf(j, k, l)*(1._wp - q_cons_qp%vf(alf_idx)%sf(j, k, l)) &
/alf_sum%sf(j, k, l)
@@ -839,7 +839,7 @@ contains
! END: Dimensional Splitting Loop
if (ib) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -903,7 +903,7 @@ contains
! END: Additional physics and source terms
if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do l = idwbuff(3)%beg, idwbuff(3)%end
do k = idwbuff(2)%beg, idwbuff(2)%end
@@ -943,7 +943,7 @@ contains
real(wp) :: advected_qty_val, pressure_val, velocity_val
if (alt_soundspeed) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do q_loop = 0, p
do l_loop = 0, n
do k_loop = 0, m
@@ -977,7 +977,7 @@ contains
call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz)
end if
- !$acc parallel loop collapse(4) gang vector default(present) private(inv_ds, flux_face1, flux_face2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]')
do j = 1, sys_size
do q_loop = 0, p
do l_loop = 0, n
@@ -992,8 +992,8 @@ contains
end do
if (model_eqns == 3) then
- !$acc parallel loop collapse(4) gang vector default(present) &
- !$acc private(inv_ds, advected_qty_val, pressure_val, flux_face1, flux_face2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, &
+ & pressure_val,flux_face1,flux_face2]')
do q_loop = 0, p
do l_loop = 0, n
do k_loop = 0, m
@@ -1022,7 +1022,7 @@ contains
call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz)
end if
- !$acc parallel loop collapse(4) gang vector default(present) private(inv_ds, flux_face1, flux_face2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]')
do j = 1, sys_size
do l = 0, p
do k = 0, n
@@ -1037,8 +1037,8 @@ contains
end do
if (model_eqns == 3) then
- !$acc parallel loop collapse(4) gang vector default(present) &
- !$acc private(inv_ds, advected_qty_val, pressure_val, flux_face1, flux_face2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, &
+ & pressure_val,flux_face1,flux_face2]')
do l = 0, p
do k = 0, n
do q = 0, m
@@ -1063,7 +1063,7 @@ contains
end if
if (cyl_coord) then
- !$acc parallel loop collapse(4) gang vector default(present) private(flux_face1, flux_face2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]')
do j = 1, sys_size
do l = 0, p
do k = 0, n
@@ -1089,8 +1089,8 @@ contains
end if
if (grid_geometry == 3) then ! Cylindrical Coordinates
- !$acc parallel loop collapse(4) gang vector default(present) &
- !$acc private(inv_ds, velocity_val, flux_face1, flux_face2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,velocity_val, &
+ & flux_face1,flux_face2]')
do j = 1, sys_size
do k = 0, p
do q = 0, n
@@ -1105,7 +1105,7 @@ contains
end do
end do
end do
- !$acc parallel loop collapse(4) gang vector default(present) private(flux_face1, flux_face2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]')
do j = 1, sys_size
do k = 0, p
do q = 0, n
@@ -1119,7 +1119,7 @@ contains
end do
end do
else ! Cartesian Coordinates
- !$acc parallel loop collapse(4) gang vector default(present) private(inv_ds, flux_face1, flux_face2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]')
do j = 1, sys_size
do k = 0, p
do q = 0, n
@@ -1135,8 +1135,8 @@ contains
end if
if (model_eqns == 3) then
- !$acc parallel loop collapse(4) gang vector default(present) &
- !$acc private(inv_ds, advected_qty_val, pressure_val, flux_face1, flux_face2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, &
+ & pressure_val,flux_face1,flux_face2]')
do k = 0, p
do q = 0, n
do l = 0, m
@@ -1180,8 +1180,8 @@ contains
case (1) ! x-direction
use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4)
if (use_standard_riemann) then
- !$acc parallel loop collapse(4) gang vector default(present) &
- !$acc private(local_inv_ds, local_term_coeff, local_flux1, local_flux2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, &
+ & local_term_coeff,local_flux1,local_flux2]')
do j_adv = advxb, advxe
do q_idx = 0, p ! z_extent
do l_idx = 0, n ! y_extent
@@ -1199,8 +1199,9 @@ contains
else ! Other Riemann solvers
if (alt_soundspeed) then
if (bubbles_euler .neqv. .true.) then
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, &
+ & local_q_cons_val, local_k_term_val, &
+ & local_term_coeff, local_flux1, local_flux2]')
do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m
local_inv_ds = 1._wp/dx(k_idx)
local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx)
@@ -1212,8 +1213,10 @@ contains
local_inv_ds*local_term_coeff*(local_flux1 - local_flux2)
end do; end do; end do
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,&
+ & local_q_cons_val, local_k_term_val, &
+ & local_term_coeff, local_flux1, &
+ & local_flux2]')
do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m
local_inv_ds = 1._wp/dx(k_idx)
local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx)
@@ -1226,8 +1229,8 @@ contains
end do; end do; end do
end if
else ! NOT alt_soundspeed
- !$acc parallel loop collapse(4) gang vector default(present) &
- !$acc private(local_inv_ds, local_term_coeff, local_flux1, local_flux2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, &
+ & local_term_coeff,local_flux1,local_flux2]')
do j_adv = advxb, advxe
do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m
local_inv_ds = 1._wp/dx(k_idx)
@@ -1244,8 +1247,8 @@ contains
case (2) ! y-direction: loops q_idx (x), k_idx (y), l_idx (z); sf(q_idx, k_idx, l_idx); dy(k_idx); Kterm(q_idx,k_idx,l_idx)
use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4)
if (use_standard_riemann) then
- !$acc parallel loop collapse(4) gang vector default(present) &
- !$acc private(local_inv_ds, local_term_coeff, local_flux1, local_flux2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, &
+ & local_term_coeff,local_flux1,local_flux2]')
do j_adv = advxb, advxe
do l_idx = 0, p ! z_extent
do k_idx = 0, n ! y_extent
@@ -1263,8 +1266,10 @@ contains
else ! Other Riemann solvers
if (alt_soundspeed) then
if (bubbles_euler .neqv. .true.) then
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, &
+ & local_q_cons_val, local_k_term_val, &
+ & local_term_coeff, local_flux1, &
+ & local_flux2]')
do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m
local_inv_ds = 1._wp/dy(k_idx)
local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx)
@@ -1280,8 +1285,10 @@ contains
end if
end do; end do; end do
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, &
+ & local_q_cons_val, local_k_term_val, &
+ & local_term_coeff, local_flux1, &
+ & local_flux2]')
do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m
local_inv_ds = 1._wp/dy(k_idx)
local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx)
@@ -1298,8 +1305,8 @@ contains
end do; end do; end do
end if
else ! NOT alt_soundspeed
- !$acc parallel loop collapse(4) gang vector default(present) &
- !$acc private(local_inv_ds, local_term_coeff, local_flux1, local_flux2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, &
+ & local_term_coeff,local_flux1,local_flux2]')
do j_adv = advxb, advxe
do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m
local_inv_ds = 1._wp/dy(k_idx)
@@ -1321,8 +1328,8 @@ contains
end if
if (use_standard_riemann) then
- !$acc parallel loop collapse(4) gang vector default(present) &
- !$acc private(local_inv_ds, local_term_coeff, local_flux1, local_flux2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, &
+ & local_term_coeff,local_flux1,local_flux2]')
do j_adv = advxb, advxe
do k_idx = 0, p ! z_extent
do q_idx = 0, n ! y_extent
@@ -1340,8 +1347,10 @@ contains
else ! Other Riemann solvers
if (alt_soundspeed) then
if (bubbles_euler .neqv. .true.) then
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, &
+ & local_q_cons_val, local_k_term_val, &
+ & local_term_coeff, local_flux1, &
+ & local_flux2]')
do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m
local_inv_ds = 1._wp/dz(k_idx)
local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx)
@@ -1353,8 +1362,10 @@ contains
local_inv_ds*local_term_coeff*(local_flux1 - local_flux2)
end do; end do; end do
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, &
+ & local_q_cons_val, local_k_term_val, &
+ & local_term_coeff, local_flux1, &
+ & local_flux2]')
do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m
local_inv_ds = 1._wp/dz(k_idx)
local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx)
@@ -1367,8 +1378,8 @@ contains
end do; end do; end do
end if
else ! NOT alt_soundspeed
- !$acc parallel loop collapse(4) gang vector default(present) &
- !$acc private(local_inv_ds, local_term_coeff, local_flux1, local_flux2)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, &
+ & local_term_coeff,local_flux1,local_flux2]')
do j_adv = advxb, advxe
do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m
local_inv_ds = 1._wp/dz(k_idx)
@@ -1400,7 +1411,7 @@ contains
if (idir == 1) then ! x-direction
if (surface_tension) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -1414,11 +1425,11 @@ contains
end do
end if
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, E_idx
rhs_vf(i)%sf(j, k, l) = &
rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* &
@@ -1432,7 +1443,7 @@ contains
elseif (idir == 2) then ! y-direction
if (surface_tension) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -1464,10 +1475,10 @@ contains
idwbuff(1), idwbuff(2), idwbuff(3))
end if
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = 0, p
do j = 0, m
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, E_idx
rhs_vf(i)%sf(j, 0, l) = &
rhs_vf(i)%sf(j, 0, l) + 1._wp/(y_cc(1) - y_cc(-1))* &
@@ -1479,11 +1490,11 @@ contains
end if
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 1, n
do j = 0, m
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, E_idx
rhs_vf(i)%sf(j, k, l) = &
rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* &
@@ -1495,11 +1506,11 @@ contains
end do
else
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, E_idx
rhs_vf(i)%sf(j, k, l) = &
rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* &
@@ -1516,11 +1527,11 @@ contains
if (cyl_coord) then
if ((bc_y%beg == BC_REFLECTIVE) .or. (bc_y%beg == BC_AXIS)) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 1, n
do j = 0, m
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, E_idx
rhs_vf(i)%sf(j, k, l) = &
rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* &
@@ -1532,10 +1543,10 @@ contains
end do
if (viscous) then
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = 0, p
do j = 0, m
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, E_idx
rhs_vf(i)%sf(j, 0, l) = &
rhs_vf(i)%sf(j, 0, l) - 1._wp/y_cc(0)* &
@@ -1546,11 +1557,11 @@ contains
end if
else
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, E_idx
rhs_vf(i)%sf(j, k, l) = &
rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* &
@@ -1567,7 +1578,7 @@ contains
elseif (idir == 3) then ! z-direction
if (surface_tension) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -1581,11 +1592,11 @@ contains
end do
end if
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, E_idx
rhs_vf(i)%sf(j, k, l) = &
rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* &
@@ -1597,7 +1608,7 @@ contains
end do
if (grid_geometry == 3) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -1717,10 +1728,10 @@ contains
end if
- !$acc update device(is1, is2, is3, iv)
+ $:GPU_UPDATE(device='[is1,is2,is3,iv]')
if (recon_dir == 1) then
- !$acc parallel loop collapse(4) default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = iv%beg, iv%end
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1731,9 +1742,8 @@ contains
end do
end do
end do
- !$acc end parallel loop
else if (recon_dir == 2) then
- !$acc parallel loop collapse(4) default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = iv%beg, iv%end
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1744,9 +1754,8 @@ contains
end do
end do
end do
- !$acc end parallel loop
else if (recon_dir == 3) then
- !$acc parallel loop collapse(4) default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = iv%beg, iv%end
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -1757,7 +1766,6 @@ contains
end do
end do
end do
- !$acc end parallel loop
end if
end subroutine s_reconstruct_cell_boundary_values_first_order
@@ -1775,13 +1783,13 @@ contains
@:DEALLOCATE(q_cons_qp%vf(j)%sf)
@:DEALLOCATE(q_prim_qp%vf(j)%sf)
else
- !$acc exit data detach(q_prim_qp%vf(j)%sf)
+ $:GPU_EXIT_DATA(detach='[q_prim_qp%vf(j)%sf]')
nullify (q_prim_qp%vf(j)%sf)
end if
end do
do j = adv_idx%beg, adv_idx%end
- !$acc exit data detach(q_prim_qp%vf(j)%sf)
+ $:GPU_EXIT_DATA(detach='[q_prim_qp%vf(j)%sf]')
nullify (q_prim_qp%vf(j)%sf)
end do
@@ -1814,7 +1822,7 @@ contains
end if
if (mpp_lim .and. bubbles_euler) then
- !$acc exit data delete(alf_sum%sf)
+ $:GPU_EXIT_DATA(delete='[alf_sum%sf]')
deallocate (alf_sum%sf)
end if
diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp
index 8b9d0cf043..a189d7e3c3 100644
--- a/src/simulation/m_riemann_solvers.fpp
+++ b/src/simulation/m_riemann_solvers.fpp
@@ -67,8 +67,7 @@ module m_riemann_solvers
real(wp), allocatable, dimension(:, :, :, :) :: flux_rsx_vf, flux_src_rsx_vf
real(wp), allocatable, dimension(:, :, :, :) :: flux_rsy_vf, flux_src_rsy_vf
real(wp), allocatable, dimension(:, :, :, :) :: flux_rsz_vf, flux_src_rsz_vf
- !$acc declare create( flux_rsx_vf, flux_src_rsx_vf, flux_rsy_vf, &
- !$acc flux_src_rsy_vf, flux_rsz_vf, flux_src_rsz_vf )
+ $:GPU_DECLARE(create='[flux_rsx_vf,flux_src_rsx_vf,flux_rsy_vf,flux_src_rsy_vf,flux_rsz_vf,flux_src_rsz_vf]')
!> @}
!> The cell-boundary values of the geometrical source flux that are computed
@@ -79,7 +78,7 @@ module m_riemann_solvers
real(wp), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsx_vf !<
real(wp), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsy_vf !<
real(wp), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsz_vf !<
- !$acc declare create( flux_gsrc_rsx_vf, flux_gsrc_rsy_vf, flux_gsrc_rsz_vf )
+ $:GPU_DECLARE(create='[flux_gsrc_rsx_vf,flux_gsrc_rsy_vf,flux_gsrc_rsz_vf]')
!> @}
! The cell-boundary values of the velocity. vel_src_rs_vf is determined as
@@ -88,17 +87,17 @@ module m_riemann_solvers
real(wp), allocatable, dimension(:, :, :, :) :: vel_src_rsx_vf
real(wp), allocatable, dimension(:, :, :, :) :: vel_src_rsy_vf
real(wp), allocatable, dimension(:, :, :, :) :: vel_src_rsz_vf
- !$acc declare create(vel_src_rsx_vf, vel_src_rsy_vf, vel_src_rsz_vf)
+ $:GPU_DECLARE(create='[vel_src_rsx_vf,vel_src_rsy_vf,vel_src_rsz_vf]')
real(wp), allocatable, dimension(:, :, :, :) :: mom_sp_rsx_vf
real(wp), allocatable, dimension(:, :, :, :) :: mom_sp_rsy_vf
real(wp), allocatable, dimension(:, :, :, :) :: mom_sp_rsz_vf
- !$acc declare create(mom_sp_rsx_vf, mom_sp_rsy_vf, mom_sp_rsz_vf)
+ $:GPU_DECLARE(create='[mom_sp_rsx_vf,mom_sp_rsy_vf,mom_sp_rsz_vf]')
real(wp), allocatable, dimension(:, :, :, :) :: Re_avg_rsx_vf
real(wp), allocatable, dimension(:, :, :, :) :: Re_avg_rsy_vf
real(wp), allocatable, dimension(:, :, :, :) :: Re_avg_rsz_vf
- !$acc declare create(Re_avg_rsx_vf, Re_avg_rsy_vf, Re_avg_rsz_vf)
+ $:GPU_DECLARE(create='[Re_avg_rsx_vf,Re_avg_rsy_vf,Re_avg_rsz_vf]')
!> @name Indical bounds in the s1-, s2- and s3-directions
!> @{
@@ -106,13 +105,13 @@ module m_riemann_solvers
type(int_bounds_info) :: isx, isy, isz
!> @}
- !$acc declare create(is1, is2, is3, isx, isy, isz)
+ $:GPU_DECLARE(create='[is1,is2,is3,isx,isy,isz]')
real(wp), allocatable, dimension(:) :: Gs
- !$acc declare create(Gs)
+ $:GPU_DECLARE(create='[Gs]')
real(wp), allocatable, dimension(:, :) :: Res
- !$acc declare create(Res)
+ $:GPU_DECLARE(create='[Res]')
contains
@@ -358,19 +357,18 @@ contains
#:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
if (norm_dir == ${NORM_DIR}$) then
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, &
- !$acc alpha_R, tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, &
- !$acc rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, &
- !$acc xi_field_L, xi_field_R, &
- !$acc Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, &
- !$acc Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, &
- !$acc c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, &
- !$acc pcorr, zcoef, vel_L_tmp, vel_R_tmp)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, &
+ & vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R, &
+ & G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, &
+ & s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, &
+ & Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, &
+ & Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, &
+ & pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, &
+ & zcoef, vel_L_tmp, vel_R_tmp]')
do l = is3%beg, is3%end
do k = is2%beg, is2%end
do j = is1%beg, is1%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i)
alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)
@@ -378,7 +376,7 @@ contains
vel_L_rms = 0._wp; vel_R_rms = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_vels
vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i)
vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i)
@@ -386,7 +384,7 @@ contains
vel_R_rms = vel_R_rms + vel_R(i)**2._wp
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)
alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)
@@ -430,7 +428,7 @@ contains
pres_mag%R = 0._wp
if (mpp_lim) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho_L(i) = max(0._wp, alpha_rho_L(i))
alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp)
@@ -444,7 +442,7 @@ contains
alpha_R = alpha_R/max(alpha_R_sum, sgm_eps)
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_L = rho_L + alpha_rho_L(i)
gamma_L = gamma_L + alpha_L(i)*gammas(i)
@@ -458,7 +456,7 @@ contains
end do
if (viscous) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_L(i) = dflt_real
Re_R(i) = dflt_real
@@ -466,7 +464,7 @@ contains
if (Re_size(i) > 0) Re_L(i) = 0._wp
if (Re_size(i) > 0) Re_R(i) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
Re_L(i) = alpha_L(Re_idx(i, q))/Res(i, q) &
+ Re_L(i)
@@ -480,7 +478,7 @@ contains
end if
if (chemistry) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = chemxb, chemxe
Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i)
Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)
@@ -568,7 +566,7 @@ contains
if (hypoelasticity) then
G_L = 0._wp; G_R = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
G_L = G_L + alpha_L(i)*Gs(i)
G_R = G_R + alpha_R(i)*Gs(i)
@@ -601,7 +599,7 @@ contains
! G_L = 0._wp
! G_R = 0._wp
!
- ! !$acc loop seq
+ ! $:GPU_LOOP(parallelism='[seq]')
! do i = 1, num_fluids
! G_L = G_L + alpha_L(i)*Gs(i)
! G_R = G_R + alpha_R(i)*Gs(i)
@@ -610,17 +608,17 @@ contains
! if ((G_L > 1.e-3_wp) .and. (G_R > 1.e-3_wp)) then
! E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1)
! E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1)
- ! !$acc loop seq
+ ! $:GPU_LOOP(parallelism='[seq]')
! do i = 1, b_size-1
! tau_e_L(i) = G_L*qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i)
! tau_e_R(i) = G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i)
! end do
- ! !$acc loop seq
+ ! $:GPU_LOOP(parallelism='[seq]')
! do i = 1, b_size-1
! tau_e_L(i) = 0._wp
! tau_e_R(i) = 0._wp
! end do
- ! !$acc loop seq
+ ! $:GPU_LOOP(parallelism='[seq]')
! do i = 1, num_dims
! xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i)
! xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i)
@@ -648,7 +646,7 @@ contains
end if
if (viscous) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i))
end do
@@ -727,7 +725,7 @@ contains
! Mass
if (.not. relativity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
flux_rs${XYZ}$_vf(j, k, l, i) = &
(s_M*alpha_rho_R(i)*vel_R(norm_dir) &
@@ -737,7 +735,7 @@ contains
/(s_M - s_P)
end do
elseif (relativity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
flux_rs${XYZ}$_vf(j, k, l, i) = &
(s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) &
@@ -750,7 +748,7 @@ contains
! Momentum
if (mhd .and. (.not. relativity)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 3
! Flux of rho*v_i in the ${XYZ}$ direction
! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot
@@ -765,7 +763,7 @@ contains
/(s_M - s_P)
end do
elseif (mhd .and. relativity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 3
! Flux of m_i in the ${XYZ}$ direction
! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot
@@ -780,7 +778,7 @@ contains
/(s_M - s_P)
end do
elseif (bubbles_euler) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_vels
flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = &
(s_M*(rho_R*vel_R(dir_idx(1)) &
@@ -795,7 +793,7 @@ contains
+ (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i)))
end do
else if (hypoelasticity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_vels
flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = &
(s_M*(rho_R*vel_R(dir_idx(1)) &
@@ -811,7 +809,7 @@ contains
/(s_M - s_P)
end do
else
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_vels
flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = &
(s_M*(rho_R*vel_R(dir_idx(1)) &
@@ -852,7 +850,7 @@ contains
+ (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp
else if (hypoelasticity) then
flux_tau_L = 0._wp; flux_tau_R = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i))
flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i))
@@ -885,7 +883,7 @@ contains
end if
! Advection
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_rs${XYZ}$_vf(j, k, l, i) = &
(qL_prim_rs${XYZ}$_vf(j, k, l, i) &
@@ -910,7 +908,7 @@ contains
!end if
! Div(U)?
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_vels
vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = &
(xi_M*(rho_L*vel_L(dir_idx(i))* &
@@ -931,7 +929,7 @@ contains
end if
if (chemistry) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = chemxb, chemxe
Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i)
Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)
@@ -948,7 +946,7 @@ contains
if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const.
! B_y flux = v_x * B_y - v_y * Bx0
! B_z flux = v_x * B_z - v_z * Bx0
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 0, 1
flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) &
- s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) &
@@ -958,7 +956,7 @@ contains
! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$)
! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$)
! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 0, 2
flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( &
s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - &
@@ -972,7 +970,7 @@ contains
#:if (NORM_DIR == 2)
if (cyl_coord) then
!Substituting the advective flux into the inviscid geometrical source flux
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, E_idx
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i)
end do
@@ -981,7 +979,7 @@ contains
flux_rs${XYZ}$_vf(j, k, l, contxe + 2) &
- (s_M*pres_R - s_P*pres_L)/(s_M - s_P)
! Geometrical source of the void fraction(s) is zero
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i)
end do
@@ -994,7 +992,7 @@ contains
(s_M*tau_e_R(4) - s_P*tau_e_L(4)) &
/(s_M - s_P)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = strxb, strxe
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i)
end do
@@ -1190,13 +1188,14 @@ contains
! 6-EQUATION MODEL WITH HLLC
if (model_eqns == 3) then
!ME3
-
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(vel_L, vel_R, vel_K_Star, Re_L, Re_R, rho_avg, h_avg, gamma_avg, &
- !$acc s_L, s_R, s_S, vel_avg_rms, alpha_L, alpha_R, Ys_L, Ys_R, Xs_L, Xs_R, &
- !$acc Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, &
- !$acc tau_e_L, tau_e_R, G_L, G_R, flux_ene_e, xi_field_L, xi_field_R, pcorr, &
- !$acc zcoef, vel_L_tmp, vel_R_tmp)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, &
+ & vel_K_Star, Re_L, Re_R, rho_avg, h_avg, &
+ & gamma_avg, s_L, s_R, s_S, vel_avg_rms, &
+ & alpha_L, alpha_R, Ys_L, Ys_R, Xs_L, Xs_R, &
+ & Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, &
+ & Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, &
+ & tau_e_R, G_L, G_R, flux_ene_e, xi_field_L, &
+ & xi_field_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]')
do l = is3%beg, is3%end
do k = is2%beg, is2%end
do j = is1%beg, is1%end
@@ -1205,7 +1204,7 @@ contains
vel_L_rms = 0._wp; vel_R_rms = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i)
vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i)
@@ -1230,32 +1229,32 @@ contains
alpha_R_sum = 0._wp
if (mpp_lim) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i))
qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp)
alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i))
qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp)
alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps)
end do
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)
gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i)
@@ -1272,13 +1271,13 @@ contains
end do
if (viscous) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_L(i) = dflt_real
if (Re_size(i) > 0) Re_L(i) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) &
+ Re_L(i)
@@ -1288,13 +1287,13 @@ contains
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_R(i) = dflt_real
if (Re_size(i) > 0) Re_R(i) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) &
+ Re_R(i)
@@ -1310,18 +1309,18 @@ contains
! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY
if (hypoelasticity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, strxe - strxb + 1
tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i)
tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i)
end do
G_L = 0._wp; G_R = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
G_L = G_L + alpha_L(i)*Gs(i)
G_R = G_R + alpha_R(i)*Gs(i)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, strxe - strxb + 1
! Elastic contribution to energy if G large enough
if ((G_L > verysmall) .and. (G_R > verysmall)) then
@@ -1338,13 +1337,13 @@ contains
! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY
if (hyperelasticity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i)
xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i)
end do
G_L = 0._wp; G_R = 0._wp;
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
! Mixture left and right shear modulus
G_L = G_L + alpha_L(i)*Gs(i)
@@ -1355,7 +1354,7 @@ contains
E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1)
E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1)
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, b_size - 1
tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i)
tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i)
@@ -1379,7 +1378,7 @@ contains
vel_avg_rms, 0._wp, c_avg)
if (viscous) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i))
end do
@@ -1474,7 +1473,7 @@ contains
! COMPUTING FLUXES
! MASS FLUX.
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
flux_rs${XYZ}$_vf(j, k, l, i) = &
xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*(vel_L(idx1) + s_M*(xi_L - 1._wp)) + &
@@ -1483,7 +1482,7 @@ contains
! MOMENTUM FLUX.
! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
idxi = dir_idx(i)
flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = rho_Star*vel_K_Star* &
@@ -1499,7 +1498,7 @@ contains
! ELASTICITY. Elastic shear stress additions for the momentum and energy flux
if (elasticity) then
flux_ene_e = 0._wp;
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
idxi = dir_idx(i)
! MOMENTUM ELASTIC FLUX.
@@ -1517,7 +1516,7 @@ contains
end if
! VOLUME FRACTION FLUX.
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_rs${XYZ}$_vf(j, k, l, i) = &
xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*s_S + &
@@ -1525,7 +1524,7 @@ contains
end do
! SOURCE TERM FOR VOLUME FRACTION ADVECTION FLUX.
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
idxi = dir_idx(i)
vel_src_rs${XYZ}$_vf(j, k, l, idxi) = &
@@ -1535,7 +1534,7 @@ contains
! INTERNAL ENERGIES ADVECTION FLUX.
! K-th pressure and velocity in preparation for the internal energy flux
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
p_K_Star = xi_M*(xi_MP*((pres_L + pi_infs(i)/(1._wp + gammas(i)))* &
xi_L**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_L) + pres_L) + &
@@ -1554,7 +1553,7 @@ contains
! HYPOELASTIC STRESS EVOLUTION FLUX.
if (hypoelasticity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, strxe - strxb + 1
flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = &
xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + &
@@ -1564,7 +1563,7 @@ contains
! REFERENCE MAP FLUX.
if (hyperelasticity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = &
xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) &
@@ -1585,11 +1584,11 @@ contains
#:if (NORM_DIR == 2)
if (cyl_coord) then
!Substituting the advective flux into the inviscid geometrical source flux
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, E_idx
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = intxb, intxe
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i)
end do
@@ -1597,7 +1596,7 @@ contains
flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = &
flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star
! Geometrical source of the void fraction(s) is zero
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp
end do
@@ -1605,7 +1604,7 @@ contains
#:endif
#:if (NORM_DIR == 3)
if (grid_geometry == 3) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, sys_size
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp
end do
@@ -1622,32 +1621,34 @@ contains
elseif (model_eqns == 4) then
!ME4
- !$acc parallel loop collapse(3) gang vector default(present) private(alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, &
- !$acc rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, nbub_L, nbub_R, ptilde_L, ptilde_R)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, &
+ & alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, &
+ & rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, &
+ & vel_avg_rms, nbub_L, nbub_R, ptilde_L, ptilde_R]')
do l = is3%beg, is3%end
do k = is2%beg, is2%end
do j = is1%beg, is1%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i)
alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i)
vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i)
end do
vel_L_rms = 0._wp; vel_R_rms = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
vel_L_rms = vel_L_rms + vel_L(i)**2._wp
vel_R_rms = vel_R_rms + vel_R(i)**2._wp
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)
alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)
@@ -1660,7 +1661,7 @@ contains
gamma_L = 0._wp
pi_inf_L = 0._wp
qv_L = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_L = rho_L + alpha_rho_L(i)
gamma_L = gamma_L + alpha_L(i)*gammas(i)
@@ -1672,7 +1673,7 @@ contains
gamma_R = 0._wp
pi_inf_R = 0._wp
qv_R = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_R = rho_R + alpha_rho_R(i)
gamma_R = gamma_R + alpha_R(i)*gammas(i)
@@ -1747,7 +1748,7 @@ contains
xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S))
xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S))
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
flux_rs${XYZ}$_vf(j, k, l, i) = &
xi_M*alpha_rho_L(i) &
@@ -1758,7 +1759,7 @@ contains
! Momentum flux.
! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = &
xi_M*(rho_L*(vel_L(dir_idx(1))* &
@@ -1777,7 +1778,7 @@ contains
if (bubbles_euler) then
! Put p_tilde in
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = &
flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + &
@@ -1788,7 +1789,7 @@ contains
flux_rs${XYZ}$_vf(j, k, l, E_idx) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = alf_idx, alf_idx !only advect the void fraction
flux_rs${XYZ}$_vf(j, k, l, i) = &
xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) &
@@ -1798,7 +1799,7 @@ contains
end do
! Source for volume fraction advection equation
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = 0._wp
@@ -1809,7 +1810,7 @@ contains
! Add advection flux for bubble variables
if (bubbles_euler) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = bubxb, bubxe
flux_rs${XYZ}$_vf(j, k, l, i) = &
xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) &
@@ -1824,7 +1825,7 @@ contains
#:if (NORM_DIR == 2)
if (cyl_coord) then
! Substituting the advective flux into the inviscid geometrical source flux
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, E_idx
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i)
end do
@@ -1841,7 +1842,7 @@ contains
(1._wp - dir_flg(dir_idx(1)))* &
vel_R(dir_idx(1))) - vel_R(dir_idx(1)))))
! Geometrical source of the void fraction(s) is zero
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp
end do
@@ -1849,7 +1850,7 @@ contains
#:endif
#:if (NORM_DIR == 3)
if (grid_geometry == 3) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, sys_size
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp
end do
@@ -1870,16 +1871,19 @@ contains
end do
end do
end do
- !$acc end parallel loop
elseif (model_eqns == 2 .and. bubbles_euler) then
- !$acc parallel loop collapse(3) gang vector default(present) private(R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, &
- !$acc rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[R0_L, R0_R, V0_L, &
+ & V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, &
+ & vel_R, rho_avg, alpha_L, alpha_R, h_avg, &
+ & gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, &
+ & ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, &
+ & pcorr, zcoef, vel_L_tmp, vel_R_tmp]')
do l = is3%beg, is3%end
do k = is2%beg, is2%end
do j = is1%beg, is1%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)
alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)
@@ -1887,7 +1891,7 @@ contains
vel_L_rms = 0._wp; vel_R_rms = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i)
vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i)
@@ -1905,7 +1909,7 @@ contains
! Retain this in the refactor
if (mpp_lim .and. (num_fluids > 2)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)
gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i)
@@ -1913,7 +1917,7 @@ contains
qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i)
end do
else if (num_fluids > 2) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids - 1
rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)
gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i)
@@ -1933,7 +1937,7 @@ contains
qv_R = 0._wp
if (mpp_lim .and. (num_fluids > 2)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)
gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i)
@@ -1941,7 +1945,7 @@ contains
qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i)
end do
else if (num_fluids > 2) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids - 1
rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)
gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i)
@@ -1957,13 +1961,13 @@ contains
if (viscous) then
if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_L(i) = dflt_real
if (Re_size(i) > 0) Re_L(i) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res(i, q) &
+ Re_L(i)
@@ -1973,13 +1977,13 @@ contains
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_R(i) = dflt_real
if (Re_size(i) > 0) Re_R(i) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res(i, q) &
+ Re_R(i)
@@ -1998,7 +2002,7 @@ contains
H_R = (E_R + pres_R)/rho_R
if (avg_state == 2) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, nb
R0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, rs(i))
R0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, rs(i))
@@ -2018,7 +2022,7 @@ contains
else
nbub_L_denom = 0._wp
nbub_R_denom = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, nb
nbub_L_denom = nbub_L_denom + (R0_L(i)**3._wp)*weight(i)
nbub_R_denom = nbub_R_denom + (R0_R(i)**3._wp)*weight(i)
@@ -2032,7 +2036,7 @@ contains
nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, bubxb)
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, nb
if (.not. qbmm) then
if (polytropic) then
@@ -2065,7 +2069,7 @@ contains
R3V2Lbar = 0._wp
R3V2Rbar = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, nb
PbwR3Lbar = PbwR3Lbar + pbw_L(i)*(R0_L(i)**3._wp)*weight(i)
PbwR3Rbar = PbwR3Rbar + pbw_R(i)*(R0_R(i)**3._wp)*weight(i)
@@ -2100,7 +2104,7 @@ contains
gamma_avg = 5.e-1_wp*(gamma_L + gamma_R)
vel_avg_rms = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
vel_avg_rms = vel_avg_rms + (5.e-1_wp*(vel_L(i) + vel_R(i)))**2._wp
end do
@@ -2119,7 +2123,7 @@ contains
vel_avg_rms, 0._wp, c_avg)
if (viscous) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i))
end do
@@ -2183,7 +2187,7 @@ contains
pcorr = 0._wp
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
flux_rs${XYZ}$_vf(j, k, l, i) = &
xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) &
@@ -2202,7 +2206,7 @@ contains
! Include p_tilde
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = &
xi_M*(rho_L*(vel_L(dir_idx(1))* &
@@ -2234,7 +2238,7 @@ contains
+ (s_M/s_L)*(s_P/s_R)*pcorr*s_S
! Volume fraction flux
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_rs${XYZ}$_vf(j, k, l, i) = &
xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) &
@@ -2244,7 +2248,7 @@ contains
end do
! Source for volume fraction advection equation
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = &
xi_M*(vel_L(dir_idx(i)) + &
@@ -2260,7 +2264,7 @@ contains
flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1))
! Add advection flux for bubble variables
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = bubxb, bubxe
flux_rs${XYZ}$_vf(j, k, l, i) = &
xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) &
@@ -2289,7 +2293,7 @@ contains
#:if (NORM_DIR == 2)
if (cyl_coord) then
! Substituting the advective flux into the inviscid geometrical source flux
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, E_idx
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i)
end do
@@ -2306,7 +2310,7 @@ contains
(1._wp - dir_flg(dir_idx(1)))* &
vel_R(dir_idx(1))) - vel_R(dir_idx(1)))))
! Geometrical source of the void fraction(s) is zero
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp
end do
@@ -2314,7 +2318,7 @@ contains
#:endif
#:if (NORM_DIR == 3)
if (grid_geometry == 3) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, sys_size
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp
end do
@@ -2337,28 +2341,31 @@ contains
end do
end do
end do
- !$acc end parallel loop
else
! 5-EQUATION MODEL WITH HLLC
- !$acc parallel loop collapse(3) gang vector default(present) private(vel_L, vel_R, Re_L, Re_R, &
- !$acc rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, &
- !$acc vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, &
- !$acc tau_e_L, tau_e_R, xi_field_L, xi_field_R, &
- !$acc Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2) copyin(is1,is2,is3)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, &
+ & Re_L, Re_R, rho_avg, h_avg, gamma_avg, &
+ & alpha_L, alpha_R, s_L, s_R, s_S, &
+ & vel_avg_rms, pcorr, zcoef, vel_L_tmp, &
+ & vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, &
+ & Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, &
+ & tau_e_R, xi_field_L, xi_field_R, Yi_avg, &
+ & Phi_avg, h_iL, h_iR, h_avg_2]', &
+ & copyin='[is1, is2, is3]')
do l = is3%beg, is3%end
do k = is2%beg, is2%end
do j = is1%beg, is1%end
!idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)
alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)
end do
vel_L_rms = 0._wp; vel_R_rms = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i)
vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i)
@@ -2385,32 +2392,32 @@ contains
! Change this by splitting it into the cases
! present in the bubbles_euler
if (mpp_lim) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i))
qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp)
alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i))
qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp)
alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps)
end do
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)
gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i)
@@ -2424,13 +2431,13 @@ contains
end do
if (viscous) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_L(i) = dflt_real
if (Re_size(i) > 0) Re_L(i) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) &
+ Re_L(i)
@@ -2440,13 +2447,13 @@ contains
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_R(i) = dflt_real
if (Re_size(i) > 0) Re_R(i) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) &
+ Re_R(i)
@@ -2458,7 +2465,7 @@ contains
if (chemistry) then
c_sum_Yi_Phi = 0.0_wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = chemxb, chemxe
Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i)
Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)
@@ -2517,19 +2524,19 @@ contains
! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY
if (hypoelasticity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, strxe - strxb + 1
tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i)
tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i)
end do
G_L = 0._wp
G_R = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
G_L = G_L + alpha_L(i)*Gs(i)
G_R = G_R + alpha_R(i)*Gs(i)
end do
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, strxe - strxb + 1
! Elastic contribution to energy if G large enough
if ((G_L > verysmall) .and. (G_R > verysmall)) then
@@ -2546,14 +2553,14 @@ contains
! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY
if (hyperelasticity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i)
xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i)
end do
G_L = 0._wp
G_R = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
! Mixture left and right shear modulus
G_L = G_L + alpha_L(i)*Gs(i)
@@ -2564,7 +2571,7 @@ contains
E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1)
E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1)
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, b_size - 1
tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i)
tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i)
@@ -2588,7 +2595,7 @@ contains
vel_avg_rms, c_sum_Yi_Phi, c_avg)
if (viscous) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i))
end do
@@ -2664,7 +2671,7 @@ contains
! COMPUTING THE HLLC FLUXES
! MASS FLUX.
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, contxe
flux_rs${XYZ}$_vf(j, k, l, i) = &
xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) &
@@ -2675,7 +2682,7 @@ contains
! MOMENTUM FLUX.
! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
idxi = dir_idx(i)
flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = &
@@ -2710,7 +2717,7 @@ contains
! ELASTICITY. Elastic shear stress additions for the momentum and energy flux
if (elasticity) then
flux_ene_e = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
idxi = dir_idx(i)
! MOMENTUM ELASTIC FLUX.
@@ -2729,7 +2736,7 @@ contains
! HYPOELASTIC STRESS EVOLUTION FLUX.
if (hypoelasticity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, strxe - strxb + 1
flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = &
xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + &
@@ -2738,7 +2745,7 @@ contains
end if
! VOLUME FRACTION FLUX.
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_rs${XYZ}$_vf(j, k, l, i) = &
xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) &
@@ -2748,7 +2755,7 @@ contains
end do
! VOLUME FRACTION SOURCE FLUX.
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
idxi = dir_idx(i)
vel_src_rs${XYZ}$_vf(j, k, l, idxi) = &
@@ -2771,7 +2778,7 @@ contains
! REFERENCE MAP FLUX.
if (hyperelasticity) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_dims
flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = &
xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) &
@@ -2784,7 +2791,7 @@ contains
flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1)
if (chemistry) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = chemxb, chemxe
Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i)
Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)
@@ -2799,7 +2806,7 @@ contains
#:if (NORM_DIR == 2)
if (cyl_coord) then
!Substituting the advective flux into the inviscid geometrical source flux
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, E_idx
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i)
end do
@@ -2816,7 +2823,7 @@ contains
(1._wp - dir_flg(idx1))* &
vel_R(idx1)) - vel_R(idx1))))
! Geometrical source of the void fraction(s) is zero
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp
end do
@@ -2824,7 +2831,7 @@ contains
#:endif
#:if (NORM_DIR == 3)
if (grid_geometry == 3) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, sys_size
flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp
end do
@@ -2848,7 +2855,6 @@ contains
end do
end do
end do
- !$acc end parallel loop
end if
end if
#:endfor
@@ -2961,10 +2967,12 @@ contains
#:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
if (norm_dir == ${NORM_DIR}$) then
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, &
- !$acc rho, pres, E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, &
- !$acc U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, &
+ & alpha_rho_R, vel, alpha_L, alpha_R, rho, pres, &
+ & E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, &
+ & c, c_fast, pres_mag, U_L, U_R, U_starL, &
+ & U_starR, U_doubleL, U_doubleR, F_L, F_R, &
+ & F_starL, F_starR, F_hlld]')
do l = is3%beg, is3%end
do k = is2%beg, is2%end
do j = is1%beg, is1%end
@@ -3010,7 +3018,7 @@ contains
! Sum properties of all fluid components
rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp
rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho%L = rho%L + alpha_rho_L(i)
gamma%L = gamma%L + alpha_L(i)*gammas(i)
@@ -3127,7 +3135,7 @@ contains
! Energy
flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7)
! Partial fraction
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = advxb, advxe
flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now)
end do
@@ -3136,7 +3144,6 @@ contains
end do
end do
end do
- !$acc end parallel loop
end if
#:endfor
@@ -3159,7 +3166,7 @@ contains
do i = 1, num_fluids
Gs(i) = fluid_pp(i)%G
end do
- !$acc update device(Gs)
+ $:GPU_UPDATE(device='[Gs]')
if (viscous) then
@:ALLOCATE(Res(1:2, 1:maxval(Re_size)))
@@ -3171,10 +3178,10 @@ contains
Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i)
end do
end do
- !$acc update device(Res, Re_idx, Re_size)
+ $:GPU_UPDATE(device='[Res,Re_idx,Re_size]')
end if
- !$acc enter data copyin(is1, is2, is3, isx, isy, isz)
+ $:GPU_ENTER_DATA(copyin='[is1,is2,is3,isx,isy,isz]')
is1%beg = -1; is2%beg = 0; is3%beg = 0
is1%end = m; is2%end = n; is3%end = p
@@ -3317,7 +3324,7 @@ contains
dir_idx = (/3, 1, 2/); dir_flg = (/0._wp, 0._wp, 1._wp/)
end if
- !$acc update device(is1, is2, is3)
+ $:GPU_UPDATE(device='[is1,is2,is3]')
if (elasticity) then
if (norm_dir == 1) then
@@ -3330,14 +3337,16 @@ contains
end if
isx = ix; isy = iy; isz = iz
- !$acc update device(isx, isy, isz) ! for stuff in the same module
- !$acc update device(dir_idx, dir_flg, dir_idx_tau) ! for stuff in different modules
+ ! for stuff in the same module
+ $:GPU_UPDATE(device='[isx,isy,isz]')
+ ! for stuff in different modules
+ $:GPU_UPDATE(device='[dir_idx,dir_flg,dir_idx_tau]')
! Population of Buffers in x-direction
if (norm_dir == 1) then
if (bc_x%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = 1, sys_size
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -3348,7 +3357,7 @@ contains
end do
if (viscous) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do l = isz%beg, isz%end
do k = isy%beg, isy%end
@@ -3360,7 +3369,7 @@ contains
end do
if (n > 0) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do l = isz%beg, isz%end
do k = isy%beg, isy%end
@@ -3372,7 +3381,7 @@ contains
end do
if (p > 0) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do l = isz%beg, isz%end
do k = isy%beg, isy%end
@@ -3392,7 +3401,7 @@ contains
if (bc_x%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = 1, sys_size
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -3404,7 +3413,7 @@ contains
if (viscous) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do l = isz%beg, isz%end
do k = isy%beg, isy%end
@@ -3416,7 +3425,7 @@ contains
end do
if (n > 0) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do l = isz%beg, isz%end
do k = isy%beg, isy%end
@@ -3428,7 +3437,7 @@ contains
end do
if (p > 0) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do l = isz%beg, isz%end
do k = isy%beg, isy%end
@@ -3451,7 +3460,7 @@ contains
elseif (norm_dir == 2) then
if (bc_y%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = 1, sys_size
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -3463,7 +3472,7 @@ contains
if (viscous) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do l = isz%beg, isz%end
do j = isx%beg, isx%end
@@ -3473,7 +3482,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do l = isz%beg, isz%end
do j = isx%beg, isx%end
@@ -3484,7 +3493,7 @@ contains
end do
if (p > 0) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do l = isz%beg, isz%end
do j = isx%beg, isx%end
@@ -3501,7 +3510,7 @@ contains
if (bc_y%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = 1, sys_size
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -3513,7 +3522,7 @@ contains
if (viscous) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do l = isz%beg, isz%end
do j = isx%beg, isx%end
@@ -3523,7 +3532,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do l = isz%beg, isz%end
do j = isx%beg, isx%end
@@ -3534,7 +3543,7 @@ contains
end do
if (p > 0) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do l = isz%beg, isz%end
do j = isx%beg, isx%end
@@ -3554,7 +3563,7 @@ contains
else
if (bc_z%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = 1, sys_size
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -3565,7 +3574,7 @@ contains
end do
if (viscous) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do k = isy%beg, isy%end
do j = isx%beg, isx%end
@@ -3574,7 +3583,7 @@ contains
end do
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do k = isy%beg, isy%end
do j = isx%beg, isx%end
@@ -3583,7 +3592,7 @@ contains
end do
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do k = isy%beg, isy%end
do j = isx%beg, isx%end
@@ -3598,7 +3607,7 @@ contains
if (bc_z%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = 1, sys_size
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -3609,7 +3618,7 @@ contains
end do
if (viscous) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do k = isy%beg, isy%end
do j = isx%beg, isx%end
@@ -3619,7 +3628,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do k = isy%beg, isy%end
do j = isx%beg, isx%end
@@ -3629,7 +3638,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do i = momxb, momxe
do k = isy%beg, isy%end
do j = isx%beg, isx%end
@@ -3681,7 +3690,7 @@ contains
if (viscous .or. (surface_tension)) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = momxb, E_idx
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -3695,7 +3704,7 @@ contains
if (qbmm) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, 4
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -3711,7 +3720,7 @@ contains
elseif (norm_dir == 2) then
if (viscous .or. (surface_tension)) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = momxb, E_idx
do l = is3%beg, is3%end
do j = is1%beg, is1%end
@@ -3724,7 +3733,7 @@ contains
end if
if (qbmm) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, 4
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -3740,7 +3749,7 @@ contains
else
if (viscous .or. (surface_tension)) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = momxb, E_idx
do j = is1%beg, is1%end
do k = is2%beg, is2%end
@@ -3753,7 +3762,7 @@ contains
end if
if (qbmm) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, 4
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -3819,10 +3828,10 @@ contains
integer :: i_vel !!< Loop iterator for velocity components.
integer :: idx_rp(3) !!< Indices $(j,k,l)$ of 'right' point for averaging.
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(idx_rp, avg_v_int, avg_dvdx_int, avg_dvdy_int, avg_dvdz_int, &
- !$acc Re_s, Re_b, vel_src_int, r_eff, divergence_cyl, &
- !$acc stress_vector_shear, stress_normal_bulk, div_v_term_const)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[idx_rp, avg_v_int, &
+ & avg_dvdx_int, avg_dvdy_int, avg_dvdz_int, Re_s, Re_b, &
+ & vel_src_int, r_eff, divergence_cyl, stress_vector_shear, &
+ & stress_normal_bulk, div_v_term_const]')
do l = iz%beg, iz%end
do k = iy%beg, iy%end
do j = ix%beg, ix%end
@@ -3833,7 +3842,7 @@ contains
! Average velocities and their derivatives at the interface
! For cylindrical: x-dir ~ axial (z_cyl), y-dir ~ radial (r_cyl), z-dir ~ azimuthal (theta_cyl)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i_vel = 1, num_dims
avg_v_int(i_vel) = 0.5_wp*(velL_vf(i_vel)%sf(j, k, l) + velR_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3)))
@@ -3911,7 +3920,7 @@ contains
end if
end select
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i_vel = 1, num_dims
flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) = flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) - stress_vector_shear(i_vel)
flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(i_vel)*stress_vector_shear(i_vel)
@@ -3928,7 +3937,6 @@ contains
end do
end do
end do
- !$acc end parallel loop
end subroutine s_compute_cylindrical_viscous_source_flux
@@ -3982,10 +3990,9 @@ contains
real(wp) :: divergence_v !< Velocity divergence at interface.
- !$acc parallel loop collapse(3) gang vector default(present) &
- !$acc private(idx_right_phys, vel_grad_avg, &
- !$acc current_tau_shear, current_tau_bulk, vel_src_at_interface, &
- !$acc Re_shear, Re_bulk, divergence_v, i_dim, vel_comp_idx)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[idx_right_phys, vel_grad_avg, &
+ & current_tau_shear, current_tau_bulk, vel_src_at_interface, &
+ & Re_shear, Re_bulk, divergence_v, i_dim, vel_comp_idx]')
do l_loop = isz%beg, isz%end
do k_loop = isy%beg, isy%end
do j_loop = isx%beg, isx%end
@@ -4066,7 +4073,6 @@ contains
end do
end do
end do
- !$acc end parallel loop
end subroutine s_compute_cartesian_viscous_source_flux
@@ -4077,7 +4083,7 @@ contains
!! @param[in] divergence_v Velocity divergence (du/dx + dv/dy + dw/dz).
!! @param[out] tau_shear_out Calculated shear stress tensor (stress on i-face, j-direction).
pure subroutine s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, tau_shear_out)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
implicit none
@@ -4111,7 +4117,7 @@ contains
!! @param[in] divergence_v Velocity divergence (du/dx + dv/dy + dw/dz).
!! @param[out] tau_bulk_out Calculated bulk stress tensor (stress on i-face, i-direction).
pure subroutine s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, tau_bulk_out)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
implicit none
@@ -4151,7 +4157,7 @@ contains
! Reshaping Outputted Data in y-direction
if (norm_dir == 2) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do l = is3%beg, is3%end
do j = is1%beg, is1%end
@@ -4164,7 +4170,7 @@ contains
end do
if (cyl_coord) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do l = is3%beg, is3%end
do j = is1%beg, is1%end
@@ -4177,7 +4183,7 @@ contains
end do
end if
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3%beg, is3%end
do j = is1%beg, is1%end
do k = is2%beg, is2%end
@@ -4188,7 +4194,7 @@ contains
end do
if (riemann_solver == 1 .or. riemann_solver == 4) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = advxb + 1, advxe
do l = is3%beg, is3%end
do j = is1%beg, is1%end
@@ -4203,7 +4209,7 @@ contains
end if
! Reshaping Outputted Data in z-direction
elseif (norm_dir == 3) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do j = is1%beg, is1%end
do k = is2%beg, is2%end
@@ -4216,7 +4222,7 @@ contains
end do
end do
if (grid_geometry == 3) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do j = is1%beg, is1%end
do k = is2%beg, is2%end
@@ -4230,7 +4236,7 @@ contains
end do
end if
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do j = is1%beg, is1%end
do k = is2%beg, is2%end
do l = is3%beg, is3%end
@@ -4241,7 +4247,7 @@ contains
end do
if (riemann_solver == 1 .or. riemann_solver == 4) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = advxb + 1, advxe
do j = is1%beg, is1%end
do k = is2%beg, is2%end
@@ -4255,7 +4261,7 @@ contains
end if
elseif (norm_dir == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -4267,7 +4273,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3%beg, is3%end
do k = is2%beg, is2%end
do j = is1%beg, is1%end
@@ -4278,7 +4284,7 @@ contains
end do
if (riemann_solver == 1 .or. riemann_solver == 4) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = advxb + 1, advxe
do l = is3%beg, is3%end
do k = is2%beg, is2%end
diff --git a/src/simulation/m_sim_helpers.f90 b/src/simulation/m_sim_helpers.fpp
similarity index 95%
rename from src/simulation/m_sim_helpers.f90
rename to src/simulation/m_sim_helpers.fpp
index 0ab9c5d1ba..cf8cf80bd7 100644
--- a/src/simulation/m_sim_helpers.f90
+++ b/src/simulation/m_sim_helpers.fpp
@@ -1,3 +1,5 @@
+#:include 'macros.fpp'
+
module m_sim_helpers
use m_derived_types !< Definitions of the derived types
@@ -19,7 +21,7 @@ module m_sim_helpers
!! @param l z coordinate index
!! @return fltr_dtheta Modified dtheta value for cylindrical coordinates
pure function f_compute_filtered_dtheta(k, l) result(fltr_dtheta)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
integer, intent(in) :: k, l
real(wp) :: fltr_dtheta
integer :: Nfq
@@ -46,7 +48,7 @@ end function f_compute_filtered_dtheta
!! @param l z coordinate index
!! @return cfl_terms computed CFL terms for 2D/3D cases
pure function f_compute_multidim_cfl_terms(vel, c, j, k, l) result(cfl_terms)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), dimension(num_vels), intent(in) :: vel
real(wp), intent(in) :: c
integer, intent(in) :: j, k, l
@@ -88,11 +90,8 @@ end function f_compute_multidim_cfl_terms
!! @param k y index
!! @param l z index
pure subroutine s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l)
-#ifdef _CRAYFTN
- !DIR$ INLINEALWAYS s_compute_enthalpy
-#else
- !$acc routine seq
-#endif
+ $:GPU_ROUTINE(function_name='s_compute_enthalpy',parallelism='[seq]', &
+ & cray_inline=True)
type(scalar_field), intent(in), dimension(sys_size) :: q_prim_vf
real(wp), intent(inout), dimension(num_fluids) :: alpha
@@ -106,7 +105,7 @@ pure subroutine s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, a
integer :: i
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho(i) = q_prim_vf(i)%sf(j, k, l)
alpha(i) = q_prim_vf(E_idx + i)%sf(j, k, l)
@@ -121,13 +120,13 @@ pure subroutine s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, a
call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha, alpha_rho, Re)
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_vels
vel(i) = q_prim_vf(contxe + i)%sf(j, k, l)
end do
vel_sum = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_vels
vel_sum = vel_sum + vel(i)**2._wp
end do
@@ -156,7 +155,7 @@ end subroutine s_compute_enthalpy
!! @param vcfl_sf (optional) cell-centered viscous CFL number
!! @param Rc_sf (optional) cell centered Rc
pure subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl_sf, vcfl_sf, Rc_sf)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), intent(in), dimension(num_vels) :: vel
real(wp), intent(in) :: c, rho
real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: icfl_sf
@@ -219,7 +218,7 @@ end subroutine s_compute_stability_from_dt
!! @param k y coordinate
!! @param l z coordinate
pure subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l)
- !$acc routine seq
+ $:GPU_ROUTINE(parallelism='[seq]')
real(wp), dimension(num_vels), intent(in) :: vel
real(wp), intent(in) :: c, rho
real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: max_dt
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 02b7345530..57106cfac9 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -3,6 +3,7 @@
!! @brief Contains module m_start_up
#:include 'case.fpp'
+#:include 'macros.fpp'
!> @brief The purpose of the module is primarily to read in the files that
!! contain the inputs, the initial condition data and the grid data
@@ -1047,12 +1048,12 @@ contains
if (cfl_dt) then
if ((mytime + dt) >= t_stop) then
dt = t_stop - mytime
- !$acc update device(dt)
+ $:GPU_UPDATE(device='[dt]')
end if
else
if ((mytime + dt) >= finaltime) then
dt = finaltime - mytime
- !$acc update device(dt)
+ $:GPU_UPDATE(device='[dt]')
end if
end if
@@ -1076,7 +1077,7 @@ contains
if (probe_wrt) then
do i = 1, sys_size
- !$acc update host(q_cons_ts(1)%vf(i)%sf)
+ $:GPU_UPDATE(host='[q_cons_ts(1)%vf(i)%sf]')
end do
end if
@@ -1179,7 +1180,7 @@ contains
call cpu_time(start)
call nvtxStartRange("SAVE-DATA")
do i = 1, sys_size
- !$acc update host(q_cons_ts(1)%vf(i)%sf)
+ $:GPU_UPDATE(host='[q_cons_ts(1)%vf(i)%sf]')
do l = 0, p
do k = 0, n
do j = 0, m
@@ -1193,8 +1194,8 @@ contains
end do
if (qbmm .and. .not. polytropic) then
- !$acc update host(pb_ts(1)%sf)
- !$acc update host(mv_ts(1)%sf)
+ $:GPU_UPDATE(host='[pb_ts(1)%sf]')
+ $:GPU_UPDATE(host='[mv_ts(1)%sf]')
end if
if (cfl_dt) then
@@ -1204,16 +1205,16 @@ contains
end if
if (bubbles_lagrange) then
- !$acc update host(intfc_rad)
+ $:GPU_UPDATE(host='[intfc_rad]')
do i = 1, nBubs
if (ieee_is_nan(intfc_rad(i, 1)) .or. intfc_rad(i, 1) <= 0._wp) then
call s_mpi_abort("Bubble radius is negative or NaN, please reduce dt.")
end if
end do
- !$acc update host(q_beta%vf(1)%sf)
+ $:GPU_UPDATE(host='[q_beta%vf(1)%sf]')
call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, q_beta%vf(1))
- !$acc update host(Rmax_stats, Rmin_stats, gas_p, gas_mv, intfc_vel)
+ $:GPU_UPDATE(host='[Rmax_stats,Rmin_stats,gas_p,gas_mv,intfc_vel]')
call s_write_restart_lag_bubbles(save_count) !parallel
if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats()
else
@@ -1411,38 +1412,44 @@ contains
integer :: i
!Update GPU DATA
do i = 1, sys_size
- !$acc update device(q_cons_ts(1)%vf(i)%sf)
+ $:GPU_UPDATE(device='[q_cons_ts(1)%vf(i)%sf]')
end do
if (qbmm .and. .not. polytropic) then
- !$acc update device(pb_ts(1)%sf, mv_ts(1)%sf)
+ $:GPU_UPDATE(device='[pb_ts(1)%sf,mv_ts(1)%sf]')
end if
if (chemistry) then
- !$acc update device(q_T_sf%sf)
+ $:GPU_UPDATE(device='[q_T_sf%sf]')
end if
- !$acc update device(nb, R0ref, Ca, Web, Re_inv, weight, R0, V0, bubbles_euler, polytropic, polydisperse, qbmm, R0_type, ptil, bubble_model, thermal, poly_sigma, adv_n, adap_dt, adap_dt_tol, n_idx, pi_fac, low_Mach)
- !$acc update device(R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v, k_n, k_v, pb0, mass_n0, mass_v0, Pe_T, Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN , mul0, ss, gamma_v, mu_v, gamma_m, gamma_n, mu_n, gam)
+ $:GPU_UPDATE(device='[nb,R0ref,Ca,Web,Re_inv,weight,R0,V0, &
+ & bubbles_euler,polytropic,polydisperse,qbmm,R0_type, &
+ & ptil,bubble_model,thermal,poly_sigma,adv_n,adap_dt, &
+ & adap_dt_tol,n_idx,pi_fac,low_Mach]')
+ $:GPU_UPDATE(device='[R_n,R_v,phi_vn,phi_nv,Pe_c,Tw,pv,M_n, &
+ & M_v,k_n,k_v,pb0,mass_n0,mass_v0,Pe_T,Re_trans_T, &
+ & Re_trans_c,Im_trans_T,Im_trans_c,omegaN,mul0,ss, &
+ & gamma_v,mu_v,gamma_m,gamma_n,mu_n,gam]')
- !$acc update device(acoustic_source, num_source)
- !$acc update device(sigma, surface_tension)
+ $:GPU_UPDATE(device='[acoustic_source, num_source]')
+ $:GPU_UPDATE(device='[sigma, surface_tension]')
- !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc)
+ $:GPU_UPDATE(device='[dx,dy,dz,x_cb,x_cc,y_cb,y_cc,z_cb,z_cc]')
- !$acc update device(bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3)
- !$acc update device(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3)
- !$acc update device(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3)
+ $:GPU_UPDATE(device='[bc_x%vb1,bc_x%vb2,bc_x%vb3,bc_x%ve1,bc_x%ve2,bc_x%ve3]')
+ $:GPU_UPDATE(device='[bc_y%vb1,bc_y%vb2,bc_y%vb3,bc_y%ve1,bc_y%ve2,bc_y%ve3]')
+ $:GPU_UPDATE(device='[bc_z%vb1,bc_z%vb2,bc_z%vb3,bc_z%ve1,bc_z%ve2,bc_z%ve3]')
- !$acc update device(bc_x%grcbc_in, bc_x%grcbc_out, bc_x%grcbc_vel_out)
- !$acc update device(bc_y%grcbc_in, bc_y%grcbc_out, bc_y%grcbc_vel_out)
- !$acc update device(bc_z%grcbc_in, bc_z%grcbc_out, bc_z%grcbc_vel_out)
+ $:GPU_UPDATE(device='[bc_x%grcbc_in,bc_x%grcbc_out,bc_x%grcbc_vel_out]')
+ $:GPU_UPDATE(device='[bc_y%grcbc_in,bc_y%grcbc_out,bc_y%grcbc_vel_out]')
+ $:GPU_UPDATE(device='[bc_z%grcbc_in,bc_z%grcbc_out,bc_z%grcbc_vel_out]')
- !$acc update device(relax, relax_model)
+ $:GPU_UPDATE(device='[relax, relax_model]')
if (relax) then
- !$acc update device(palpha_eps, ptgalpha_eps)
+ $:GPU_UPDATE(device='[palpha_eps, ptgalpha_eps]')
end if
if (ib) then
- !$acc update device(ib_markers%sf)
+ $:GPU_UPDATE(device='[ib_markers%sf]')
end if
end subroutine s_initialize_gpu_vars
diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp
index 5cf87531aa..30b6fcec2b 100644
--- a/src/simulation/m_surface_tension.fpp
+++ b/src/simulation/m_surface_tension.fpp
@@ -29,16 +29,16 @@ module m_surface_tension
!> @{
type(scalar_field), allocatable, dimension(:) :: c_divs
!> @)
- !$acc declare create(c_divs)
+ $:GPU_DECLARE(create='[c_divs]')
!> @name cell boundary reconstructed gradient components and magnitude
!> @{
real(wp), allocatable, dimension(:, :, :, :) :: gL_x, gR_x, gL_y, gR_y, gL_z, gR_z
!> @}
- !$acc declare create(gL_x, gR_x, gL_y, gR_y, gL_z, gR_z)
+ $:GPU_DECLARE(create='[gL_x,gR_x,gL_y,gR_y,gL_z,gR_z]')
type(int_bounds_info) :: is1, is2, is3, iv
- !$acc declare create(is1, is2, is3, iv)
+ $:GPU_DECLARE(create='[is1,is2,is3,iv]')
contains
@@ -85,8 +85,9 @@ contains
integer :: j, k, l, i
if (id == 1) then
- !$acc parallel loop collapse(3) gang vector default(present) private(Omega, &
- !$acc w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, &
+ & w1R, w2R, w3R, w1, w2, w3, normWL, &
+ & normWR, normW]')
do l = isz%beg, isz%end
do k = isy%beg, isy%end
do j = isx%beg, isx%end
@@ -131,8 +132,9 @@ contains
elseif (id == 2) then
- !$acc parallel loop collapse(3) gang vector default(present) private(Omega, &
- !$acc w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, &
+ & w1R, w2R, w3R, w1, w2, w3, normWL, normWR, &
+ & normW]')
do l = isz%beg, isz%end
do k = isy%beg, isy%end
do j = isx%beg, isx%end
@@ -177,8 +179,9 @@ contains
elseif (id == 3) then
- !$acc parallel loop collapse(3) gang vector default(present) private(Omega, &
- !$acc w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, &
+ & w1R, w2R, w3R, w1, w2, w3, normWL, normWR, &
+ & normW]')
do l = isz%beg, isz%end
do k = isy%beg, isy%end
do j = isx%beg, isx%end
@@ -240,7 +243,7 @@ contains
isx%end = m; isy%end = n; isz%end = p
! compute gradient components
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -250,7 +253,7 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -261,7 +264,7 @@ contains
end do
if (p > 0) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -272,7 +275,7 @@ contains
end do
end if
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = 0, p
do k = 0, n
do j = 0, m
@@ -332,10 +335,10 @@ contains
end if
- !$acc update device(is1, is2, is3, iv)
+ $:GPU_UPDATE(device='[is1,is2,is3,iv]')
if (recon_dir == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = iv%beg, iv%end
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -346,9 +349,8 @@ contains
end do
end do
end do
- !$acc end parallel loop
else if (recon_dir == 2) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = iv%beg, iv%end
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -359,9 +361,8 @@ contains
end do
end do
end do
- !$acc end parallel loop
else if (recon_dir == 3) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = iv%beg, iv%end
do l = is3%beg, is3%end
do k = is2%beg, is2%end
@@ -372,7 +373,6 @@ contains
end do
end do
end do
- !$acc end parallel loop
end if
end subroutine s_reconstruct_cell_boundary_values_capillary
diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index b1c338b5c9..381455be2b 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -75,7 +75,7 @@ module m_time_steppers
integer, private :: num_ts !<
!! Number of time stages in the time-stepping scheme
- !$acc declare create(q_cons_ts, q_prim_vf, q_T_sf, rhs_vf, q_prim_ts, rhs_mv, rhs_pb, max_dt)
+ $:GPU_DECLARE(create='[q_cons_ts,q_prim_vf,q_T_sf,rhs_vf,q_prim_ts,rhs_mv,rhs_pb,max_dt]')
contains
@@ -377,7 +377,7 @@ contains
if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1)
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do l = 0, p
do k = 0, n
@@ -392,7 +392,7 @@ contains
!Evolve pb and mv for non-polytropic qbmm
if (qbmm .and. (.not. polytropic)) then
- !$acc parallel loop collapse(5) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=5)
do i = 1, nb
do l = 0, p
do k = 0, n
@@ -409,7 +409,7 @@ contains
end if
if (qbmm .and. (.not. polytropic)) then
- !$acc parallel loop collapse(5) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=5)
do i = 1, nb
do l = 0, p
do k = 0, n
@@ -479,7 +479,7 @@ contains
if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1)
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do l = 0, p
do k = 0, n
@@ -494,7 +494,7 @@ contains
!Evolve pb and mv for non-polytropic qbmm
if (qbmm .and. (.not. polytropic)) then
- !$acc parallel loop collapse(5) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=5)
do i = 1, nb
do l = 0, p
do k = 0, n
@@ -511,7 +511,7 @@ contains
end if
if (qbmm .and. (.not. polytropic)) then
- !$acc parallel loop collapse(5) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=5)
do i = 1, nb
do l = 0, p
do k = 0, n
@@ -551,7 +551,7 @@ contains
if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2)
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do l = 0, p
do k = 0, n
@@ -566,7 +566,7 @@ contains
end do
if (qbmm .and. (.not. polytropic)) then
- !$acc parallel loop collapse(5) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=5)
do i = 1, nb
do l = 0, p
do k = 0, n
@@ -584,7 +584,7 @@ contains
end if
if (qbmm .and. (.not. polytropic)) then
- !$acc parallel loop collapse(5) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=5)
do i = 1, nb
do l = 0, p
do k = 0, n
@@ -661,7 +661,7 @@ contains
if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1)
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do l = 0, p
do k = 0, n
@@ -676,7 +676,7 @@ contains
!Evolve pb and mv for non-polytropic qbmm
if (qbmm .and. (.not. polytropic)) then
- !$acc parallel loop collapse(5) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=5)
do i = 1, nb
do l = 0, p
do k = 0, n
@@ -693,7 +693,7 @@ contains
end if
if (qbmm .and. (.not. polytropic)) then
- !$acc parallel loop collapse(5) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=5)
do i = 1, nb
do l = 0, p
do k = 0, n
@@ -733,7 +733,7 @@ contains
if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2)
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do l = 0, p
do k = 0, n
@@ -748,7 +748,7 @@ contains
end do
if (qbmm .and. (.not. polytropic)) then
- !$acc parallel loop collapse(5) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=5)
do i = 1, nb
do l = 0, p
do k = 0, n
@@ -766,7 +766,7 @@ contains
end if
if (qbmm .and. (.not. polytropic)) then
- !$acc parallel loop collapse(5) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=5)
do i = 1, nb
do l = 0, p
do k = 0, n
@@ -806,7 +806,7 @@ contains
if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3)
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, sys_size
do l = 0, p
do k = 0, n
@@ -821,7 +821,7 @@ contains
end do
if (qbmm .and. (.not. polytropic)) then
- !$acc parallel loop collapse(5) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=5)
do i = 1, nb
do l = 0, p
do k = 0, n
@@ -839,7 +839,7 @@ contains
end if
if (qbmm .and. (.not. polytropic)) then
- !$acc parallel loop collapse(5) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=5)
do i = 1, nb
do l = 0, p
do k = 0, n
@@ -946,7 +946,7 @@ contains
if (stage == 3) then
if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats()
if (lag_params%write_bubbles) then
- !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel)
+ $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]')
call s_write_lag_particles(mytime)
end if
call s_write_void_evol(mytime)
@@ -979,7 +979,7 @@ contains
q_prim_vf, &
idwint)
- !$acc parallel loop collapse(3) gang vector default(present) private(vel, alpha, Re)
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]')
do l = 0, p
do k = 0, n
do j = 0, m
@@ -993,9 +993,9 @@ contains
end do
end do
- !$acc kernels
- dt_local = minval(max_dt)
- !$acc end kernels
+ #:call GPU_PARALLEL()
+ dt_local = minval(max_dt)
+ #:endcall GPU_PARALLEL
if (num_procs == 1) then
dt = dt_local
@@ -1003,7 +1003,7 @@ contains
call s_mpi_allreduce_min(dt_local, dt)
end if
- !$acc update device(dt)
+ $:GPU_UPDATE(device='[dt]')
end subroutine s_compute_dt
@@ -1022,7 +1022,7 @@ contains
call nvtxStartRange("RHS-BODYFORCES")
call s_compute_body_forces_rhs(q_prim_vf, q_cons_vf, rhs_vf)
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = momxb, E_idx
do l = 0, p
do k = 0, n
@@ -1048,7 +1048,7 @@ contains
integer :: i !< Generic loop iterator
do i = 1, sys_size
- !$acc update host(q_prim_vf(i)%sf)
+ $:GPU_UPDATE(host='[q_prim_vf(i)%sf]')
end do
if (t_step == t_step_start) then
diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp
index 24b7dbb8df..ef301f5856 100644
--- a/src/simulation/m_viscous.fpp
+++ b/src/simulation/m_viscous.fpp
@@ -24,10 +24,10 @@ module m_viscous
type(int_bounds_info) :: iv
type(int_bounds_info) :: is1_viscous, is2_viscous, is3_viscous
- !$acc declare create(is1_viscous, is2_viscous, is3_viscous, iv)
+ $:GPU_DECLARE(create='[is1_viscous,is2_viscous,is3_viscous,iv]')
real(wp), allocatable, dimension(:, :) :: Res_viscous
- !$acc declare create(Res_viscous)
+ $:GPU_DECLARE(create='[Res_viscous]')
contains
@@ -42,8 +42,8 @@ contains
Res_viscous(i, j) = fluid_pp(Re_idx(i, j))%Re(i)
end do
end do
- !$acc update device(Res_viscous, Re_idx, Re_size)
- !$acc enter data copyin(is1_viscous, is2_viscous, is3_viscous, iv)
+ $:GPU_UPDATE(device='[Res_viscous,Re_idx,Re_size]')
+ $:GPU_ENTER_DATA(copyin='[is1_viscous,is2_viscous,is3_viscous,iv]')
end subroutine s_initialize_viscous_module
@@ -75,13 +75,13 @@ contains
is1_viscous = ix; is2_viscous = iy; is3_viscous = iz
- !$acc update device(is1_viscous, is2_viscous, is3_viscous)
+ $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]')
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do k = is2_viscous%beg, is2_viscous%end
do j = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = momxb, E_idx
tau_Re_vf(i)%sf(j, k, l) = 0._wp
end do
@@ -89,12 +89,13 @@ contains
end do
end do
if (shear_stress) then ! Shear stresses
- !$acc parallel loop collapse(3) gang vector default(present) private(alpha_visc, alpha_rho_visc, Re_visc, tau_Re )
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, &
+ & alpha_rho_visc, Re_visc, tau_Re]')
do l = is3_viscous%beg, is3_viscous%end
do k = -1, 1
do j = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l)
if (bubbles_euler .and. num_fluids == 1) then
@@ -110,14 +111,14 @@ contains
pi_inf_visc = 0._wp
if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_visc = rho_visc + alpha_rho_visc(i)
gamma_visc = gamma_visc + alpha_visc(i)*gammas(i)
pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i)
end do
else if ((model_eqns == 2) .and. (num_fluids > 2)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids - 1
rho_visc = rho_visc + alpha_rho_visc(i)
gamma_visc = gamma_visc + alpha_visc(i)*gammas(i)
@@ -136,7 +137,7 @@ contains
alpha_visc_sum = 0._wp
if (mpp_lim) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i))
alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp)
@@ -147,7 +148,7 @@ contains
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_visc = rho_visc + alpha_rho_visc(i)
gamma_visc = gamma_visc + alpha_visc(i)*gammas(i)
@@ -155,12 +156,12 @@ contains
end do
if (viscous) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_visc(i) = dflt_real
if (Re_size(i) > 0) Re_visc(i) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) &
+ Re_visc(i)
@@ -180,7 +181,7 @@ contains
- 2._wp*grad_x_vf(1)%sf(j, k, l) &
- 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ &
(3._wp*Re_visc(1))
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
tau_Re_vf(contxe + i)%sf(j, k, l) = &
tau_Re_vf(contxe + i)%sf(j, k, l) - &
@@ -196,12 +197,13 @@ contains
end if
if (bulk_stress) then ! Bulk stresses
- !$acc parallel loop collapse(3) gang vector default(present) private(alpha_visc, alpha_rho_visc, Re_visc, tau_Re )
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, &
+ & alpha_rho_visc, Re_visc, tau_Re]')
do l = is3_viscous%beg, is3_viscous%end
do k = -1, 1
do j = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l)
if (bubbles_euler .and. num_fluids == 1) then
@@ -217,14 +219,14 @@ contains
pi_inf_visc = 0._wp
if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_visc = rho_visc + alpha_rho_visc(i)
gamma_visc = gamma_visc + alpha_visc(i)*gammas(i)
pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i)
end do
else if ((model_eqns == 2) .and. (num_fluids > 2)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids - 1
rho_visc = rho_visc + alpha_rho_visc(i)
gamma_visc = gamma_visc + alpha_visc(i)*gammas(i)
@@ -243,7 +245,7 @@ contains
alpha_visc_sum = 0._wp
if (mpp_lim) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i))
alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp)
@@ -254,7 +256,7 @@ contains
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_visc = rho_visc + alpha_rho_visc(i)
gamma_visc = gamma_visc + alpha_visc(i)*gammas(i)
@@ -262,12 +264,12 @@ contains
end do
if (viscous) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_visc(i) = dflt_real
if (Re_size(i) > 0) Re_visc(i) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) &
+ Re_visc(i)
@@ -300,12 +302,13 @@ contains
if (p == 0) return
if (shear_stress) then ! Shear stresses
- !$acc parallel loop collapse(3) gang vector default(present) private(alpha_visc, alpha_rho_visc, Re_visc, tau_Re )
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, &
+ & alpha_rho_visc, Re_visc, tau_Re]')
do l = is3_viscous%beg, is3_viscous%end
do k = -1, 1
do j = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l)
if (bubbles_euler .and. num_fluids == 1) then
@@ -321,14 +324,14 @@ contains
pi_inf_visc = 0._wp
if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_visc = rho_visc + alpha_rho_visc(i)
gamma_visc = gamma_visc + alpha_visc(i)*gammas(i)
pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i)
end do
else if ((model_eqns == 2) .and. (num_fluids > 2)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids - 1
rho_visc = rho_visc + alpha_rho_visc(i)
gamma_visc = gamma_visc + alpha_visc(i)*gammas(i)
@@ -347,7 +350,7 @@ contains
alpha_visc_sum = 0._wp
if (mpp_lim) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i))
alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp)
@@ -358,7 +361,7 @@ contains
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_visc = rho_visc + alpha_rho_visc(i)
gamma_visc = gamma_visc + alpha_visc(i)*gammas(i)
@@ -366,12 +369,12 @@ contains
end do
if (viscous) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_visc(i) = dflt_real
if (Re_size(i) > 0) Re_visc(i) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) &
+ Re_visc(i)
@@ -391,7 +394,7 @@ contains
y_cc(k) + grad_y_vf(3)%sf(j, k, l))/ &
Re_visc(1)
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 2, 3
tau_Re_vf(contxe + i)%sf(j, k, l) = &
tau_Re_vf(contxe + i)%sf(j, k, l) - &
@@ -408,12 +411,13 @@ contains
end if
if (bulk_stress) then ! Bulk stresses
- !$acc parallel loop collapse(3) gang vector default(present) private(alpha_visc, alpha_rho_visc, Re_visc, tau_Re )
+ $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, &
+ & alpha_rho_visc, Re_visc, tau_Re]')
do l = is3_viscous%beg, is3_viscous%end
do k = -1, 1
do j = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l)
if (bubbles_euler .and. num_fluids == 1) then
@@ -429,14 +433,14 @@ contains
pi_inf_visc = 0._wp
if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_visc = rho_visc + alpha_rho_visc(i)
gamma_visc = gamma_visc + alpha_visc(i)*gammas(i)
pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i)
end do
else if ((model_eqns == 2) .and. (num_fluids > 2)) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids - 1
rho_visc = rho_visc + alpha_rho_visc(i)
gamma_visc = gamma_visc + alpha_visc(i)*gammas(i)
@@ -455,7 +459,7 @@ contains
alpha_visc_sum = 0._wp
if (mpp_lim) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i))
alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp)
@@ -466,7 +470,7 @@ contains
end if
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, num_fluids
rho_visc = rho_visc + alpha_rho_visc(i)
gamma_visc = gamma_visc + alpha_visc(i)*gammas(i)
@@ -474,12 +478,12 @@ contains
end do
if (viscous) then
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, 2
Re_visc(i) = dflt_real
if (Re_size(i) > 0) Re_visc(i) = 0._wp
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) &
+ Re_visc(i)
@@ -545,7 +549,7 @@ contains
iv%beg = mom_idx%beg; iv%end = mom_idx%end
- !$acc update device(iv)
+ $:GPU_UPDATE(device='[iv]')
call s_reconstruct_cell_boundary_values_visc( &
q_prim_qp%vf(iv%beg:iv%end), &
@@ -583,17 +587,17 @@ contains
else ! Compute velocity gradient at cell centers using finite differences
iv%beg = mom_idx%beg; iv%end = mom_idx%end
- !$acc update device(iv)
+ $:GPU_UPDATE(device='[iv]')
is1_viscous = ix; is2_viscous = iy; is3_viscous = iz
- !$acc update device(is1_viscous, is2_viscous, is3_viscous)
+ $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]')
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do k = iy%beg, iy%end
do j = is1_viscous%beg + 1, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqL_prim_dx_n(1)%vf(i)%sf(j, k, l) = &
(q_prim_qp%vf(i)%sf(j, k, l) - &
@@ -604,11 +608,11 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do k = is2_viscous%beg, is2_viscous%end
do j = is1_viscous%beg, is1_viscous%end - 1
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqR_prim_dx_n(1)%vf(i)%sf(j, k, l) = &
(q_prim_qp%vf(i)%sf(j + 1, k, l) - &
@@ -621,11 +625,11 @@ contains
if (n > 0) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do j = is2_viscous%beg + 1, is2_viscous%end
do k = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = &
(q_prim_qp%vf(i)%sf(k, j, l) - &
@@ -636,11 +640,11 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do j = is2_viscous%beg, is2_viscous%end - 1
do k = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = &
(q_prim_qp%vf(i)%sf(k, j + 1, l) - &
@@ -651,11 +655,11 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do j = is2_viscous%beg + 1, is2_viscous%end
do k = is1_viscous%beg + 1, is1_viscous%end - 1
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = &
(dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + &
@@ -670,11 +674,11 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do j = is2_viscous%beg, is2_viscous%end - 1
do k = is1_viscous%beg + 1, is1_viscous%end - 1
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = &
(dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + &
@@ -690,11 +694,11 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do k = is2_viscous%beg + 1, is2_viscous%end - 1
do j = is1_viscous%beg + 1, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = &
(dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + &
@@ -710,11 +714,11 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do k = is2_viscous%beg + 1, is2_viscous%end - 1
do j = is1_viscous%beg, is1_viscous%end - 1
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = &
(dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + &
@@ -732,11 +736,11 @@ contains
if (p > 0) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do j = is3_viscous%beg + 1, is3_viscous%end
do l = is2_viscous%beg, is2_viscous%end
do k = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqL_prim_dz_n(3)%vf(i)%sf(k, l, j) = &
@@ -748,11 +752,11 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do j = is3_viscous%beg, is3_viscous%end - 1
do l = is2_viscous%beg, is2_viscous%end
do k = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqR_prim_dz_n(3)%vf(i)%sf(k, l, j) = &
@@ -764,11 +768,11 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg + 1, is3_viscous%end - 1
do k = is2_viscous%beg, is2_viscous%end
do j = is1_viscous%beg + 1, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = &
@@ -785,11 +789,11 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg + 1, is3_viscous%end - 1
do k = is2_viscous%beg, is2_viscous%end
do j = is1_viscous%beg, is1_viscous%end - 1
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = &
@@ -806,11 +810,11 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg + 1, is3_viscous%end - 1
do j = is2_viscous%beg + 1, is2_viscous%end
do k = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = &
@@ -827,11 +831,11 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg + 1, is3_viscous%end - 1
do j = is2_viscous%beg, is2_viscous%end - 1
do k = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = &
@@ -848,11 +852,11 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do j = is3_viscous%beg + 1, is3_viscous%end
do l = is2_viscous%beg + 1, is2_viscous%end - 1
do k = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = &
@@ -869,11 +873,11 @@ contains
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do j = is3_viscous%beg, is3_viscous%end - 1
do l = is2_viscous%beg + 1, is2_viscous%end - 1
do k = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = &
@@ -889,11 +893,11 @@ contains
end do
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do j = is3_viscous%beg + 1, is3_viscous%end
do l = is2_viscous%beg, is2_viscous%end
do k = is1_viscous%beg + 1, is1_viscous%end - 1
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = &
@@ -909,11 +913,11 @@ contains
end do
end do
end do
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do j = is3_viscous%beg, is3_viscous%end - 1
do l = is2_viscous%beg, is2_viscous%end
do k = is1_viscous%beg + 1, is1_viscous%end - 1
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = &
(dqL_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + &
@@ -995,7 +999,7 @@ contains
end if
- !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv)
+ $:GPU_UPDATE(device='[is1_viscous, is2_viscous, is3_viscous, iv]')
if (n > 0) then
if (p > 0) then
@@ -1019,7 +1023,7 @@ contains
if (viscous) then
if (weno_Re_flux) then
if (norm_dir == 2) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = iv%beg, iv%end
do l = is3_viscous%beg, is3_viscous%end
do j = is1_viscous%beg, is1_viscous%end
@@ -1031,7 +1035,7 @@ contains
end do
end do
elseif (norm_dir == 3) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = iv%beg, iv%end
do j = is1_viscous%beg, is1_viscous%end
do k = is2_viscous%beg, is2_viscous%end
@@ -1043,7 +1047,7 @@ contains
end do
end do
elseif (norm_dir == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = iv%beg, iv%end
do l = is3_viscous%beg, is3_viscous%end
do k = is2_viscous%beg, is2_viscous%end
@@ -1092,7 +1096,7 @@ contains
end if
- !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv)
+ $:GPU_UPDATE(device='[is1_viscous, is2_viscous, is3_viscous, iv]')
if (n > 0) then
if (p > 0) then
@@ -1118,7 +1122,7 @@ contains
if (viscous) then
if (weno_Re_flux) then
if (norm_dir == 2) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = iv%beg, iv%end
do l = is3_viscous%beg, is3_viscous%end
do j = is1_viscous%beg, is1_viscous%end
@@ -1130,7 +1134,7 @@ contains
end do
end do
elseif (norm_dir == 3) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = iv%beg, iv%end
do j = is1_viscous%beg, is1_viscous%end
do k = is2_viscous%beg, is2_viscous%end
@@ -1142,7 +1146,7 @@ contains
end do
end do
elseif (norm_dir == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = iv%beg, iv%end
do l = is3_viscous%beg, is3_viscous%end
do k = is2_viscous%beg, is2_viscous%end
@@ -1195,7 +1199,7 @@ contains
is3_viscous = iz
iv = iv_in
- !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv)
+ $:GPU_UPDATE(device='[is1_viscous, is2_viscous, is3_viscous, iv]')
! First-Order Spatial Derivatives in x-direction
if (norm_dir == 1) then
@@ -1206,11 +1210,11 @@ contains
! cell-boundaries, to calculate the cell-averaged first-order
! spatial derivatives inside the cell.
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do k = is2_viscous%beg, is2_viscous%end
do j = is1_viscous%beg + 1, is1_viscous%end - 1
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dv_ds_vf(i)%sf(j, k, l) = &
1._wp/((1._wp + wa_flg)*dL(j)) &
@@ -1234,11 +1238,11 @@ contains
! cell-boundaries, to calculate the cell-averaged first-order
! spatial derivatives inside the cell.
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do k = is2_viscous%beg + 1, is2_viscous%end - 1
do j = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dv_ds_vf(i)%sf(j, k, l) = &
1._wp/((1._wp + wa_flg)*dL(k)) &
@@ -1262,11 +1266,11 @@ contains
! cell-boundaries, to calculate the cell-averaged first-order
! spatial derivatives inside the cell.
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg + 1, is3_viscous%end - 1
do k = is2_viscous%beg, is2_viscous%end
do j = is1_viscous%beg, is1_viscous%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = iv%beg, iv%end
dv_ds_vf(i)%sf(j, k, l) = &
1._wp/((1._wp + wa_flg)*dL(l)) &
@@ -1315,9 +1319,9 @@ contains
is1_viscous = ix; is2_viscous = iy; is3_viscous = iz
- !$acc update device(is1_viscous, is2_viscous, is3_viscous)
+ $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]')
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do k = is2_viscous%beg, is2_viscous%end
do j = is1_viscous%beg, is1_viscous%end
@@ -1329,7 +1333,7 @@ contains
end do
if (n > 0) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do k = is2_viscous%beg, is2_viscous%end
do j = is1_viscous%beg, is1_viscous%end
@@ -1342,7 +1346,7 @@ contains
end if
if (p > 0) then
- !$acc parallel loop collapse(3) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=3)
do l = is3_viscous%beg, is3_viscous%end
do k = is2_viscous%beg, is2_viscous%end
do j = is1_viscous%beg, is1_viscous%end
@@ -1354,7 +1358,7 @@ contains
end do
end if
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = idwbuff(3)%beg, idwbuff(3)%end
do k = idwbuff(2)%beg, idwbuff(2)%end
grad_x%sf(idwbuff(1)%beg, k, l) = &
@@ -1366,7 +1370,7 @@ contains
end do
end do
if (n > 0) then
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = idwbuff(3)%beg, idwbuff(3)%end
do j = idwbuff(1)%beg, idwbuff(1)%end
grad_y%sf(j, idwbuff(2)%beg, l) = &
@@ -1378,7 +1382,7 @@ contains
end do
end do
if (p > 0) then
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do k = idwbuff(2)%beg, idwbuff(2)%end
do j = idwbuff(1)%beg, idwbuff(1)%end
grad_z%sf(j, k, idwbuff(3)%beg) = &
@@ -1393,7 +1397,7 @@ contains
end if
if (bc_x%beg <= BC_GHOST_EXTRAP) then
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = idwbuff(3)%beg, idwbuff(3)%end
do k = idwbuff(2)%beg, idwbuff(2)%end
grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ &
@@ -1402,7 +1406,7 @@ contains
end do
end if
if (bc_x%end <= BC_GHOST_EXTRAP) then
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = idwbuff(3)%beg, idwbuff(3)%end
do k = idwbuff(2)%beg, idwbuff(2)%end
grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ &
@@ -1412,7 +1416,7 @@ contains
end if
if (n > 0) then
if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_NULL) then
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = idwbuff(3)%beg, idwbuff(3)%end
do j = idwbuff(1)%beg, idwbuff(1)%end
grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ &
@@ -1421,7 +1425,7 @@ contains
end do
end if
if (bc_y%end <= BC_GHOST_EXTRAP) then
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do l = idwbuff(3)%beg, idwbuff(3)%end
do j = idwbuff(1)%beg, idwbuff(1)%end
grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ &
@@ -1431,7 +1435,7 @@ contains
end if
if (p > 0) then
if (bc_z%beg <= BC_GHOST_EXTRAP) then
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do k = idwbuff(2)%beg, idwbuff(2)%end
do j = idwbuff(1)%beg, idwbuff(1)%end
grad_z%sf(j, k, 0) = &
@@ -1441,7 +1445,7 @@ contains
end do
end if
if (bc_z%end <= BC_GHOST_EXTRAP) then
- !$acc parallel loop collapse(2) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=2)
do k = idwbuff(2)%beg, idwbuff(2)%end
do j = idwbuff(1)%beg, idwbuff(1)%end
grad_z%sf(j, k, p) = &
diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp
index fb13b45aba..f03c7c8151 100644
--- a/src/simulation/m_weno.fpp
+++ b/src/simulation/m_weno.fpp
@@ -42,6 +42,7 @@ module m_weno
!> @{
real(wp), allocatable, dimension(:, :, :, :) :: v_rs_ws_x, v_rs_ws_y, v_rs_ws_z
!> @}
+ $:GPU_DECLARE(create='[v_rs_ws_x,v_rs_ws_y,v_rs_ws_z]')
! WENO Coefficients
@@ -58,6 +59,8 @@ module m_weno
real(wp), target, allocatable, dimension(:, :, :) :: poly_coef_cbR_y
real(wp), target, allocatable, dimension(:, :, :) :: poly_coef_cbR_z
!> @}
+ $:GPU_DECLARE(create='[poly_coef_cbL_x,poly_coef_cbL_y,poly_coef_cbL_z]')
+ $:GPU_DECLARE(create='[poly_coef_cbR_x,poly_coef_cbR_y,poly_coef_cbR_z]')
!> @name The ideal weights at the left and the right cell-boundaries and at the
!! left and the right quadrature points, in x-, y- and z-directions. Note
@@ -72,6 +75,7 @@ module m_weno
real(wp), target, allocatable, dimension(:, :) :: d_cbR_y
real(wp), target, allocatable, dimension(:, :) :: d_cbR_z
!> @}
+ $:GPU_DECLARE(create='[d_cbL_x,d_cbL_y,d_cbL_z,d_cbR_x,d_cbR_y,d_cbR_z]')
!> @name Smoothness indicator coefficients in the x-, y-, and z-directions. Note
!! that the first array dimension identifies the smoothness indicator, the
@@ -82,25 +86,20 @@ module m_weno
real(wp), target, allocatable, dimension(:, :, :) :: beta_coef_y
real(wp), target, allocatable, dimension(:, :, :) :: beta_coef_z
!> @}
+ $:GPU_DECLARE(create='[beta_coef_x,beta_coef_y,beta_coef_z]')
! END: WENO Coefficients
integer :: v_size !< Number of WENO-reconstructed cell-average variables
- !$acc declare create(v_size)
+ $:GPU_DECLARE(create='[v_size]')
!> @name Indical bounds in the s1-, s2- and s3-directions
!> @{
type(int_bounds_info) :: is1_weno, is2_weno, is3_weno
- !$acc declare create(is1_weno, is2_weno, is3_weno)
+ $:GPU_DECLARE(create='[is1_weno,is2_weno,is3_weno]')
!
!> @}
- !$acc declare create( &
- !$acc v_rs_ws_x, v_rs_ws_y, v_rs_ws_z, &
- !$acc poly_coef_cbL_x,poly_coef_cbL_y,poly_coef_cbL_z, &
- !$acc poly_coef_cbR_x,poly_coef_cbR_y,poly_coef_cbR_z,d_cbL_x, &
- !$acc d_cbL_y,d_cbL_z,d_cbR_x,d_cbR_y,d_cbR_z,beta_coef_x,beta_coef_y,beta_coef_z)
-
contains
!> The computation of parameters, the allocation of memory,
@@ -622,11 +621,11 @@ contains
#:endfor
if (weno_dir == 1) then
- !$acc update device(poly_coef_cbL_x, poly_coef_cbR_x, d_cbL_x, d_cbR_x, beta_coef_x)
+ $:GPU_UPDATE(device='[poly_coef_cbL_x,poly_coef_cbR_x,d_cbL_x,d_cbR_x,beta_coef_x]')
elseif (weno_dir == 2) then
- !$acc update device(poly_coef_cbL_y, poly_coef_cbR_y, d_cbL_y, d_cbR_y, beta_coef_y)
+ $:GPU_UPDATE(device='[poly_coef_cbL_y,poly_coef_cbR_y,d_cbL_y,d_cbR_y,beta_coef_y]')
else
- !$acc update device(poly_coef_cbL_z, poly_coef_cbR_z, d_cbL_z, d_cbR_z, beta_coef_z)
+ $:GPU_UPDATE(device='[poly_coef_cbL_z,poly_coef_cbR_z,d_cbL_z,d_cbR_z,beta_coef_z]')
end if
! Nullifying WENO coefficients and cell-boundary locations pointers
@@ -660,7 +659,7 @@ contains
is2_weno = is2_weno_d
is3_weno = is3_weno_d
- !$acc update device(is1_weno, is2_weno, is3_weno)
+ $:GPU_UPDATE(device='[is1_weno,is2_weno,is3_weno]')
if (weno_order /= 1) then
call s_initialize_weno(v_vf, &
@@ -669,7 +668,7 @@ contains
if (weno_order == 1) then
if (weno_dir == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, ubound(v_vf, 1)
do l = is3_weno%beg, is3_weno%end
do k = is2_weno%beg, is2_weno%end
@@ -680,9 +679,8 @@ contains
end do
end do
end do
- !$acc end parallel loop
else if (weno_dir == 2) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, ubound(v_vf, 1)
do l = is3_weno%beg, is3_weno%end
do k = is2_weno%beg, is2_weno%end
@@ -693,9 +691,8 @@ contains
end do
end do
end do
- !$acc end parallel loop
else if (weno_dir == 3) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do i = 1, ubound(v_vf, 1)
do l = is3_weno%beg, is3_weno%end
do k = is2_weno%beg, is2_weno%end
@@ -706,12 +703,11 @@ contains
end do
end do
end do
- !$acc end parallel loop
end if
elseif (weno_order == 3) then
#:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
if (weno_dir == ${WENO_DIR}$) then
- !$acc parallel loop collapse(4) gang vector default(present) private(beta,dvd,poly,omega,alpha,tau)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[beta,dvd,poly,omega,alpha,tau]')
do l = is3_weno%beg, is3_weno%end
do k = is2_weno%beg, is2_weno%end
do j = is1_weno%beg, is1_weno%end
@@ -784,17 +780,16 @@ contains
end do
end do
end do
- !$acc end parallel loop
end if
#:endfor
elseif (weno_order == 5) then
#:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
if (weno_dir == ${WENO_DIR}$) then
- !$acc parallel loop vector gang collapse(3) default(present) private(dvd, poly, beta, alpha, omega, tau, delta)
+ $:GPU_PARALLEL_LOOP(collapse=3,private='[dvd,poly,beta,alpha,omega,tau,delta]')
do l = is3_weno%beg, is3_weno%end
do k = is2_weno%beg, is2_weno%end
do j = is1_weno%beg, is1_weno%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, v_size
! reconstruct from left side
@@ -899,7 +894,6 @@ contains
end do
end do
end do
- !$acc end parallel loop
if (mp_weno) then
call s_preserve_monotonicity(v_rs_ws_${XYZ}$, vL_rs_vf_${XYZ}$, &
@@ -910,11 +904,11 @@ contains
elseif (weno_order == 7) then
#:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
if (weno_dir == ${WENO_DIR}$) then
- !$acc parallel loop vector gang collapse(3) default(present) private(poly, beta, alpha, omega, tau, delta, dvd, v)
+ $:GPU_PARALLEL_LOOP(collapse=3,private='[poly,beta,alpha,omega,tau,delta,dvd,v]')
do l = is3_weno%beg, is3_weno%end
do k = is2_weno%beg, is2_weno%end
do j = is1_weno%beg, is1_weno%end
- !$acc loop seq
+ $:GPU_LOOP(parallelism='[seq]')
do i = 1, v_size
if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3, k, l, i) ! temporary field value array for clarity
@@ -1095,7 +1089,6 @@ contains
end do
end do
end do
- !$acc end parallel loop
end if
#:endfor
@@ -1130,10 +1123,10 @@ contains
! as to reshape the inputted data in the coordinate direction of
! the WENO reconstruction
v_size = ubound(v_vf, 1)
- !$acc update device(v_size)
+ $:GPU_UPDATE(device='[v_size]')
if (weno_dir == 1) then
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do j = 1, v_size
do q = is3_weno%beg, is3_weno%end
do l = is2_weno%beg, is2_weno%end
@@ -1143,7 +1136,6 @@ contains
end do
end do
end do
- !$acc end parallel loop
end if
! Reshaping/Projecting onto Characteristic Fields in y-direction
@@ -1156,22 +1148,22 @@ contains
block
use CuTensorEx
- !$acc host_data use_device(v_rs_ws_x, v_rs_ws_y)
- v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1, sys_size], order=[2, 1, 3, 4])
- !$acc end host_data
+ #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]')
+ v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1, sys_size], order=[2, 1, 3, 4])
+ #:endcall GPU_HOST_DATA
end block
else
block
use CuTensorEx
- !$acc host_data use_device(v_rs_ws_x, v_rs_ws_y)
- v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1 + 2*buff_size, sys_size], order=[2, 1, 3, 4])
- !$acc end host_data
+ #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]')
+ v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1 + 2*buff_size, sys_size], order=[2, 1, 3, 4])
+ #:endcall GPU_HOST_DATA
end block
end if
else
#endif
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do j = 1, v_size
do q = is3_weno%beg, is3_weno%end
do l = is2_weno%beg, is2_weno%end
@@ -1181,7 +1173,6 @@ contains
end do
end do
end do
-!$acc end parallel loop
#if MFC_cuTENSOR
end if
#endif
@@ -1195,13 +1186,13 @@ contains
block
use CuTensorEx
- !$acc host_data use_device(v_rs_ws_x, v_rs_ws_z)
- v_rs_ws_z = reshape(v_rs_ws_x, shape=[p + 1 + 2*buff_size, n + 2*buff_size + 1, m + 2*buff_size + 1, sys_size], order=[3, 2, 1, 4])
- !$acc end host_data
+ #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_z]')
+ v_rs_ws_z = reshape(v_rs_ws_x, shape=[p + 1 + 2*buff_size, n + 2*buff_size + 1, m + 2*buff_size + 1, sys_size], order=[3, 2, 1, 4])
+ #:endcall
end block
else
#endif
- !$acc parallel loop collapse(4) gang vector default(present)
+ $:GPU_PARALLEL_LOOP(collapse=4)
do j = 1, v_size
do q = is3_weno%beg, is3_weno%end
do l = is2_weno%beg, is2_weno%end
@@ -1211,7 +1202,6 @@ contains
end do
end do
end do
-!$acc end parallel loop
#if MFC_cuTENSOR
end if
#endif
@@ -1264,7 +1254,7 @@ contains
real(wp), parameter :: alpha_mp = 2._wp
real(wp), parameter :: beta_mp = 4._wp/3._wp
- !$acc parallel loop gang vector collapse (4) default(present) private(d)
+ $:GPU_PARALLEL_LOOP(collapse=4,private='[d]')
do l = is3_weno%beg, is3_weno%end
do k = is2_weno%beg, is2_weno%end
do j = is1_weno%beg, is1_weno%end
@@ -1389,7 +1379,6 @@ contains
end do
end do
end do
- !$acc end parallel loop
end subroutine s_preserve_monotonicity