diff --git a/CMakeLists.txt b/CMakeLists.txt index db86f137e7..78c33aab3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -636,7 +636,7 @@ exit 0 target_compile_options(${a_target} PRIVATE -fopenmp) target_link_options(${a_target} PRIVATE -fopenmp) elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") - target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a) + target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -fopenmp-target-fast -fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription) target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a) endif() endif() diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index c2b1cc5ac7..99172ccbe9 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -941,7 +941,7 @@ contains if (weno_order == 3 .or. dummy) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=4,private='[beta, dvd, poly, omega, alpha, tau]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[beta, dvd, poly, omega, alpha, tau, q]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -962,24 +962,25 @@ contains beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(-1)*dvd(-1) + weno_eps if (wenojs) then - alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j)/(beta(0:weno_num_stencils)**2._wp) + do q = 0, weno_num_stencils + alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp) + end do else if (mapped_weno) then - alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j)/(beta(0:weno_num_stencils)**2._wp) + do q = 0, weno_num_stencils + alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp) + end do omega = alpha/sum(alpha) - alpha(0:weno_num_stencils) = (d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j)*(1._wp + d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & - & *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j)**2._wp + omega(0:weno_num_stencils)*(1._wp & - & - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils,j)))) + do q = 0, weno_num_stencils + alpha(q) = (d_cbL_${XYZ}$ (q, j)*(1._wp + d_cbL_${XYZ}$ (q, & + & j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbL_${XYZ}$ (q, & + & j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbL_${XYZ}$ (q, j)))) + end do else if (wenoz) then ! Borges, et al. (2008) - tau = abs(beta(1) - beta(0)) - alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j)*(1._wp + tau/beta(0:weno_num_stencils)) + do q = 0, weno_num_stencils + alpha(q) = d_cbL_${XYZ}$ (q, j)*(1._wp + tau/beta(q)) + end do end if omega = alpha/sum(alpha) @@ -992,21 +993,23 @@ contains poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(-1) if (wenojs) then - alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j)/(beta(0:weno_num_stencils)**2._wp) + do q = 0, weno_num_stencils + alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp) + end do else if (mapped_weno) then - alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j)/(beta(0:weno_num_stencils)**2._wp) + do q = 0, weno_num_stencils + alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp) + end do omega = alpha/sum(alpha) - alpha(0:weno_num_stencils) = (d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j)*(1._wp + d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & - & *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j)**2._wp + omega(0:weno_num_stencils)*(1._wp & - & - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils,j)))) + do q = 0, weno_num_stencils + alpha(q) = (d_cbR_${XYZ}$ (q, j)*(1._wp + d_cbR_${XYZ}$ (q, & + & j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbR_${XYZ}$ (q, & + & j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbR_${XYZ}$ (q, j)))) + end do else if (wenoz) then - alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j)*(1._wp + tau/beta(0:weno_num_stencils)) + do q = 0, weno_num_stencils + alpha(q) = d_cbR_${XYZ}$ (q, j)*(1._wp + tau/beta(q)) + end do end if omega = alpha/sum(alpha) @@ -1057,18 +1060,19 @@ contains & 1)*dvd(-1)*dvd(-2) + beta_coef_${XYZ}$ (j, 2, 2)*dvd(-2)*dvd(-2) + weno_eps if (wenojs) then - alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j)/(beta(0:weno_num_stencils)**2._wp) + do q = 0, weno_num_stencils + alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp) + end do else if (mapped_weno) then - alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j)/(beta(0:weno_num_stencils)**2._wp) + do q = 0, weno_num_stencils + alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp) + end do omega = alpha/sum(alpha) - alpha(0:weno_num_stencils) = (d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j)*(1._wp + d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & - & *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j)**2._wp + omega(0:weno_num_stencils)*(1._wp & - & - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils,j)))) + do q = 0, weno_num_stencils + alpha(q) = (d_cbL_${XYZ}$ (q, j)*(1._wp + d_cbL_${XYZ}$ (q, & + & j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbL_${XYZ}$ (q, & + & j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbL_${XYZ}$ (q, j)))) + end do else if (wenoz) then ! Borges, et al. (2008) @@ -1100,7 +1104,9 @@ contains end do end if - omega = alpha/sum(alpha) + omega(0) = alpha(0)/(alpha(0) + alpha(1) + alpha(2)) + omega(1) = alpha(1)/(alpha(0) + alpha(1) + alpha(2)) + omega(2) = alpha(2)/(alpha(0) + alpha(1) + alpha(2)) vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) @@ -1114,18 +1120,19 @@ contains & 0)*dvd(-1) + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-2) if (wenojs) then - alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j)/(beta(0:weno_num_stencils)**2._wp) + do q = 0, weno_num_stencils + alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp) + end do else if (mapped_weno) then - alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j)/(beta(0:weno_num_stencils)**2._wp) + do q = 0, weno_num_stencils + alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp) + end do omega = alpha/sum(alpha) - alpha(0:weno_num_stencils) = (d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j)*(1._wp + d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & - & *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j)**2._wp + omega(0:weno_num_stencils)*(1._wp & - & - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils,j)))) + do q = 0, weno_num_stencils + alpha(q) = (d_cbR_${XYZ}$ (q, j)*(1._wp + d_cbR_${XYZ}$ (q, & + & j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbR_${XYZ}$ (q, & + & j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbR_${XYZ}$ (q, j)))) + end do else if (wenoz) then $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils @@ -1138,7 +1145,9 @@ contains end do end if - omega = alpha/sum(alpha) + omega(0) = alpha(0)/(alpha(0) + alpha(1) + alpha(2)) + omega(1) = alpha(1)/(alpha(0) + alpha(1) + alpha(2)) + omega(2) = alpha(2)/(alpha(0) + alpha(1) + alpha(2)) vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) end do @@ -1252,18 +1261,19 @@ contains end if if (wenojs) then - alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j)/(beta(0:weno_num_stencils)**2._wp) + do q = 0, weno_num_stencils + alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp) + end do else if (mapped_weno) then - alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j)/(beta(0:weno_num_stencils)**2._wp) + do q = 0, weno_num_stencils + alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp) + end do omega = alpha/sum(alpha) - alpha(0:weno_num_stencils) = (d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j)*(1._wp + d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & - & *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, & - & j)**2._wp + omega(0:weno_num_stencils)*(1._wp & - & - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils,j)))) + do q = 0, weno_num_stencils + alpha(q) = (d_cbL_${XYZ}$ (q, j)*(1._wp + d_cbL_${XYZ}$ (q, & + & j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbL_${XYZ}$ (q, & + & j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbL_${XYZ}$ (q, j)))) + end do else if (wenoz) then ! Castro, et al. (2010) Don & Borges (2013) also helps tau = abs(beta(3) - beta(0)) ! Equation 50 @@ -1327,18 +1337,19 @@ contains end if if (wenojs) then - alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j)/(beta(0:weno_num_stencils)**2._wp) + do q = 0, weno_num_stencils + alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp) + end do else if (mapped_weno) then - alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j)/(beta(0:weno_num_stencils)**2._wp) + do q = 0, weno_num_stencils + alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp) + end do omega = alpha/sum(alpha) - alpha(0:weno_num_stencils) = (d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j)*(1._wp + d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & - & *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, & - & j)**2._wp + omega(0:weno_num_stencils)*(1._wp & - & - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils,j)))) + do q = 0, weno_num_stencils + alpha(q) = (d_cbR_${XYZ}$ (q, j)*(1._wp + d_cbR_${XYZ}$ (q, & + & j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbR_${XYZ}$ (q, & + & j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbR_${XYZ}$ (q, j)))) + end do else if (wenoz) then $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils