Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -479,17 +479,6 @@ function(MFC_SETUP_TARGET)
"-foffload-options=-lgfortran\ -lm"
"-fno-exceptions")
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
find_package(cuTENSOR)
if (NOT cuTENSOR_FOUND)
message(WARNING
"Failed to locate the NVIDIA cuTENSOR library. MFC will be "
"built without support for it, disallowing the use of "
"cu_tensor=T. This can result in degraded performance.")
else()
target_link_libraries (${a_target} PRIVATE cuTENSOR::cuTENSOR)
target_compile_definitions(${a_target} PRIVATE MFC_cuTENSOR)
endif()

foreach (cc ${MFC_CUDA_CC})
target_compile_options(${a_target}
PRIVATE -gpu=cc${cc}
Expand Down
5 changes: 0 additions & 5 deletions src/simulation/m_checker.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,6 @@ contains
#if !defined(MFC_OpenACC) && !(defined(__PGI) || defined(_CRAYFTN))
@:PROHIBIT(rdma_mpi, "Unsupported value of rdma_mpi for the current compiler")
#endif

#ifndef MFC_cuTENSOR
@:PROHIBIT(cu_tensor, "MFC was not built with the NVIDIA cuTENSOR library")
#endif

end subroutine s_check_inputs_compilers

impure subroutine s_check_inputs_igr
Expand Down
8 changes: 1 addition & 7 deletions src/simulation/m_global_parameters.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ module m_global_parameters
integer :: hyper_model !< hyperelasticity solver algorithm
logical :: elasticity !< elasticity modeling, true for hyper or hypo
logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling
logical :: cu_tensor
logical :: shear_stress !< Shear stresses
logical :: bulk_stress !< Bulk stresses
logical :: cont_damage !< Continuum damage modeling
Expand Down Expand Up @@ -499,7 +498,7 @@ module m_global_parameters
real(wp) :: mytime !< Current simulation time
real(wp) :: finaltime !< Final simulation time

logical :: weno_flat, riemann_flat, rdma_mpi
logical :: rdma_mpi

type(pres_field), allocatable, dimension(:) :: pb_ts

Expand Down Expand Up @@ -591,8 +590,6 @@ contains
hyper_model = dflt_int
b_size = dflt_int
tensor_size = dflt_int
weno_flat = .true.
riemann_flat = .true.
rdma_mpi = .false.
shear_stress = .false.
bulk_stress = .false.
Expand Down Expand Up @@ -700,9 +697,6 @@ contains
sigma = dflt_real
surface_tension = .false.

! Cuda aware MPI
cu_tensor = .false.

bodyForces = .false.
bf_x = .false.; bf_y = .false.; bf_z = .false.
!< amplitude, frequency, and phase shift sinusoid in each direction
Expand Down
4 changes: 2 additions & 2 deletions src/simulation/m_mpi_proxy.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ contains
#:endfor

#:for VAR in [ 'run_time_info','cyl_coord', 'mpp_lim', &
& 'mp_weno', 'rdma_mpi', 'weno_flat', 'riemann_flat', &
& 'mp_weno', 'rdma_mpi', 'powell', 'cont_damage', 'bc_io', &
& 'weno_Re_flux', 'alt_soundspeed', 'null_weights', 'mixture_err', &
& 'parallel_io', 'hypoelasticity', 'bubbles_euler', 'polytropic', &
& 'polydisperse', 'qbmm', 'acoustic_source', 'probe_wrt', 'integral_wrt', &
Expand All @@ -119,7 +119,7 @@ contains
& 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out', &
& 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension', &
& 'shear_stress', 'bulk_stress', 'bubbles_lagrange', &
& 'hyperelasticity', 'bc_io', 'powell', 'cont_damage' ]
& 'hyperelasticity']
call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
#:endfor

Expand Down
5 changes: 2 additions & 3 deletions src/simulation/m_start_up.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,8 @@ contains
! Namelist of the global parameters which may be specified by user
namelist /user_inputs/ case_dir, run_time_info, m, n, p, dt, &
t_step_start, t_step_stop, t_step_save, t_step_print, &
model_eqns, mpp_lim, time_stepper, weno_eps, weno_flat, &
riemann_flat, rdma_mpi, cu_tensor, &
teno_CT, mp_weno, weno_avg, &
model_eqns, mpp_lim, time_stepper, weno_eps, &
rdma_mpi, teno_CT, mp_weno, weno_avg, &
riemann_solver, low_Mach, wave_speeds, avg_state, &
bc_x, bc_y, bc_z, &
x_a, y_a, z_a, x_b, y_b, z_b, &
Expand Down
67 changes: 15 additions & 52 deletions src/simulation/m_weno.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -1142,69 +1142,32 @@ contains
if (n == 0) return

if (weno_dir == 2) then
#if MFC_cuTENSOR
if (cu_tensor) then
if (p == 0) then
block
use CuTensorEx

#:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]')
v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1, sys_size], order=[2, 1, 3, 4])
#:endcall GPU_HOST_DATA
end block
else
block
use CuTensorEx

#:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]')
v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1 + 2*buff_size, sys_size], order=[2, 1, 3, 4])
#:endcall GPU_HOST_DATA
end block
end if
else
#endif
$:GPU_PARALLEL_LOOP(collapse=4)
do j = 1, v_size
do q = is3_weno%beg, is3_weno%end
do l = is2_weno%beg, is2_weno%end
do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q)
end do
$:GPU_PARALLEL_LOOP(collapse=4)
do j = 1, v_size
do q = is3_weno%beg, is3_weno%end
do l = is2_weno%beg, is2_weno%end
do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q)
end do
end do
end do
#if MFC_cuTENSOR
end if
#endif
end do
end if

! Reshaping/Projecting onto Characteristic Fields in z-direction
if (p == 0) return

if (weno_dir == 3) then
#if MFC_cuTENSOR
if (cu_tensor) then
block
use CuTensorEx

#:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_z]')
v_rs_ws_z = reshape(v_rs_ws_x, shape=[p + 1 + 2*buff_size, n + 2*buff_size + 1, m + 2*buff_size + 1, sys_size], order=[3, 2, 1, 4])
#:endcall
end block
else
#endif
$:GPU_PARALLEL_LOOP(collapse=4)
do j = 1, v_size
do q = is3_weno%beg, is3_weno%end
do l = is2_weno%beg, is2_weno%end
do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k)
end do
$:GPU_PARALLEL_LOOP(collapse=4)
do j = 1, v_size
do q = is3_weno%beg, is3_weno%end
do l = is2_weno%beg, is2_weno%end
do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k)
end do
end do
end do
#if MFC_cuTENSOR
end if
#endif
end do
end if

end subroutine s_initialize_weno
Expand Down
Loading