diff --git a/CMakeLists.txt b/CMakeLists.txt index d78345765c..8269c1cb48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -479,17 +479,6 @@ function(MFC_SETUP_TARGET) "-foffload-options=-lgfortran\ -lm" "-fno-exceptions") elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") - find_package(cuTENSOR) - if (NOT cuTENSOR_FOUND) - message(WARNING - "Failed to locate the NVIDIA cuTENSOR library. MFC will be " - "built without support for it, disallowing the use of " - "cu_tensor=T. This can result in degraded performance.") - else() - target_link_libraries (${a_target} PRIVATE cuTENSOR::cuTENSOR) - target_compile_definitions(${a_target} PRIVATE MFC_cuTENSOR) - endif() - foreach (cc ${MFC_CUDA_CC}) target_compile_options(${a_target} PRIVATE -gpu=cc${cc} diff --git a/src/simulation/m_checker.fpp b/src/simulation/m_checker.fpp index c477a4ee9a..e7bc62d4ea 100644 --- a/src/simulation/m_checker.fpp +++ b/src/simulation/m_checker.fpp @@ -57,11 +57,6 @@ contains #if !defined(MFC_OpenACC) && !(defined(__PGI) || defined(_CRAYFTN)) @:PROHIBIT(rdma_mpi, "Unsupported value of rdma_mpi for the current compiler") #endif - -#ifndef MFC_cuTENSOR - @:PROHIBIT(cu_tensor, "MFC was not built with the NVIDIA cuTENSOR library") -#endif - end subroutine s_check_inputs_compilers impure subroutine s_check_inputs_igr diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index efe71526c6..b467e40da1 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -165,7 +165,6 @@ module m_global_parameters integer :: hyper_model !< hyperelasticity solver algorithm logical :: elasticity !< elasticity modeling, true for hyper or hypo logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling - logical :: cu_tensor logical :: shear_stress !< Shear stresses logical :: bulk_stress !< Bulk stresses logical :: cont_damage !< Continuum damage modeling @@ -499,7 +498,7 @@ module m_global_parameters real(wp) :: mytime !< Current simulation time real(wp) :: finaltime !< Final simulation time - logical :: weno_flat, riemann_flat, rdma_mpi + logical :: rdma_mpi type(pres_field), allocatable, dimension(:) :: pb_ts @@ -591,8 +590,6 @@ contains hyper_model = dflt_int b_size = dflt_int tensor_size = dflt_int - weno_flat = .true. - riemann_flat = .true. rdma_mpi = .false. shear_stress = .false. bulk_stress = .false. @@ -700,9 +697,6 @@ contains sigma = dflt_real surface_tension = .false. - ! Cuda aware MPI - cu_tensor = .false. - bodyForces = .false. bf_x = .false.; bf_y = .false.; bf_z = .false. !< amplitude, frequency, and phase shift sinusoid in each direction diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 07a21e1c96..c3f1e94a15 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -108,7 +108,7 @@ contains #:endfor #:for VAR in [ 'run_time_info','cyl_coord', 'mpp_lim', & - & 'mp_weno', 'rdma_mpi', 'weno_flat', 'riemann_flat', & + & 'mp_weno', 'rdma_mpi', 'powell', 'cont_damage', 'bc_io', & & 'weno_Re_flux', 'alt_soundspeed', 'null_weights', 'mixture_err', & & 'parallel_io', 'hypoelasticity', 'bubbles_euler', 'polytropic', & & 'polydisperse', 'qbmm', 'acoustic_source', 'probe_wrt', 'integral_wrt', & @@ -119,7 +119,7 @@ contains & 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out', & & 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension', & & 'shear_stress', 'bulk_stress', 'bubbles_lagrange', & - & 'hyperelasticity', 'bc_io', 'powell', 'cont_damage' ] + & 'hyperelasticity'] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index f429b66ab2..782418b416 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -146,9 +146,8 @@ contains ! Namelist of the global parameters which may be specified by user namelist /user_inputs/ case_dir, run_time_info, m, n, p, dt, & t_step_start, t_step_stop, t_step_save, t_step_print, & - model_eqns, mpp_lim, time_stepper, weno_eps, weno_flat, & - riemann_flat, rdma_mpi, cu_tensor, & - teno_CT, mp_weno, weno_avg, & + model_eqns, mpp_lim, time_stepper, weno_eps, & + rdma_mpi, teno_CT, mp_weno, weno_avg, & riemann_solver, low_Mach, wave_speeds, avg_state, & bc_x, bc_y, bc_z, & x_a, y_a, z_a, x_b, y_b, z_b, & diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index f03c7c8151..6bc9d004d9 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -1142,69 +1142,32 @@ contains if (n == 0) return if (weno_dir == 2) then -#if MFC_cuTENSOR - if (cu_tensor) then - if (p == 0) then - block - use CuTensorEx - - #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]') - v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1, sys_size], order=[2, 1, 3, 4]) - #:endcall GPU_HOST_DATA - end block - else - block - use CuTensorEx - - #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]') - v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1 + 2*buff_size, sys_size], order=[2, 1, 3, 4]) - #:endcall GPU_HOST_DATA - end block - end if - else -#endif - $:GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) - end do + $:GPU_PARALLEL_LOOP(collapse=4) + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) end do end do end do -#if MFC_cuTENSOR - end if -#endif + end do end if ! Reshaping/Projecting onto Characteristic Fields in z-direction if (p == 0) return + if (weno_dir == 3) then -#if MFC_cuTENSOR - if (cu_tensor) then - block - use CuTensorEx - - #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_z]') - v_rs_ws_z = reshape(v_rs_ws_x, shape=[p + 1 + 2*buff_size, n + 2*buff_size + 1, m + 2*buff_size + 1, sys_size], order=[3, 2, 1, 4]) - #:endcall - end block - else -#endif - $:GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) - end do + $:GPU_PARALLEL_LOOP(collapse=4) + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) end do end do end do -#if MFC_cuTENSOR - end if -#endif + end do end if end subroutine s_initialize_weno