diff --git a/CMakeLists.txt b/CMakeLists.txt
index d78345765c..8269c1cb48 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -479,17 +479,6 @@ function(MFC_SETUP_TARGET)
                     "-foffload-options=-lgfortran\ -lm"
                     "-fno-exceptions")
             elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
-                find_package(cuTENSOR)
-                if (NOT cuTENSOR_FOUND)
-                    message(WARNING
-                        "Failed to locate the NVIDIA cuTENSOR library. MFC will be "
-                        "built without support for it, disallowing the use of "
-                        "cu_tensor=T. This can result in degraded performance.")
-                else()
-                    target_link_libraries     (${a_target} PRIVATE cuTENSOR::cuTENSOR)
-                    target_compile_definitions(${a_target} PRIVATE MFC_cuTENSOR)
-                endif()
-
                 foreach (cc ${MFC_CUDA_CC})
                     target_compile_options(${a_target}
                         PRIVATE -gpu=cc${cc}
diff --git a/src/simulation/m_checker.fpp b/src/simulation/m_checker.fpp
index c477a4ee9a..e7bc62d4ea 100644
--- a/src/simulation/m_checker.fpp
+++ b/src/simulation/m_checker.fpp
@@ -57,11 +57,6 @@ contains
 #if !defined(MFC_OpenACC) && !(defined(__PGI) || defined(_CRAYFTN))
         @:PROHIBIT(rdma_mpi, "Unsupported value of rdma_mpi for the current compiler")
 #endif
-
-#ifndef MFC_cuTENSOR
-        @:PROHIBIT(cu_tensor, "MFC was not built with the NVIDIA cuTENSOR library")
-#endif
-
     end subroutine s_check_inputs_compilers
 
     impure subroutine s_check_inputs_igr
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index efe71526c6..b467e40da1 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -165,7 +165,6 @@ module m_global_parameters
     integer :: hyper_model     !< hyperelasticity solver algorithm
     logical :: elasticity      !< elasticity modeling, true for hyper or hypo
     logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling
-    logical :: cu_tensor
     logical :: shear_stress  !< Shear stresses
     logical :: bulk_stress   !< Bulk stresses
     logical :: cont_damage   !< Continuum damage modeling
@@ -499,7 +498,7 @@ module m_global_parameters
     real(wp) :: mytime       !< Current simulation time
     real(wp) :: finaltime    !< Final simulation time
 
-    logical :: weno_flat, riemann_flat, rdma_mpi
+    logical :: rdma_mpi
 
     type(pres_field), allocatable, dimension(:) :: pb_ts
 
@@ -591,8 +590,6 @@ contains
         hyper_model = dflt_int
         b_size = dflt_int
         tensor_size = dflt_int
-        weno_flat = .true.
-        riemann_flat = .true.
         rdma_mpi = .false.
         shear_stress = .false.
         bulk_stress = .false.
@@ -700,9 +697,6 @@ contains
         sigma = dflt_real
         surface_tension = .false.
 
-        ! Cuda aware MPI
-        cu_tensor = .false.
-
         bodyForces = .false.
         bf_x = .false.; bf_y = .false.; bf_z = .false.
         !< amplitude, frequency, and phase shift sinusoid in each direction
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 07a21e1c96..c3f1e94a15 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -108,7 +108,7 @@ contains
         #:endfor
 
         #:for VAR in [ 'run_time_info','cyl_coord', 'mpp_lim',     &
-            &  'mp_weno', 'rdma_mpi', 'weno_flat', 'riemann_flat', &
+            &  'mp_weno', 'rdma_mpi', 'powell', 'cont_damage', 'bc_io', &
             & 'weno_Re_flux', 'alt_soundspeed', 'null_weights', 'mixture_err',   &
             & 'parallel_io', 'hypoelasticity', 'bubbles_euler', 'polytropic',    &
             & 'polydisperse', 'qbmm', 'acoustic_source', 'probe_wrt', 'integral_wrt',   &
@@ -119,7 +119,7 @@ contains
             & 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out',          &
             & 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension',        &
             & 'shear_stress', 'bulk_stress', 'bubbles_lagrange',     &
-            & 'hyperelasticity', 'bc_io', 'powell', 'cont_damage' ]
+            & 'hyperelasticity']
             call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index f429b66ab2..782418b416 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -146,9 +146,8 @@ contains
         ! Namelist of the global parameters which may be specified by user
         namelist /user_inputs/ case_dir, run_time_info, m, n, p, dt, &
             t_step_start, t_step_stop, t_step_save, t_step_print, &
-            model_eqns, mpp_lim, time_stepper, weno_eps, weno_flat, &
-            riemann_flat, rdma_mpi, cu_tensor, &
-            teno_CT, mp_weno, weno_avg, &
+            model_eqns, mpp_lim, time_stepper, weno_eps, &
+            rdma_mpi, teno_CT, mp_weno, weno_avg, &
             riemann_solver, low_Mach, wave_speeds, avg_state, &
             bc_x, bc_y, bc_z, &
             x_a, y_a, z_a, x_b, y_b, z_b, &
diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp
index f03c7c8151..6bc9d004d9 100644
--- a/src/simulation/m_weno.fpp
+++ b/src/simulation/m_weno.fpp
@@ -1142,69 +1142,32 @@ contains
         if (n == 0) return
 
         if (weno_dir == 2) then
-#if MFC_cuTENSOR
-            if (cu_tensor) then
-                if (p == 0) then
-                    block
-                        use CuTensorEx
-
-                        #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]')
-                            v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1, sys_size], order=[2, 1, 3, 4])
-                        #:endcall GPU_HOST_DATA
-                    end block
-                else
-                    block
-                        use CuTensorEx
-
-                        #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]')
-                            v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1 + 2*buff_size, sys_size], order=[2, 1, 3, 4])
-                        #:endcall GPU_HOST_DATA
-                    end block
-                end if
-            else
-#endif
-                $:GPU_PARALLEL_LOOP(collapse=4)
-                do j = 1, v_size
-                    do q = is3_weno%beg, is3_weno%end
-                        do l = is2_weno%beg, is2_weno%end
-                            do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
-                                v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q)
-                            end do
+            $:GPU_PARALLEL_LOOP(collapse=4)
+            do j = 1, v_size
+                do q = is3_weno%beg, is3_weno%end
+                    do l = is2_weno%beg, is2_weno%end
+                        do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
+                            v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q)
                         end do
                     end do
                 end do
-#if MFC_cuTENSOR
-            end if
-#endif
+            end do
         end if
 
         ! Reshaping/Projecting onto Characteristic Fields in z-direction
         if (p == 0) return
+
         if (weno_dir == 3) then
-#if MFC_cuTENSOR
-            if (cu_tensor) then
-                block
-                    use CuTensorEx
-
-                    #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_z]')
-                        v_rs_ws_z = reshape(v_rs_ws_x, shape=[p + 1 + 2*buff_size, n + 2*buff_size + 1, m + 2*buff_size + 1, sys_size], order=[3, 2, 1, 4])
-                    #:endcall
-                end block
-            else
-#endif
-                $:GPU_PARALLEL_LOOP(collapse=4)
-                do j = 1, v_size
-                    do q = is3_weno%beg, is3_weno%end
-                        do l = is2_weno%beg, is2_weno%end
-                            do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
-                                v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k)
-                            end do
+            $:GPU_PARALLEL_LOOP(collapse=4)
+            do j = 1, v_size
+                do q = is3_weno%beg, is3_weno%end
+                    do l = is2_weno%beg, is2_weno%end
+                        do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
+                            v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k)
                         end do
                     end do
                 end do
-#if MFC_cuTENSOR
-            end if
-#endif
+            end do
         end if
 
     end subroutine s_initialize_weno