Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add pointers for intel performance, remove unused PI variable for nvhpc #845

Merged
merged 1 commit into from Jun 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion src/control/cam_history.F90
Expand Up @@ -4992,7 +4992,6 @@ subroutine h_global (f, t)
use cam_history_support, only: dim_index_2d
use shr_reprosum_mod, only: shr_reprosum_calc
use spmd_utils, only: mpicom
use shr_const_mod, only: PI => SHR_CONST_PI
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable PI is not used here. This change was required for nvhpc.

!
!-----------------------------------------------------------------------
!
Expand Down
24 changes: 15 additions & 9 deletions src/dynamics/se/dycore/fvm_consistent_se_cslam.F90
Expand Up @@ -44,7 +44,7 @@ subroutine run_consistent_se_cslam(elem,fvm,hybrid,dt_fvm,tl,nets,nete,hvcoord,&
use thread_mod , only: vert_num_threads, omp_set_nested
implicit none
type (element_t) , intent(inout) :: elem(:)
type (fvm_struct) , intent(inout) :: fvm(:)
type (fvm_struct), target , intent(inout) :: fvm(:)
type (hybrid_t) , intent(in) :: hybrid ! distributed parallel structure (shared)
type (TimeLevel_t) , intent(in) :: tl ! time level struct
type (hvcoord_t) , intent(in) :: hvcoord
Expand All @@ -71,7 +71,9 @@ subroutine run_consistent_se_cslam(elem,fvm,hybrid,dt_fvm,tl,nets,nete,hvcoord,&
integer :: region_num_threads
logical :: inJetCall
logical :: ActiveJetThread


real(r8), pointer :: fcube(:,:,:,:)
real(r8), pointer :: spherecentroid(:,:,:)

llimiter = .true.

Expand Down Expand Up @@ -152,22 +154,26 @@ subroutine run_consistent_se_cslam(elem,fvm,hybrid,dt_fvm,tl,nets,nete,hvcoord,&

!call t_stopf('fvm:orthogonal_swept_areas')
do ie=nets,nete
! Intel compiler version 2023.0.0 on derecho had significant slowdown on subroutine interface without
! these pointers.
fcube => fvm(ie)%c(:,:,:,:)
spherecentroid => fvm(ie)%spherecentroid(:,1-nhe:nc+nhe,1-nhe:nc+nhe)
do k=kmin,kmax
!call t_startf('fvm:tracers_reconstruct')
call reconstruction(fvm(ie)%c(:,:,:,:),nlev,k,&
!call t_startf('FVM:tracers_reconstruct')
call reconstruction(fcube,nlev,k,&
ctracer(:,:,:,:),irecons_tracer,llimiter,ntrac,&
nc,nhe,nhr,nhc,nht,ns,nhr+(nhe-1),&
fvm(ie)%jx_min,fvm(ie)%jx_max,fvm(ie)%jy_min,fvm(ie)%jy_max,&
fvm(ie)%cubeboundary,fvm(ie)%halo_interp_weight,fvm(ie)%ibase,&
fvm(ie)%spherecentroid(:,1-nhe:nc+nhe,1-nhe:nc+nhe),&
spherecentroid,&
fvm(ie)%recons_metrics,fvm(ie)%recons_metrics_integral,&
fvm(ie)%rot_matrix,fvm(ie)%centroid_stretch,&
fvm(ie)%vertex_recons_weights,fvm(ie)%vtx_cart,&
irecons_tracer_lev(k))
!call t_stopf('fvm:tracers_reconstruct')
!call t_startf('fvm:swept_flux')
call swept_flux(elem(ie),fvm(ie),k,ctracer,irecons_tracer_lev(k),gsweights,gspts)
!call t_stopf('fvm:swept_flux')
!call t_stopf('FVM:tracers_reconstruct')
!call t_startf('fvm:swept_flux')
call swept_flux(elem(ie),fvm(ie),k,ctracer,irecons_tracer_lev(k),gsweights,gspts)
!call t_stopf('fvm:swept_flux')
end do
end do
!
Expand Down