From 2167bc93fe2b1e84657822a323666ccc61f9a339 Mon Sep 17 00:00:00 2001 From: TingLei-NOAA Date: Wed, 20 Mar 2024 12:16:42 -0400 Subject: [PATCH] =?UTF-8?q?Issue=20694:=20Upgrade/refactoring=20for=20U=20?= =?UTF-8?q?and=20V=20write-out=20sub=20for=20FV3REG=20GSI=20failure=20?= =?UTF-8?q?=E2=80=A6=20(#698)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **DUE DATE for merger of this PR into `develop` is 3/27/2024 (six weeks after PR creation).** Resolves #693 (Thanks to @edwardhartnett 's suggestions) Resolves # 694 ( this PR is not able to provide a stable solution, more details would be given on the issue page) Resolves # 697: With larger requested memory for each mpi task, it still showed, for some time, the differences in the analysis files between loproc vs hiproc for the control runs on hercules. whether integrating this with the refactored IO part would provide a stable solution remains to be seen. This PR resolved the newly emerged issue with IO of netcdf files in the continuous storage, with upgraded FV3REG IO for the cold start options. (Co author Ming Hu @hu5970 ) This PR is being worked on in collaboration with Pete Johnson through RDHPCS help desk, @RussTreadon-NOAA @DavidHuber-NOAA and thanks to help from @ed Raghue Reddy through RDHPCS help desk. --------- Co-authored-by: Ting Lei Co-authored-by: Ting.Lei-NOAA --- regression/regression_driver.sh | 2 + regression/regression_param.sh | 48 +-- src/gsi/gsi_rfv3io_mod.f90 | 542 ++++++++++++++++++++------------ ush/sub_hera | 5 +- ush/sub_hercules | 4 +- ush/sub_jet | 2 +- ush/sub_orion | 2 +- 7 files changed, 365 insertions(+), 240 deletions(-) diff --git a/regression/regression_driver.sh b/regression/regression_driver.sh index 821cc7cedb..38329778a4 100755 --- a/regression/regression_driver.sh +++ b/regression/regression_driver.sh @@ -36,10 +36,12 @@ for jn in `seq ${RSTART} ${REND}`; do export scripts=${scripts_updat:-$scripts} export fixgsi=${fixgsi_updat:-$fixgsi} export modulefiles=${modulefiles_updat:-$modulefiles} + export ush=${ush_update:-$ush} else export scripts=${scripts_contrl:-$scripts} export fixgsi=${fixgsi_contrl:-$fixgsi} export modulefiles=${modulefiles_contrl:-$modulefiles} + export ush=${ush_cntrl:-$ush} fi rm -f ${job[$jn]}.out diff --git a/regression/regression_param.sh b/regression/regression_param.sh index 2ac615fc4a..a4f5d7035c 100755 --- a/regression/regression_param.sh +++ b/regression/regression_param.sh @@ -87,17 +87,17 @@ case $regtest in rrfs_3denvar_glbens) if [[ "$machine" = "Hera" ]]; then - topts[1]="0:15:00" ; popts[1]="20/1/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="20/2/" ; ropts[2]="/1" + topts[1]="0:15:00" ; popts[1]="5/4/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="10/4/" ; ropts[2]="/1" elif [[ "$machine" = "Orion" ]]; then - topts[1]="0:15:00" ; popts[1]="20/1/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="20/2/" ; ropts[2]="/2" + topts[1]="0:15:00" ; popts[1]="5/4/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="10/4/" ; ropts[2]="/2" elif [[ "$machine" = "Hercules" ]]; then - topts[1]="0:15:00" ; popts[1]="20/1/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="20/2/" ; ropts[2]="/2" + topts[1]="0:15:00" ; popts[1]="5/4/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="10/4/" ; ropts[2]="/2" elif [[ "$machine" = "Jet" ]]; then - topts[1]="0:15:00" ; popts[1]="20/1/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="20/2/" ; ropts[2]="/1" + topts[1]="0:15:00" ; popts[1]="5/4/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="10/4/" ; ropts[2]="/1" elif [[ "$machine" = "Gaea" ]]; then topts[1]="0:15:00" ; popts[1]="18/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="18/2/" ; ropts[2]="/1" @@ -117,17 +117,17 @@ case $regtest in hafs_3denvar_hybens) if [[ "$machine" = "Hera" ]]; then - topts[1]="0:15:00" ; popts[1]="20/1/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="20/2/" ; ropts[2]="/1" + topts[1]="0:15:00" ; popts[1]="5/4/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="10/4/" ; ropts[2]="/1" elif [[ "$machine" = "Orion" ]]; then - topts[1]="0:15:00" ; popts[1]="20/1/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="20/2/" ; ropts[2]="/2" + topts[1]="0:15:00" ; popts[1]="5/4/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="10/4/" ; ropts[2]="/2" elif [[ "$machine" = "Hercules" ]]; then topts[1]="0:15:00" ; popts[1]="20/1/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="20/2/" ; ropts[2]="/2" + topts[2]="0:15:00" ; popts[2]="5/8/" ; ropts[2]="/2" elif [[ "$machine" = "Jet" ]]; then - topts[1]="0:15:00" ; popts[1]="20/1/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="20/2/" ; ropts[2]="/1" + topts[1]="0:15:00" ; popts[1]="5/4/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="10/4/" ; ropts[2]="/1" elif [[ "$machine" = "Gaea" ]]; then topts[1]="0:15:00" ; popts[1]="18/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="18/2/" ; ropts[2]="/1" @@ -146,17 +146,17 @@ case $regtest in hafs_4denvar_glbens) if [[ "$machine" = "Hera" ]]; then - topts[1]="0:15:00" ; popts[1]="20/1/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="20/2/" ; ropts[2]="/1" + topts[1]="0:15:00" ; popts[1]="5/4/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="10/4/" ; ropts[2]="/1" elif [[ "$machine" = "Orion" ]]; then - topts[1]="0:20:00" ; popts[1]="20/1/" ; ropts[1]="/1" + topts[1]="0:20:00" ; popts[1]="5/4/" ; ropts[1]="/1" topts[2]="0:20:00" ; popts[2]="20/2/" ; ropts[2]="/1" elif [[ "$machine" = "Hercules" ]]; then - topts[1]="0:20:00" ; popts[1]="20/1/" ; ropts[1]="/1" - topts[2]="0:20:00" ; popts[2]="20/2/" ; ropts[2]="/1" + topts[1]="0:20:00" ; popts[1]="5/4/" ; ropts[1]="/1" + topts[2]="0:20:00" ; popts[2]="10/4/" ; ropts[2]="/1" elif [[ "$machine" = "Jet" ]]; then - topts[1]="0:15:00" ; popts[1]="20/1/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="20/2/" ; ropts[2]="/1" + topts[1]="0:15:00" ; popts[1]="5/4/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="10/4/" ; ropts[2]="/1" elif [[ "$machine" = "Gaea" ]]; then topts[1]="0:15:00" ; popts[1]="18/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="18/2/" ; ropts[2]="/1" @@ -300,10 +300,10 @@ if [[ "$machine" = "Hera" ]]; then export APRUN="srun" elif [[ "$machine" = "Orion" ]]; then export OMP_STACKSIZE=2048M - export APRUN="srun -n \$ntasks --cpus-per-task=\$threads" + export APRUN="srun -n \$ntasks --mem=0 --cpus-per-task=\$threads" elif [[ "$machine" = "Hercules" ]]; then export OMP_STACKSIZE=2048M - export APRUN="srun -n \$ntasks --cpus-per-task=\$threads" + export APRUN="srun -n \$ntasks --mem=0 --cpus-per-task=\$threads" elif [[ "$machine" = "Jet" ]]; then export OMP_STACKSIZE=1024M export MPI_BUFS_PER_PROC=256 diff --git a/src/gsi/gsi_rfv3io_mod.f90 b/src/gsi/gsi_rfv3io_mod.f90 index b3f3488a70..652bad9a33 100644 --- a/src/gsi/gsi_rfv3io_mod.f90 +++ b/src/gsi/gsi_rfv3io_mod.f90 @@ -547,7 +547,7 @@ subroutine gsi_rfv3io_get_ens_grid_specs(grid_spec,ierr) ! !$$$ end documentation block use netcdf, only: nf90_open,nf90_close,nf90_get_var,nf90_noerr - use netcdf, only: nf90_nowrite,nf90_inquire,nf90_inquire_dimension + use netcdf, only: nf90_nowrite,nf90_mpiio,nf90_inquire,nf90_inquire_dimension use netcdf, only: nf90_inquire_variable use mpimod, only: mype use mod_fv3_lola, only: definecoef_regular_grids @@ -1238,7 +1238,6 @@ subroutine read_fv3_netcdf_guess(fv3filenamegin) fv3lam_io_phymetvars3d_nouv(jphyvar)=trim(vartem) else write(6,*)'the metvarname ',vartem,' is not expected, stop' - call flush(6) call stop2(333) endif endif @@ -1253,7 +1252,6 @@ subroutine read_fv3_netcdf_guess(fv3filenamegin) endif if(jdynvar /= ndynvario3d.or.jtracer /= ntracerio3d.or.jphyvar /= nphyvario3d ) then write(6,*)'ndynvario3d is not as expected, stop' - call flush(6) call stop2(333) endif if(mype == 0) then @@ -1361,7 +1359,6 @@ subroutine read_fv3_netcdf_guess(fv3filenamegin) else if (trim(vartem) /= "pm2_5")then write(6,*)'the chemvarname ',vartem,' is not in aeronames_smoke_fv3 !!!' - call flush(6) endif endif enddo @@ -1598,7 +1595,7 @@ subroutine read_fv3_netcdf_guess(fv3filenamegin) if( fv3sar_bg_opt == 0) then call gsi_fv3ncdf_readuv(grd_fv3lam_uv,ges_u,ges_v,fv3filenamegin(it),.false.) else - call gsi_fv3ncdf_readuv_v1(grd_fv3lam_uv,ges_u,ges_v,fv3filenamegin(it),.false.) + call gsi_fv3ncdf_readuv_v1(grd_fv3lam_uv,ges_u,ges_v,fv3filenamegin(it),.false.) endif if( fv3sar_bg_opt == 0) then @@ -2449,7 +2446,7 @@ subroutine gsi_fv3ncdf_read(grd_ionouv,cstate_nouv,filenamein,fv3filenamegin,ens use mpimod, only: mpi_comm_world,mpi_rtype,mype,npe,setcomm,mpi_integer,mpi_max use mpimod, only: MPI_INFO_NULL use netcdf, only: nf90_open,nf90_close,nf90_get_var,nf90_noerr - use netcdf, only: nf90_nowrite,nf90_inquire,nf90_inquire_dimension + use netcdf, only: nf90_nowrite,nf90_mpiio,nf90_inquire,nf90_inquire_dimension use netcdf, only: nf90_inquire_variable use netcdf, only: nf90_inq_varid use mod_fv3_lola, only: fv3_h_to_ll,fv3_h_to_ll_ens @@ -2469,9 +2466,9 @@ subroutine gsi_fv3ncdf_read(grd_ionouv,cstate_nouv,filenamein,fv3filenamegin,ens character(len=max_varname_length) :: name character(len=max_filename_length) :: filenamein2 real(r_kind),allocatable,dimension(:,:):: uu2d_tmp - integer(i_kind) :: countloc_tmp(3),startloc_tmp(3) + integer(i_kind) :: countloc_tmp(4),startloc_tmp(4) - integer(i_kind) nlatcase,nloncase,nxcase,nycase,countloc(3),startloc(3) + integer(i_kind) nlatcase,nloncase,nxcase,nycase,countloc(4),startloc(4) integer(i_kind) ilev,ilevtot,inative integer(i_kind) kbgn,kend,len logical :: phy_smaller_domain @@ -2528,18 +2525,16 @@ subroutine gsi_fv3ncdf_read(grd_ionouv,cstate_nouv,filenamein,fv3filenamegin,ens allocate(gfile_loc_layout(0:fv3_io_layout_y-1)) do nio=0,fv3_io_layout_y-1 write(filename_layout,'(a,a,I4.4)') trim(filenamein),'.',nio - iret=nf90_open(filename_layout,nf90_nowrite,gfile_loc_layout(nio),comm=mpi_comm_read,info=MPI_INFO_NULL) !clt + iret=nf90_open(filename_layout,ior(nf90_nowrite,nf90_mpiio),gfile_loc_layout(nio),comm=mpi_comm_read,info=MPI_INFO_NULL) !clt if(iret/=nf90_noerr) then write(6,*)' gsi_fv3ncdf_read: problem opening ',trim(filename_layout),gfile_loc_layout(nio),', Status = ',iret - call flush(6) call stop2(333) endif enddo else - iret=nf90_open(filenamein,nf90_nowrite,gfile_loc,comm=mpi_comm_read,info=MPI_INFO_NULL) !clt + iret=nf90_open(filenamein,ior(nf90_nowrite,nf90_mpiio),gfile_loc,comm=mpi_comm_read,info=MPI_INFO_NULL) !clt if(iret/=nf90_noerr) then write(6,*)' gsi_fv3ncdf_read: problem opening ',trim(filenamein),gfile_loc,', Status = ',iret - call flush(6) call stop2(333) endif endif @@ -2554,15 +2549,14 @@ subroutine gsi_fv3ncdf_read(grd_ionouv,cstate_nouv,filenamein,fv3filenamegin,ens name=trim(varname) if(trim(filenamein) /= trim(filenamein2)) then write(6,*)'filenamein and filenamein2 are not the same as expected, stop' - call flush(6) call stop2(333) endif ilev=grd_ionouv%lnames(1,ilevtot) nz=grd_ionouv%nsig nzp1=nz+1 inative=nzp1-ilev - startloc=(/1,1,inative/) - countloc=(/nxcase,nycase,1/) + startloc=(/1,1,inative,1/) + countloc=(/nxcase,nycase,1,1/) ! Variable ref_f3d in phy_data.nc has a smaller domain size than ! dynvariables and tracers as well as a reversed order in vertical if ( trim(adjustl(varname)) == 'ref_f3d' .or. trim(adjustl(varname)) == 'flash_extent_density' )then @@ -2570,23 +2564,23 @@ subroutine gsi_fv3ncdf_read(grd_ionouv,cstate_nouv,filenamein,fv3filenamegin,ens if(trim(name)=='xaxis_1') nx_phy=len if( nx_phy == nxcase )then allocate(uu2d_tmp(nxcase,nycase)) - countloc_tmp=(/nxcase,nycase,1/) + countloc_tmp=(/nxcase,nycase,1,1/) phy_smaller_domain = .false. else allocate(uu2d_tmp(nxcase-6,nycase-6)) - countloc_tmp=(/nxcase-6,nycase-6,1/) + countloc_tmp=(/nxcase-6,nycase-6,1,1/) phy_smaller_domain = .true. end if - startloc_tmp=(/1,1,ilev/) + startloc_tmp=(/1,1,ilev,1/) end if if(fv3_io_layout_y > 1) then do nio=0,fv3_io_layout_y-1 if (ensgrid) then - countloc=(/nxcase,ny_layout_lenens(nio)+1,1/) + countloc=(/nxcase,ny_layout_lenens(nio)+1,1,1/) allocate(uu2d_layout(nxcase,ny_layout_lenens(nio)+1)) else - countloc=(/nxcase,ny_layout_len(nio),1/) + countloc=(/nxcase,ny_layout_len(nio),1,1/) allocate(uu2d_layout(nxcase,ny_layout_len(nio))) end if iret=nf90_inq_varid(gfile_loc_layout(nio),trim(adjustl(varname)),var_id) @@ -2671,10 +2665,10 @@ subroutine gsi_fv3ncdf_read_v1(grd_ionouv,cstate_nouv,filenamein,fv3filenamegin, use kinds, only: r_kind,i_kind - use mpimod, only: mpi_rtype,mpi_comm_world,mype,MPI_INFO_NULL - use mpimod, only: mpi_comm_world,mpi_rtype,mype + use mpimod, only: npe,mpi_rtype,mpi_comm_world,mype,MPI_INFO_NULL + use mpimod, only: mpi_comm_world,mpi_rtype,mype,setcomm,mpi_integer,mpi_max use netcdf, only: nf90_open,nf90_close,nf90_get_var,nf90_noerr - use netcdf, only: nf90_nowrite,nf90_inquire,nf90_inquire_dimension + use netcdf, only: nf90_nowrite,nf90_mpiio,nf90_inquire,nf90_inquire_dimension use netcdf, only: nf90_inquire_variable use netcdf, only: nf90_inq_varid use mod_fv3_lola, only: fv3_h_to_ll,fv3_h_to_ll_ens @@ -2694,12 +2688,18 @@ subroutine gsi_fv3ncdf_read_v1(grd_ionouv,cstate_nouv,filenamein,fv3filenamegin, character(len=max_varname_length) :: varname,vgsiname - integer(i_kind) nlatcase,nloncase,nxcase,nycase,countloc(3),startloc(3) + integer(i_kind) nlatcase,nloncase,nxcase,nycase,countloc(4),startloc(4) integer(i_kind) kbgn,kend integer(i_kind) var_id integer(i_kind) inative,ilev,ilevtot integer(i_kind) gfile_loc,iret integer(i_kind) nzp1,mm1 + + integer(i_kind):: iworld,iworld_group,nread,mpi_comm_read,i,ierror + integer(i_kind),dimension(npe):: members,members_read,mype_read_rank + logical:: procuse + + mm1=mype+1 @@ -2712,13 +2712,34 @@ subroutine gsi_fv3ncdf_read_v1(grd_ionouv,cstate_nouv,filenamein,fv3filenamegin, nxcase=nx nycase=ny end if + allocate(uu2d(nxcase,nycase)) + kbgn=grd_ionouv%kbegin_loc kend=grd_ionouv%kend_loc - allocate(uu2d(nxcase,nycase)) - iret=nf90_open(filenamein,nf90_nowrite,gfile_loc,comm=mpi_comm_world,info=MPI_INFO_NULL) !clt + procuse = .false. + members=-1 + members_read=-1 + if (kbgn<=kend) then + procuse = .true. + members(mm1) = mype + endif + call mpi_allreduce(members,members_read,npe,mpi_integer,mpi_max,mpi_comm_world,ierror) + + nread=0 + mype_read_rank=-1 + do i=1,npe + if (members_read(i) >= 0) then + nread=nread+1 + mype_read_rank(nread) = members_read(i) + endif + enddo + + call setcomm(iworld,iworld_group,nread,mype_read_rank,mpi_comm_read,ierror) + + if (procuse) then + iret=nf90_open(filenamein,ior(nf90_nowrite,nf90_mpiio),gfile_loc,comm=mpi_comm_read,info=MPI_INFO_NULL) !clt if(iret/=nf90_noerr) then write(6,*)' gsi_fv3ncdf_read_v1: problem opening ',trim(filenamein),gfile_loc,', Status = ',iret - call flush(6) call stop2(333) endif @@ -2728,15 +2749,14 @@ subroutine gsi_fv3ncdf_read_v1(grd_ionouv,cstate_nouv,filenamein,fv3filenamegin, call getfv3lamfilevname(vgsiname,fv3filenamegin,filenamein2,varname) if(trim(filenamein) /= trim(filenamein2)) then write(6,*)'filenamein and filenamein2 are not the same as expected, stop' - call flush(6) call stop2(333) endif ilev=grd_ionouv%lnames(1,ilevtot) nz=grd_ionouv%nsig nzp1=nz+1 inative=nzp1-ilev - startloc=(/1,1,inative+1/) - countloc=(/nxcase,nycase,1/) + startloc=(/1,1,inative+1,1/) + countloc=(/nxcase,nycase,1,1/) iret=nf90_inq_varid(gfile_loc,trim(adjustl(varname)),var_id) if(iret/=nf90_noerr) then write(6,*)' wrong to get var_id ',var_id @@ -2752,8 +2772,9 @@ subroutine gsi_fv3ncdf_read_v1(grd_ionouv,cstate_nouv,filenamein,fv3filenamegin, end if enddo ! i - call general_grid2sub(grd_ionouv,hwork,cstate_nouv%values) iret=nf90_close(gfile_loc) + endif + call general_grid2sub(grd_ionouv,hwork,cstate_nouv%values) deallocate (uu2d) @@ -2785,7 +2806,7 @@ subroutine gsi_fv3ncdf_readuv(grd_uv,ges_u,ges_v,fv3filenamegin,ensgrid) use kinds, only: r_kind,i_kind use mpimod, only: mpi_comm_world,mpi_rtype,mype,mpi_info_null,npe,setcomm,mpi_integer,mpi_max use netcdf, only: nf90_open,nf90_close,nf90_get_var,nf90_noerr - use netcdf, only: nf90_nowrite,nf90_inquire,nf90_inquire_dimension + use netcdf, only: nf90_nowrite,nf90_mpiio,nf90_inquire,nf90_inquire_dimension use netcdf, only: nf90_inquire_variable use netcdf, only: nf90_inq_varid use mod_fv3_lola, only: fv3_h_to_ll,fv3uv2earth,fv3_h_to_ll_ens,fv3uv2earthens @@ -2808,7 +2829,7 @@ subroutine gsi_fv3ncdf_readuv(grd_uv,ges_u,ges_v,fv3filenamegin,ensgrid) integer(i_kind) u_grd_VarId,v_grd_VarId integer(i_kind) nlatcase,nloncase integer(i_kind) nxcase,nycase - integer(i_kind) u_countloc(3),u_startloc(3),v_countloc(3),v_startloc(3) + integer(i_kind) u_countloc(4),u_startloc(4),v_countloc(4),v_startloc(4) integer(i_kind) inative,ilev,ilevtot integer(i_kind) kbgn,kend @@ -2873,15 +2894,13 @@ subroutine gsi_fv3ncdf_readuv(grd_uv,ges_u,ges_v,fv3filenamegin,ensgrid) iret=nf90_open(filename_layout,nf90_nowrite,gfile_loc_layout(nio),comm=mpi_comm_read,info=MPI_INFO_NULL) if(iret/=nf90_noerr) then write(6,*)'problem opening6 ',trim(filename_layout),gfile_loc_layout(nio),', Status = ',iret - call flush(6) call stop2(333) endif enddo else - iret=nf90_open(filenamein,nf90_nowrite,gfile_loc,comm=mpi_comm_read,info=MPI_INFO_NULL) !clt + iret=nf90_open(filenamein,ior(nf90_nowrite,nf90_mpiio),gfile_loc,comm=mpi_comm_read,info=MPI_INFO_NULL) !clt if(iret/=nf90_noerr) then write(6,*)' problem opening6 ',trim(filenamein),', Status = ',iret - call flush(6) call stop2(333) endif endif @@ -2891,24 +2910,23 @@ subroutine gsi_fv3ncdf_readuv(grd_uv,ges_u,ges_v,fv3filenamegin,ensgrid) call getfv3lamfilevname(vgsiname,fv3filenamegin,filenamein2,varname) if(trim(filenamein) /= trim(filenamein2)) then write(6,*)'filenamein and filenamein2 are not the same as expected, stop' - call flush(6) call stop2(333) endif ilev=grd_uv%lnames(1,ilevtot) nz=grd_uv%nsig nzp1=nz+1 inative=nzp1-ilev - u_countloc=(/nxcase,nycase+1,1/) - v_countloc=(/nxcase+1,nycase,1/) - u_startloc=(/1,1,inative/) - v_startloc=(/1,1,inative/) + u_countloc=(/nxcase,nycase+1,1,1/) + v_countloc=(/nxcase+1,nycase,1,1/) + u_startloc=(/1,1,inative,1/) + v_startloc=(/1,1,inative,1/) if(fv3_io_layout_y > 1) then do nio=0,fv3_io_layout_y-1 if (ensgrid) then - u_countloc=(/nxcase,ny_layout_lenens(nio)+1,1/) + u_countloc=(/nxcase,ny_layout_lenens(nio)+1,1,1/) allocate(u2d_layout(nxcase,ny_layout_lenens(nio)+1)) else - u_countloc=(/nxcase,ny_layout_len(nio)+1,1/) + u_countloc=(/nxcase,ny_layout_len(nio)+1,1,1/) allocate(u2d_layout(nxcase,ny_layout_len(nio)+1)) end if call check( nf90_inq_varid(gfile_loc_layout(nio),'u',u_grd_VarId) ) @@ -2917,13 +2935,13 @@ subroutine gsi_fv3ncdf_readuv(grd_uv,ges_u,ges_v,fv3filenamegin,ensgrid) u2d(:,ny_layout_bens(nio):ny_layout_eens(nio))=u2d_layout(:,1:ny_layout_lenens(nio)) if(nio==fv3_io_layout_y-1) u2d(:,ny_layout_eens(nio)+1)=u2d_layout(:,ny_layout_lenens(nio)+1) deallocate(u2d_layout) - v_countloc=(/nxcase+1,ny_layout_lenens(nio),1/) + v_countloc=(/nxcase+1,ny_layout_lenens(nio),1,1/) allocate(v2d_layout(nxcase+1,ny_layout_lenens(nio))) else u2d(:,ny_layout_b(nio):ny_layout_e(nio))=u2d_layout(:,1:ny_layout_len(nio)) if(nio==fv3_io_layout_y-1) u2d(:,ny_layout_e(nio)+1)=u2d_layout(:,ny_layout_len(nio)+1) deallocate(u2d_layout) - v_countloc=(/nxcase+1,ny_layout_len(nio),1/) + v_countloc=(/nxcase+1,ny_layout_len(nio),1,1/) allocate(v2d_layout(nxcase+1,ny_layout_len(nio))) end if call check( nf90_inq_varid(gfile_loc_layout(nio),'v',v_grd_VarId) ) @@ -3019,9 +3037,10 @@ subroutine gsi_fv3ncdf_readuv_v1(grd_uv,ges_u,ges_v,fv3filenamegin,ensgrid) !$$$ end documentation block use constants, only: half use kinds, only: r_kind,i_kind - use mpimod, only: mpi_comm_world,mpi_rtype,mype,mpi_info_null + use mpimod, only: setcomm,mpi_integer,mpi_max, npe,mpi_comm_world,mpi_rtype,mype,mpi_info_null use netcdf, only: nf90_open,nf90_close,nf90_get_var,nf90_noerr - use netcdf, only: nf90_nowrite,nf90_inquire,nf90_inquire_dimension + use netcdf, only: nf90_nowrite,nf90_mpiio,nf90_inquire,nf90_inquire_dimension + use netcdf, only: nf90_var_par_access,nf90_netcdf4 use netcdf, only: nf90_inquire_variable use netcdf, only: nf90_inq_varid use mod_fv3_lola, only: fv3_h_to_ll,fv3_h_to_ll_ens @@ -3051,6 +3070,9 @@ subroutine gsi_fv3ncdf_readuv_v1(grd_uv,ges_u,ges_v,fv3filenamegin,ensgrid) integer(i_kind) nxcase,nycase integer(i_kind) us_countloc(3),us_startloc(3) integer(i_kind) vw_countloc(3),vw_startloc(3) + integer(i_kind):: iworld,iworld_group,nread,mpi_comm_read,i,ierror + integer(i_kind),dimension(npe):: members,members_read,mype_read_rank + logical:: procuse allocate (worksub(2,grd_uv%lat2,grd_uv%lon2,grd_uv%nsig)) mm1=mype+1 @@ -3067,11 +3089,33 @@ subroutine gsi_fv3ncdf_readuv_v1(grd_uv,ges_u,ges_v,fv3filenamegin,ensgrid) kend=grd_uv%kend_loc allocate (us2d(nxcase,nycase+1),vw2d(nxcase+1,nycase)) allocate (uorv2d(nxcase,nycase)) + procuse = .false. + members=-1 + members_read=-1 + if (kbgn<=kend) then + procuse = .true. + members(mm1) = mype + endif + + call mpi_allreduce(members,members_read,npe,mpi_integer,mpi_max,mpi_comm_world,ierror) + + nread=0 + mype_read_rank=-1 + do i=1,npe + if (members_read(i) >= 0) then + nread=nread+1 + mype_read_rank(nread) = members_read(i) + endif + enddo + + call setcomm(iworld,iworld_group,nread,mype_read_rank,mpi_comm_read,ierror) + + if (procuse) then + filenamein=fv3filenamegin%dynvars - iret=nf90_open(filenamein,nf90_nowrite,gfile_loc,comm=mpi_comm_world,info=MPI_INFO_NULL) !clt + iret=nf90_open(filenamein,ior(nf90_netcdf4,ior(nf90_nowrite,nf90_mpiio)),gfile_loc,comm=mpi_comm_read,info=MPI_INFO_NULL) !clt if(iret/=nf90_noerr) then write(6,*)' gsi_fv3ncdf_read_v1: problem opening ',trim(filenamein),gfile_loc,', Status = ',iret - call flush(6) call stop2(333) endif @@ -3080,7 +3124,6 @@ subroutine gsi_fv3ncdf_readuv_v1(grd_uv,ges_u,ges_v,fv3filenamegin,ensgrid) filenamein2=fv3filenamegin%dynvars if(trim(filenamein) /= trim(filenamein2)) then write(6,*)'filenamein and filenamein2 are not the same as expected, stop' - call flush(6) call stop2(333) endif ilev=grd_uv%lnames(1,ilevtot) @@ -3099,9 +3142,9 @@ subroutine gsi_fv3ncdf_readuv_v1(grd_uv,ges_u,ges_v,fv3filenamegin,ensgrid) ! transfor to earth u/v, interpolate to analysis grid, reverse vertical order - iret=nf90_inq_varid(gfile_loc,trim(adjustl("u_s")),var_id) - - iret=nf90_get_var(gfile_loc,var_id,us2d,start=us_startloc,count=us_countloc) + call check(nf90_inq_varid(gfile_loc,trim(adjustl("u_s")),var_id)) + + call check(nf90_get_var(gfile_loc,var_id,us2d,start=us_startloc,count=us_countloc)) iret=nf90_inq_varid(gfile_loc,trim(adjustl("v_w")),var_id) iret=nf90_get_var(gfile_loc,var_id,vw2d,start=vw_startloc,count=vw_countloc) do j=1,ny @@ -3123,10 +3166,11 @@ subroutine gsi_fv3ncdf_readuv_v1(grd_uv,ges_u,ges_v,fv3filenamegin,ensgrid) end if enddo ! iilevtoto + iret=nf90_close(gfile_loc) + endif !procuse call general_grid2sub(grd_uv,hwork,worksub) ges_u=worksub(1,:,:,:) ges_v=worksub(2,:,:,:) - iret=nf90_close(gfile_loc) deallocate (us2d,vw2d,worksub) end subroutine gsi_fv3ncdf_readuv_v1 @@ -3160,7 +3204,7 @@ subroutine gsi_fv3ncdf_read_ens_parallel_over_ens(filenamein,fv3filenamegin, & use mpimod, only: mpi_comm_world,mpi_rtype,mype use mpimod, only: MPI_INFO_NULL use netcdf, only: nf90_open,nf90_close,nf90_get_var,nf90_noerr - use netcdf, only: nf90_nowrite,nf90_inquire,nf90_inquire_dimension + use netcdf, only: nf90_nowrite,nf90_mpiio,nf90_inquire,nf90_inquire_dimension use netcdf, only: nf90_inquire_variable use netcdf, only: nf90_inq_varid use gridmod, only: nsig,nlon,nlat @@ -3179,7 +3223,7 @@ subroutine gsi_fv3ncdf_read_ens_parallel_over_ens(filenamein,fv3filenamegin, & character(len=max_varname_length) :: name character(len=max_filename_length), allocatable,dimension(:) :: varname_files - integer(i_kind) nlatcase,nloncase,nxcase,nycase,countloc(3),startloc(3),countloc_tmp(3),startloc_tmp(3) + integer(i_kind) nlatcase,nloncase,nxcase,nycase,countloc(4),startloc(4),countloc_tmp(4),startloc_tmp(4) integer(i_kind) ilev,ilevtot,inative,ivar integer(i_kind) kbgn,kend integer(i_kind) gfile_loc,iret,var_id @@ -3238,15 +3282,13 @@ subroutine gsi_fv3ncdf_read_ens_parallel_over_ens(filenamein,fv3filenamegin, & iret=nf90_open(filename_layout,nf90_nowrite,gfile_loc_layout(nio),comm=mpi_comm_world,info=MPI_INFO_NULL) if(iret/=nf90_noerr) then write(6,*)' gsi_fv3ncdf_read: problem opening ',trim(filename_layout),gfile_loc_layout(nio),', Status = ',iret - call flush(6) call stop2(333) endif enddo else - iret=nf90_open(filenamein,nf90_nowrite,gfile_loc) + iret=nf90_open(filenamein,ior(nf90_nowrite,nf90_mpiio),gfile_loc) if(iret/=nf90_noerr) then write(6,*)' gsi_fv3ncdf_read: problem opening ',trim(filenamein),gfile_loc,', Status = ',iret - call flush(6) call stop2(333) endif endif @@ -3256,8 +3298,8 @@ subroutine gsi_fv3ncdf_read_ens_parallel_over_ens(filenamein,fv3filenamegin, & nz=nsig nzp1=nz+1 inative=nzp1-ilev - startloc=(/1,1,inative/) - countloc=(/nxcase,nycase,1/) + startloc=(/1,1,inative,1/) + countloc=(/nxcase,nycase,1,1/) varname = trim(varname_files(ivar)) ! Variable ref_f3d in phy_data.nc has a smaller domain size than ! dynvariables and tracers as well as a reversed order in vertical @@ -3266,19 +3308,19 @@ subroutine gsi_fv3ncdf_read_ens_parallel_over_ens(filenamein,fv3filenamegin, & if(trim(name)=='xaxis_1') nx_phy=len if( nx_phy == nxcase )then allocate(uu2d_tmp(nxcase,nycase)) - countloc_tmp=(/nxcase,nycase,1/) + countloc_tmp=(/nxcase,nycase,1,1/) phy_smaller_domain = .false. else allocate(uu2d_tmp(nxcase-6,nycase-6)) - countloc_tmp=(/nxcase-6,nycase-6,1/) + countloc_tmp=(/nxcase-6,nycase-6,1,1/) phy_smaller_domain = .true. end if - startloc_tmp=(/1,1,ilev/) + startloc_tmp=(/1,1,ilev,1/) end if if(fv3_io_layout_y > 1) then do nio=0,fv3_io_layout_y-1 - countloc=(/nxcase,ny_layout_len(nio),1/) + countloc=(/nxcase,ny_layout_len(nio),1,1/) allocate(uu2d_layout(nxcase,ny_layout_len(nio))) iret=nf90_inq_varid(gfile_loc_layout(nio),trim(adjustl(varname)),var_id) iret=nf90_get_var(gfile_loc_layout(nio),var_id,uu2d_layout,start=startloc,count=countloc) @@ -3408,7 +3450,7 @@ subroutine gsi_fv3ncdf_readuv_ens_parallel_over_ens(ges_u,ges_v,fv3filenamegin,i integer(i_kind) u_grd_VarId,v_grd_VarId integer(i_kind) nlatcase,nloncase integer(i_kind) nxcase,nycase - integer(i_kind) u_countloc(3),u_startloc(3),v_countloc(3),v_startloc(3) + integer(i_kind) u_countloc(4),u_startloc(4),v_countloc(4),v_startloc(4) integer(i_kind) inative,ilev,ilevtot integer(i_kind) kbgn,kend @@ -3442,7 +3484,6 @@ subroutine gsi_fv3ncdf_readuv_ens_parallel_over_ens(ges_u,ges_v,fv3filenamegin,i iret=nf90_open(filename_layout,nf90_nowrite,gfile_loc_layout(nio),comm=mpi_comm_world,info=MPI_INFO_NULL) if(iret/=nf90_noerr) then write(6,*)'problem opening ',trim(filename_layout),gfile_loc_layout(nio),', Status = ',iret - call flush(6) call stop2(333) endif enddo @@ -3450,7 +3491,6 @@ subroutine gsi_fv3ncdf_readuv_ens_parallel_over_ens(ges_u,ges_v,fv3filenamegin,i iret=nf90_open(filenamein,nf90_nowrite,gfile_loc) if(iret/=nf90_noerr) then write(6,*)' problem opening ',trim(filenamein),', Status = ',iret - call flush(6) call stop2(333) endif endif @@ -3459,14 +3499,14 @@ subroutine gsi_fv3ncdf_readuv_ens_parallel_over_ens(ges_u,ges_v,fv3filenamegin,i nz=nsig nzp1=nz+1 inative=nzp1-ilev - u_countloc=(/nxcase,nycase+1,1/) - v_countloc=(/nxcase+1,nycase,1/) - u_startloc=(/1,1,inative/) - v_startloc=(/1,1,inative/) + u_countloc=(/nxcase,nycase+1,1,1/) + v_countloc=(/nxcase+1,nycase,1,1/) + u_startloc=(/1,1,inative,1/) + v_startloc=(/1,1,inative,1/) if(fv3_io_layout_y > 1) then do nio=0,fv3_io_layout_y-1 - u_countloc=(/nxcase,ny_layout_len(nio)+1,1/) + u_countloc=(/nxcase,ny_layout_len(nio)+1,1,1/) allocate(u2d_layout(nxcase,ny_layout_len(nio)+1)) call check( nf90_inq_varid(gfile_loc_layout(nio),'u',u_grd_VarId) ) iret=nf90_get_var(gfile_loc_layout(nio),u_grd_VarId,u2d_layout,start=u_startloc,count=u_countloc) @@ -3474,7 +3514,7 @@ subroutine gsi_fv3ncdf_readuv_ens_parallel_over_ens(ges_u,ges_v,fv3filenamegin,i if(nio==fv3_io_layout_y-1) u2d(:,ny_layout_e(nio)+1)=u2d_layout(:,ny_layout_len(nio)+1) deallocate(u2d_layout) - v_countloc=(/nxcase+1,ny_layout_len(nio),1/) + v_countloc=(/nxcase+1,ny_layout_len(nio),1,1/) allocate(v2d_layout(nxcase+1,ny_layout_len(nio))) call check( nf90_inq_varid(gfile_loc_layout(nio),'v',v_grd_VarId) ) iret=nf90_get_var(gfile_loc_layout(nio),v_grd_VarId,v2d_layout,start=v_startloc,count=v_countloc) @@ -3965,7 +4005,7 @@ subroutine gsi_fv3ncdf_writeuv(grd_uv,ges_u,ges_v,add_saved,fv3filenamegin) use mod_fv3_lola, only: fv3_ll_to_h,fv3_h_to_ll, & fv3uv2earth,earthuv2fv3 use netcdf, only: nf90_open,nf90_close,nf90_noerr - use netcdf, only: nf90_write,nf90_inq_varid + use netcdf, only: nf90_write,nf90_mpiio,nf90_inq_varid,nf90_var_par_access,nf90_collective use netcdf, only: nf90_put_var,nf90_get_var use general_sub2grid_mod, only: sub2grid_info,general_sub2grid @@ -3984,11 +4024,11 @@ subroutine gsi_fv3ncdf_writeuv(grd_uv,ges_u,ges_v,add_saved,fv3filenamegin) integer(i_kind) inative,ilev,ilevtot integer(i_kind) nlatcase,nloncase integer(i_kind) nxcase,nycase - integer(i_kind) u_countloc(3),u_startloc(3),v_countloc(3),v_startloc(3) + integer(i_kind) u_countloc(4),u_startloc(4),v_countloc(4),v_startloc(4) character(:),allocatable:: filenamein ,varname real(r_kind),allocatable,dimension(:,:,:,:):: worksub real(r_kind),allocatable,dimension(:,:):: work_au,work_av - real(r_kind),allocatable,dimension(:,:):: work_bu,work_bv + real(r_kind),allocatable,dimension(:,:,:):: work_bu,work_bv real(r_kind),allocatable,dimension(:,:):: u2d,v2d,workau2,workav2 real(r_kind),allocatable,dimension(:,:):: workbu2,workbv2 @@ -3997,10 +4037,12 @@ subroutine gsi_fv3ncdf_writeuv(grd_uv,ges_u,ges_v,add_saved,fv3filenamegin) logical:: procuse ! for fv3_io_layout_y > 1 - real(r_kind),allocatable,dimension(:,:):: u2d_layout,v2d_layout + real(r_kind),allocatable,dimension(:,:,:):: u2d_layout,v2d_layout integer(i_kind) :: nio integer(i_kind),allocatable :: gfile_loc_layout(:) character(len=180) :: filename_layout + integer(i_kind):: kend_native,kbgn_native + integer(i_kind):: istat mm1=mype+1 @@ -4012,8 +4054,6 @@ subroutine gsi_fv3ncdf_writeuv(grd_uv,ges_u,ges_v,add_saved,fv3filenamegin) kend=grd_uv%kend_loc allocate( u2d(nlon_regional,nlat_regional+1)) allocate( v2d(nlon_regional+1,nlat_regional)) - allocate( work_bu(nlon_regional,nlat_regional+1)) - allocate( work_bv(nlon_regional+1,nlat_regional)) allocate (worksub(2,grd_uv%lat2,grd_uv%lon2,grd_uv%nsig)) allocate( work_au(nlatcase,nloncase),work_av(nlatcase,nloncase)) do k=1,grd_uv%nsig @@ -4054,59 +4094,70 @@ subroutine gsi_fv3ncdf_writeuv(grd_uv,ges_u,ges_v,add_saved,fv3filenamegin) allocate(gfile_loc_layout(0:fv3_io_layout_y-1)) do nio=0,fv3_io_layout_y-1 write(filename_layout,'(a,a,I4.4)') trim(filenamein),".",nio - call check( nf90_open(filename_layout,nf90_write,gfile_loc_layout(nio),comm=mpi_comm_read,info=MPI_INFO_NULL) ) + call check( nf90_open(filename_layout,ior(nf90_write, nf90_mpiio),gfile_loc_layout(nio),comm=mpi_comm_read,info=MPI_INFO_NULL) ) enddo gfile_loc=gfile_loc_layout(0) else - call check( nf90_open(filenamein,nf90_write,gfile_loc,comm=mpi_comm_read,info=MPI_INFO_NULL) ) + call check( nf90_open(filenamein,ior(nf90_write, nf90_mpiio),gfile_loc,comm=mpi_comm_read,info=MPI_INFO_NULL) ) + endif + nz=grd_uv%nsig + nzp1=nz+1 + kend_native=nzp1-grd_uv%lnames(1,kbgn) + kbgn_native=nzp1-grd_uv%lnames(1,kend) + allocate( work_bu(nlon_regional,nlat_regional+1,kbgn_native:kend_native)) + allocate( work_bv(nlon_regional+1,nlat_regional,kbgn_native:kend_native)) + u_startloc=(/1,1,kbgn_native,1/) + u_countloc=(/nxcase,nycase+1,kend_native-kbgn_native+1,1/) + v_startloc=(/1,1,kbgn_native,1/) + v_countloc=(/nxcase+1,nycase,kend_native-kbgn_native+1,1/) + if(fv3_io_layout_y > 1) then + do nio=0,fv3_io_layout_y-1 + allocate(u2d_layout(nxcase,ny_layout_len(nio)+1,kend_native-kbgn_native+1)) + u_countloc=(/nxcase,ny_layout_len(nio)+1,kend_native-kbgn_native+1,1/) + call check( nf90_get_var(gfile_loc_layout(nio),ugrd_VarId,u2d_layout,start=u_startloc,count=u_countloc) ) + work_bu(:,ny_layout_b(nio):ny_layout_e(nio),:)=u2d_layout(:,1:ny_layout_len(nio),:) + if(nio==fv3_io_layout_y-1) work_bu(:,ny_layout_e(nio)+1,:)=u2d_layout(:,ny_layout_len(nio)+1,:) + deallocate(u2d_layout) + + allocate(v2d_layout(nxcase+1,ny_layout_len(nio),kend_native-kbgn_native+1)) + v_countloc=(/nxcase+1,ny_layout_len(nio),kend_native-kbgn_native+1,1/) + call check( nf90_get_var(gfile_loc_layout(nio),vgrd_VarId,v2d_layout,start=v_startloc,count=v_countloc) ) + work_bv(:,ny_layout_b(nio):ny_layout_e(nio),:)=v2d_layout + deallocate(v2d_layout) + enddo + else + call check( nf90_inq_varid(gfile_loc,'u',ugrd_VarId) ) + call check( nf90_inq_varid(gfile_loc,'v',vgrd_VarId) ) + call check( nf90_var_par_access(gfile_loc, ugrd_VarId, nf90_collective)) + call check( nf90_var_par_access(gfile_loc, vgrd_VarId, nf90_collective)) + call check( nf90_get_var(gfile_loc,ugrd_VarId,work_bu,start=u_startloc,count=u_countloc) ) + call check( nf90_get_var(gfile_loc,vgrd_VarId,work_bv,start=v_startloc,count=v_countloc) ) endif + + do ilevtot=kbgn,kend varname=grd_uv%names(1,ilevtot) ilev=grd_uv%lnames(1,ilevtot) - nz=grd_uv%nsig - nzp1=nz+1 inative=nzp1-ilev - u_countloc=(/nxcase,nycase+1,1/) - v_countloc=(/nxcase+1,nycase,1/) - u_startloc=(/1,1,inative/) - v_startloc=(/1,1,inative/) work_au=hwork(1,:,:,ilevtot) work_av=hwork(2,:,:,ilevtot) - call check( nf90_inq_varid(gfile_loc,'u',ugrd_VarId) ) - call check( nf90_inq_varid(gfile_loc,'v',vgrd_VarId) ) if(add_saved)then allocate( workau2(nlatcase,nloncase),workav2(nlatcase,nloncase)) allocate( workbu2(nlon_regional,nlat_regional+1)) allocate( workbv2(nlon_regional+1,nlat_regional)) !!!!!!!! readin work_b !!!!!!!!!!!!!!!! - if(fv3_io_layout_y > 1) then - do nio=0,fv3_io_layout_y-1 - allocate(u2d_layout(nxcase,ny_layout_len(nio)+1)) - u_countloc=(/nxcase,ny_layout_len(nio)+1,1/) - call check( nf90_get_var(gfile_loc_layout(nio),ugrd_VarId,u2d_layout,start=u_startloc,count=u_countloc) ) - work_bu(:,ny_layout_b(nio):ny_layout_e(nio))=u2d_layout(:,1:ny_layout_len(nio)) - if(nio==fv3_io_layout_y-1) work_bu(:,ny_layout_e(nio)+1)=u2d_layout(:,ny_layout_len(nio)+1) - deallocate(u2d_layout) - - allocate(v2d_layout(nxcase+1,ny_layout_len(nio))) - v_countloc=(/nxcase+1,ny_layout_len(nio),1/) - call check( nf90_get_var(gfile_loc_layout(nio),vgrd_VarId,v2d_layout,start=v_startloc,count=v_countloc) ) - work_bv(:,ny_layout_b(nio):ny_layout_e(nio))=v2d_layout - deallocate(v2d_layout) - enddo - else - call check( nf90_get_var(gfile_loc,ugrd_VarId,work_bu,start=u_startloc,count=u_countloc) ) - call check( nf90_get_var(gfile_loc,vgrd_VarId,work_bv,start=v_startloc,count=v_countloc) ) - endif + +!clt for fv3_io_layout<=1 now the nf90_get_var has been moved outside of this do loop +!to avoid failure on hercules when L_MPI_EXTRA_FILESYSTEM=1 if(.not.grid_reverse_flag) then - call reverse_grid_r_uv(work_bu,nlon_regional,nlat_regional+1,1) - call reverse_grid_r_uv(work_bv,nlon_regional+1,nlat_regional,1) + call reverse_grid_r_uv(work_bu(:,:,inative),nlon_regional,nlat_regional+1,1) + call reverse_grid_r_uv(work_bv(:,:,inative),nlon_regional+1,nlat_regional,1) endif - call fv3uv2earth(work_bu,work_bv,nlon_regional,nlat_regional,u2d,v2d) + call fv3uv2earth(work_bu(:,:,inative),work_bv(:,:,inative),nlon_regional,nlat_regional,u2d,v2d) call fv3_h_to_ll(u2d,workau2,nlon_regional,nlat_regional,nloncase,nlatcase,.true.) call fv3_h_to_ll(v2d,workav2,nlon_regional,nlat_regional,nloncase,nlatcase,.true.) !!!!!!!! find analysis_inc: work_a !!!!!!!!!!!!!!!! @@ -4116,38 +4167,38 @@ subroutine gsi_fv3ncdf_writeuv(grd_uv,ges_u,ges_v,add_saved,fv3filenamegin) call fv3_ll_to_h(work_av(:,:),v2d,nloncase,nlatcase,nlon_regional,nlat_regional,.true.) call earthuv2fv3(u2d,v2d,nlon_regional,nlat_regional,workbu2,workbv2) !!!!!!!! add analysis_inc to readin work_b !!!!!!!!!!!!!!!! - work_bu(:,:)=work_bu(:,:)+workbu2(:,:) - work_bv(:,:)=work_bv(:,:)+workbv2(:,:) + work_bu(:,:,inative)=work_bu(:,:,inative)+workbu2(:,:) + work_bv(:,:,inative)=work_bv(:,:,inative)+workbv2(:,:) deallocate(workau2,workbu2,workav2,workbv2) else call fv3_ll_to_h(work_au(:,:),u2d,nloncase,nlatcase,nlon_regional,nlat_regional,.true.) call fv3_ll_to_h(work_av(:,:),v2d,nloncase,nlatcase,nlon_regional,nlat_regional,.true.) - call earthuv2fv3(u2d,v2d,nlon_regional,nlat_regional,work_bu(:,:),work_bv(:,:)) + call earthuv2fv3(u2d,v2d,nlon_regional,nlat_regional,work_bu(:,:,inative),work_bv(:,:,inative)) endif if(.not.grid_reverse_flag) then - call reverse_grid_r_uv(work_bu,nlon_regional,nlat_regional+1,1) - call reverse_grid_r_uv(work_bv,nlon_regional+1,nlat_regional,1) + call reverse_grid_r_uv(work_bu(:,:,inative),nlon_regional,nlat_regional+1,1) + call reverse_grid_r_uv(work_bv(:,:,inative),nlon_regional+1,nlat_regional,1) endif + enddo !ilevltot - if(fv3_io_layout_y > 1) then + if(fv3_io_layout_y > 1) then do nio=0,fv3_io_layout_y-1 - allocate(u2d_layout(nxcase,ny_layout_len(nio)+1)) - u_countloc=(/nxcase,ny_layout_len(nio)+1,1/) - u2d_layout=work_bu(:,ny_layout_b(nio):ny_layout_e(nio)+1) + allocate(u2d_layout(nxcase,ny_layout_len(nio)+1,kend_native-kbgn_native+1)) + u_countloc=(/nxcase,ny_layout_len(nio)+1,kend_native-kbgn_native+1,1/) + u2d_layout=work_bu(:,ny_layout_b(nio):ny_layout_e(nio)+1,:) call check( nf90_put_var(gfile_loc_layout(nio),ugrd_VarId,u2d_layout,start=u_startloc,count=u_countloc) ) deallocate(u2d_layout) - allocate(v2d_layout(nxcase+1,ny_layout_len(nio))) - v_countloc=(/nxcase+1,ny_layout_len(nio),1/) - v2d_layout=work_bv(:,ny_layout_b(nio):ny_layout_e(nio)) + allocate(v2d_layout(nxcase+1,ny_layout_len(nio),kend_native-kbgn_native+1)) + v_countloc=(/nxcase+1,ny_layout_len(nio),kend_native-kbgn_native+1,1/) + v2d_layout=work_bv(:,ny_layout_b(nio):ny_layout_e(nio),:) call check( nf90_put_var(gfile_loc_layout(nio),vgrd_VarId,v2d_layout,start=v_startloc,count=v_countloc) ) deallocate(v2d_layout) enddo - else + else call check( nf90_put_var(gfile_loc,ugrd_VarId,work_bu,start=u_startloc,count=u_countloc) ) call check( nf90_put_var(gfile_loc,vgrd_VarId,work_bv,start=v_startloc,count=v_countloc) ) - endif - enddo !ilevltot + endif if(fv3_io_layout_y > 1) then do nio=0,fv3_io_layout_y-1 @@ -4157,11 +4208,12 @@ subroutine gsi_fv3ncdf_writeuv(grd_uv,ges_u,ges_v,add_saved,fv3filenamegin) else call check( nf90_close(gfile_loc) ) endif + deallocate(work_bu,work_bv) endif call mpi_barrier(mpi_comm_world,ierror) - deallocate(work_bu,work_bv,u2d,v2d) + deallocate(u2d,v2d) deallocate(work_au,work_av) end subroutine gsi_fv3ncdf_writeuv @@ -4193,12 +4245,12 @@ subroutine gsi_fv3ncdf_writeuv_v1(grd_uv,ges_u,ges_v,add_saved,fv3filenamegin) !$$$ end documentation block use constants, only: half,zero - use mpimod, only: mpi_rtype,mpi_comm_world,mype,mpi_info_null + use mpimod, only: npe, setcomm,mpi_integer,mpi_max,mpi_rtype,mpi_comm_world,mype,mpi_info_null use gridmod, only: nlon_regional,nlat_regional use mod_fv3_lola, only: fv3_ll_to_h,fv3_h_to_ll, & fv3uv2earth,earthuv2fv3 use netcdf, only: nf90_open,nf90_close,nf90_noerr - use netcdf, only: nf90_write,nf90_inq_varid + use netcdf, only: nf90_write, nf90_mpiio,nf90_inq_varid,nf90_var_par_access,nf90_collective use netcdf, only: nf90_put_var,nf90_get_var use general_sub2grid_mod, only: sub2grid_info,general_sub2grid implicit none @@ -4220,14 +4272,20 @@ subroutine gsi_fv3ncdf_writeuv_v1(grd_uv,ges_u,ges_v,add_saved,fv3filenamegin) integer(i_kind) inative,ilev,ilevtot real(r_kind),allocatable,dimension(:,:,:,:):: worksub real(r_kind),allocatable,dimension(:,:):: work_au,work_av - real(r_kind),allocatable,dimension(:,:):: work_bu_s,work_bv_s - real(r_kind),allocatable,dimension(:,:):: work_bu_w,work_bv_w + real(r_kind),allocatable,dimension(:,:,:):: work_bu_s,work_bv_s + real(r_kind),allocatable,dimension(:,:,:):: work_bu_w,work_bv_w real(r_kind),allocatable,dimension(:,:):: u2d,v2d,workau2,workav2 real(r_kind),allocatable,dimension(:,:):: workbu_s2,workbv_s2 real(r_kind),allocatable,dimension(:,:):: workbu_w2,workbv_w2 integer(i_kind) nlatcase,nloncase,nxcase,nycase - integer(i_kind) uw_countloc(3),us_countloc(3),uw_startloc(3),us_startloc(3) - integer(i_kind) vw_countloc(3),vs_countloc(3),vw_startloc(3),vs_startloc(3) + integer(i_kind) uw_countloc(4),us_countloc(4),uw_startloc(4),us_startloc(4) + integer(i_kind) vw_countloc(4),vs_countloc(4),vw_startloc(4),vs_startloc(4) + integer(i_kind):: kend_native,kbgn_native,kdim_native + + + integer(i_kind):: iworld,iworld_group,nread,mpi_comm_read,ierror + integer(i_kind),dimension(npe):: members,members_read,mype_read_rank + logical:: procuse mm1=mype+1 nloncase=grd_uv%nlon @@ -4249,61 +4307,96 @@ subroutine gsi_fv3ncdf_writeuv_v1(grd_uv,ges_u,ges_v,add_saved,fv3filenamegin) allocate( u2d(nlon_regional,nlat_regional)) allocate( v2d(nlon_regional,nlat_regional)) - allocate( work_bu_s(nlon_regional,nlat_regional+1)) - allocate( work_bv_s(nlon_regional,nlat_regional+1)) - allocate( work_bu_w(nlon_regional+1,nlat_regional)) - allocate( work_bv_w(nlon_regional+1,nlat_regional)) allocate( work_au(nlatcase,nloncase),work_av(nlatcase,nloncase)) + if(add_saved) allocate( workau2(nlatcase,nloncase),workav2(nlatcase,nloncase)) - allocate( workbu_w2(nlon_regional+1,nlat_regional)) - allocate( workbv_w2(nlon_regional+1,nlat_regional)) - allocate( workbu_s2(nlon_regional,nlat_regional+1)) - allocate( workbv_s2(nlon_regional,nlat_regional+1)) + allocate( workbu_w2(nlon_regional+1,nlat_regional)) + allocate( workbv_w2(nlon_regional+1,nlat_regional)) + allocate( workbu_s2(nlon_regional,nlat_regional+1)) + allocate( workbv_s2(nlon_regional,nlat_regional+1)) filenamein=fv3filenamegin%dynvars - call check( nf90_open(filenamein,nf90_write,gfile_loc,comm=mpi_comm_world,info=MPI_INFO_NULL) ) - do ilevtot=kbgn,kend - varname=grd_uv%names(1,ilevtot) - ilev=grd_uv%lnames(1,ilevtot) - nz=grd_uv%nsig - nzp1=nz+1 - inative=nzp1-ilev + procuse = .false. + members=-1 + members_read=-1 + if (kbgn<=kend) then + procuse = .true. + members(mm1) = mype + endif + + call mpi_allreduce(members,members_read,npe,mpi_integer,mpi_max,mpi_comm_world,ierror) + + nread=0 + mype_read_rank=-1 + do i=1,npe + if (members_read(i) >= 0) then + nread=nread+1 + mype_read_rank(nread) = members_read(i) + endif + enddo + + call setcomm(iworld,iworld_group,nread,mype_read_rank,mpi_comm_read,ierror) + + if (procuse) then + - uw_countloc= (/nlon_regional+1,nlat_regional,1/) - us_countloc= (/nlon_regional,nlat_regional+1,1/) - vw_countloc= (/nlon_regional+1,nlat_regional,1/) - vs_countloc= (/nlon_regional,nlat_regional+1,1/) + + + call check( nf90_open(filenamein,ior(nf90_write, nf90_mpiio),gfile_loc,comm=mpi_comm_read,info=MPI_INFO_NULL) ) + + call check( nf90_inq_varid(gfile_loc,'u_s',u_sgrd_VarId) ) + call check( nf90_var_par_access(gfile_loc, u_sgrd_VarId, nf90_collective)) + call check( nf90_inq_varid(gfile_loc,'u_w',u_wgrd_VarId) ) + call check( nf90_var_par_access(gfile_loc, u_wgrd_VarId, nf90_collective)) + call check( nf90_inq_varid(gfile_loc,'v_s',v_sgrd_VarId) ) + call check( nf90_var_par_access(gfile_loc, v_sgrd_VarId, nf90_collective)) + call check( nf90_inq_varid(gfile_loc,'v_w',v_wgrd_VarId) ) + call check( nf90_var_par_access(gfile_loc, v_wgrd_VarId, nf90_collective)) + nz=grd_uv%nsig + nzp1=nz+1 + kend_native=nzp1-grd_uv%lnames(1,kbgn) + kbgn_native=nzp1-grd_uv%lnames(1,kend) + kdim_native=kend_native-kbgn_native+1 + + uw_countloc= (/nlon_regional+1,nlat_regional,kdim_native,1/) + us_countloc= (/nlon_regional,nlat_regional+1,kdim_native,1/) + vw_countloc= (/nlon_regional+1,nlat_regional,kdim_native,1/) + vs_countloc= (/nlon_regional,nlat_regional+1,kdim_native,1/) - uw_startloc=(/1,1,inative+1/) - us_startloc=(/1,1,inative+1/) - vw_startloc=(/1,1,inative+1/) - vs_startloc=(/1,1,inative+1/) + uw_startloc=(/1,1,kbgn_native+1,1/) !In the coldstart files, there is an extra top level + us_startloc=(/1,1,kbgn_native+1,1/) + vw_startloc=(/1,1,kbgn_native+1,1/) + vs_startloc=(/1,1,kbgn_native+1,1/) + allocate( work_bu_s(nlon_regional,nlat_regional+1,kbgn_native:kend_native)) + allocate( work_bv_s(nlon_regional,nlat_regional+1,kbgn_native:kend_native)) + allocate( work_bu_w(nlon_regional+1,nlat_regional,kbgn_native:kend_native)) + allocate( work_bv_w(nlon_regional+1,nlat_regional,kbgn_native:kend_native)) +!!!!!!!! readin work_b !!!!!!!!!!!!!!!! + call check( nf90_get_var(gfile_loc,u_sgrd_VarId,work_bu_s,start=us_startloc,count=us_countloc) ) + call check( nf90_get_var(gfile_loc,u_wgrd_VarId,work_bu_w,start=uw_startloc,count=uw_countloc) ) + call check( nf90_get_var(gfile_loc,v_sgrd_VarId,work_bv_s,start=vs_startloc,count=vs_countloc) ) + call check( nf90_get_var(gfile_loc,v_wgrd_VarId,work_bv_w,start=vw_startloc,count=vw_countloc) ) + do ilevtot=kbgn,kend + varname=grd_uv%names(1,ilevtot) + ilev=grd_uv%lnames(1,ilevtot) + inative=nzp1-ilev work_au=hwork(1,:,:,ilevtot) work_av=hwork(2,:,:,ilevtot) - call check( nf90_inq_varid(gfile_loc,'u_s',u_sgrd_VarId) ) - call check( nf90_inq_varid(gfile_loc,'u_w',u_wgrd_VarId) ) - call check( nf90_inq_varid(gfile_loc,'v_s',v_sgrd_VarId) ) - call check( nf90_inq_varid(gfile_loc,'v_w',v_wgrd_VarId) ) -!!!!!!!! readin work_b !!!!!!!!!!!!!!!! - call check( nf90_get_var(gfile_loc,u_sgrd_VarId,work_bu_s,start=us_startloc,count=us_countloc) ) - call check( nf90_get_var(gfile_loc,u_wgrd_VarId,work_bu_w,start=uw_startloc,count=uw_countloc) ) - call check( nf90_get_var(gfile_loc,v_sgrd_VarId,work_bv_s,start=vs_startloc,count=vs_countloc) ) - call check( nf90_get_var(gfile_loc,v_wgrd_VarId,work_bv_w,start=vw_startloc,count=vw_countloc) ) if(add_saved)then do j=1,nlat_regional - u2d(:,j)=half * (work_bu_s(:,j)+ work_bu_s(:,j+1)) + u2d(:,j)=half * (work_bu_s(:,j,inative)+ work_bu_s(:,j+1,inative)) enddo do i=1,nlon_regional - v2d(i,:)=half*(work_bv_w(i,:)+work_bv_w(i+1,:)) + v2d(i,:)=half*(work_bv_w(i,:,inative)+work_bv_w(i+1,:,inative)) enddo call fv3_h_to_ll(u2d,workau2,nlon_regional,nlat_regional,nloncase,nlatcase,grid_reverse_flag) call fv3_h_to_ll(v2d,workav2,nlon_regional,nlat_regional,nloncase,nlatcase,grid_reverse_flag) @@ -4333,44 +4426,46 @@ subroutine gsi_fv3ncdf_writeuv_v1(grd_uv,ges_u,ges_v,add_saved,fv3filenamegin) - work_bu_w(:,:)=work_bu_w(:,:)+workbu_w2(:,:) - work_bu_s(:,:)=work_bu_s(:,:)+workbu_s2(:,:) - work_bv_w(:,:)=work_bv_w(:,:)+workbv_w2(:,:) - work_bv_s(:,:)=work_bv_s(:,:)+workbv_s2(:,:) + work_bu_w(:,:,inative)=work_bu_w(:,:,inative)+workbu_w2(:,:) + work_bu_s(:,:,inative)=work_bu_s(:,:,inative)+workbu_s2(:,:) + work_bv_w(:,:,inative)=work_bv_w(:,:,inative)+workbv_w2(:,:) + work_bv_s(:,:,inative)=work_bv_s(:,:,inative)+workbv_s2(:,:) else call fv3_ll_to_h(work_au(:,:),u2d,nloncase,nlatcase,nlon_regional,nlat_regional,grid_reverse_flag) call fv3_ll_to_h(work_av(:,:),v2d,nloncase,nlatcase,nlon_regional,nlat_regional,grid_reverse_flag) do i=2,nlon_regional - work_bu_w(i,:)=half*(u2d(i-1,:)+u2d(i,:)) - work_bv_w(i,:)=half*(v2d(i-1,:)+v2d(i,:)) + work_bu_w(i,:,inative)=half*(u2d(i-1,:)+u2d(i,:)) + work_bv_w(i,:,inative)=half*(v2d(i-1,:)+v2d(i,:)) enddo - work_bu_w(1,:)=u2d(1,:) - work_bv_w(1,:)=v2d(1,:) - work_bu_w(nlon_regional+1,:)=u2d(nlon_regional,:) - work_bv_w(nlon_regional+1,:)=v2d(nlon_regional,:) + work_bu_w(1,:,inative)=u2d(1,:) + work_bv_w(1,:,inative)=v2d(1,:) + work_bu_w(nlon_regional+1,:,inative)=u2d(nlon_regional,:) + work_bv_w(nlon_regional+1,:,inative)=v2d(nlon_regional,:) do j=2,nlat_regional - work_bu_s(:,j)=half*(u2d(:,j-1)+u2d(:,j)) - work_bv_s(:,j)=half*(v2d(:,j-1)+v2d(:,j)) + work_bu_s(:,j,inative)=half*(u2d(:,j-1)+u2d(:,j)) + work_bv_s(:,j,inative)=half*(v2d(:,j-1)+v2d(:,j)) enddo - work_bu_s(:,1)=u2d(:,1) - work_bv_s(:,1)=v2d(:,1) - work_bu_s(:,nlat_regional+1)=u2d(:,nlat_regional) - work_bv_s(:,nlat_regional+1)=v2d(:,nlat_regional) + work_bu_s(:,1,inative)=u2d(:,1) + work_bv_s(:,1,inative)=v2d(:,1) + work_bu_s(:,nlat_regional+1,inative)=u2d(:,nlat_regional) + work_bv_s(:,nlat_regional+1,inative)=v2d(:,nlat_regional) endif - - call check( nf90_put_var(gfile_loc,u_wgrd_VarId,work_bu_w,start=uw_startloc,count=uw_countloc) ) - call check( nf90_put_var(gfile_loc,u_sgrd_VarId,work_bu_s,start=us_startloc,count=us_countloc) ) - call check( nf90_put_var(gfile_loc,v_wgrd_VarId,work_bv_w,start=vw_startloc,count=vw_countloc) ) - call check( nf90_put_var(gfile_loc,v_sgrd_VarId,work_bv_s,start=vs_startloc,count=vs_countloc) ) enddo ! + + call check( nf90_put_var(gfile_loc,u_wgrd_VarId,work_bu_w,start=uw_startloc,count=uw_countloc) ) + call check( nf90_put_var(gfile_loc,u_sgrd_VarId,work_bu_s,start=us_startloc,count=us_countloc) ) + call check( nf90_put_var(gfile_loc,v_wgrd_VarId,work_bv_w,start=vw_startloc,count=vw_countloc) ) + call check( nf90_put_var(gfile_loc,v_sgrd_VarId,work_bv_s,start=vs_startloc,count=vs_countloc) ) call check( nf90_close(gfile_loc) ) deallocate(work_bu_w,work_bv_w) deallocate(work_bu_s,work_bv_s) + endif !procuse + deallocate(work_au,work_av,u2d,v2d) if(add_saved) deallocate(workau2,workav2) if (allocated(workbu_w2)) then @@ -4541,8 +4636,8 @@ subroutine gsi_fv3ncdf_write(grd_ionouv,cstate_nouv,add_saved,filenamein,fv3file use mod_fv3_lola, only: fv3_ll_to_h use mod_fv3_lola, only: fv3_h_to_ll use netcdf, only: nf90_open,nf90_close - use netcdf, only: nf90_write,nf90_inq_varid - use netcdf, only: nf90_put_var,nf90_get_var + use netcdf, only: nf90_write,nf90_netcdf4, nf90_mpiio,nf90_inq_varid + use netcdf, only: nf90_put_var,nf90_get_var,nf90_independent,nf90_var_par_access use netcdf, only: nf90_inquire_dimension use gsi_bundlemod, only: gsi_bundle use general_sub2grid_mod, only: sub2grid_info,general_sub2grid @@ -4558,8 +4653,8 @@ subroutine gsi_fv3ncdf_write(grd_ionouv,cstate_nouv,add_saved,filenamein,fv3file character(len=max_filename_length) :: filenamein2 character(len=max_varname_length) :: varname,vgsiname,name - integer(i_kind) nlatcase,nloncase,nxcase,nycase,countloc(3),startloc(3) - integer(i_kind) countloc_tmp(3),startloc_tmp(3) + integer(i_kind) nlatcase,nloncase,nxcase,nycase,countloc(4),startloc(4) + integer(i_kind) countloc_tmp(4),startloc_tmp(4) integer(i_kind) kbgn,kend integer(i_kind) inative,ilev,ilevtot integer(i_kind) :: VarId,gfile_loc @@ -4621,11 +4716,11 @@ subroutine gsi_fv3ncdf_write(grd_ionouv,cstate_nouv,add_saved,filenamein,fv3file allocate(gfile_loc_layout(0:fv3_io_layout_y-1)) do nio=0,fv3_io_layout_y-1 write(filename_layout,'(a,a,I4.4)') trim(filenamein),'.',nio - call check( nf90_open(filename_layout,nf90_write,gfile_loc_layout(nio),comm=mpi_comm_read,info=MPI_INFO_NULL) ) + call check( nf90_open(filename_layout,ior(nf90_netcdf4,ior(nf90_write, nf90_mpiio)),gfile_loc_layout(nio),comm=mpi_comm_read,info=MPI_INFO_NULL) ) enddo gfile_loc=gfile_loc_layout(0) else - call check( nf90_open(filenamein,nf90_write,gfile_loc,comm=mpi_comm_read,info=MPI_INFO_NULL) ) + call check( nf90_open(filenamein,ior(nf90_netcdf4,ior(nf90_write, nf90_mpiio)),gfile_loc,comm=mpi_comm_read,info=MPI_INFO_NULL) ) endif do ilevtot=kbgn,kend @@ -4637,15 +4732,14 @@ subroutine gsi_fv3ncdf_write(grd_ionouv,cstate_nouv,add_saved,filenamein,fv3file call getfv3lamfilevname(vgsiname,fv3filenamegin,filenamein2,varname) if(trim(filenamein) /= trim(filenamein2)) then write(6,*)'filenamein and filenamein2 are not the same as expected, stop' - call flush(6) call stop2(333) endif ilev=grd_ionouv%lnames(1,ilevtot) nz=grd_ionouv%nsig nzp1=nz+1 inative=nzp1-ilev - countloc=(/nxcase,nycase,1/) - startloc=(/1,1,inative/) + countloc=(/nxcase,nycase,1,1/) + startloc=(/1,1,inative,1/) work_a=hwork(1,:,:,ilevtot) @@ -4654,23 +4748,24 @@ subroutine gsi_fv3ncdf_write(grd_ionouv,cstate_nouv,add_saved,filenamein,fv3file if(trim(name)=='xaxis_1') nx_phy=len if( nx_phy == nxcase )then allocate(work_b_tmp(nxcase,nycase)) - countloc_tmp=(/nxcase,nycase,1/) + countloc_tmp=(/nxcase,nycase,1,1/) phy_smaller_domain = .false. else allocate(work_b_tmp(nxcase-6,nycase-6)) - countloc_tmp=(/nxcase-6,nycase-6,1/) + countloc_tmp=(/nxcase-6,nycase-6,1,1/) phy_smaller_domain = .true. end if - startloc_tmp=(/1,1,ilev/) + startloc_tmp=(/1,1,ilev,1/) end if call check( nf90_inq_varid(gfile_loc,trim(varname),VarId) ) + call check( nf90_var_par_access(gfile_loc, VarId, nf90_independent)) if(index(vgsiname,"delzinc") > 0) then if(fv3_io_layout_y > 1) then do nio=0,fv3_io_layout_y-1 - countloc=(/nxcase,ny_layout_len(nio),1/) + countloc=(/nxcase,ny_layout_len(nio),1,1/) allocate(work_b_layout(nxcase,ny_layout_len(nio))) call check( nf90_get_var(gfile_loc_layout(nio),VarId,work_b_layout,start = startloc, count = countloc) ) work_b(:,ny_layout_b(nio):ny_layout_e(nio))=work_b_layout @@ -4685,7 +4780,7 @@ subroutine gsi_fv3ncdf_write(grd_ionouv,cstate_nouv,add_saved,filenamein,fv3file if(add_saved)then if(fv3_io_layout_y > 1) then do nio=0,fv3_io_layout_y-1 - countloc=(/nxcase,ny_layout_len(nio),1/) + countloc=(/nxcase,ny_layout_len(nio),1,1/) allocate(work_b_layout(nxcase,ny_layout_len(nio))) call check( nf90_get_var(gfile_loc_layout(nio),VarId,work_b_layout,start = startloc, count = countloc) ) work_b(:,ny_layout_b(nio):ny_layout_e(nio))=work_b_layout @@ -4721,7 +4816,7 @@ subroutine gsi_fv3ncdf_write(grd_ionouv,cstate_nouv,add_saved,filenamein,fv3file endif if(fv3_io_layout_y > 1) then do nio=0,fv3_io_layout_y-1 - countloc=(/nxcase,ny_layout_len(nio),1/) + countloc=(/nxcase,ny_layout_len(nio),1,1/) allocate(work_b_layout(nxcase,ny_layout_len(nio))) work_b_layout=work_b(:,ny_layout_b(nio):ny_layout_e(nio)) call check( nf90_put_var(gfile_loc_layout(nio),VarId,work_b_layout, start = startloc, count = countloc) ) @@ -4795,12 +4890,13 @@ subroutine gsi_fv3ncdf_write_v1(grd_ionouv,cstate_nouv,add_saved,filenamein,fv3f ! !$$$ end documentation block - use mpimod, only: mpi_rtype,mpi_comm_world,mype,mpi_info_null + use mpimod, only: npe, setcomm,mpi_integer,mpi_max,mpi_rtype,mpi_comm_world,mype,mpi_info_null use mod_fv3_lola, only: fv3_ll_to_h use mod_fv3_lola, only: fv3_h_to_ll use netcdf, only: nf90_open,nf90_close - use netcdf, only: nf90_write,nf90_inq_varid + use netcdf, only: nf90_write, nf90_netcdf4,nf90_mpiio,nf90_inq_varid use netcdf, only: nf90_put_var,nf90_get_var + use netcdf, only: nf90_independent,nf90_var_par_access use gsi_bundlemod, only: gsi_bundle use general_sub2grid_mod, only: sub2grid_info,general_sub2grid implicit none @@ -4824,6 +4920,10 @@ subroutine gsi_fv3ncdf_write_v1(grd_ionouv,cstate_nouv,add_saved,filenamein,fv3f character(len=max_varname_length) :: varname,vgsiname integer(i_kind) nlatcase,nloncase,nxcase,nycase,countloc(3),startloc(3) + integer(i_kind):: iworld,iworld_group,nread,mpi_comm_read,i,ierror + integer(i_kind),dimension(npe):: members,members_read,mype_read_rank + logical:: procuse + mm1=mype+1 nloncase=grd_ionouv%nlon @@ -4838,7 +4938,30 @@ subroutine gsi_fv3ncdf_write_v1(grd_ionouv,cstate_nouv,add_saved,filenamein,fv3f allocate( work_b(nlon_regional,nlat_regional)) allocate( workb2(nlon_regional,nlat_regional)) allocate( worka2(nlatcase,nloncase)) - call check ( nf90_open(filenamein,nf90_write,gfile_loc,comm=mpi_comm_world,info=MPI_INFO_NULL)) !clt + + procuse = .false. + members=-1 + members_read=-1 + if (kbgn<=kend) then + procuse = .true. + members(mm1) = mype + endif + + call mpi_allreduce(members,members_read,npe,mpi_integer,mpi_max,mpi_comm_world,ierror) + + nread=0 + mype_read_rank=-1 + do i=1,npe + if (members_read(i) >= 0) then + nread=nread+1 + mype_read_rank(nread) = members_read(i) + endif + enddo + + call setcomm(iworld,iworld_group,nread,mype_read_rank,mpi_comm_read,ierror) + + if (procuse) then + call check ( nf90_open(filenamein,ior(nf90_netcdf4,ior(nf90_write, nf90_mpiio)),gfile_loc,comm=mpi_comm_read,info=MPI_INFO_NULL)) !clt do ilevtot=kbgn,kend vgsiname=grd_ionouv%names(1,ilevtot) if(trim(vgsiname)=='amassi') cycle @@ -4848,7 +4971,6 @@ subroutine gsi_fv3ncdf_write_v1(grd_ionouv,cstate_nouv,add_saved,filenamein,fv3f call getfv3lamfilevname(vgsiname,fv3filenamegin,filenamein2,varname) if(trim(filenamein) /= trim(filenamein2)) then write(6,*)'filenamein and filenamein2 are not the same as expected, stop' - call flush(6) call stop2(333) endif ilev=grd_ionouv%lnames(1,ilevtot) @@ -4862,6 +4984,7 @@ subroutine gsi_fv3ncdf_write_v1(grd_ionouv,cstate_nouv,add_saved,filenamein,fv3f call check( nf90_inq_varid(gfile_loc,trim(varname),VarId) ) + call check( nf90_var_par_access(gfile_loc, VarId, nf90_independent)) call check( nf90_get_var(gfile_loc,VarId,work_b,start=startloc,count=countloc) ) if(index(vgsiname,"delzinc") > 0) then write(6,*)'delz is not in the cold start fiels with this option, incompatible setup , stop' @@ -4885,6 +5008,7 @@ subroutine gsi_fv3ncdf_write_v1(grd_ionouv,cstate_nouv,add_saved,filenamein,fv3f call check( nf90_put_var(gfile_loc,VarId,work_b,start=startloc,count=countloc) ) enddo !ilevtot call check(nf90_close(gfile_loc)) + endif deallocate(work_b,work_a) deallocate(worka2,workb2) @@ -5420,7 +5544,7 @@ subroutine gsi_copy_bundle(bundi,bundo) character(len=max_varname_length),dimension(:),allocatable:: target_name_vars3d character(len=max_varname_length) ::varname real(r_kind),dimension(:,:,:),pointer:: pvar3d=>NULL() - real(r_kind),dimension(:,:,:),pointer:: pvar2d =>NULL() + real(r_kind),dimension(:,:),pointer:: pvar2d =>NULL() integer(i_kind):: src_nc3d,src_nc2d,target_nc3d,target_nc2d integer(i_kind):: ivar,jvar,istatus src_nc3d=bundi%n3d diff --git a/ush/sub_hera b/ush/sub_hera index 610756af00..c94b734596 100755 --- a/ush/sub_hera +++ b/ush/sub_hera @@ -120,10 +120,10 @@ echo "#SBATCH --output=$output" echo "#SBATCH --job-name=$jobname" >> $cfile echo "#SBATCH --qos=$queue" >> $cfile echo "#SBATCH --time=$timew" >> $cfile -echo "#SBATCH --nodes=$nodes --ntasks-per-node=$procs --cpus-per-task=$threads" >> $cfile +echo "#SBATCH --nodes=$nodes --ntasks-per-node=$procs --exclusive" >> $cfile #echo "#SBATCH -j oe" >> $cfile echo "#SBATCH --account=$accnt" >> $cfile -echo "#SBATCH --mem=0" >> $cfile +#cltorg echo "#SBATCH --mem=0" >> $cfile #echo "#SBATCH -V" >> $cfile #echo "#PBS -d" >> $cfile #. $exec >> $cfile @@ -143,7 +143,6 @@ echo "module list" >> $cfile echo "" >>$cfile cat $exec >> $cfile - if [[ $nosub = YES ]];then cat $cfile exit diff --git a/ush/sub_hercules b/ush/sub_hercules index 459b480559..78a0f5daee 100755 --- a/ush/sub_hercules +++ b/ush/sub_hercules @@ -111,7 +111,7 @@ echo "#SBATCH --job-name=$jobname" echo "#SBATCH --qos=$queue" >> $cfile echo "#SBATCH --partition=$partition" >> $cfile echo "#SBATCH --time=$timew" >> $cfile -echo "#SBATCH --nodes=$nodes --ntasks-per-node=$procs --cpus-per-task=$threads" >> $cfile +echo "#SBATCH --nodes=$nodes --ntasks-per-node=$procs --exclusive" >> $cfile echo "#SBATCH --account=$accnt" >> $cfile echo "" >>$cfile @@ -131,7 +131,7 @@ echo "module use $modulefiles" >> $cfile echo "module load gsi_hercules.intel" >> $cfile #TODO reenable I_MPI_EXTRA_FILESYSTEM once regional ctests can properly handle parallel I/O on Hercules echo "unset I_MPI_EXTRA_FILESYSTEM" >> $cfile -echo "" >> $cfile + cat $exec >> $cfile if [[ $nosub = YES ]];then diff --git a/ush/sub_jet b/ush/sub_jet index 9bd60486f6..96f3eae9b2 100755 --- a/ush/sub_jet +++ b/ush/sub_jet @@ -108,7 +108,7 @@ echo "#SBATCH --output=$output" echo "#SBATCH --job-name=$jobname" >> $cfile echo "#SBATCH --qos=$queue" >> $cfile echo "#SBATCH --time=$timew" >> $cfile -echo "#SBATCH --nodes=$nodes --ntasks-per-node=$procs --cpus-per-task=$threads" >> $cfile +echo "#SBATCH --nodes=$nodes --ntasks-per-node=$procs --exclusive" >> $cfile echo "#SBATCH --account=$accnt" >> $cfile echo "#SBATCH --mem=0" >> $cfile echo "#SBATCH --partition=kjet" >> $cfile diff --git a/ush/sub_orion b/ush/sub_orion index 5a13f54845..b810576379 100755 --- a/ush/sub_orion +++ b/ush/sub_orion @@ -111,7 +111,7 @@ echo "#SBATCH --job-name=$jobname" echo "#SBATCH --qos=$queue" >> $cfile echo "#SBATCH --partition=$partition" >> $cfile echo "#SBATCH --time=$timew" >> $cfile -echo "#SBATCH --nodes=$nodes --ntasks-per-node=$procs --cpus-per-task=$threads" >> $cfile +echo "#SBATCH --nodes=$nodes --ntasks-per-node=$procs --exclusive" >> $cfile echo "#SBATCH --account=$accnt" >> $cfile echo "" >>$cfile