Permalink
Browse files

Merged with neighbor_alltoallw. Default -O2 optimization. Version bum…

…ped.
  • Loading branch information...
John Donners
John Donners committed Jan 6, 2016
2 parents e9356b0 + b0fac1b commit 13cb4fb311dd09cc088e446c7ab92742afea6dea
Showing with 126 additions and 19 deletions.
  1. +5 −7 README.md
  2. +1 −0 bou.in
  3. +36 −1 configure.ac
  4. +52 −7 decomp_2d.F90
  5. +10 −0 m4/ax_prog_fc_mpi.m4
  6. +11 −2 transpose_x_to_z.F90
  7. +11 −2 transpose_z_to_x.F90
View
@@ -21,26 +21,24 @@ The AFiD model has the following prerequisites:
It's recommended to download a release tarball of AFiD, which can be found ​[here](https://github.com/jdonners/afid/releases). To install AFiD, please
use the 'configure' script. Note that you'll need to set optimization and debugging options yourself.
A good, first guess at the configuration of AFiD would be
```
./configure FCFLAGS="-O2 -g"
```
If the configuration was successful, simply run
The easiest way to configure and build AFiD would be
```
./configure
make
make install prefix=/path/to/install/afid
```
It tries to find and configure all prerequisites automatically, although it doesn't always succeed.
By default it uses the -O2 optimization flag (if available).
The most important configuration options are:
```
./configure MPIFC=mpif90.gfortran # set MPIFC to your MPI compiler wrapper for Fortran
./configure --with-blas=/path/to/blas.lib # library with blas routines
./configure --with-lapack=/path/to/lapack.lib # library with lapack routines
./configure FCFLAGS=-O3 # very high optimization
./configure FCFLAGS="-g -O0" # debug info, no optimization
```
The configure script locates the fftw-wisdom utility to find the root path of the FFTW3 library and it uses the h5pfc compiler wrapper
View
1 bou.in
@@ -16,3 +16,4 @@
1 1e-8 0.01 1e3
SLABDUMP(STST3)
0
View
@@ -2,11 +2,15 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ([2.69])
AC_INIT([AFiD], [1.0], [john.donners@surfsara.nl])
AC_INIT([AFiD], [1.1], [r.j.a.m.stevens@utwente.nl])
AC_CONFIG_HEADERS([config.h])
AC_CONFIG_MACRO_DIR([m4])
AM_INIT_AUTOMAKE
if test "x$FCFLAGS" = "x"; then
FCFLAGS_isset="no"
fi
# AC_PROG_FC
AX_PROG_FC_MPI([true],
[ use_mpi=yes],
@@ -30,6 +34,37 @@ AC_LANG([Fortran])
#set the extension to f90, to compile the HDF5 test
AC_FC_SRCEXT(f90)
dummy_FCFLAGS=$FCFLAGS
#set default compiler optimization
if test "x$FCFLAGS_isset" = "xno"; then
#if test 0;then
FCFLAGS="-O2"
AC_MSG_CHECKING([for default optimization -O2])
AC_LINK_IFELSE(
[AC_LANG_SOURCE([[
program conftest
implicit none
end]])],
[o2_works=yes],
[o2_works=no])
AC_MSG_RESULT([$o2_works])
if test "x$o2_works" = "xno"; then
FCFLAGS=$dummy_FCFLAGS
fi
fi
mpi3_wanted=yes
AC_ARG_ENABLE([mpi3], AS_HELP_STRING([--disable-mpi3],
[disable MPI-3 functions]),
[if test "x$enableval" == "xno"; then mpi3_wanted=no; fi],[])
if test "x$mpi3_wanted" == "xyes"; then
if test "x$_ax_prog_fc_mpi_mpi3_found" == "xyes"; then
FCFLAGS="${FCFLAGS} ${FC_DEFINE}MPI3"
fi
fi
#Disable OpenMP as long as it is not efficient
#AC_OPENMP
#FCFLAGS="${FCFLAGS} ${OPENMP_FCFLAGS}"
View
@@ -110,8 +110,16 @@ module decomp_2d
! This is only for the complex datatype
integer,dimension(:),allocatable::zcnts_xz,ztypes_xz
integer,dimension(:),allocatable::xcnts_xz,xtypes_xz
#ifdef MPI3
! use MPI_ADDRESS_KIND for MPI_Neighbor_alltoallw call
integer(kind=MPI_ADDRESS_KIND),dimension(:),allocatable::zdispls_xz,xdispls_xz
integer :: xtozNeighborComm,ztoxNeighborComm
integer,dimension(:),allocatable::xranks,zranks
#else
! use default integer for MPI_Alltoallw call
integer,dimension(:),allocatable::zdispls_xz,xdispls_xz
#endif
!#endif
! evenly distributed data
logical :: even
@@ -582,18 +590,20 @@ subroutine decomp_info_finalize(decomp)
deallocate(decomp%x1disp,decomp%y1disp,decomp%y2disp,decomp%z2disp)
deallocate(decomp%z1disp,decomp%x2disp)
do i=1,nproc
if (decomp%ztypes_xz(i).ne.MPI_DATATYPE_NULL) then
if (decomp%ztypes_xz(i).ne.MPI_INTEGER) then
call MPI_Type_free(decomp%ztypes_xz(i),ierror)
endif
if (decomp%xtypes_xz(i).ne.MPI_DATATYPE_NULL) then
if (decomp%xtypes_xz(i).ne.MPI_INTEGER) then
call MPI_Type_free(decomp%xtypes_xz(i),ierror)
endif
enddo
deallocate(decomp%xcnts_xz,decomp%zcnts_xz)
deallocate(decomp%xtypes_xz,decomp%ztypes_xz)
deallocate(decomp%xdispls_xz)
deallocate(decomp%zdispls_xz)
#ifdef MPI3
deallocate(decomp%xranks,decomp%zranks)
#endif
return
end subroutine decomp_info_finalize
@@ -732,7 +742,11 @@ subroutine prepare_buffer(decomp)
integer :: rank_x, rank_z
integer :: subsize_y, offset_y
integer :: ierror
#ifdef MPI3
integer,dimension(nproc) :: xranks,zranks
integer,dimension(nproc) :: xweights,zweights
integer :: index_src, index_dest
#endif
! MPI_ALLTOALLV buffer information
do i=0, dims(1)-1
@@ -790,8 +804,12 @@ subroutine prepare_buffer(decomp)
decomp%zdispls_xz(:)=0
decomp%xcnts_xz(:)=0
decomp%zcnts_xz(:)=0
decomp%xtypes_xz(:)=MPI_DATATYPE_NULL
decomp%ztypes_xz(:)=MPI_DATATYPE_NULL
decomp%xtypes_xz(:)=MPI_INTEGER
decomp%ztypes_xz(:)=MPI_INTEGER
#ifdef MPI3
index_src=0
index_dest=0
#endif
do k=0,dims(1)-1
do i=0,dims(2)-1
! Actually, rank_x and rank_z are the same..
@@ -805,6 +823,11 @@ subroutine prepare_buffer(decomp)
decomp%zcnts_xz(rank_z+1)=1
subsize_y=min(decomp%zen(2),decomp%y1en(k))-max(decomp%zst(2),decomp%y1st(k))+1
offset_y =max(decomp%zst(2),decomp%y1st(k))-decomp%zst(2)
#ifdef MPI3
index_src=index_src+1
zranks(index_src)=rank_z
zweights(index_src)=decomp%zsz(1)*subsize_y*decomp%z2dist(i)
#endif
call MPI_Type_create_subarray(3,decomp%zsz, &
(/decomp%zsz(1),subsize_y,decomp%z2dist(i)/), &
(/0,offset_y,decomp%z2st(i)-decomp%zst(3)/), &
@@ -824,6 +847,11 @@ subroutine prepare_buffer(decomp)
decomp%xcnts_xz(rank_x+1)=1
subsize_y=min(decomp%xen(2),decomp%y2en(i))-max(decomp%xst(2),decomp%y2st(i))+1
offset_y =max(decomp%xst(2),decomp%y2st(i))-decomp%xst(2)
#ifdef MPI3
index_dest=index_dest+1
xranks(index_dest)=rank_x
xweights(index_dest)=decomp%x1dist(k)*subsize_y*decomp%xsz(3)
#endif
call MPI_Type_create_subarray(3,decomp%xsz, &
(/decomp%x1dist(k),subsize_y,decomp%xsz(3)/), &
(/decomp%x1st(k)-decomp%xst(1),offset_y,0/), &
@@ -840,6 +868,23 @@ subroutine prepare_buffer(decomp)
enddo
enddo
#ifdef MPI3
allocate(decomp%xranks(index_dest))
allocate(decomp%zranks(index_src))
decomp%xranks=xranks(1:index_dest)+1
decomp%zranks=zranks(1:index_src)+1
call MPI_Dist_graph_create_adjacent(DECOMP_2D_COMM_CART_X, &
index_src,zranks(1:index_src),zweights(1:index_src), &
index_dest,xranks(1:index_dest),xweights(1:index_dest), &
MPI_INFO_NULL,.true.,decomp%xtozNeighborComm,ierror)
call MPI_Dist_graph_create_adjacent(DECOMP_2D_COMM_CART_X, &
index_dest,xranks(1:index_dest),xweights(1:index_dest), &
index_src,zranks(1:index_src),zweights(1:index_src), &
MPI_INFO_NULL,.true.,decomp%ztoxNeighborComm,ierror)
#endif
return
end subroutine prepare_buffer
View
@@ -109,6 +109,16 @@ AS_IF([test x"$_ax_prog_fc_mpi_mpi_wanted" = xno],
LIBS=$save_LIBS
done
# test if MPI-3 is available
# We do not use AC_SEARCH_LIBS here, as it caches its outcome and
# thus disallows corresponding calls in the other AX_PROG_*_MPI
# macros.
AC_MSG_CHECKING([for MPI-3])
AC_LINK_IFELSE([AC_LANG_CALL([],[MPI_NEIGHBOR_ALLTOALLW])],
[ _ax_prog_fc_mpi_mpi3_found=yes ],
[ _ax_prog_fc_mpi_mpi3_found=no ])
AC_MSG_RESULT($_ax_prog_fc_mpi_mpi3_found)
# Check for header
AS_IF([test x"$_ax_prog_fc_mpi_mpi_found" = xyes], [
AC_MSG_CHECKING([for mpif.h])
View
@@ -29,8 +29,17 @@ subroutine transpose_x_to_z_complex(src, dst, opt_decomp)
decomp = decomp_main
end if
call MPI_Alltoallw(src,decomp%xcnts_xz,decomp%xdispls_xz,decomp%xtypes_xz, &
dst,decomp%zcnts_xz,decomp%zdispls_xz,decomp%ztypes_xz,MPI_COMM_WORLD,ierror)
#if !defined(MPI3)
call MPI_Alltoallw(src,decomp%xcnts_xz,decomp%xdispls_xz,decomp%xtypes_xz, &
dst,decomp%zcnts_xz,decomp%zdispls_xz,decomp%ztypes_xz,MPI_COMM_WORLD,ierror)
#endif
#ifdef MPI3
call MPI_Neighbor_alltoallw( &
src,decomp%xcnts_xz(decomp%xranks),decomp%xdispls_xz(decomp%xranks),decomp%xtypes_xz(decomp%xranks), &
dst,decomp%zcnts_xz(decomp%zranks),decomp%zdispls_xz(decomp%zranks),decomp%ztypes_xz(decomp%zranks), &
decomp%xtozNeighborComm,ierror)
#endif
return
end subroutine transpose_x_to_z_complex
View
@@ -29,8 +29,17 @@ subroutine transpose_z_to_x_complex(src, dst, opt_decomp)
decomp = decomp_main
end if
call MPI_Alltoallw(src,decomp%zcnts_xz,decomp%zdispls_xz,decomp%ztypes_xz, &
dst,decomp%xcnts_xz,decomp%xdispls_xz,decomp%xtypes_xz,MPI_COMM_WORLD,ierror)
#if !defined(MPI3)
call MPI_Alltoallw(src,decomp%zcnts_xz,decomp%zdispls_xz,decomp%ztypes_xz, &
dst,decomp%xcnts_xz,decomp%xdispls_xz,decomp%xtypes_xz,MPI_COMM_WORLD,ierror)
#endif
#ifdef MPI3
call MPI_Neighbor_alltoallw( &
src,decomp%zcnts_xz(decomp%zranks),decomp%zdispls_xz(decomp%zranks),decomp%ztypes_xz(decomp%zranks), &
dst,decomp%xcnts_xz(decomp%xranks),decomp%xdispls_xz(decomp%xranks),decomp%xtypes_xz(decomp%xranks), &
decomp%ztoxNeighborComm,ierror)
#endif
return
end subroutine transpose_z_to_x_complex

0 comments on commit 13cb4fb

Please sign in to comment.