Skip to content

Commit

Permalink
Merge branch 'ben' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
jmaassen committed May 1, 2013
2 parents 92cd351 + b933fbe commit e2bebec
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 15 deletions.
135 changes: 135 additions & 0 deletions build/linuxg95_mpi_gpu.gnu
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@

#-----------------------------------------------------------------------
#
# File: sgialtix_mpi.gnu
#
# Contains compiler and loader options for the SGI Altix using the
# intel compiler and specifies the mpi directory for communications
# modules.
#
#-----------------------------------------------------------------------
F77 = mpif77
F90 = mpif90
LD = mpif90
CC = cc

Cp = /bin/cp
Cpp = cpp -P
AWK = /usr/bin/gawk
ABI =
COMMDIR = mpi
NVCC = nvcc

# Enable MPI library for parallel code, yes/no.

MPI = yes

# Adjust these to point to where netcdf is installed

# These have been loaded as a module so no values necessary
NETCDFINC = -I/cm/shared/apps/netcdf/gcc/64/4.1.1/include
NETCDFLIB = -L/cm/shared/apps/netcdf/gcc/64/4.1.1/lib

# Enable trapping and traceback of floating point exceptions, yes/no.
# Note - Requires 'setenv TRAP_FPE "ALL=ABORT,TRACE"' for traceback.

TRAP_FPE = no

#------------------------------------------------------------------
# precompiler options
#------------------------------------------------------------------

#DCOUPL = -Dcoupled
DHIRES = -D_HIRES
#PRINT = -DJASON_PRINT
#PRINT_HALO = -DJASON_PRINT_HALO
#PRINT_REDIST = -DJASON_PRINT_REDIST
#PRINT_LOOP = -DJASON_PRINT_LOOP
#TIMER = -DJASON_TIMER
#FIX_DATA = -DJASON_FIX_DATA
#LOG_FILE = -DJASON_SIMPLE_LOG_FILENAME
FLOW = -D_USE_FLOW_CONTROL
#SEND = -DJASON_PRINT_SEND
FLUSH = -DJASON_FLUSH
GPU = -DBEN_GPU

Cpp_opts = \
$(DCOUPL) $(DHIRES) $(TIMER) $(PRINT) $(PRINT_LOOP) $(LOG_FILE) $(FLOW) $(FIX_DATA) $(SEND) $(FLUSH) $(PRINT_REDIST) $(GPU)

Cpp_opts := $(Cpp_opts) -DPOSIX

#----------------------------------------------------------------------------
#
# C Flags
#
#----------------------------------------------------------------------------

CFLAGS = $(ABI)

ifeq ($(OPTIMIZE),yes)
CFLAGS := $(CFLAGS) -O3
# -mcmodel=medium
else
CFLAGS := $(CFLAGS) -g -check all -ftrapuv
endif

#----------------------------------------------------------------------------
#
# FORTRAN Flags
#
#----------------------------------------------------------------------------

FBASE = $(ABI) $(NETCDFINC) $(MPI_COMPILE_FLAGS) -I$(DepDir)
MODSUF = mod

ifeq ($(TRAP_FPE),yes)
FBASE := $(FBASE)
endif

ifeq ($(OPTIMIZE),yes)
FFLAGS = $(FBASE) -O3 -fconvert=swap
#-fmax-stack-var-size=536870912
#-mcmodel=medium
else
FFLAGS = $(FBASE) -g -check bounds -fconvert=swap
endif

#----------------------------------------------------------------------------
#
# CUDA Flags
#
#----------------------------------------------------------------------------

CUFLAGS = -gencode arch=compute_35,code=sm_35 -Xptxas=-v -maxrregcount=64

#CUFLAGS = -gencode arch=compute_20,code=sm_20 -Xptxas=-v

#-prec-sqrt=true -fmad=false

ifeq ($(OPTIMIZE),yes)
CUFLAGS := $(CUFLAGS)
endif

CUFLAGS := $(CUFLAGS)

#----------------------------------------------------------------------------
#
# Loader Flags and Libraries
#
#----------------------------------------------------------------------------

LDFLAGS = $(ABI)

LIBS = $(NETCDFLIB) -L/cm/shared/apps/cuda50/toolkit/current/lib64/ -lnetcdf -lcurl -lcudart -lstdc++

ifeq ($(MPI),yes)
LIBS := $(LIBS) $(MPI_LD_FLAGS) -lmpi
endif

ifeq ($(TRAP_FPE),yes)
LIBS := $(LIBS)
endif

LDLIBS = $(LIBS)

#----------------------------------------------------------------------------
6 changes: 3 additions & 3 deletions source/gpu_cmod.cu
Original file line number Diff line number Diff line change
Expand Up @@ -540,9 +540,9 @@ void gpu_compare (double *a1, double *a2, int *pN, int *pName) {
//if (zero_one > 95*(N/100)) { fprintf(stderr, "Node %d: Error: array1 contains %d zeros\n",my_task, zero_one); }
//if (zero_two > 95*(N/100)) { fprintf(stderr, "Node %d: Error: array2 contains %d zeros\n",my_task, zero_two); }

if (zero_one != zero_two) {
fprintf(stderr, "Node %d: %s Error: number of zeros in arrays dont correspond zero1=%d, zero2=%d\n", my_task, var_names[vName], zero_one, zero_two);
}
//if (zero_one != zero_two) {
// fprintf(stderr, "Node %d: %s Error: number of zeros in arrays dont correspond zero1=%d, zero2=%d\n", my_task, var_names[vName], zero_one, zero_two);
//}

if (res > 0) {
if (vName == 0) {
Expand Down
4 changes: 2 additions & 2 deletions source/gpu_domain.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

#define KM 42
#define NX_BLOCK 904
#define NY_BLOCK 604
#define NX_BLOCK 64
#define NY_BLOCK 64
#define NSTREAMS 42
20 changes: 10 additions & 10 deletions source/gpu_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,14 @@ subroutine init_gpu_mod
!
!-----------------------------------------------------------------------

call my_cudaMallocHost(cptr, (nx_block*ny_block*km*nt*3*max_blocks_clinic))
call c_f_pointer(cptr, TRACER, (/ nx_block,ny_block,km,nt,3,max_blocks_clinic /))
call my_cudaMallocHost(cptr, (nx_block*ny_block*km*nt*3*nblocks_clinic))
call c_f_pointer(cptr, TRACER, (/ nx_block,ny_block,km,nt,3,nblocks_clinic /))

call my_cudaMallocHost(cptr, (nx_block*ny_block*km*3*max_blocks_clinic))
call c_f_pointer(cptr, RHO, (/ nx_block,ny_block,km,3,max_blocks_clinic /))
call my_cudaMallocHost(cptr, (nx_block*ny_block*km*3*nblocks_clinic))
call c_f_pointer(cptr, RHO, (/ nx_block,ny_block,km,3,nblocks_clinic /))

call my_cudaMallocHost(cptr, (nx_block*ny_block*km*max_blocks_clinic))
call c_f_pointer(cptr, RHOP, (/ nx_block,ny_block,km,max_blocks_clinic /))
call my_cudaMallocHost(cptr, (nx_block*ny_block*km*nblocks_clinic))
call c_f_pointer(cptr, RHOP, (/ nx_block,ny_block,km,nblocks_clinic /))

! real (r8), dimension(nx_block,ny_block,km) :: &
! DBLOC, &! buoyancy difference between adjacent levels
Expand All @@ -208,8 +208,8 @@ subroutine init_gpu_mod

!allocate (VDC(nx_block,ny_block,0:km+1,2,nblocks_clinic), &
! VVC(nx_block,ny_block,km, nblocks_clinic))
call my_cudaMallocHost(cptr, (nx_block*ny_block*(km+2)*2*max_blocks_clinic))
call c_f_pointer(cptr, VDC, (/ nx_block,ny_block,(km+2),2,max_blocks_clinic /))
call my_cudaMallocHost(cptr, (nx_block*ny_block*(km+2)*2*nblocks_clinic))
call c_f_pointer(cptr, VDC, (/ nx_block,ny_block,(km+2),2,nblocks_clinic /))


!VDC = RESHAPE(VDC, (/ nx_block,ny_block,0:km+1,2,max_blocks_clinic /))
Expand All @@ -219,8 +219,8 @@ subroutine init_gpu_mod
!apparantly c_f_pointer doesnt like the ':' in the array shape statement
!call c_f_pointer(cptr, VDC, (/ nx_block,ny_block,0:km+1,2,max_blocks_clinic /))

call my_cudaMallocHost(cptr, (nx_block*ny_block*km*max_blocks_clinic))
call c_f_pointer(cptr, VVC, (/ nx_block,ny_block,km,max_blocks_clinic /))
call my_cudaMallocHost(cptr, (nx_block*ny_block*km*nblocks_clinic))
call c_f_pointer(cptr, VVC, (/ nx_block,ny_block,km,nblocks_clinic /))


! arrays used for correctness checks
Expand Down

0 comments on commit e2bebec

Please sign in to comment.