Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:NLeSC/eSalsa-POP into develop
Browse files Browse the repository at this point in the history
Conflicts:
	build/linuxg95_mpi_gpu.gnu
  • Loading branch information
jmaassen committed Jul 31, 2013
2 parents 5446dcd + e2bebec commit 2cb4997
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 16 deletions.
1 change: 1 addition & 0 deletions build/compile.mk
Original file line number Diff line number Diff line change
Expand Up @@ -154,4 +154,5 @@ include $(DEPENDS)

%.o: %.cu
@echo $(POPARCH) Compiling with implicit rule $(CUFLAGS) $<
@cd $(POPEXEDIR)/compile && $(NVCC) $(CUFLAGS) -ptx $<
@cd $(POPEXEDIR)/compile && $(NVCC) $(CUFLAGS) -c $(notdir $<)
6 changes: 3 additions & 3 deletions build/linuxg95_mpi_gpu.gnu
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ Cpp_opts := $(Cpp_opts) -DPOSIX
CFLAGS = $(ABI)

ifeq ($(OPTIMIZE),yes)
CFLAGS := $(CFLAGS) -O3 -march=corei7
CFLAGS := $(CFLAGS) -O3
# -mcmodel=medium
else
CFLAGS := $(CFLAGS) -g -check all -ftrapuv
Expand All @@ -87,7 +87,7 @@ ifeq ($(TRAP_FPE),yes)
endif

ifeq ($(OPTIMIZE),yes)
FFLAGS = $(FBASE) -O3 -march=corei7 -fconvert=swap
FFLAGS = $(FBASE) -O3 -fconvert=swap
#-fmax-stack-var-size=536870912
#-mcmodel=medium
else
Expand All @@ -100,7 +100,7 @@ endif
#
#----------------------------------------------------------------------------

CUFLAGS = -gencode arch=compute_35,code=sm_35 -Xptxas=-v -maxrregcount=64 -gencode arch=compute_20,code=sm_20
CUFLAGS = -gencode arch=compute_35,code=sm_35 -Xptxas=-v -maxrregcount=64
#CUFLAGS = -gencode arch=compute_20,code=sm_20 -Xptxas=-v

#-prec-sqrt=true -fmad=false
Expand Down
6 changes: 3 additions & 3 deletions source/gpu_cmod.cu
Original file line number Diff line number Diff line change
Expand Up @@ -540,9 +540,9 @@ void gpu_compare (double *a1, double *a2, int *pN, int *pName) {
//if (zero_one > 95*(N/100)) { fprintf(stderr, "Node %d: Error: array1 contains %d zeros\n",my_task, zero_one); }
//if (zero_two > 95*(N/100)) { fprintf(stderr, "Node %d: Error: array2 contains %d zeros\n",my_task, zero_two); }

if (zero_one != zero_two) {
fprintf(stderr, "Node %d: %s Error: number of zeros in arrays dont correspond zero1=%d, zero2=%d\n", my_task, var_names[vName], zero_one, zero_two);
}
//if (zero_one != zero_two) {
// fprintf(stderr, "Node %d: %s Error: number of zeros in arrays dont correspond zero1=%d, zero2=%d\n", my_task, var_names[vName], zero_one, zero_two);
//}

if (res > 0) {
if (vName == 0) {
Expand Down
20 changes: 10 additions & 10 deletions source/gpu_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,14 @@ subroutine init_gpu_mod
!
!-----------------------------------------------------------------------

call my_cudaMallocHost(cptr, (nx_block*ny_block*km*nt*3*max_blocks_clinic))
call c_f_pointer(cptr, TRACER, (/ nx_block,ny_block,km,nt,3,max_blocks_clinic /))
call my_cudaMallocHost(cptr, (nx_block*ny_block*km*nt*3*nblocks_clinic))
call c_f_pointer(cptr, TRACER, (/ nx_block,ny_block,km,nt,3,nblocks_clinic /))

call my_cudaMallocHost(cptr, (nx_block*ny_block*km*3*max_blocks_clinic))
call c_f_pointer(cptr, RHO, (/ nx_block,ny_block,km,3,max_blocks_clinic /))
call my_cudaMallocHost(cptr, (nx_block*ny_block*km*3*nblocks_clinic))
call c_f_pointer(cptr, RHO, (/ nx_block,ny_block,km,3,nblocks_clinic /))

call my_cudaMallocHost(cptr, (nx_block*ny_block*km*max_blocks_clinic))
call c_f_pointer(cptr, RHOP, (/ nx_block,ny_block,km,max_blocks_clinic /))
call my_cudaMallocHost(cptr, (nx_block*ny_block*km*nblocks_clinic))
call c_f_pointer(cptr, RHOP, (/ nx_block,ny_block,km,nblocks_clinic /))

! real (r8), dimension(nx_block,ny_block,km) :: &
! DBLOC, &! buoyancy difference between adjacent levels
Expand All @@ -208,8 +208,8 @@ subroutine init_gpu_mod

!allocate (VDC(nx_block,ny_block,0:km+1,2,nblocks_clinic), &
! VVC(nx_block,ny_block,km, nblocks_clinic))
call my_cudaMallocHost(cptr, (nx_block*ny_block*(km+2)*2*max_blocks_clinic))
call c_f_pointer(cptr, VDC, (/ nx_block,ny_block,(km+2),2,max_blocks_clinic /))
call my_cudaMallocHost(cptr, (nx_block*ny_block*(km+2)*2*nblocks_clinic))
call c_f_pointer(cptr, VDC, (/ nx_block,ny_block,(km+2),2,nblocks_clinic /))


!VDC = RESHAPE(VDC, (/ nx_block,ny_block,0:km+1,2,max_blocks_clinic /))
Expand All @@ -219,8 +219,8 @@ subroutine init_gpu_mod
!apparantly c_f_pointer doesnt like the ':' in the array shape statement
!call c_f_pointer(cptr, VDC, (/ nx_block,ny_block,0:km+1,2,max_blocks_clinic /))

call my_cudaMallocHost(cptr, (nx_block*ny_block*km*max_blocks_clinic))
call c_f_pointer(cptr, VVC, (/ nx_block,ny_block,km,max_blocks_clinic /))
call my_cudaMallocHost(cptr, (nx_block*ny_block*km*nblocks_clinic))
call c_f_pointer(cptr, VVC, (/ nx_block,ny_block,km,nblocks_clinic /))


! arrays used for correctness checks
Expand Down

0 comments on commit 2cb4997

Please sign in to comment.