Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge pull request #69 from OP2/update/documentation

Update/documentation
  • Loading branch information...
commit 97f50f6c8ba64d55f3749e09152f30e6bb36602e 2 parents 62a4247 + 7eaf025
@gihanmudalige gihanmudalige authored
Showing with 1,524 additions and 14,107 deletions.
  1. +44 −0 .gitignore
  2. +2 −2 apps/c/CMakeLists.txt
  3. +64 −0 apps/c/airfoil/README.md
  4. +0 −183 apps/c/airfoil/airfoil_hdf5_simple/dp/Makefile
  5. +0 −26 apps/c/airfoil/airfoil_hdf5_simple/dp/adt_calc.h
  6. +0 −172 apps/c/airfoil/airfoil_hdf5_simple/dp/adt_calc_kernel.cpp
  7. +0 −191 apps/c/airfoil/airfoil_hdf5_simple/dp/adt_calc_kernel.cu
  8. +0 −232 apps/c/airfoil/airfoil_hdf5_simple/dp/airfoil.cpp
  9. +0 −25 apps/c/airfoil/airfoil_hdf5_simple/dp/airfoil_kernels.cpp
  10. +0 −52 apps/c/airfoil/airfoil_hdf5_simple/dp/airfoil_kernels.cu
  11. +0 −285 apps/c/airfoil/airfoil_hdf5_simple/dp/airfoil_op.cpp
  12. +0 −33 apps/c/airfoil/airfoil_hdf5_simple/dp/bres_calc.h
  13. +0 −224 apps/c/airfoil/airfoil_hdf5_simple/dp/bres_calc_kernel.cpp
  14. +0 −260 apps/c/airfoil/airfoil_hdf5_simple/dp/bres_calc_kernel.cu
  15. +0 −343 apps/c/airfoil/airfoil_hdf5_simple/dp/convert_mesh.cpp
  16. +0 −30 apps/c/airfoil/airfoil_hdf5_simple/dp/res_calc.h
  17. +0 −239 apps/c/airfoil/airfoil_hdf5_simple/dp/res_calc_kernel.cpp
  18. +0 −273 apps/c/airfoil/airfoil_hdf5_simple/dp/res_calc_kernel.cu
  19. +0 −3  apps/c/airfoil/airfoil_hdf5_simple/dp/save_soln.h
  20. +0 −94 apps/c/airfoil/airfoil_hdf5_simple/dp/save_soln_kernel.cpp
  21. +0 −134 apps/c/airfoil/airfoil_hdf5_simple/dp/save_soln_kernel.cu
  22. +0 −12 apps/c/airfoil/airfoil_hdf5_simple/dp/update.h
  23. +0 −123 apps/c/airfoil/airfoil_hdf5_simple/dp/update_kernel.cpp
  24. +0 −206 apps/c/airfoil/airfoil_hdf5_simple/dp/update_kernel.cu
  25. +0 −79 apps/c/airfoil/airfoil_hdf5_vector/CMakeLists.txt
  26. +0 −175 apps/c/airfoil/airfoil_hdf5_vector/dp/Makefile
  27. +0 −26 apps/c/airfoil/airfoil_hdf5_vector/dp/adt_calc.h
  28. +0 −176 apps/c/airfoil/airfoil_hdf5_vector/dp/adt_calc_kernel.cpp
  29. +0 −191 apps/c/airfoil/airfoil_hdf5_vector/dp/adt_calc_kernel.cu
  30. +0 −210 apps/c/airfoil/airfoil_hdf5_vector/dp/airfoil.cpp
  31. +0 −25 apps/c/airfoil/airfoil_hdf5_vector/dp/airfoil_kernels.cpp
  32. +0 −52 apps/c/airfoil/airfoil_hdf5_vector/dp/airfoil_kernels.cu
  33. +0 −250 apps/c/airfoil/airfoil_hdf5_vector/dp/airfoil_op.cpp
  34. +0 −32 apps/c/airfoil/airfoil_hdf5_vector/dp/bres_calc.h
  35. +0 −233 apps/c/airfoil/airfoil_hdf5_vector/dp/bres_calc_kernel.cpp
  36. +0 −264 apps/c/airfoil/airfoil_hdf5_vector/dp/bres_calc_kernel.cu
  37. +0 −29 apps/c/airfoil/airfoil_hdf5_vector/dp/res_calc.h
  38. +0 −259 apps/c/airfoil/airfoil_hdf5_vector/dp/res_calc_kernel.cpp
  39. +0 −290 apps/c/airfoil/airfoil_hdf5_vector/dp/res_calc_kernel.cu
  40. +0 −3  apps/c/airfoil/airfoil_hdf5_vector/dp/save_soln.h
  41. +0 −94 apps/c/airfoil/airfoil_hdf5_vector/dp/save_soln_kernel.cpp
  42. +0 −134 apps/c/airfoil/airfoil_hdf5_vector/dp/save_soln_kernel.cu
  43. +0 −12 apps/c/airfoil/airfoil_hdf5_vector/dp/update.h
  44. +0 −123 apps/c/airfoil/airfoil_hdf5_vector/dp/update_kernel.cpp
  45. +0 −206 apps/c/airfoil/airfoil_hdf5_vector/dp/update_kernel.cu
  46. +0 −71 apps/c/airfoil/airfoil_vector/CMakeLists.txt
  47. +0 −165 apps/c/airfoil/airfoil_vector/dp/Makefile
  48. +0 −26 apps/c/airfoil/airfoil_vector/dp/adt_calc.h
  49. +0 −176 apps/c/airfoil/airfoil_vector/dp/adt_calc_kernel.cpp
  50. +0 −191 apps/c/airfoil/airfoil_vector/dp/adt_calc_kernel.cu
  51. +0 −274 apps/c/airfoil/airfoil_vector/dp/airfoil.cpp
  52. +0 −25 apps/c/airfoil/airfoil_vector/dp/airfoil_kernels.cpp
  53. +0 −52 apps/c/airfoil/airfoil_vector/dp/airfoil_kernels.cu
  54. +0 −424 apps/c/airfoil/airfoil_vector/dp/airfoil_mpi.cpp
  55. +0 −25 apps/c/airfoil/airfoil_vector/dp/airfoil_mpi_kernels.cpp
  56. +0 −52 apps/c/airfoil/airfoil_vector/dp/airfoil_mpi_kernels.cu
  57. +0 −464 apps/c/airfoil/airfoil_vector/dp/airfoil_mpi_op.cpp
  58. +0 −314 apps/c/airfoil/airfoil_vector/dp/airfoil_op.cpp
  59. +0 −32 apps/c/airfoil/airfoil_vector/dp/bres_calc.h
  60. +0 −233 apps/c/airfoil/airfoil_vector/dp/bres_calc_kernel.cpp
  61. +0 −264 apps/c/airfoil/airfoil_vector/dp/bres_calc_kernel.cu
  62. +0 −29 apps/c/airfoil/airfoil_vector/dp/res_calc.h
  63. +0 −259 apps/c/airfoil/airfoil_vector/dp/res_calc_kernel.cpp
  64. +0 −290 apps/c/airfoil/airfoil_vector/dp/res_calc_kernel.cu
  65. +0 −3  apps/c/airfoil/airfoil_vector/dp/save_soln.h
  66. +0 −94 apps/c/airfoil/airfoil_vector/dp/save_soln_kernel.cpp
  67. +0 −134 apps/c/airfoil/airfoil_vector/dp/save_soln_kernel.cu
  68. +0 −12 apps/c/airfoil/airfoil_vector/dp/update.h
  69. +0 −123 apps/c/airfoil/airfoil_vector/dp/update_kernel.cpp
  70. +0 −206 apps/c/airfoil/airfoil_vector/dp/update_kernel.cu
  71. +0 −166 apps/c/airfoil/airfoil_vector/sp/Makefile
  72. +0 −26 apps/c/airfoil/airfoil_vector/sp/adt_calc.h
  73. +0 −176 apps/c/airfoil/airfoil_vector/sp/adt_calc_kernel.cpp
  74. +0 −191 apps/c/airfoil/airfoil_vector/sp/adt_calc_kernel.cu
  75. +0 −274 apps/c/airfoil/airfoil_vector/sp/airfoil.cpp
  76. +0 −25 apps/c/airfoil/airfoil_vector/sp/airfoil_kernels.cpp
  77. +0 −52 apps/c/airfoil/airfoil_vector/sp/airfoil_kernels.cu
  78. +0 −424 apps/c/airfoil/airfoil_vector/sp/airfoil_mpi.cpp
  79. +0 −25 apps/c/airfoil/airfoil_vector/sp/airfoil_mpi_kernels.cpp
  80. +0 −52 apps/c/airfoil/airfoil_vector/sp/airfoil_mpi_kernels.cu
  81. +0 −464 apps/c/airfoil/airfoil_vector/sp/airfoil_mpi_op.cpp
  82. +0 −314 apps/c/airfoil/airfoil_vector/sp/airfoil_op.cpp
  83. +0 −32 apps/c/airfoil/airfoil_vector/sp/bres_calc.h
  84. +0 −233 apps/c/airfoil/airfoil_vector/sp/bres_calc_kernel.cpp
  85. +0 −264 apps/c/airfoil/airfoil_vector/sp/bres_calc_kernel.cu
  86. +0 −29 apps/c/airfoil/airfoil_vector/sp/res_calc.h
  87. +0 −259 apps/c/airfoil/airfoil_vector/sp/res_calc_kernel.cpp
  88. +0 −290 apps/c/airfoil/airfoil_vector/sp/res_calc_kernel.cu
  89. +0 −3  apps/c/airfoil/airfoil_vector/sp/save_soln.h
  90. +0 −94 apps/c/airfoil/airfoil_vector/sp/save_soln_kernel.cpp
  91. +0 −134 apps/c/airfoil/airfoil_vector/sp/save_soln_kernel.cu
  92. +0 −12 apps/c/airfoil/airfoil_vector/sp/update.h
  93. +0 −123 apps/c/airfoil/airfoil_vector/sp/update_kernel.cpp
  94. +0 −206 apps/c/airfoil/airfoil_vector/sp/update_kernel.cu
  95. +13 −0 apps/c/airfoil/compare_results/Makefile
  96. +85 −0 apps/c/airfoil/compare_results/compare.cpp
  97. +83 −0 apps/c/airfoil/compare_results/comparebin.cpp
  98. +83 −0 doc/BIB.bib
  99. +141 −81 doc/C++_Users_Guide.tex
  100. +19 −0 doc/README
  101. +600 −0 doc/airfoil-doc.tex
  102. BIN  doc/airfoil_hdf5.pdf
  103. BIN  doc/airfoil_plain.pdf
  104. BIN  doc/airfoil_plain_mpi.pdf
  105. +24 −0 doc/build_docs.sh
  106. +71 −77 doc/dev.tex
  107. +288 −135 doc/mpi-dev.tex
  108. +1 −0  op2/c/Makefile
  109. +6 −3 op2/c/README
View
44 .gitignore
@@ -0,0 +1,44 @@
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*_cu.o
+# Compiled Dynamic libraries
+*.so
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*_seq
+*_openmp
+*_cuda
+*_mpi
+#*.txt
+#*.dat
+*_mpi
+*.h5
+*.bin
+#fortran
+*.mod
+
+#python
+*.pyc
+
+#latex
+*.log
+*.aux
+*.toc
+*.out
+*.backup
+*.blg
+*.bbl
+*~
+*.pyg.*
+
+#directories
+./op2/c/lib/
+./op2/c/build/
+./op2/c/obj/
+./op2/c/CMakeFiles/
+./op2/fortran/lib/
+./op2/fortran/obj/
View
4 apps/c/CMakeLists.txt
@@ -40,8 +40,8 @@ option(OP2_BUILD_ALL_AERO "Build all aero apps" ON)
set(OP2_AIRFOIL_APPS
airfoil/airfoil_plain
airfoil/airfoil_hdf5
- airfoil/airfoil_vector
- airfoil/airfoil_hdf5_vector
+ #airfoil/airfoil_vector
+ #airfoil/airfoil_hdf5_vector
airfoil/airfoil_tempdats
)
set(OP2_JACOBI_APPS
View
64 apps/c/airfoil/README.md
@@ -0,0 +1,64 @@
+### Airfoil
+
+Airfoil is a nonlinear 2D inviscid airfoil code that uses an unstructured grid. It is a finite volume application that
+solves the 2D Euler equations using a scalar numerical dissipation. The algorithm iterates towards the steady state
+solution, in each iteration using a control volume approach – for example the rate at which the mass changes within a
+control volume is equal to the net flux of mass into the control volume across the four faces around the cell. This is
+representative of the 3D viscous flow calculations OP2 aims to eventually support for production-grade CFD applications
+(such as the Hydra CFD code at Rolls-Royce plc.). The application implements a predictor/corrector time-marching
+loop, by (1) looping over the cells of the mesh and computing the time step for each cell, (2) computing the flux over
+internal edges (3) computing the flux over boundary edges (4) updating the solution and (5) saving the old solution
+before repeating. These main stages of the application is solved in Airfoil within five parallel loops: adt_calc,
+res_calc, bres_calc, update and save_soln. Out of these, save_soln and update are direct loops while the other three are
+indirect loops.
+
+Please see airfoil-doc under the ../../doc directory for further OP2 application development details
+
+### Airfoil Application Directory Structure
+
+Airfoil has been the main development, testing and benchmarking application in OP2. As such this directory contains
+several versions of Airfoil that demonstrate the use of various OP2 features.
+
+airfoil_plain -- airfoil implemented with user I/O routines (mesh file in ASCI - see ../../apps/mesh_generators
+on how to generate the mesh)
+
+airfoil_hdf5 -- airfoil implemented with OP2 HDF5 routines (mesh file in HDF5, see ASCI to HDF5 file converter)
+
+airfoil_vector -- airfoil user kernels modified to achieve vectorization
+
+airfoil_tempdats -- airfoil use op_decl_temp, i.e. temporary dats in application
+
+compare_results -- small utility code to compare two files (txt or bin), used to compare the final result from airfoil
+
+### Testing the Results
+
+The various parallel versions of Airfoil should be compared against the single-threaded CPU version (also known as the
+reference implementation) to ascertain the correctness of the results. The p_q array holds the final result and as such
+will be the data array to compare. One way to achieve this is to use the following OP2 calls to write the data array to
+text or binary files, for example after the end of the 1000 iterations in the airfoil code.
+
+```
+op_print_dat_to_txtfile(p_q, "out_grid_seq.dat"); //ASCI
+op_print_dat_to_binfile(p_q, "out_grid_seq.bin"); //Binary
+```
+
+Then the code in compare.cpp and comparebin.cpp can be used to compare the text file or binary file with the reference
+implementation.
+
+Bitwise accuracy can be expected across systems for the double precision version to within the accuracy of machine
+precision. For the single precision version, answers should be very close. A summary print of the rms value of the
+residual is printed out by default every 100 iterations. This in double precision for the first 1000 iterations should
+be exactly:
+
+```
+100 5.02186e-04
+200 3.41746e-04
+300 2.63430e-04
+400 2.16288e-04
+500 1.84659e-04
+600 1.60866e-04
+700 1.42253e-04
+800 1.27627e-04
+900 1.15810e-04
+1000 1.06011e-04
+```
View
183 apps/c/airfoil/airfoil_hdf5_simple/dp/Makefile
@@ -1,183 +0,0 @@
-#
-# The following environment variables should be predefined:
-#
-# CUDA_INSTALL_PATH
-# PARMETIS_INSTALL_PATH
-# PTSCOTCH_INSTALL_PATH
-# HDF5_INSTALL_PATH
-#
-# OP2_INSTALL_PATH
-# OP2_COMPILER (gnu,intel,etc)
-#
-
-#
-# set paths for header files and libraries
-#
-OP2_INC = -I$(OP2_INSTALL_PATH)/c/include
-OP2_LIB = -L$(OP2_INSTALL_PATH)/c/lib
-
-CUDA_INC = -I$(CUDA_INSTALL_PATH)/include
-CUDA_LIB = -L$(CUDA_INSTALL_PATH)/lib64
-
-
-ifeq ($(OP2_COMPILER),gnu)
- CPP = g++
- CPPFLAGS = -g -fPIC -DUNIX -Wall #-Wextra
- OMPFLAGS = -fopenmp
- MPICPP = /usr/bin/mpiCC
- MPIFLAGS = $(CCFLAGS)
-else
-ifeq ($(OP2_COMPILER),intel)
- CPP = icpc
- CCFLAGS = -O3 -vec-report -xSSE4.2 -parallel #-g -DCOMM_PERF #-DDEBUG
- CPPFLAGS = $(CCFLAGS)
- OMPFLAGS = -openmp -openmp-report2
- MPICPP = mpiCC
- MPIFLAGS = $(CPPFLAGS)
-else
-ifeq ($(OP2_COMPILER),cray)
- CPP = CC
- CCFLAGS = -O3
- CPPFLAGS = $(CCFLAGS)
- OMPFLAGS = -openmp -openmp-report2
- MPICPP = CC
- MPIFLAGS = $(CPPFLAGS)
-else
-print:
- @echo "unrecognised value for OP2_COMPILER"
-endif
-endif
-endif
-
-
-
-#
-# set flags for NVCC compilation and linking
-#
-NVCCFLAGS = -O3 -arch=sm_20 -Xptxas=-v -Dlcm=ca -use_fast_math #-g -G -O0
-VAR = #-DOP_PART_SIZE_1=160 -DOP_PART_SIZE_2=320 -DOP_PART_SIZE_3=64 #-DOP_BLOCK_SIZE_0=64 -DOP_BLOCK_SIZE_1=64 -DOP_BLOCK_SIZE_2=64 -DOP_BLOCK_SIZE_3=64 -DOP_BLOCK_SIZE_4=64
-
-#
-# partitioning software for MPI versions
-#
-
-PARMETIS_INC = -I$(PARMETIS_INSTALL_PATH) -DHAVE_PARMETIS
-PARMETIS_LIB = -L$(PARMETIS_INSTALL_PATH) -lparmetis \
- -L$(PARMETIS_INSTALL_PATH) -lmetis
-
-PTSCOTCH_INC = -I$(PTSCOTCH_INSTALL_PATH)/include -DHAVE_PTSCOTCH
-PTSCOTCH_LIB = -L$(PTSCOTCH_INSTALL_PATH)/lib/ -lptscotch \
- -L$(PTSCOTCH_INSTALL_PATH)/lib/ -lptscotcherr
-
-HDF5_INC = -I$(HDF5_INSTALL_PATH)/include
-HDF5_LIB = -L$(HDF5_INSTALL_PATH)/lib -lhdf5 -lz
-
-
-#
-# master to make all versions
-#
-
-all: clean airfoil_mpi airfoil_cuda airfoil_openmp airfoil_seq airfoil_mpi_cuda airfoil_mpi_openmp convert_mesh
-
-#
-# simple sequential version
-#
-
-airfoil_seq: airfoil.cpp save_soln.h adt_calc.h res_calc.h bres_calc.h
- $(MPICPP) $(CPPFLAGS) airfoil.cpp $(OP2_INC) $(HDF5_INC) $(OP2_LIB) -lop2_seq -lop2_hdf5 $(HDF5_LIB) -o airfoil_seq
-
-#
-# x86 version using kernel files generated by op2.m
-#
-
-airfoil_openmp: airfoil_op.cpp airfoil_kernels.cpp \
- save_soln_kernel.cpp save_soln.h \
- adt_calc_kernel.cpp adt_calc.h \
- res_calc_kernel.cpp res_calc.h \
- bres_calc_kernel.cpp bres_calc.h \
- update_kernel.cpp update.h \
- Makefile
- $(MPICPP) $(VAR) $(CPPFLAGS) $(OMPFLAGS) $(OP2_INC) $(OP2_LIB) $(HDF5_INC) \
- airfoil_op.cpp -lm airfoil_kernels.cpp -lm -lop2_openmp -lop2_hdf5 $(HDF5_LIB) -o airfoil_openmp
-
-#
-# CUDA version using kernel files generated by op2.m
-#
-
-airfoil_cuda: airfoil_op.cpp airfoil_kernels_cu.o Makefile
- $(MPICPP) $(VAR) $(CPPFLAGS) airfoil_op.cpp airfoil_kernels_cu.o \
- $(CUDA_INC) $(OP2_INC) $(HDF5_INC) \
- $(OP2_LIB) $(CUDA_LIB) -lcudart -lop2_cuda -lop2_hdf5 $(HDF5_LIB) -o airfoil_cuda
-
-airfoil_kernels_cu.o: airfoil_kernels.cu \
- save_soln_kernel.cu save_soln.h \
- adt_calc_kernel.cu adt_calc.h \
- res_calc_kernel.cu res_calc.h \
- bres_calc_kernel.cu bres_calc.h \
- update_kernel.cu update.h \
- Makefile
- nvcc $(VAR) $(INC) $(NVCCFLAGS) $(OP2_INC) $(HDF5_INC) -I /home/gihan/openmpi-intel/include \
- -c -o airfoil_kernels_cu.o airfoil_kernels.cu
-
-#
-# mpi with sequential-nodes version
-#
-
-airfoil_mpi: airfoil.cpp save_soln.h adt_calc.h res_calc.h bres_calc.h Makefile
- $(MPICPP) $(MPIFLAGS) airfoil.cpp $(OP2_INC) $(PARMETIS_INC) $(PTSCOTCH_INC) $(HDF5_INC) \
- $(OP2_LIB) -lop2_mpi $(PARMETIS_LIB) $(PTSCOTCH_LIB) $(HDF5_LIB) -o airfoil_mpi
-
-#
-# mpi openmp version using kernel files generated by op2.m
-#
-
-airfoil_mpi_openmp: airfoil_op.cpp airfoil_kernels.cpp \
- save_soln_kernel.cpp save_soln.h \
- adt_calc_kernel.cpp adt_calc.h \
- res_calc_kernel.cpp res_calc.h \
- bres_calc_kernel.cpp bres_calc.h \
- update_kernel.cpp update.h \
- Makefile
- $(MPICPP) $(VAR) $(CPPFLAGS) $(OMPFLAGS) $(OP2_INC) $(OP2_INC) $(HDF5_INC) \
- $(PARMETIS_INC) $(PTSCOTCH_INC) \
- airfoil_op.cpp -lm airfoil_kernels.cpp $(OP2_LIB) -lop2_mpi \
- $(PARMETIS_LIB) $(PTSCOTCH_LIB) $(HDF5_LIB) -o airfoil_mpi_openmp
-
-#
-# mpi with CUDA version
-#
-
-airfoil_mpi_cuda: airfoil_op.cpp airfoil_kernels_mpi_cu.o Makefile
- $(MPICPP) $(MPIFLAGS) airfoil_op.cpp -lm airfoil_kernels_mpi_cu.o \
- $(OP2_INC) $(PARMETIS_INC) $(PTSCOTCH_INC) $(HDF5_INC) \
- $(OP2_LIB) -lop2_mpi_cuda $(PARMETIS_LIB) $(PTSCOTCH_LIB) \
- $(HDF5_LIB) $(CUDA_LIB) -lcudart -o airfoil_mpi_cuda
-
-airfoil_kernels_mpi_cu.o: airfoil_kernels.cu \
- save_soln_kernel.cu save_soln.h \
- adt_calc_kernel.cu adt_calc.h \
- res_calc_kernel.cu res_calc.h \
- bres_calc_kernel.cu bres_calc.h \
- update_kernel.cu update.h \
- Makefile
- nvcc $(INC) $(NVCCFLAGS) $(OP2_INC) -I $(MPI_INSTALL_PATH)/include \
- -c -o airfoil_kernels_mpi_cu.o airfoil_kernels.cu
-
-
-#
-# convert ASCI new_gird.dat to HDF5 new_grid.h5
-#
-
-convert_mesh: convert_mesh.cpp
- $(MPICPP) $(MPIFLAGS) convert_mesh.cpp $(OP2_INC) $(PARMETIS_INC) $(PTSCOTCH_INC) $(HDF5_INC) \
- $(OP2_LIB) -lop2_mpi $(PARMETIS_LIB) $(PTSCOTCH_LIB) $(HDF5_LIB) -o convert_mesh
-
-
-
-
-#
-# cleanup
-#
-
-clean:
- rm -f airfoil_seq airfoil_openmp airfoil_cuda airfoil_mpi airfoil_mpi_openmp airfoil_mpi_cuda convert_mesh *.o
View
26 apps/c/airfoil/airfoil_hdf5_simple/dp/adt_calc.h
@@ -1,26 +0,0 @@
-inline void adt_calc(double *x1,double *x2,double *x3,double *x4,double *q,double *adt){
- double dx,dy, ri,u,v,c;
-
- ri = 1.0f/q[0];
- u = ri*q[1];
- v = ri*q[2];
- c = sqrt(gam*gm1*(ri*q[3]-0.5f*(u*u+v*v)));
-
- dx = x2[0] - x1[0];
- dy = x2[1] - x1[1];
- *adt = fabs(u*dy-v*dx) + c*sqrt(dx*dx+dy*dy);
-
- dx = x3[0] - x2[0];
- dy = x3[1] - x2[1];
- *adt += fabs(u*dy-v*dx) + c*sqrt(dx*dx+dy*dy);
-
- dx = x4[0] - x3[0];
- dy = x4[1] - x3[1];
- *adt += fabs(u*dy-v*dx) + c*sqrt(dx*dx+dy*dy);
-
- dx = x1[0] - x4[0];
- dy = x1[1] - x4[1];
- *adt += fabs(u*dy-v*dx) + c*sqrt(dx*dx+dy*dy);
-
- *adt = (*adt) / cfl;
-}
View
172 apps/c/airfoil/airfoil_hdf5_simple/dp/adt_calc_kernel.cpp
@@ -1,172 +0,0 @@
-//
-// auto-generated by op2.m on 16-Oct-2012 15:15:10
-//
-
-// user function
-
-#include "adt_calc.h"
-
-
-// x86 kernel function
-
-void op_x86_adt_calc(
- int blockIdx,
- double *ind_arg0,
- int *ind_map,
- short *arg_map,
- double *arg4,
- double *arg5,
- int *ind_arg_sizes,
- int *ind_arg_offs,
- int block_offset,
- int *blkmap,
- int *offset,
- int *nelems,
- int *ncolors,
- int *colors,
- int set_size) {
-
-
- int *ind_arg0_map, ind_arg0_size;
- double *ind_arg0_s;
- int nelem, offset_b;
-
- char shared[128000];
-
- if (0==0) {
-
- // get sizes and shift pointers and direct-mapped data
-
- int blockId = blkmap[blockIdx + block_offset];
- nelem = nelems[blockId];
- offset_b = offset[blockId];
-
- ind_arg0_size = ind_arg_sizes[0+blockId*1];
-
- ind_arg0_map = &ind_map[0*set_size] + ind_arg_offs[0+blockId*1];
-
- // set shared memory pointers
-
- int nbytes = 0;
- ind_arg0_s = (double *) &shared[nbytes];
- }
-
- // copy indirect datasets into shared memory or zero increment
-
- for (int n=0; n<ind_arg0_size; n++)
- for (int d=0; d<2; d++)
- ind_arg0_s[d+n*2] = ind_arg0[d+ind_arg0_map[n]*2];
-
-
- // process set elements
-
- for (int n=0; n<nelem; n++) {
-
-
- // user-supplied kernel call
-
-
- adt_calc( ind_arg0_s+arg_map[0*set_size+n+offset_b]*2,
- ind_arg0_s+arg_map[1*set_size+n+offset_b]*2,
- ind_arg0_s+arg_map[2*set_size+n+offset_b]*2,
- ind_arg0_s+arg_map[3*set_size+n+offset_b]*2,
- arg4+(n+offset_b)*4,
- arg5+(n+offset_b)*1 );
- }
-
-}
-
-
-// host stub function
-
-void op_par_loop_adt_calc(char const *name, op_set set,
- op_arg arg0,
- op_arg arg1,
- op_arg arg2,
- op_arg arg3,
- op_arg arg4,
- op_arg arg5 ){
-
-
- int nargs = 6;
- op_arg args[6];
-
- args[0] = arg0;
- args[1] = arg1;
- args[2] = arg2;
- args[3] = arg3;
- args[4] = arg4;
- args[5] = arg5;
-
- int ninds = 1;
- int inds[6] = {0,0,0,0,-1,-1};
-
- if (OP_diags>2) {
- printf(" kernel routine with indirection: adt_calc\n");
- }
-
- // get plan
-
- #ifdef OP_PART_SIZE_1
- int part_size = OP_PART_SIZE_1;
- #else
- int part_size = OP_part_size;
- #endif
-
- int set_size = op_mpi_halo_exchanges(set, nargs, args);
-
- // initialise timers
-
- double cpu_t1, cpu_t2, wall_t1=0, wall_t2=0;
- op_timing_realloc(1);
- OP_kernels[1].name = name;
- OP_kernels[1].count += 1;
-
- if (set->size >0) {
-
- op_plan *Plan = op_plan_get(name,set,part_size,nargs,args,ninds,inds);
-
- op_timers_core(&cpu_t1, &wall_t1);
-
- // execute plan
-
- int block_offset = 0;
-
- for (int col=0; col < Plan->ncolors; col++) {
- if (col==Plan->ncolors_core) op_mpi_wait_all(nargs, args);
-
- int nblocks = Plan->ncolblk[col];
-
-#pragma omp parallel for
- for (int blockIdx=0; blockIdx<nblocks; blockIdx++) {
- int blockId = Plan->blkmap[blockIdx + block_offset];
- int nelem = Plan->nelems[blockId];
- int offset_b = Plan->offset[blockId];
- for (int n=0; n<nelem; n++) {
- adt_calc( &((double *)arg0.data)[2*arg0.map_data[4*(n+offset_b)+0]],
- &((double *)arg0.data)[2*arg0.map_data[4*(n+offset_b)+1]],
- &((double *)arg0.data)[2*arg0.map_data[4*(n+offset_b)+2]],
- &((double *)arg0.data)[2*arg0.map_data[4*(n+offset_b)+3]],
- &((double *)arg4.data)[(n+offset_b)*4],
- &((double *)arg5.data)[(n+offset_b)*1]);
- }
- }
- block_offset += nblocks;
- }
-
- op_timing_realloc(1);
- OP_kernels[1].transfer += Plan->transfer;
- OP_kernels[1].transfer2 += Plan->transfer2;
-
- }
-
-
- // combine reduction data
-
- op_mpi_set_dirtybit(nargs, args);
-
- // update kernel record
-
- op_timers_core(&cpu_t2, &wall_t2);
- OP_kernels[1].time += wall_t2 - wall_t1;
-}
View
191 apps/c/airfoil/airfoil_hdf5_simple/dp/adt_calc_kernel.cu
@@ -1,191 +0,0 @@
-//
-// auto-generated by op2.m on 19-Oct-2012 16:21:07
-//
-
-// user function
-
-__device__
-#include "adt_calc.h"
-
-
-// CUDA kernel function
-
-__global__ void op_cuda_adt_calc(
- double *ind_arg0,
- int *ind_map,
- short *arg_map,
- double *arg4,
- double *arg5,
- int *ind_arg_sizes,
- int *ind_arg_offs,
- int block_offset,
- int *blkmap,
- int *offset,
- int *nelems,
- int *ncolors,
- int *colors,
- int nblocks,
- int set_size) {
-
-
- __shared__ int *ind_arg0_map, ind_arg0_size;
- __shared__ double *ind_arg0_s;
- __shared__ int nelem, offset_b;
-
- extern __shared__ char shared[];
-
- if (blockIdx.x+blockIdx.y*gridDim.x >= nblocks) return;
- if (threadIdx.x==0) {
-
- // get sizes and shift pointers and direct-mapped data
-
- int blockId = blkmap[blockIdx.x + blockIdx.y*gridDim.x + block_offset];
-
- nelem = nelems[blockId];
- offset_b = offset[blockId];
-
- ind_arg0_size = ind_arg_sizes[0+blockId*1];
-
- ind_arg0_map = &ind_map[0*set_size] + ind_arg_offs[0+blockId*1];
-
- // set shared memory pointers
-
- int nbytes = 0;
- ind_arg0_s = (double *) &shared[nbytes];
- }
-
- __syncthreads(); // make sure all of above completed
-
- // copy indirect datasets into shared memory or zero increment
-
- for (int n=threadIdx.x; n<ind_arg0_size*2; n+=blockDim.x)
- ind_arg0_s[n] = ind_arg0[n%2+ind_arg0_map[n/2]*2];
-
- __syncthreads();
-
- // process set elements
-
- for (int n=threadIdx.x; n<nelem; n+=blockDim.x) {
-
-
- // user-supplied kernel call
-
-
- adt_calc( ind_arg0_s+arg_map[0*set_size+n+offset_b]*2,
- ind_arg0_s+arg_map[1*set_size+n+offset_b]*2,
- ind_arg0_s+arg_map[2*set_size+n+offset_b]*2,
- ind_arg0_s+arg_map[3*set_size+n+offset_b]*2,
- arg4+(n+offset_b)*4,
- arg5+(n+offset_b)*1 );
- }
-
-}
-
-
-// host stub function
-
-void op_par_loop_adt_calc(char const *name, op_set set,
- op_arg arg0,
- op_arg arg1,
- op_arg arg2,
- op_arg arg3,
- op_arg arg4,
- op_arg arg5 ){
-
-
- int nargs = 6;
- op_arg args[6];
-
- args[0] = arg0;
- args[1] = arg1;
- args[2] = arg2;
- args[3] = arg3;
- args[4] = arg4;
- args[5] = arg5;
-
- int ninds = 1;
- int inds[6] = {0,0,0,0,-1,-1};
-
- if (OP_diags>2) {
- printf(" kernel routine with indirection: adt_calc\n");
- }
-
- // get plan
-
- #ifdef OP_PART_SIZE_1
- int part_size = OP_PART_SIZE_1;
- #else
- int part_size = OP_part_size;
- #endif
-
- int set_size = op_mpi_halo_exchanges_cuda(set, nargs, args);
-
- // initialise timers
-
- double cpu_t1, cpu_t2, wall_t1=0, wall_t2=0;
- op_timing_realloc(1);
- OP_kernels[1].name = name;
- OP_kernels[1].count += 1;
-
- if (set->size >0) {
-
- op_plan *Plan = op_plan_get(name,set,part_size,nargs,args,ninds,inds);
-
- op_timers_core(&cpu_t1, &wall_t1);
-
- // execute plan
-
- int block_offset = 0;
-
- for (int col=0; col < Plan->ncolors; col++) {
-
- if (col==Plan->ncolors_core) op_mpi_wait_all_cuda(nargs,args);
-
- #ifdef OP_BLOCK_SIZE_1
- int nthread = OP_BLOCK_SIZE_1;
- #else
- int nthread = OP_block_size;
- #endif
-
- dim3 nblocks = dim3(Plan->ncolblk[col] >= (1<<16) ? 65535 : Plan->ncolblk[col],
- Plan->ncolblk[col] >= (1<<16) ? (Plan->ncolblk[col]-1)/65535+1: 1, 1);
- if (Plan->ncolblk[col] > 0) {
- int nshared = Plan->nsharedCol[col];
- op_cuda_adt_calc<<<nblocks,nthread,nshared>>>(
- (double *)arg0.data_d,
- Plan->ind_map,
- Plan->loc_map,
- (double *)arg4.data_d,
- (double *)arg5.data_d,
- Plan->ind_sizes,
- Plan->ind_offs,
- block_offset,
- Plan->blkmap,
- Plan->offset,
- Plan->nelems,
- Plan->nthrcol,
- Plan->thrcol,
- Plan->ncolblk[col],
- set_size);
-
- cutilSafeCall(cudaDeviceSynchronize());
- cutilCheckMsg("op_cuda_adt_calc execution failed\n");
- }
-
- block_offset += Plan->ncolblk[col];
- }
-
- op_timing_realloc(1);
- OP_kernels[1].transfer += Plan->transfer;
- OP_kernels[1].transfer2 += Plan->transfer2;
-
- }
-
-
- op_mpi_set_dirtybit_cuda(nargs, args);
-
- // update kernel record
-
- op_timers_core(&cpu_t2, &wall_t2);
- OP_kernels[1].time += wall_t2 - wall_t1;
-}
View
232 apps/c/airfoil/airfoil_hdf5_simple/dp/airfoil.cpp
@@ -1,232 +0,0 @@
-/*
- * Open source copyright declaration based on BSD open source template:
- * http://www.opensource.org/licenses/bsd-license.php
- *
- * This file is part of the OP2 distribution.
- *
- * Copyright (c) 2011, Mike Giles and others. Please see the AUTHORS file in
- * the main source directory for a full list of copyright holders.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * The name of Mike Giles may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY Mike Giles ''AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Mike Giles BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-//
-// Nonlinear airfoil lift calculation
-//
-// Written by Mike Giles, 2010-2011, based on FORTRAN code
-// by Devendra Ghate and Mike Giles, 2005
-//
-
-//
-// standard headers
-//
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-
-// global constants
-
-double gam, gm1, cfl, eps, mach, alpha, qinf[4];
-
-//
-// OP header file
-//
-
-#include "op_seq.h"
-
-//
-// kernel routines for parallel loops
-//
-
-#include "save_soln.h"
-#include "adt_calc.h"
-#include "res_calc.h"
-#include "bres_calc.h"
-#include "update.h"
-
-// main program
-
-int main(int argc, char **argv)
-{
- // OP initialisation
- op_init(argc,argv,2);
-
- int niter;
- double rms;
-
- //timer
- double cpu_t1, cpu_t2, wall_t1, wall_t2;
-
- // set constants and initialise flow field and residual
- op_printf("initialising flow field \n");
-
- char file[] = "new_grid.h5";
-
- // declare sets, pointers, datasets and global constants
-
- op_set nodes = op_decl_set_hdf5(file, "nodes");
- op_set edges = op_decl_set_hdf5(file, "edges");
- op_set bedges = op_decl_set_hdf5(file, "bedges");
- op_set cells = op_decl_set_hdf5(file, "cells");
-
- op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge");
- op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell");
- op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge");
- op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell");
- op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell");
-
- op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound");
- op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x");
- op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q");
- op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold");
- op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt");
- op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res");
-
- op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5");
- op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5");
- op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5");
- op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5");
- op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5");
- op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5");
- op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5");
-
- op_decl_const(1,"double",&gam );
- op_decl_const(1,"double",&gm1 );
- op_decl_const(1,"double",&cfl );
- op_decl_const(1,"double",&eps );
- op_decl_const(1,"double",&mach );
- op_decl_const(1,"double",&alpha);
- op_decl_const(4,"double",qinf );
-
- op_diagnostic_output();
-
- //write back original data just to compare you read the file correctly
- //do an h5diff between new_grid_out.h5 and new_grid.h5 to
- //compare two hdf5 files
- op_write_hdf5("new_grid_out.h5");
-
- op_write_const_hdf5("gam",1,"double",(char *)&gam, "new_grid_out.h5");
- op_write_const_hdf5("gm1",1,"double",(char *)&gm1, "new_grid_out.h5");
- op_write_const_hdf5("cfl",1,"double",(char *)&cfl, "new_grid_out.h5");
- op_write_const_hdf5("eps",1,"double",(char *)&eps, "new_grid_out.h5");
- op_write_const_hdf5("mach",1,"double",(char *)&mach, "new_grid_out.h5");
- op_write_const_hdf5("alpha",1,"double",(char *)&alpha, "new_grid_out.h5");
- op_write_const_hdf5("qinf",4,"double",(char *)qinf, "new_grid_out.h5");
-
- //trigger partitioning and halo creation routines
- op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x);
-
- int g_ncell = op_get_size(cells);
-
-
- //initialise timers for total execution wall time
- op_timers(&cpu_t1, &wall_t1);
-
- // main time-marching loop
-
- niter = 1000;
-
- for(int iter=1; iter<=niter; iter++) {
-
- // save old flow solution
-
- op_par_loop(save_soln,"save_soln", cells,
- op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ),
- op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE));
-
- // predictor/corrector update loop
-
- for(int k=0; k<2; k++) {
-
- // calculate area/timstep
-
- op_par_loop(adt_calc,"adt_calc",cells,
- op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ),
- op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ),
- op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ),
- op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ),
- op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ),
- op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE));
-
- // calculate flux residual
-
- op_par_loop(res_calc,"res_calc",edges,
- op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ),
- op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ),
- op_arg_dat(p_q, 0,pecell,4,"double",OP_READ),
- op_arg_dat(p_q, 1,pecell,4,"double",OP_READ),
- op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ),
- op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ),
- op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ),
- op_arg_dat(p_res, 1,pecell,4,"double",OP_INC ));
-
- op_par_loop(bres_calc,"bres_calc",bedges,
- op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ),
- op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ),
- op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ),
- op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ),
- op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ),
- op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ));
-
- // update flow field
-
- rms = 0.0;
-
- op_par_loop(update,"update",cells,
- op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ),
- op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE),
- op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ),
- op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ),
- op_arg_gbl(&rms,1,"double",OP_INC));
- }
-
- // print iteration history
-
- rms = sqrt(rms/(double)g_ncell);
-
- if (iter%100 == 0)
- op_printf(" %d %10.5e \n",iter,rms);
- }
-
- op_timers(&cpu_t2, &wall_t2);
-
- double* q = (double *)malloc(sizeof(double)*op_get_size(cells)*4);
- op_fetch_data_hdf5(p_q, q, 0, op_get_size(cells)-1);
- free(q);
-
- op_fetch_data_hdf5_file(p_q, "file_name.h5");
-
- //printf("Root process = %d\n",op_is_root());
-
- //output the result dat array to files
- //op_write_hdf5("new_grid_out.h5");
-
- //compress using
- // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5
-
- op_timing_output();
- op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1);
- op_exit();
-}
View
25 apps/c/airfoil/airfoil_hdf5_simple/dp/airfoil_kernels.cpp
@@ -1,25 +0,0 @@
-//
-// auto-generated by op2.m on 29-Oct-2012 09:32:02
-//
-
-// header
-
-#include "op_lib_cpp.h"
-
-// global constants
-
-extern double gam;
-extern double gm1;
-extern double cfl;
-extern double eps;
-extern double mach;
-extern double alpha;
-extern double qinf[4];
-
-// user kernel files
-
-#include "save_soln_kernel.cpp"
-#include "adt_calc_kernel.cpp"
-#include "res_calc_kernel.cpp"
-#include "bres_calc_kernel.cpp"
-#include "update_kernel.cpp"
View
52 apps/c/airfoil/airfoil_hdf5_simple/dp/airfoil_kernels.cu
@@ -1,52 +0,0 @@
-//
-// auto-generated by op2.m on 29-Oct-2012 09:32:02
-//
-
-// header
-
-#include "op_lib_cpp.h"
-
-#include "op_cuda_rt_support.h"
-#include "op_cuda_reduction.h"
-// global constants
-
-#ifndef MAX_CONST_SIZE
-#define MAX_CONST_SIZE 128
-#endif
-
-__constant__ double gam;
-__constant__ double gm1;
-__constant__ double cfl;
-__constant__ double eps;
-__constant__ double mach;
-__constant__ double alpha;
-__constant__ double qinf[4];
-
-void op_decl_const_char(int dim, char const *type,
- int size, char *dat, char const *name) {
- if (!strcmp(name,"gam")) {
- cutilSafeCall(cudaMemcpyToSymbol(gam, dat, dim*size));
- } else if (!strcmp(name,"gm1")) {
- cutilSafeCall(cudaMemcpyToSymbol(gm1, dat, dim*size));
- } else if (!strcmp(name,"cfl")) {
- cutilSafeCall(cudaMemcpyToSymbol(cfl, dat, dim*size));
- } else if (!strcmp(name,"eps")) {
- cutilSafeCall(cudaMemcpyToSymbol(eps, dat, dim*size));
- } else if (!strcmp(name,"mach")) {
- cutilSafeCall(cudaMemcpyToSymbol(mach, dat, dim*size));
- } else if (!strcmp(name,"alpha")) {
- cutilSafeCall(cudaMemcpyToSymbol(alpha, dat, dim*size));
- } else if (!strcmp(name,"qinf")) {
- cutilSafeCall(cudaMemcpyToSymbol(qinf, dat, dim*size));
- } else {
- printf("error: unknown const name\n"); exit(1);
- }
-}
-
-// user kernel files
-
-#include "save_soln_kernel.cu"
-#include "adt_calc_kernel.cu"
-#include "res_calc_kernel.cu"
-#include "bres_calc_kernel.cu"
-#include "update_kernel.cu"
View
285 apps/c/airfoil/airfoil_hdf5_simple/dp/airfoil_op.cpp
@@ -1,285 +0,0 @@
-//
-// auto-generated by op2.m on 16-Oct-2012 15:15:10
-//
-
-/*
- * Open source copyright declaration based on BSD open source template:
- * http://www.opensource.org/licenses/bsd-license.php
- *
- * This file is part of the OP2 distribution.
- *
- * Copyright (c) 2011, Mike Giles and others. Please see the AUTHORS file in
- * the main source directory for a full list of copyright holders.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * The name of Mike Giles may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY Mike Giles ''AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Mike Giles BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-//
-// Nonlinear airfoil lift calculation
-//
-// Written by Mike Giles, 2010-2011, based on FORTRAN code
-// by Devendra Ghate and Mike Giles, 2005
-//
-
-//
-// standard headers
-//
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-
-// global constants
-
-double gam, gm1, cfl, eps, mach, alpha, qinf[4];
-
-//
-// OP header file
-//
-
-#include "op_lib_cpp.h"
-int op2_stride = 1;
-#define OP2_STRIDE(arr, idx) arr[op2_stride*(idx)]
-
-//
-// op_par_loop declarations
-//
-
-void op_par_loop_save_soln(char const *, op_set,
- op_arg,
- op_arg );
-
-void op_par_loop_adt_calc(char const *, op_set,
- op_arg,
- op_arg,
- op_arg,
- op_arg,
- op_arg,
- op_arg );
-
-void op_par_loop_res_calc(char const *, op_set,
- op_arg,
- op_arg,
- op_arg,
- op_arg,
- op_arg,
- op_arg,
- op_arg,
- op_arg );
-
-void op_par_loop_bres_calc(char const *, op_set,
- op_arg,
- op_arg,
- op_arg,
- op_arg,
- op_arg,
- op_arg );
-
-void op_par_loop_update(char const *, op_set,
- op_arg,
- op_arg,
- op_arg,
- op_arg,
- op_arg );
-
-//
-// kernel routines for parallel loops
-//
-
-#include "save_soln.h"
-#include "adt_calc.h"
-#include "res_calc.h"
-#include "bres_calc.h"
-#include "update.h"
-
-// main program
-
-int main(int argc, char **argv)
-{
- // OP initialisation
- op_init(argc,argv,2);
-
- int niter;
- double rms;
-
- //timer
- double cpu_t1, cpu_t2, wall_t1, wall_t2;
-
- // set constants and initialise flow field and residual
- op_printf("initialising flow field \n");
-
- char file[] = "new_grid.h5";
-
- // declare sets, pointers, datasets and global constants
-
- op_set nodes = op_decl_set_hdf5(file, "nodes");
- op_set edges = op_decl_set_hdf5(file, "edges");
- op_set bedges = op_decl_set_hdf5(file, "bedges");
- op_set cells = op_decl_set_hdf5(file, "cells");
-
- op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge");
- op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell");
- op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge");
- op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell");
- op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell");
-
- op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound");
- op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x");
- op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q");
- op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold");
- op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt");
- op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res");
-
- op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5");
- op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5");
- op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5");
- op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5");
- op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5");
- op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5");
- op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5");
-
- op_decl_const2("gam",1,"double",&gam );
- op_decl_const2("gm1",1,"double",&gm1 );
- op_decl_const2("cfl",1,"double",&cfl );
- op_decl_const2("eps",1,"double",&eps );
- op_decl_const2("mach",1,"double",&mach );
- op_decl_const2("alpha",1,"double",&alpha);
- op_decl_const2("qinf",4,"double",qinf );
-
- op_diagnostic_output();
-
- //write back original data just to compare you read the file correctly
- //do an h5diff between new_grid_out.h5 and new_grid.h5 to
- //compare two hdf5 files
- op_write_hdf5("new_grid_out.h5");
-
- op_write_const_hdf5("gam",1,"double",(char *)&gam, "new_grid_out.h5");
- op_write_const_hdf5("gm1",1,"double",(char *)&gm1, "new_grid_out.h5");
- op_write_const_hdf5("cfl",1,"double",(char *)&cfl, "new_grid_out.h5");
- op_write_const_hdf5("eps",1,"double",(char *)&eps, "new_grid_out.h5");
- op_write_const_hdf5("mach",1,"double",(char *)&mach, "new_grid_out.h5");
- op_write_const_hdf5("alpha",1,"double",(char *)&alpha, "new_grid_out.h5");
- op_write_const_hdf5("qinf",4,"double",(char *)qinf, "new_grid_out.h5");
-
- //trigger partitioning and halo creation routines
- op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x);
-
- int g_ncell = op_get_size(cells);
-
-
- //initialise timers for total execution wall time
- op_timers(&cpu_t1, &wall_t1);
-
- // main time-marching loop
-
- niter = 1000;
-
- for(int iter=1; iter<=niter; iter++) {
-
- // save old flow solution
-
- op_par_loop_save_soln("save_soln",cells,
- op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ),
- op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE));
-
- // predictor/corrector update loop
-
- for(int k=0; k<2; k++) {
-
- // calculate area/timstep
-
- op_par_loop_adt_calc("adt_calc",cells,
- op_arg_dat(p_x,0,pcell,2,"double",OP_READ),
- op_arg_dat(p_x,1,pcell,2,"double",OP_READ),
- op_arg_dat(p_x,2,pcell,2,"double",OP_READ),
- op_arg_dat(p_x,3,pcell,2,"double",OP_READ),
- op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ),
- op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE));
-
- // calculate flux residual
-
- op_par_loop_res_calc("res_calc",edges,
- op_arg_dat(p_x,0,pedge,2,"double",OP_READ),
- op_arg_dat(p_x,1,pedge,2,"double",OP_READ),
- op_arg_dat(p_q,0,pecell,4,"double",OP_READ),
- op_arg_dat(p_q,1,pecell,4,"double",OP_READ),
- op_arg_dat(p_adt,0,pecell,1,"double",OP_READ),
- op_arg_dat(p_adt,1,pecell,1,"double",OP_READ),
- op_arg_dat(p_res,0,pecell,4,"double",OP_INC),
- op_arg_dat(p_res,1,pecell,4,"double",OP_INC));
-
- op_par_loop_bres_calc("bres_calc",bedges,
- op_arg_dat(p_x,0,pbedge,2,"double",OP_READ),
- op_arg_dat(p_x,1,pbedge,2,"double",OP_READ),
- op_arg_dat(p_q,0,pbecell,4,"double",OP_READ),
- op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ),
- op_arg_dat(p_res,0,pbecell,4,"double",OP_INC),
- op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ));
-
- // update flow field
-
- rms = 0.0;
-
- op_par_loop_update("update",cells,
- op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ),
- op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE),
- op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW),
- op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ),
- op_arg_gbl(&rms,1,"double",OP_INC));
- }
-
- // print iteration history
-
- rms = sqrt(rms/(double)g_ncell);
-
- if (iter%100 == 0)
- op_printf(" %d %10.5e \n",iter,rms);
- }
-
- op_timers(&cpu_t2, &wall_t2);
-
- //allocate memory to hold the whole data array
- double* q = (double *)malloc(sizeof(double)*op_get_size(cells)*4);
- op_fetch_data_hdf5(p_q, q, 0, op_get_size(cells)-1);
-
- /*printf("number of cells %d\n",op_get_size(cells));
- for(int i = 0; i< op_get_size(cells)*4; i++)
- printf("(%d) %lf",i,(double)q[i]);
- */
- free(q);
-
- op_fetch_data_hdf5_file(p_q, "file_name.h5");
-
- printf("Root process = %d\n",op_is_root());
-
- //output the result dat array to files
- //op_write_hdf5("new_grid_out.h5");
-
- //compress using
- // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5
-
- op_timing_output();
- op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1);
- op_exit();
-}
View
33 apps/c/airfoil/airfoil_hdf5_simple/dp/bres_calc.h
@@ -1,33 +0,0 @@
-inline void bres_calc(double *x1, double *x2, double *q1,
- double *adt1,double *res1,int *bound) {
- double dx,dy,mu, ri, p1,vol1, p2,vol2, f;
-
- dx = x1[0] - x2[0];
- dy = x1[1] - x2[1];
-
- ri = 1.0f/q1[0];
- p1 = gm1*(q1[3]-0.5f*ri*(q1[1]*q1[1]+q1[2]*q1[2]));
-
- if (*bound==1) {
- res1[1] += + p1*dy;
- res1[2] += - p1*dx;
- }
- else {
- vol1 = ri*(q1[1]*dy - q1[2]*dx);
-
- ri = 1.0f/qinf[0];
- p2 = gm1*(qinf[3]-0.5f*ri*(qinf[1]*qinf[1]+qinf[2]*qinf[2]));
- vol2 = ri*(qinf[1]*dy - qinf[2]*dx);
-
- mu = (*adt1)*eps;
-
- f = 0.5f*(vol1* q1[0] + vol2* qinf[0] ) + mu*(q1[0]-qinf[0]);
- res1[0] += f;
- f = 0.5f*(vol1* q1[1] + p1*dy + vol2* qinf[1] + p2*dy) + mu*(q1[1]-qinf[1]);
- res1[1] += f;
- f = 0.5f*(vol1* q1[2] - p1*dx + vol2* qinf[2] - p2*dx) + mu*(q1[2]-qinf[2]);
- res1[2] += f;
- f = 0.5f*(vol1*(q1[3]+p1) + vol2*(qinf[3]+p2) ) + mu*(q1[3]-qinf[3]);
- res1[3] += f;
- }
-}
View
224 apps/c/airfoil/airfoil_hdf5_simple/dp/bres_calc_kernel.cpp
@@ -1,224 +0,0 @@
-//
-// auto-generated by op2.m on 16-Oct-2012 15:15:10
-//
-
-// user function
-
-#include "bres_calc.h"
-
-
-// x86 kernel function
-
-void op_x86_bres_calc(
- int blockIdx,
- double *ind_arg0,
- double *ind_arg1,
- double *ind_arg2,
- double *ind_arg3,
- int *ind_map,
- short *arg_map,
- int *arg5,
- int *ind_arg_sizes,
- int *ind_arg_offs,
- int block_offset,
- int *blkmap,
- int *offset,
- int *nelems,
- int *ncolors,
- int *colors,
- int set_size) {
-
- double arg4_l[4];
-
- int *ind_arg0_map, ind_arg0_size;
- int *ind_arg1_map, ind_arg1_size;
- int *ind_arg2_map, ind_arg2_size;
- int *ind_arg3_map, ind_arg3_size;
- double *ind_arg0_s;
- double *ind_arg1_s;
- double *ind_arg2_s;
- double *ind_arg3_s;
- int nelem, offset_b;
-
- char shared[128000];
-
- if (0==0) {
-
- // get sizes and shift pointers and direct-mapped data
-
- int blockId = blkmap[blockIdx + block_offset];
- nelem = nelems[blockId];
- offset_b = offset[blockId];
-
- ind_arg0_size = ind_arg_sizes[0+blockId*4];
- ind_arg1_size = ind_arg_sizes[1+blockId*4];
- ind_arg2_size = ind_arg_sizes[2+blockId*4];
- ind_arg3_size = ind_arg_sizes[3+blockId*4];
-
- ind_arg0_map = &ind_map[0*set_size] + ind_arg_offs[0+blockId*4];
- ind_arg1_map = &ind_map[2*set_size] + ind_arg_offs[1+blockId*4];
- ind_arg2_map = &ind_map[3*set_size] + ind_arg_offs[2+blockId*4];
- ind_arg3_map = &ind_map[4*set_size] + ind_arg_offs[3+blockId*4];
-
- // set shared memory pointers
-
- int nbytes = 0;
- ind_arg0_s = (double *) &shared[nbytes];
- nbytes += ROUND_UP(ind_arg0_size*sizeof(double)*2);
- ind_arg1_s = (double *) &shared[nbytes];
- nbytes += ROUND_UP(ind_arg1_size*sizeof(double)*4);
- ind_arg2_s = (double *) &shared[nbytes];
- nbytes += ROUND_UP(ind_arg2_size*sizeof(double)*1);
- ind_arg3_s = (double *) &shared[nbytes];
- }
-
- // copy indirect datasets into shared memory or zero increment
-
- for (int n=0; n<ind_arg0_size; n++)
- for (int d=0; d<2; d++)
- ind_arg0_s[d+n*2] = ind_arg0[d+ind_arg0_map[n]*2];
-
- for (int n=0; n<ind_arg1_size; n++)
- for (int d=0; d<4; d++)
- ind_arg1_s[d+n*4] = ind_arg1[d+ind_arg1_map[n]*4];
-
- for (int n=0; n<ind_arg2_size; n++)
- for (int d=0; d<1; d++)
- ind_arg2_s[d+n*1] = ind_arg2[d+ind_arg2_map[n]*1];
-
- for (int n=0; n<ind_arg3_size; n++)
- for (int d=0; d<4; d++)
- ind_arg3_s[d+n*4] = ZERO_double;
-
-
- // process set elements
-
- for (int n=0; n<nelem; n++) {
-
- // initialise local variables
-
- for (int d=0; d<4; d++)
- arg4_l[d] = ZERO_double;
-
-
- // user-supplied kernel call
-
-
- bres_calc( ind_arg0_s+arg_map[0*set_size+n+offset_b]*2,
- ind_arg0_s+arg_map[1*set_size+n+offset_b]*2,
- ind_arg1_s+arg_map[2*set_size+n+offset_b]*4,
- ind_arg2_s+arg_map[3*set_size+n+offset_b]*1,
- arg4_l,
- arg5+(n+offset_b)*1 );
-
- // store local variables
-
- int arg4_map = arg_map[4*set_size+n+offset_b];
-
- for (int d=0; d<4; d++)
- ind_arg3_s[d+arg4_map*4] += arg4_l[d];
- }
-
- // apply pointered write/increment
-
- for (int n=0; n<ind_arg3_size; n++)
- for (int d=0; d<4; d++)
- ind_arg3[d+ind_arg3_map[n]*4] += ind_arg3_s[d+n*4];
-
-}
-
-
-// host stub function
-
-void op_par_loop_bres_calc(char const *name, op_set set,
- op_arg arg0,
- op_arg arg1,
- op_arg arg2,
- op_arg arg3,
- op_arg arg4,
- op_arg arg5 ){
-
-
- int nargs = 6;
- op_arg args[6];
-
- args[0] = arg0;
- args[1] = arg1;
- args[2] = arg2;
- args[3] = arg3;
- args[4] = arg4;
- args[5] = arg5;
-
- int ninds = 4;
- int inds[6] = {0,0,1,2,3,-1};
-
- if (OP_diags>2) {
- printf(" kernel routine with indirection: bres_calc\n");
- }
-
- // get plan
-
- #ifdef OP_PART_SIZE_3
- int part_size = OP_PART_SIZE_3;
- #else
- int part_size = OP_part_size;
- #endif
-
- int set_size = op_mpi_halo_exchanges(set, nargs, args);
-
- // initialise timers
-
- double cpu_t1, cpu_t2, wall_t1=0, wall_t2=0;
- op_timing_realloc(3);
- OP_kernels[3].name = name;
- OP_kernels[3].count += 1;
-
- if (set->size >0) {
-
- op_plan *Plan = op_plan_get(name,set,part_size,nargs,args,ninds,inds);
-
- op_timers_core(&cpu_t1, &wall_t1);
-
- // execute plan
-
- int block_offset = 0;
-
- for (int col=0; col < Plan->ncolors; col++) {
- if (col==Plan->ncolors_core) op_mpi_wait_all(nargs, args);
-
- int nblocks = Plan->ncolblk[col];
-
-#pragma omp parallel for
- for (int blockIdx=0; blockIdx<nblocks; blockIdx++) {
- int blockId = Plan->blkmap[blockIdx + block_offset];
- int nelem = Plan->nelems[blockId];
- int offset_b = Plan->offset[blockId];
- for (int n=0; n<nelem; n++) {
- bres_calc( &((double *)arg0.data)[2*arg0.map_data[2*(n+offset_b)+0]],
- &((double *)arg0.data)[2*arg0.map_data[2*(n+offset_b)+1]],
- &((double *)arg2.data)[4*arg2.map_data[1*(n+offset_b)+0]],
- &((double *)arg3.data)[1*arg3.map_data[1*(n+offset_b)+0]],
- &((double *)arg4.data)[4*arg4.map_data[1*(n+offset_b)+0]],
- &((int *)arg5.data)[(n+offset_b)*1]);
- }
- }
-
- block_offset += nblocks;
- }
-
- op_timing_realloc(3);
- OP_kernels[3].transfer += Plan->transfer;
- OP_kernels[3].transfer2 += Plan->transfer2;
-
- }
-
-
- // combine reduction data
-
- op_mpi_set_dirtybit(nargs, args);
-
- // update kernel record
-
- op_timers_core(&cpu_t2, &wall_t2);
- OP_kernels[3].time += wall_t2 - wall_t1;
-}
View
260 apps/c/airfoil/airfoil_hdf5_simple/dp/bres_calc_kernel.cu
@@ -1,260 +0,0 @@
-//
-// auto-generated by op2.m on 19-Oct-2012 16:21:07
-//
-
-// user function
-
-__device__
-#include "bres_calc.h"
-
-
-// CUDA kernel function
-
-__global__ void op_cuda_bres_calc(
- double *ind_arg0,
- double *ind_arg1,
- double *ind_arg2,
- double *ind_arg3,
- int *ind_map,
- short *arg_map,
- int *arg5,
- int *ind_arg_sizes,
- int *ind_arg_offs,
- int block_offset,
- int *blkmap,
- int *offset,
- int *nelems,
- int *ncolors,
- int *colors,
- int nblocks,
- int set_size) {
-
- double arg4_l[4];
-
- __shared__ int *ind_arg0_map, ind_arg0_size;
- __shared__ int *ind_arg1_map, ind_arg1_size;
- __shared__ int *ind_arg2_map, ind_arg2_size;
- __shared__ int *ind_arg3_map, ind_arg3_size;
- __shared__ double *ind_arg0_s;
- __shared__ double *ind_arg1_s;
- __shared__ double *ind_arg2_s;
- __shared__ double *ind_arg3_s;
- __shared__ int nelems2, ncolor;
- __shared__ int nelem, offset_b;
-
- extern __shared__ char shared[];
-
- if (blockIdx.x+blockIdx.y*gridDim.x >= nblocks) return;
- if (threadIdx.x==0) {
-
- // get sizes and shift pointers and direct-mapped data
-
- int blockId = blkmap[blockIdx.x + blockIdx.y*gridDim.x + block_offset];
-
- nelem = nelems[blockId];
- offset_b = offset[blockId];
-
- nelems2 = blockDim.x*(1+(nelem-1)/blockDim.x);
- ncolor = ncolors[blockId];
-
- ind_arg0_size = ind_arg_sizes[0+blockId*4];
- ind_arg1_size = ind_arg_sizes[1+blockId*4];
- ind_arg2_size = ind_arg_sizes[2+blockId*4];
- ind_arg3_size = ind_arg_sizes[3+blockId*4];
-
- ind_arg0_map = &ind_map[0*set_size] + ind_arg_offs[0+blockId*4];
- ind_arg1_map = &ind_map[2*set_size] + ind_arg_offs[1+blockId*4];
- ind_arg2_map = &ind_map[3*set_size] + ind_arg_offs[2+blockId*4];
- ind_arg3_map = &ind_map[4*set_size] + ind_arg_offs[3+blockId*4];
-
- // set shared memory pointers
-
- int nbytes = 0;
- ind_arg0_s = (double *) &shared[nbytes];
- nbytes += ROUND_UP(ind_arg0_size*sizeof(double)*2);
- ind_arg1_s = (double *) &shared[nbytes];
- nbytes += ROUND_UP(ind_arg1_size*sizeof(double)*4);
- ind_arg2_s = (double *) &shared[nbytes];
- nbytes += ROUND_UP(ind_arg2_size*sizeof(double)*1);
- ind_arg3_s = (double *) &shared[nbytes];
- }
-
- __syncthreads(); // make sure all of above completed
-
- // copy indirect datasets into shared memory or zero increment
-
- for (int n=threadIdx.x; n<ind_arg0_size*2; n+=blockDim.x)
- ind_arg0_s[n] = ind_arg0[n%2+ind_arg0_map[n/2]*2];
-
- for (int n=threadIdx.x; n<ind_arg1_size*4; n+=blockDim.x)
- ind_arg1_s[n] = ind_arg1[n%4+ind_arg1_map[n/4]*4];
-
- for (int n=threadIdx.x; n<ind_arg2_size*1; n+=blockDim.x)
- ind_arg2_s[n] = ind_arg2[n%1+ind_arg2_map[n/1]*1];
-
- for (int n=threadIdx.x; n<ind_arg3_size*4; n+=blockDim.x)
- ind_arg3_s[n] = ZERO_double;
-
- __syncthreads();
-
- // process set elements
-
- for (int n=threadIdx.x; n<nelems2; n+=blockDim.x) {
- int col2 = -1;
-
- if (n<nelem) {
-
- // initialise local variables
-
- for (int d=0; d<4; d++)
- arg4_l[d] = ZERO_double;
-
-
- // user-supplied kernel call
-
-
- bres_calc( ind_arg0_s+arg_map[0*set_size+n+offset_b]*2,
- ind_arg0_s+arg_map[1*set_size+n+offset_b]*2,
- ind_arg1_s+arg_map[2*set_size+n+offset_b]*4,
- ind_arg2_s+arg_map[3*set_size+n+offset_b]*1,
- arg4_l,
- arg5+(n+offset_b)*1 );
-
- col2 = colors[n+offset_b];
- }
-
- // store local variables
-
- int arg4_map;
-
- if (col2>=0) {
- arg4_map = arg_map[4*set_size+n+offset_b];
- }
-
- for (int col=0; col<ncolor; col++) {
- if (col2==col) {
- for (int d=0; d<4; d++)
- ind_arg3_s[d+arg4_map*4] += arg4_l[d];
- }
- __syncthreads();
- }
-
- }
-
- // apply pointered write/increment
-
- for (int n=threadIdx.x; n<ind_arg3_size*4; n+=blockDim.x)
- ind_arg3[n%4+ind_arg3_map[n/4]*4] += ind_arg3_s[n];
-
-}
-
-
-// host stub function
-
-void op_par_loop_bres_calc(char const *name, op_set set,
- op_arg arg0,
- op_arg arg1,
- op_arg arg2,
- op_arg arg3,
- op_arg arg4,
- op_arg arg5 ){
-
-
- int nargs = 6;
- op_arg args[6];
-
- args[0] = arg0;
- args[1] = arg1;
- args[2] = arg2;
- args[3] = arg3;
- args[4] = arg4;
- args[5] = arg5;
-
- int ninds = 4;
- int inds[6] = {0,0,1,2,3,-1};
-
- if (OP_diags>2) {
- printf(" kernel routine with indirection: bres_calc\n");
- }
-
- // get plan
-
- #ifdef OP_PART_SIZE_3
- int part_size = OP_PART_SIZE_3;
- #else
- int part_size = OP_part_size;
- #endif
-
- int set_size = op_mpi_halo_exchanges_cuda(set, nargs, args);
-
- // initialise timers
-
- double cpu_t1, cpu_t2, wall_t1=0, wall_t2=0;
- op_timing_realloc(3);
- OP_kernels[3].name = name;
- OP_kernels[3].count += 1;
-
- if (set->size >0) {
-
- op_plan *Plan = op_plan_get(name,set,part_size,nargs,args,ninds,inds);
-
- op_timers_core(&cpu_t1, &wall_t1);
-
- // execute plan
-
- int block_offset = 0;
-
- for (int col=0; col < Plan->ncolors; col++) {
-
- if (col==Plan->ncolors_core) op_mpi_wait_all_cuda(nargs,args);
-
- #ifdef OP_BLOCK_SIZE_3
- int nthread = OP_BLOCK_SIZE_3;
- #else
- int nthread = OP_block_size;
- #endif
-
- dim3 nblocks = dim3(Plan->ncolblk[col] >= (1<<16) ? 65535 : Plan->ncolblk[col],
- Plan->ncolblk[col] >= (1<<16) ? (Plan->ncolblk[col]-1)/65535+1: 1, 1);
- if (Plan->ncolblk[col] > 0) {
- int nshared = Plan->nsharedCol[col];
- op_cuda_bres_calc<<<nblocks,nthread,nshared>>>(
- (double *)arg0.data_d,
- (double *)arg2.data_d,
- (double *)arg3.data_d,
- (double *)arg4.data_d,
- Plan->ind_map,
- Plan->loc_map,
- (int *)arg5.data_d,
- Plan->ind_sizes,
- Plan->ind_offs,
- block_offset,
- Plan->blkmap,
- Plan->offset,
- Plan->nelems,
- Plan->nthrcol,
- Plan->thrcol,
- Plan->ncolblk[col],
- set_size);
-
- cutilSafeCall(cudaDeviceSynchronize());
- cutilCheckMsg("op_cuda_bres_calc execution failed\n");
- }
-
- block_offset += Plan->ncolblk[col];
- }
-
- op_timing_realloc(3);
- OP_kernels[3].transfer += Plan->transfer;
- OP_kernels[3].transfer2 += Plan->transfer2;
-
- }
-
-
- op_mpi_set_dirtybit_cuda(nargs, args);
-
- // update kernel record
-
- op_timers_core(&cpu_t2, &wall_t2);
- OP_kernels[3].time += wall_t2 - wall_t1;
-}
View
343 apps/c/airfoil/airfoil_hdf5_simple/dp/convert_mesh.cpp
@@ -1,343 +0,0 @@
-/*
- * Open source copyright declaration based on BSD open source template:
- * http://www.opensource.org/licenses/bsd-license.php
- *
- * This file is part of the OP2 distribution.
- *
- * Copyright (c) 2011, Mike Giles and others. Please see the AUTHORS file in
- * the main source directory for a full list of copyright holders.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * The name of Mike Giles may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY Mike Giles ''AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Mike Giles BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-//
-// standard headers
-//
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-
-//
-// mpi header file - included by user for user level mpi
-//
-
-#include <mpi.h>
-
-// global constants
-
-double gam, gm1, cfl, eps, mach, alpha, qinf[4];
-
-//
-// OP header file
-//
-
-#include "op_lib_cpp.h"
-#include "op_lib_mpi.h"
-#include "op_util.h"
-
-//
-//hdf5 header
-//
-
-#include "hdf5.h"
-
-//
-// kernel routines for parallel loops
-//
-
-#include "save_soln.h"
-#include "adt_calc.h"
-#include "res_calc.h"
-#include "bres_calc.h"
-#include "update.h"
-
-//
-// op_par_loop declarations
-//
-
-#include "op_seq.h"
-
-static void scatter_double_array(double* g_array, double* l_array, int comm_size, int g_size,
- int l_size, int elem_size)
-{
- int* sendcnts = (int *) malloc(comm_size*sizeof(int));
- int* displs = (int *) malloc(comm_size*sizeof(int));
- int disp = 0;
-
- for(int i = 0; i<comm_size; i++)
- {
- sendcnts[i] = elem_size*compute_local_size (g_size, comm_size, i);
- }
- for(int i = 0; i<comm_size; i++)
- {
- displs[i] = disp;
- disp = disp + sendcnts[i];
- }
-
- MPI_Scatterv(g_array, sendcnts, displs, MPI_DOUBLE, l_array,
- l_size*elem_size, MPI_DOUBLE, MPI_ROOT, MPI_COMM_WORLD );
-
- free(sendcnts);
- free(displs);
-}
-
-static void scatter_int_array(int* g_array, int* l_array, int comm_size, int g_size,
- int l_size, int elem_size)
-{
- int* sendcnts = (int *) malloc(comm_size*sizeof(int));
- int* displs = (int *) malloc(comm_size*sizeof(int));
- int disp = 0;
-
- for(int i = 0; i<comm_size; i++)
- {
- sendcnts[i] = elem_size*compute_local_size (g_size, comm_size, i);
- }
- for(int i = 0; i<comm_size; i++)
- {
- displs[i] = disp;
- disp = disp + sendcnts[i];
- }
-
- MPI_Scatterv(g_array, sendcnts, displs, MPI_INT, l_array,
- l_size*elem_size, MPI_INT, MPI_ROOT, MPI_COMM_WORLD );
-
- free(sendcnts);
- free(displs);
-}
-
-static void check_scan(int items_received, int items_expected)
-{
- if(items_received != items_expected) {
- printf("error reading from new_grid.dat\n");
- exit(-1);
- }
-}
-
-
-//
-// main program
-//
-
-int main(int argc, char **argv)
-{
- // OP initialisation
- op_init(argc,argv,2);
-
- //MPI for user I/O
- int my_rank;
- int comm_size;
- MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
- MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
-
- int *becell, *ecell, *bound, *bedge, *edge, *cell;
- double *x, *q, *qold, *adt, *res;
-
- int nnode,ncell,nedge,nbedge;
-
- /**------------------------BEGIN I/O -------------------**/
-
- char file[] = "new_grid.dat";
- char file_out[] = "new_grid_out.h5";
-
- /* read in grid from disk on root processor */
- FILE *fp;
-
- if ( (fp = fopen(file,"r")) == NULL) {
- op_printf("can't open file %s\n",file); exit(-1);
- }
-
- int g_nnode,g_ncell,g_nedge,g_nbedge;
-
- check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4);
-
- int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0;
- double *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0;
-
- // set constants
-
- op_printf("initialising flow field\n");
- gam = 1.4f;
- gm1 = gam - 1.0f;
- cfl = 0.9f;
- eps = 0.05f;
-
- double mach = 0.4f;
- double alpha = 3.0f*atan(1.0f)/45.0f;
- double p = 1.0f;
- double r = 1.0f;
- double u = sqrt(gam*p/r)*mach;
- double e = p/(r*gm1) + 0.5f*u*u;
-
- qinf[0] = r;
- qinf[1] = r*u;
- qinf[2] = 0.0f;
- qinf[3] = r*e;
-
- op_printf("reading in grid \n");
- op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n"
- ,g_nnode,g_ncell,g_nedge,g_nbedge);
-
- if(my_rank == MPI_ROOT) {
- g_cell = (int *) malloc(4*g_ncell*sizeof(int));
- g_edge = (int *) malloc(2*g_nedge*sizeof(int));
- g_ecell = (int *) malloc(2*g_nedge*sizeof(int));
- g_bedge = (int *) malloc(2*g_nbedge*sizeof(int));
- g_becell = (int *) malloc( g_nbedge*sizeof(int));
- g_bound = (int *) malloc( g_nbedge*sizeof(int));
-
- g_x = (double *) malloc(2*g_nnode*sizeof(double));
- g_q = (double *) malloc(4*g_ncell*sizeof(double));
- g_qold = (double *) malloc(4*g_ncell*sizeof(double));
- g_res = (double *) malloc(4*g_ncell*sizeof(double));
- g_adt = (double *) malloc( g_ncell*sizeof(double));
-
- for (int n=0; n<g_nnode; n++){
- check_scan(fscanf(fp,"%lf %lf \n",&g_x[2*n], &g_x[2*n+1]), 2);
- }
-
- for (int n=0; n<g_ncell; n++) {
- check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1],
- &g_cell[4*n+2], &g_cell[4*n+3]), 4);
- }
-
- for (int n=0; n<g_nedge; n++) {
- check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1],
- &g_ecell[2*n],&g_ecell[2*n+1]), 4);
- }
-
- for (int n=0; n<g_nbedge; n++) {
- check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1],
- &g_becell[n],&g_bound[n]), 4);
- }
-
- //initialise flow field and residual
-
- for (int n=0; n<g_ncell; n++) {
- for (int m=0; m<4; m++) {
- g_q[4*n+m] = qinf[m];
- g_res[4*n+m] = 0.0f;
- }
- }
- }
-
- fclose(fp);
-
- nnode = compute_local_size (g_nnode, comm_size, my_rank);
- ncell = compute_local_size (g_ncell, comm_size, my_rank);
- nedge = compute_local_size (g_nedge, comm_size, my_rank);
- nbedge = compute_local_size (g_nbedge, comm_size, my_rank);
-
- op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n"
- ,my_rank,nnode,ncell,nedge,nbedge);
-
- /*Allocate memory to hold local sets, mapping tables and data*/
- cell = (int *) malloc(4*ncell*sizeof(int));
- edge = (int *) malloc(2*nedge*sizeof(int));
- ecell = (int *) malloc(2*nedge*sizeof(int));
- bedge = (int *) malloc(2*nbedge*sizeof(int));
- becell = (int *) malloc( nbedge*sizeof(int));
- bound = (int *) malloc( nbedge*sizeof(int));
-
- x = (double *) malloc(2*nnode*sizeof(double));
- q = (double *) malloc(4*ncell*sizeof(double));
- qold = (double *) malloc(4*ncell*sizeof(double));
- res = (double *) malloc(4*ncell*sizeof(double));
- adt = (double *) malloc( ncell*sizeof(double));
-
- /* scatter sets, mappings and data on sets*/
- scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4);
- scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2);
- scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2);
- scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2);
- scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1);
- scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1);
-
- scatter_double_array(g_x, x, comm_size, g_nnode,nnode, 2);
- scatter_double_array(g_q, q, comm_size, g_ncell,ncell, 4);
- scatter_double_array(g_qold, qold, comm_size, g_ncell,ncell, 4);
- scatter_double_array(g_res, res, comm_size, g_ncell,ncell, 4);
- scatter_double_array(g_adt, adt, comm_size, g_ncell,ncell, 1);
-
- /*Freeing memory allocated to gloabal arrays on rank 0
- after scattering to all processes*/
- if(my_rank == MPI_ROOT) {
- free(g_cell);
- free(g_edge);
- free(g_ecell);
- free(g_bedge);
- free(g_becell);
- free(g_bound);
- free(g_x );
- free(g_q);
- free(g_qold);
- free(g_adt);
- free(g_res);
- }
-
- /**------------------------END I/O -----------------------**/
-
- /* FIXME: It's not clear to the compiler that sth. is going on behind the
- scenes here. Hence theses variables are reported as unused */
-
- op_set nodes = op_decl_set(nnode, "nodes");
- op_set edges = op_decl_set(nedge, "edges");
- op_set bedges = op_decl_set(nbedge, "bedges");
- op_set cells = op_decl_set(ncell, "cells");
-
- op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge");
- op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell");
- op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge");
- op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell");
- op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell");
-
- op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound");
- op_dat p_x = op_decl_dat(nodes ,2,"double",x ,"p_x");
- op_dat p_q = op_decl_dat(cells ,4,"double",q ,"p_q");
- op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold");
- op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt");
- op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res");
-
- op_decl_const(1,"double",&gam );
- op_decl_const(1,"double",&gm1 );
- op_decl_const(1,"double",&cfl );
- op_decl_const(1,"double",&eps );
- op_decl_const(1,"double",&mach );
- op_decl_const(1,"double",&alpha);
- op_decl_const(4,"double",qinf );
-
- op_write_hdf5(file_out);
- op_write_const_hdf5("gam", 1,"double",(char *)&gam, "new_grid_out.h5");
- op_write_const_hdf5("gm1", 1,"double",(char *)&gm1, "new_grid_out.h5");
- op_write_const_hdf5("cfl", 1,"double",(char *)&cfl, "new_grid_out.h5");
- op_write_const_hdf5("eps", 1,"double",(char *)&eps, "new_grid_out.h5");
- op_write_const_hdf5("mach", 1,"double",(char *)&mach, "new_grid_out.h5");
- op_write_const_hdf5("alpha",1,"double",(char *)&alpha,"new_grid_out.h5");
- op_write_const_hdf5("qinf", 4,"double",(char *)qinf, "new_grid_out.h5");
-
- //create halos - for sanity check
- op_halo_create();
-
- op_exit();
-}
View
30 apps/c/airfoil/airfoil_hdf5_simple/dp/res_calc.h
@@ -1,30 +0,0 @@
-inline void res_calc(double *x1, double *x2, double *q1, double *q2,
- double *adt1,double *adt2,double *res1,double *res2) {
- double dx,dy,mu, ri, p1,vol1, p2,vol2, f;
-
- dx = x1[0] - x2[0];
- dy = x1[1] - x2[1];
-
- ri = 1.0f/q1[0];
- p1 = gm1*(q1[3]-0.5f*ri*(q1[1]*q1[1]+q1[2]*q1[2]));
- vol1 = ri*(q1[1]*dy - q1[2]*dx);
-
- ri = 1.0f/q2[0];
- p2 = gm1*(q2[3]-0.5f*ri*(q2[1]*q2[1]+q2[2]*q2[2]));
- vol2 = ri*(q2[1]*dy - q2[2]*dx);
-
- mu = 0.5f*((*adt1)+(*adt2))*eps;
-
- f = 0.5f*(vol1* q1[0] + vol2* q2[0] ) + mu*(q1[0]-q2[0]);
- res1[0] += f;
- res2[0] -= f;
- f = 0.5f*(vol1* q1[1] + p1*dy + vol2* q2[1] + p2*dy) + mu*(q1[1]-q2[1]);
- res1[1] += f;
- res2[1] -= f;
- f = 0.5f*(vol1* q1[2] - p1*dx + vol2* q2[2] - p2*dx) + mu*(q1[2]-q2[2]);
- res1[2] += f;
- res2[2] -= f;
- f = 0.5f*(vol1*(q1[3]+p1) + vol2*(q2[3]+p2) ) + mu*(q1[3]-q2[3]);
- res1[3] += f;
- res2[3] -= f;
-}
View
239 apps/c/airfoil/airfoil_hdf5_simple/dp/res_calc_kernel.cpp
<
@@ -1,239 +0,0 @@
-//
-// auto-generated by op2.m on 16-Oct-2012 15:15:10
-//
-
-// user function
-
-#include "res_calc.h"
-
-
-// x86 kernel function
-
-void op_x86_res_calc(
- int blockIdx,
- double *ind_arg0,
- double *ind_arg1,
- double *ind_arg2,
- double *ind_arg3,
- int *ind_map,
- short *arg_map,
- int *ind_arg_sizes,
- int *ind_arg_offs,
- int block_offset,
- int *blkmap,
- int *offset,
- int *nelems,
- int *ncolors,
- int *colors,
- int set_size) {
-
- double arg6_l[4];
- double arg7_l[4];
-
- int *ind_arg0_map, ind_arg0_size;
- int *ind_arg1_map, ind_arg1_size;
- int *ind_arg2_map, ind_arg2_size;
- int *ind_arg3_map, ind_arg3_size;
- double *ind_arg0_s;
- double *ind_arg1_s;
- double *ind_arg2_s;
- double *ind_arg3_s;
- int nelem, offset_b;
-
- char shared[128000];
-
- if (0==0) {
-
- // get sizes and shift pointers and direct-mapped data
-
- int blockId = blkmap[blockIdx + block_offset];