Permalink
Browse files

shell script to make documentation

  • Loading branch information...
1 parent 62b2970 commit 39e1557f2b224057023742dac35c89a8d83273c9 @gihanmudalige gihanmudalige committed Dec 5, 2013
Showing with 251 additions and 78 deletions.
  1. +1 −0 .gitignore
  2. +4 −3 apps/c/airfoil/README.md
  3. +10 −4 doc/C++_Users_Guide.tex
  4. +67 −67 doc/airfoil-doc.tex
  5. +9 −0 doc/build_docs.sh
  6. +2 −3 doc/mpi-dev.tex
  7. +154 −0 doc/verbments.sty
  8. +4 −1 op2/c/Makefile
View
@@ -33,6 +33,7 @@
*.blg
*.bbl
*~
+*.pyg.*
#directories
./op2/c/lib/
View
@@ -34,15 +34,16 @@ compare_results -- small utility code to compare two files (txt or bin), used to
The various parallel versions of Airfoil should be compared against the single-threaded CPU version (also known as the
reference implementation) to ascertain the correctness of the results. The p_q array holds the final result and as such
-will be the data array to compare. One way to achieve this is to use :
+will be the data array to compare. One way to achieve this is to use the following OP2 calls to write the data array to
+text or binary files, for example after the end of the 1000 iterations in the airfoil code.
```
op_print_dat_to_txtfile(p_q, "out_grid_seq.dat"); //ASCI
op_print_dat_to_binfile(p_q, "out_grid_seq.bin"); //Binary
```
-For example after the end of the 1000 iterations in the airfoil code and then use the code in compare.cpp and
-comparebin.cpp to compare the text file or binary file with the reference implementation.
+Then the code in compare.cpp and comparebin.cpp can be used to compare the text file or binary file with the reference
+implementation.
Bitwise accuracy can be expected across systems for the double precision version to within the accuracy of machine
precision. For the single precision version, answers should be very close. A summary print of the rms value of the
View
@@ -3,6 +3,7 @@
\usepackage[footnotesize]{subfigure}
\usepackage{graphicx}
\usepackage{verbatim}
+\usepackage{verbments}
\setlength{\oddsidemargin}{-0.01in}
\setlength{\topmargin}{-0.4in}
\setlength{\textheight}{9.0in}
@@ -19,9 +20,9 @@
%
%
\newenvironment{routine}[2]
-{\vspace{.0in}{\noindent\bf\hspace{-5pt} #1}{\\ \noindent #2}
+{\vspace{.0in}{\noindent\bf\hspace{-5pt} #1}{\\ \noindent #2}
\begin{list}{}{
-\renewcommand{\makelabel}[1]{{\tt ##1} \hfil}
+\renewcommand{\makelabel}[1]{{\tt ##1 } \hfil}
\itemsep 0pt plus 1pt minus 1pt
\leftmargin 1.5in
\rightmargin 0.0in
@@ -32,6 +33,10 @@
}{\end{list}}
%
+
+%
+
+
\begin{document}
\title{OP2 C++ User's Manual}
@@ -286,9 +291,9 @@ \section{Overview}
\section{OP2 C++ API}
\subsection{Initialisation and termination routines}
-
+%
\subsubsection*{}\addcontentsline{toc}{subsubsection}{op\_init}
-\begin{routine} {void op\_init(int argc, char **argv, int diags\_level)}
+\begin{routine} { void op\_init(int argc, char **argv, int diags\_level)}
{This routine must be called before all other OP routines. Under MPI back-ends, this routine also calls
\texttt{MPI\_Init()} unless its already called previously}
\item[argc, argv] the usual command line arguments
@@ -302,6 +307,7 @@ \subsubsection*{}\addcontentsline{toc}{subsubsection}{op\_init}
\\7 -- report positive checks in op\_plan\_check;
\end{routine}
+
\subsubsection*{}\addcontentsline{toc}{subsubsection}{op\_exit}
\begin{routine} {void op\_exit()}
{This routine must be called last to cleanly terminate the OP computation. Under MPI back-ends, this routine also calls
View
@@ -4,6 +4,7 @@
\usepackage{graphicx}
\usepackage[footnotesize]{subfigure}
\usepackage{listings}
+\usepackage{verbments}
\topmargin 0.in \headheight 0pt \headsep 0pt \raggedbottom
\oddsidemargin 0.1in
@@ -82,23 +83,24 @@ \section{Airfoil - The Development CPU Version}
the edges of the mesh.
+
\begin{figure}\small
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
-\begin{lstlisting}
+\vspace{-0pt}\noindent\line(1,0){8}\vspace{-20pt}
+\begin{pyglist}[language=c]
// calculate flux residual
- op_par_loop(res_calc,"res_calc",edges,
- op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ),
- op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ),
- op_arg_dat(p_q, 0,pecell,4,"double",OP_READ),
- op_arg_dat(p_q, 1,pecell,4,"double",OP_READ),
- op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ),
- op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ),
- op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ),
- op_arg_dat(p_res, 1,pecell,4,"double",OP_INC ));
-\end{lstlisting}\vspace{-10pt}
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
+op_par_loop(res_calc,"res_calc",edges,
+ op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ),
+ op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ),
+ op_arg_dat(p_q, 0,pecell,4,"double",OP_READ),
+ op_arg_dat(p_q, 1,pecell,4,"double",OP_READ),
+ op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ),
+ op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ),
+ op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ),
+ op_arg_dat(p_res, 1,pecell,4,"double",OP_INC ));
+\end{pyglist}
+\vspace{-10pt}\noindent\line(1,0){8}\vspace{-10pt}
\caption{\small res\_calc loop}
-\normalsize\vspace{-10pt}\label{fig:rescalc}
+\normalsize\vspace{-0pt}\label{fig:rescalc}
\end{figure}
\noindent The first argument specifies the name of the function (implemented in \texttt{res\_calc.h}) that contains the
@@ -109,23 +111,21 @@ \section{Airfoil - The Development CPU Version}
For Airfoil, \texttt{airfoil.cpp} include the OP2 header files and the elemental kernel header files as follows.
Additionally global constants must be declared before the \texttt{main()} function.
-\begin{figure}[!h]\small
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
-\begin{lstlisting}
+\begin{figure}\small
+\vspace{-0pt}\noindent\line(1,0){8}\vspace{-20pt}
+\begin{pyglist}[language=c]
// OP2 header file
#include "op_seq.h"
-
// global constants
double gam, gm1, cfl, eps, mach, alpha, qinf[4];
-
// user kernel routines for parallel loops
#include "save_soln.h"
#include "adt_calc.h"
#include "res_calc.h"
#include "bres_calc.h"
#include "update.h"
-\end{lstlisting}\vspace{-10pt}
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
+\end{pyglist}
+\vspace{-10pt}\noindent\line(1,0){8}\vspace{-10pt}
\caption{\small Header and global constants }
\normalsize\vspace{-10pt}\label{fig:header}
\end{figure}
@@ -142,8 +142,8 @@ \section{Airfoil - The Development CPU Version}
\texttt{op\_set}s, \texttt{op\_map}s and \texttt{op\_dat}s as follows:
\begin{figure}[!h]\small
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
-\begin{lstlisting}
+\vspace{-0pt}\noindent\line(1,0){8}\vspace{-20pt}
+\begin{pyglist}[language=c]
op_set nodes = op_decl_set(nnode, "nodes");
op_set edges = op_decl_set(nedge, "edges");
op_set bedges = op_decl_set(nbedge, "bedges");
@@ -169,8 +169,8 @@ \section{Airfoil - The Development CPU Version}
op_decl_const(1,"double",&mach );
op_decl_const(1,"double",&alpha);
op_decl_const(4,"double",qinf );
-\end{lstlisting}\vspace{-10pt}
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
+\end{pyglist}
+\vspace{-10pt}\noindent\line(1,0){8}\vspace{-10pt}
\caption{\small OP2 set, map and dat declarations }
\normalsize\vspace{-10pt}\label{fig:decls}
\end{figure}
@@ -180,8 +180,8 @@ \section{Airfoil - The Development CPU Version}
\texttt{cells}. Any constants used by the program are also declared at this point using \texttt{op\_decl\_const()}.
The five parallel loops that make up the Airfoil application are detailed next within a time-marching loop.
\begin{figure}[!h]\small
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
-\begin{lstlisting}
+\vspace{-0pt}\noindent\line(1,0){8}\vspace{-20pt}
+\begin{pyglist}[language=c]
//main time-marching loop
for(int iter=1; iter<=niter; iter++) {
// save old flow solution
@@ -203,8 +203,8 @@ \section{Airfoil - The Development CPU Version}
}
...
}
-\end{lstlisting}\vspace{-10pt}
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
+\end{pyglist}
+\vspace{-10pt}\noindent\line(1,0){8}\vspace{-10pt}
\caption{\small Time marching loop }
\normalsize\vspace{-10pt}\label{fig:timemarching}
\end{figure}
@@ -222,20 +222,20 @@ \section{Airfoil - The Development CPU Version}
sequential library \texttt{libop2\_seq.a}, for example as follows:
\begin{figure}[!h]\small
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
-\begin{lstlisting}
+\vspace{-0pt}\noindent\line(1,0){8}\vspace{-20pt}
+\begin{pyglist}[language=make]
OP2_INC = -I$(OP2_INSTALL_PATH)/c/include
OP2_LIB = -L$(OP2_INSTALL_PATH)/c/lib
CPP = icpc
-CPPFLAGS = -O3 -xSSE4.2
+CPPFLAGS = -O3 -xSSE4.2
airfoil_seq: airfoil.cpp save_soln.h adt_calc.h res_calc.h \
- bres_calc.h update.h
+ bres_calc.h update.h
$(CPP) $(CPPFLAGS) airfoil.cpp \
$(OP2_INC) $(OP2_LIB) -lop2_seq -o airfoil_seq
-\end{lstlisting}\vspace{-10pt}
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
+\end{pyglist}
+\vspace{-10pt}\noindent\line(1,0){8}\vspace{-10pt}
\caption{\small Sequential developer version build }
-\normalsize\vspace{-10pt}\label{fig:seqbuild}
+\normalsize\vspace{-0pt}\label{fig:seqbuild}
\end{figure}
\noindent Once the application is debugged and tested on a single CPU, OP2's code generation capabilities can be used
@@ -285,7 +285,7 @@ \section{Generating Single Node OpenMP and CUDA Executables}\label{sec/cuda_open
\begin{figure}[!h]\small
\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
-\begin{lstlisting}
+\begin{pyglist}[language=make]
OP2_INC = -I$(OP2_INSTALL_PATH)/c/include
OP2_LIB = -L$(OP2_INSTALL_PATH)/c/lib
CPP = icpc
@@ -296,11 +296,11 @@ \section{Generating Single Node OpenMP and CUDA Executables}\label{sec/cuda_open
adt_calc_kernel.cpp adt_calc.h \
res_calc_kernel.cpp res_calc.h \
bres_calc_kernel.cpp bres_calc.h \
- update_kernel.cpp update.h \
+ update_kernel.cpp update.h
$(CPP) $(CPPFLAGS) $(OMPFLAGS) $(OP2_INC) $(OP2_LIB) \
- airfoil_op.cpp airfoil_kernels.cpp \
+ airfoil_op.cpp airfoil_kernels.cpp \
-lm -lop2_openmp -o airfoil_openmp
-\end{lstlisting}\vspace{-10pt}
+\end{pyglist}
\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
\caption{\small OpenMP version build }
\normalsize\vspace{-10pt}\label{fig:ompbuild}
@@ -315,8 +315,8 @@ \section{Generating Single Node OpenMP and CUDA Executables}\label{sec/cuda_open
conventional C++ compiler and linking with the CUDA back-end library, \texttt{libop2\_cuda.a}, for example as follows:
\begin{figure}[!h]\small
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
-\begin{lstlisting}
+\vspace{-0pt}\noindent\line(1,0){8}\vspace{-20pt}
+\begin{pyglist}[language=make]
OP2_INC = -I$(OP2_INSTALL_PATH)/c/include
OP2_LIB = -L$(OP2_INSTALL_PATH)/c/lib
CPP = icpc
@@ -335,12 +335,12 @@ \section{Generating Single Node OpenMP and CUDA Executables}\label{sec/cuda_open
adt_calc_kernel.cu adt_calc.h \
res_calc_kernel.cu res_calc.h \
bres_calc_kernel.cu bres_calc.h \
- update_kernel.cu update.h \
+ update_kernel.cu update.h
nvcc $(NVCCFLAGS) $(OP2_INC) \
-c -o airfoil_kernels_cu.o \
airfoil_kernels.cu
-\end{lstlisting}\vspace{-10pt}
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
+\end{pyglist}
+\vspace{-10pt}\noindent\line(1,0){8}\vspace{-10pt}
\caption{\small CUDA version build }
\normalsize\vspace{-10pt}\label{fig:cudabuild}
\end{figure}
@@ -364,8 +364,8 @@ \section{Building Airfoil for Distributed Memory (MPI) Execution}\label{sec/mpi}
with mpiCC and linking with the MPI back-end library, \texttt{libop2\_mpi.a}, for example as follows:
\begin{figure}[!h]\small
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
-\begin{lstlisting}
+\vspace{-0pt}\noindent\line(1,0){8}\vspace{-20pt}
+\begin{pyglist}[language=make]
MPICPP = mpiCC
MPIFLAGS = -O3 -xSSE4.2
@@ -383,10 +383,10 @@ \section{Building Airfoil for Distributed Memory (MPI) Execution}\label{sec/mpi}
$(PARMETIS_INC) $(PTSCOTCH_INC) \
$(OP2_LIB) airfoil_mpi.cpp -lop2_mpi \
$(PARMETIS_LIB) $(PTSCOTCH_LIB) -o airfoil_mpi
-\end{lstlisting}\vspace{-10pt}
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
+\end{pyglist}
+\vspace{-10pt}\noindent\line(1,0){8}\vspace{-10pt}
\caption{\small MPI version build }
-\normalsize\vspace{-10pt}\label{fig:mpibuild}
+\normalsize\vspace{-0pt}\label{fig:mpibuild}
\end{figure}
\noindent The unstructured mesh, will be repartitioned by OP2, using parallel graph/mesh partitioning libraries
@@ -411,8 +411,8 @@ \section{Building Airfoil for Distributed Memory (MPI) Execution}\label{sec/mpi}
C++ compiler and linking with the back-end library, \texttt{libop2\_mpi.a}, for example as follows:
\begin{figure}[!h]\small
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
-\begin{lstlisting}
+\vspace{-0pt}\noindent\line(1,0){8}\vspace{-20pt}
+\begin{pyglist}[language=make]
airfoil_mpi_openmp: airfoil_mpi_op.cpp airfoil_kernels.cpp \
save_soln_kernel.cpp save_soln.h \
adt_calc_kernel.cpp adt_calc.h \
@@ -426,10 +426,10 @@ \section{Building Airfoil for Distributed Memory (MPI) Execution}\label{sec/mpi}
$(OP2_INC) $(PARMETIS_INC) $(PTSCOTCH_INC) \
$(OP2_LIB) -lop2_mpi \
$(PARMETIS_LIB) $(PTSCOTCH_LIB) -o airfoil_mpi_openmp
-\end{lstlisting}\vspace{-10pt}
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
+\end{pyglist}
+\vspace{-10pt}\noindent\line(1,0){8}\vspace{-10pt}
\caption{\small MPI+OpenMP version build }
-\normalsize\vspace{-10pt}\label{fig:mpi_openmpbuild}
+\normalsize\vspace{-0pt}\label{fig:mpi_openmpbuild}
\end{figure}
\noindent \texttt{airfoil\_mpi\_openmp} needs to be executed using mpirun and will utilise OMP\_NUM\_THREADS per MPI
@@ -440,8 +440,8 @@ \section{Building Airfoil for Distributed Memory (MPI) Execution}\label{sec/mpi}
CUDA compiler nvcc and linking with the back-end library, \texttt{libop2\_mpi\_cuda.a}, for example as follows:
\begin{figure}[h]\small
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
-\begin{lstlisting}
+\vspace{-0pt}\noindent\line(1,0){8}\vspace{-20pt}
+\begin{pyglist}[language=make]
airfoil_mpi_cuda: airfoil_mpi_op.cpp airfoil_kernels_mpi_cu.o Makefile
$(MPICPP) $(MPIFLAGS) airfoil_mpi_op.cpp \
airfoil_kernels_mpi_cu.o \
@@ -460,8 +460,8 @@ \section{Building Airfoil for Distributed Memory (MPI) Execution}\label{sec/mpi}
nvcc $(INC) $(NVCCFLAGS) $(OP2_INC) \
-I $(MPI_INSTALL_PATH)/include \
-c -o airfoil_kernels_mpi_cu.o airfoil_kernels.cu
-\end{lstlisting}\vspace{-10pt}
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
+\end{pyglist}
+\vspace{-10pt}\noindent\line(1,0){8}\vspace{-10pt}
\caption{\small MPI+CUDA version build }
\normalsize\vspace{-10pt}\label{fig:mpi_cudabuild}
\end{figure}
@@ -486,8 +486,8 @@ \section{Airfoil with HDF5 I/O}\label{hdf5}
\noindent The library \texttt{libop2\_hdf5.a} needs to be linked when building single node executables, for example:
\begin{figure}[!h]\small
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
-\begin{lstlisting}
+\vspace{-0pt}\noindent\line(1,0){8}\vspace{-20pt}
+\begin{pyglist}[language=make]
HDF5_INC = -I$(HDF5_INSTALL_PATH)/include
HDF5_LIB = -L$(HDF5_INSTALL_PATH)/lib -lhdf5 -lz
@@ -507,8 +507,8 @@ \section{Airfoil with HDF5 I/O}\label{hdf5}
nvcc $(INC) $(NVCCFLAGS) $(OP2_INC) $(HDF5_INC) \
-I /home/gihan/openmpi-intel/include \
-c -o airfoil_kernels_cu.o airfoil_kernels.cu
-\end{lstlisting}\vspace{-10pt}
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
+\end{pyglist}
+\vspace{-10pt}\noindent\line(1,0){8}\vspace{-10pt}
\caption{\small CUDA with HDF5 build }
\normalsize\vspace{-10pt}\label{fig:hdf5build}
\end{figure}
@@ -518,8 +518,8 @@ \section{Airfoil with HDF5 I/O}\label{hdf5}
implicitly. Thus linking should not be done with \texttt{libop2\_hdf5.a} in this case, for example:
\begin{figure}\small
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
-\begin{lstlisting}
+\vspace{-0pt}\noindent\line(1,0){8}\vspace{-20pt}
+\begin{pyglist}[language=make]
HDF5_INC = -I$(HDF5_INSTALL_PATH)/include
HDF5_LIB = -L$(HDF5_INSTALL_PATH)/lib -lhdf5 -lz
@@ -541,8 +541,8 @@ \section{Airfoil with HDF5 I/O}\label{hdf5}
nvcc $(INC) $(NVCCFLAGS) $(OP2_INC) \
-I $(MPI_INSTALL_PATH)/include \
-c -o airfoil_kernels_mpi_cu.o airfoil_kernels.cu
-\end{lstlisting}\vspace{-10pt}
-\vspace{-0pt}\noindent\line(1,0){8}\vspace{-10pt}
+\end{pyglist}
+\vspace{-10pt}\noindent\line(1,0){8}\vspace{-10pt}
\caption{\small MPI+CUDA with HDF5 build }
\normalsize\vspace{-0pt}\label{fig:mpicudahdf5build}
\end{figure}
Oops, something went wrong.

0 comments on commit 39e1557

Please sign in to comment.