diff --git a/CMakeLists.txt b/CMakeLists.txt index a7b3d7c3..d60cb450 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,11 @@ find_package(HYPRE REQUIRED) find_package(MFEM REQUIRED) find_package(MPI REQUIRED) +option(LAGHOS_USE_CALIPER "Use Caliper" OFF) +if (LAGHOS_USE_CALIPER) + find_package(caliper REQUIRED) +endif() + set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ standard to use.") set(CMAKE_CXX_STANDARD_REQUIRED ON CACHE BOOL "Force the use of the chosen C++ standard.") @@ -60,7 +65,7 @@ endif() list(APPEND SOURCES - laghos_assembly.cpp laghos.cpp laghos_solver.cpp + laghos_assembly.cpp laghos.cpp laghos_solver.cpp sedov/sedov_sol.cpp ) if (MFEM_USE_CUDA) @@ -76,3 +81,24 @@ target_link_libraries(laghos PUBLIC MPI::MPI_CXX PUBLIC HYPRE::HYPRE ) +if(LAGHOS_USE_CALIPER) + target_link_libraries(laghos + PUBLIC caliper + ) + target_compile_definitions(laghos + PUBLIC LAGHOS_USE_CALIPER) +endif() + +if (MFEM_USE_CUDA) + set_source_files_properties(sedov/sedov_sol.cpp sedov/sedov.cpp PROPERTIES LANGUAGE CUDA) +endif() + +add_executable(sedov sedov/sedov_sol.cpp sedov/sedov.cpp) + +target_include_directories(sedov PUBLIC "${MFEM_DIR}/../../../include/mfem") + +target_link_libraries(sedov + PUBLIC mfem + PUBLIC MPI::MPI_CXX + PUBLIC HYPRE::HYPRE +) diff --git a/INSTALL_cmake.md b/INSTALL_cmake.md new file mode 100644 index 00000000..979c33b1 --- /dev/null +++ b/INSTALL_cmake.md @@ -0,0 +1,165 @@ +### CMake + +This installs built dependencies to `INSTALLDIR` using the `CC` C-compiler and `CXX` C++17-compiler. + +Build METIS (optional): +```sh +git clone https://github.com/KarypisLab/METIS.git +cd METIS +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=$CC -DCMAKE_INSTALL_PREFIX=$INSTALLDIR +make -j install +``` +For large runs (problem size above 2 billion unknowns), add `-DMETIS_USE_LONGINDEX=ON` option to the above `cmake` line. If building without METIS only Cartesian partitioning is supported. + +Build Umpire (CUDA, optional): +```sh +git clone https://github.com/LLNL/Umpire.git +cd Umpire +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=native -DENABLE_CUDA=ON -DUMPIRE_ENABLE_C=ON -DCMAKE_INSTALL_PREFIX=$INSTALLDIR -DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_CUDA_COMPILER=$CUDACC +make -j install +``` + +Build Umpire (HIP, optional): +```sh +git clone https://github.com/LLNL/Umpire.git +cd Umpire +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_HIP_ARCHITECTURES=native -DENABLE_HIP=ON -DUMPIRE_ENABLE_C=ON -DCMAKE_INSTALL_PREFIX=$INSTALLDIR -DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_HIP_COMPILER=$HIPCC +make -j install +``` + +Build *hypre* (CPU-only): + +```sh +git clone https://github.com/hypre-space/hypre.git +cd hypre/build +cmake ../src -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALLDIR -DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX +make -j install +``` + +Build *hypre* (CUDA): + +```sh +git clone https://github.com/hypre-space/hypre.git +cd hypre/build +cmake ../src -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALLDIR -DHYPRE_ENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native -DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_CUDA_COMPILER=$CUDACC -DHYPRE_ENABLE_GPU_AWARE_MPI=ON -DHYPRE_ENABLE_UMPIRE=ON +make -j install +``` +``HYPRE_ENABLE_GPU_AWARE_MPI`` and ``HYPRE_ENABLE_UMPIRE`` may be optionally turned off. + +Build *hypre* (HIP): + +```sh +git clone https://github.com/hypre-space/hypre.git +cd hypre/build +cmake ../src -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALLDIR -DHYPRE_ENABLE_HIP=ON -DCMAKE_HIP_ARCHITECTURES=native -DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_HIP_COMPILER=$HIPCC -DHYPRE_ENABLE_GPU_AWARE_MPI=ON -DHYPRE_ENABLE_UMPIRE=ON +make -j install +``` +``HYPRE_ENABLE_GPU_AWARE_MPI`` and ``HYPRE_ENABLE_UMPIRE`` may be optionally turned off. +For large runs (problem size above 2 billion unknowns), enable the `HYPRE_ENABLE_MIXEDINT` option. + +Build MFEM (CPU-only): +```sh +git clone https://github.com/mfem/mfem.git +cd mfem +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALLDIR -DHYPRE_DIR=$INSTALLDIR -DMETIS_DIR=$INSTALLDIR -DMFEM_USE_MPI=ON -DMFEM_USE_METIS=ON -DCMAKE_CXX_COMPILER=$CXX +make -j install +``` +`MFEM_USE_METIS` may be optionally disabled. +See the [MFEM building page](http://mfem.org/building/) for additional details. + +Build MFEM (CUDA): +```sh +git clone https://github.com/mfem/mfem.git +cd mfem +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALLDIR -DHYPRE_DIR=$INSTALLDIR -DMETIS_DIR=$INSTALLDIR -DMFEM_USE_MPI=ON -DMFEM_USE_METIS=ON -DMFEM_USE_CUDA=ON -DMFEM_USE_UMPIRE=ON -DCMAKE_CUDA_ARCHITECTURES=native -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_CUDA_COMPILER=$CUDACC -DUMPIRE_DIR=$INSTALLDIR +make -j install +``` +`MFEM_USE_METIS` and `MFEM_USE_UMPIRE may be optionally disabled. +See the [MFEM building page](http://mfem.org/building/) for additional details. + +Build MFEM (HIP): +```sh +git clone https://github.com/mfem/mfem.git +cd mfem +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALLDIR -DHYPRE_DIR=$INSTALLDIR -DMETIS_DIR=$INSTALLDIR -DMFEM_USE_MPI=ON -DMFEM_USE_METIS=ON -DMFEM_USE_HIP=ON -DMFEM_USE_UMPIRE=ON -DCMAKE_HIP_ARCHITECTURES=native -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_HIP_COMPILER=$HIPCC -DUMPIRE_DIR=$INSTALLDIR +make -j install +``` +`MFEM_USE_METIS` and `MFEM_USE_UMPIRE` may be optionally disabled. +See the [MFEM building page](http://mfem.org/building/) for additional details. + +GLVis (optional): +```sh +git clone https://github.com/GLVis/glvis.git +cd glvis +mkdir build +cd build +cmake .. -DMFEM_DIR=$INSTALLDIR -DCMAKE_INSTALL_PREFIX=$INSTALLDIR -DCMAKE_CXX_COMPILER=$CXX +``` +The easiest way to visualize Laghos results is to have GLVis running in a +separate terminal. Then the `-vis` option in Laghos will stream results directly +to the GLVis socket. + +Caliper (Optional): +1. Clone and build Adiak: +```sh +git clone --recursive https://github.com/LLNL/Adiak.git +cd Adiak +mkdir build && cd build +cmake -DBUILD_SHARED_LIBS=On -DENABLE_MPI=On \ + -DCMAKE_INSTALL_PREFIX=$INSTALLDIR .. +make -j install +``` +2. Clone and build Caliper: +```sh +git clone https://github.com/LLNL/Caliper.git +cd Caliper +mkdir build && cd build +cmake -DWITH_MPI=True -DWITH_ADIAK=True -Dadiak_ROOT=$INSTALLDIR \ + -DCMAKE_INSTALL_PREFIX=$INSTALLDIR .. +make -j install +``` + +Laghos (CPU-only): +```sh +git clone https://github.com/CEED/Laghos.git +cd Laghos +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALLDIR -DCMAKE_CXX_COMPILER=$CXX -Dcaliper_ROOT=$INSTALLDIR -DLAGHOS_USE_CALIPER=ON +make -j +``` +`LAGHOS_USE_CALIPER` may be optionally disabled. + +Laghos (CUDA): +```sh +git clone https://github.com/CEED/Laghos.git +cd Laghos +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALLDIR -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_CUDA_COMPILER=$CUDACC -DCMAKE_CUDA_ARCHITECTURES=native -Dcaliper_ROOT=$INSTALLDIR -DLAGHOS_USE_CALIPER=ON +make -j +``` +`LAGHOS_USE_CALIPER` may be optionally disabled. + +Laghos (HIP): +```sh +git clone https://github.com/CEED/Laghos.git +cd Laghos +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALLDIR -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_HIP_COMPILER=$HIPCC -DCMAKE_HIP_ARCHITECTURES=native -Dcaliper_ROOT=$INSTALLDIR -DLAGHOS_USE_CALIPER=ON +make -j +``` +`LAGHOS_USE_CALIPER` may be optionally disabled. diff --git a/INSTALL_makefile.md b/INSTALL_makefile.md new file mode 100644 index 00000000..72a01e7d --- /dev/null +++ b/INSTALL_makefile.md @@ -0,0 +1,86 @@ +### Makefile + +To build the miniapp, first download *hypre* and METIS from the links above +and put everything on the same level as the `Laghos` directory: +```sh +~> ls +Laghos/ v2.11.2.tar.gz metis-4.0.3.tar.gz +``` + +Build *hypre*: +```sh +~> tar -zxvf v2.11.2.tar.gz +~> cd hypre-2.11.2/src/ +~/hypre-2.11.2/src> ./configure --disable-fortran +~/hypre-2.11.2/src> make -j +~/hypre-2.11.2/src> cd ../.. +~> ln -s hypre-2.11.2 hypre +``` +For large runs (problem size above 2 billion unknowns), add the +`--enable-bigint` option to the above `configure` line. + +Build METIS: +```sh +~> tar -zxvf metis-4.0.3.tar.gz +~> cd metis-4.0.3 +~/metis-4.0.3> make +~/metis-4.0.3> cd .. +~> ln -s metis-4.0.3 metis-4.0 +``` +This build is optional, as MFEM can be build without METIS by specifying +`MFEM_USE_METIS = NO` below. + +Clone and build the parallel version of MFEM: +```sh +~> git clone https://github.com/mfem/mfem.git ./mfem +~> cd mfem/ +~/mfem> git checkout master +~/mfem> make parallel -j +~/mfem> cd .. +``` +The above uses the `master` branch of MFEM. +See the [MFEM building page](http://mfem.org/building/) for additional details. + +(Optional) Clone and build GLVis: +```sh +~> git clone https://github.com/GLVis/glvis.git ./glvis +~> cd glvis/ +~/glvis> make +~/glvis> cd .. +``` +The easiest way to visualize Laghos results is to have GLVis running in a +separate terminal. Then the `-vis` option in Laghos will stream results directly +to the GLVis socket. + +(Optional) Build Caliper +1. Clone and build Adiak: +```sh +~> git clone --recursive https://github.com/LLNL/Adiak.git +~> cd Adiak +~/Adiak> mkdir build && cd build +~/Adiak> cmake -DBUILD_SHARED_LIBS=On -DENABLE_MPI=On \ + -DCMAKE_INSTALL_PREFIX=../../adiak .. +~/Adiak> make && make install +~/Adiak> cd ../.. +``` +2. Clone and build Caliper: +```sh +~> git clone https://github.com/LLNL/Caliper.git +~> cd Caliper +~/Caliper> mkdir build && cd build +~/Caliper> cmake -DWITH_MPI=True -DWITH_ADIAK=True -Dadiak_ROOT=../../adiak/ \ + -DCMAKE_INSTALL_PREFIX=../../caliper .. +~/Caliper> make && make install +~/Caliper> cd ../.. +``` + +Build Laghos +```sh +~> cd Laghos/ +~/Laghos> make -j +``` +This can be followed by `make test` and `make install` to check and install the +build respectively. See `make help` for additional options. + +See also the `make setup` target that can be used to automated the +download and building of hypre, METIS and MFEM. diff --git a/README.md b/README.md index 68a12d07..24ef702c 100644 --- a/README.md +++ b/README.md @@ -128,99 +128,29 @@ Other computational motives in Laghos include the following: Laghos has the following external dependencies: -- *hypre*, used for parallel linear algebra, we recommend version 2.11.2
- https://github.com/hypre-space/hypre/releases/tag/v2.11.2 +- *hypre*, used for parallel linear algebra, we recommend version 2.31.0 or new
+ https://github.com/hypre-space/hypre/releases/tag/v2.31.0 -- METIS, used for parallel domain decomposition (optional), we recommend [version 4.0.3](https://github.com/mfem/tpls/blob/gh-pages/metis-4.0.3.tar.gz)
- https://github.com/mfem/tpls +- METIS, used for parallel domain decomposition (optional) + https://github.com/KarypisLab/METIS.git - MFEM, used for (high-order) finite element discretization, its GitHub master branch
https://github.com/mfem/mfem -To build the miniapp, first download *hypre* and METIS from the links above -and put everything on the same level as the `Laghos` directory: -```sh -~> ls -Laghos/ v2.11.2.tar.gz metis-4.0.3.tar.gz -``` +- Umpire, used for device memory pools in hypre and MFEM. This is only recommended for GPU-accelerated builds. (optional)
+ https://github.com/LLNL/Umpire.git -Build *hypre*: -```sh -~> tar -zxvf v2.11.2.tar.gz -~> cd hypre-2.11.2/src/ -~/hypre-2.11.2/src> ./configure --disable-fortran -~/hypre-2.11.2/src> make -j -~/hypre-2.11.2/src> cd ../.. -~> ln -s hypre-2.11.2 hypre -``` -For large runs (problem size above 2 billion unknowns), add the -`--enable-bigint` option to the above `configure` line. +- CMake 3.24.0+ or GNU Make +- C and C++17 compiler +- MPI -Build METIS: -```sh -~> tar -zxvf metis-4.0.3.tar.gz -~> cd metis-4.0.3 -~/metis-4.0.3> make -~/metis-4.0.3> cd .. -~> ln -s metis-4.0.3 metis-4.0 -``` -This build is optional, as MFEM can be build without METIS by specifying -`MFEM_USE_METIS = NO` below. +### Makefile -Clone and build the parallel version of MFEM: -```sh -~> git clone https://github.com/mfem/mfem.git ./mfem -~> cd mfem/ -~/mfem> git checkout master -~/mfem> make parallel -j -~/mfem> cd .. -``` -The above uses the `master` branch of MFEM. -See the [MFEM building page](http://mfem.org/building/) for additional details. +See INSTALL_makefile.md -(Optional) Clone and build GLVis: -```sh -~> git clone https://github.com/GLVis/glvis.git ./glvis -~> cd glvis/ -~/glvis> make -~/glvis> cd .. -``` -The easiest way to visualize Laghos results is to have GLVis running in a -separate terminal. Then the `-vis` option in Laghos will stream results directly -to the GLVis socket. +### CMake -(Optional) Build Caliper -1. Clone and build Adiak: -```sh -~> git clone --recursive https://github.com/LLNL/Adiak.git -~> cd Adiak -~/Adiak> mkdir build && cd build -~/Adiak> cmake -DBUILD_SHARED_LIBS=On -DENABLE_MPI=On \ - -DCMAKE_INSTALL_PREFIX=../../adiak .. -~/Adiak> make && make install -~/Adiak> cd ../.. -``` -2. Clone and build Caliper: -```sh -~> git clone https://github.com/LLNL/Caliper.git -~> cd Caliper -~/Caliper> mkdir build && cd build -~/Caliper> cmake -DWITH_MPI=True -DWITH_ADIAK=True -Dadiak_ROOT=../../adiak/ \ - -DCMAKE_INSTALL_PREFIX=../../caliper .. -~/Caliper> make && make install -~/Caliper> cd ../.. -``` - -Build Laghos -```sh -~> cd Laghos/ -~/Laghos> make -j -``` -This can be followed by `make test` and `make install` to check and install the -build respectively. See `make help` for additional options. - -See also the `make setup` target that can be used to automated the -download and building of hypre, METIS and MFEM. +See INSTALL_cmake.md ## Running @@ -231,14 +161,16 @@ partial assembly option (`-pa`). Some sample runs in 2D and 3D respectively are: ```sh -mpirun -np 8 ./laghos -p 1 -m data/square01_quad.mesh -rs 3 -tf 0.8 -pa -mpirun -np 8 ./laghos -p 1 -m data/cube01_hex.mesh -rs 2 -tf 0.6 -pa -vis +mpirun -np 8 ./laghos -p 1 -m data/square01_quad.mesh -dim 2 -rs 3 -tf 0.8 -pa +mpirun -np 8 ./laghos -p 1 -m data/cube01_hex.mesh -dim 3 -E0 2 -rs 2 -tf 0.6 -pa -vis ``` The latter produces the following density plot (notice the `-vis` option) [![Sedov blast image](data/sedov.png)](https://glvis.org/live/?stream=../data/laghos.saved) +To compare against the analytical soluton the `-err` option can be used to compute $\int_{\Omega} \|\rho_{sim} - \rho_{exact}\|_2 dV$ of the final solution. + #### Taylor-Green and Gresho vortices Laghos includes also smooth test problems that expose all the principal @@ -283,7 +215,7 @@ To make sure the results are correct, we tabulate reference final iterations 1. `mpirun -np 8 ./laghos -p 0 -m data/square01_quad.mesh -rs 3 -tf 0.75 -pa` 2. `mpirun -np 8 ./laghos -p 0 -m data/cube01_hex.mesh -rs 1 -tf 0.75 -pa` 3. `mpirun -np 8 ./laghos -p 1 -m data/square01_quad.mesh -rs 3 -tf 0.8 -pa` -4. `mpirun -np 8 ./laghos -p 1 -m data/cube01_hex.mesh -rs 2 -tf 0.6 -pa` +4. `mpirun -np 8 ./laghos -p 1 -m data/cube01_hex.mesh -E0 2 -rs 2 -tf 0.6 -pa` 5. `mpirun -np 8 ./laghos -p 2 -m data/segment01.mesh -rs 5 -tf 0.2 -fa` 6. `mpirun -np 8 ./laghos -p 3 -m data/rectangle01_quad.mesh -rs 2 -tf 3.0 -pa` 7. `mpirun -np 8 ./laghos -p 3 -m data/box01_hex.mesh -rs 1 -tf 5.0 -pa` @@ -302,17 +234,17 @@ To make sure the results are correct, we tabulate reference final iterations | 8. | 776 | 0.000045 | 4.0982431726e+02 | | 9. | 2462 | 0.000050 | 1.1792848680e+02 | -Similar GPU runs using the MFEM CUDA *device* can be run as follows: +Similar GPU runs using the MFEM GPU *device* can be run as follows: -1. `./laghos -p 0 -m data/square01_quad.mesh -rs 3 -tf 0.75 -pa -d cuda` -2. `./laghos -p 0 -m data/cube01_hex.mesh -rs 1 -tf 0.75 -pa -d cuda` -3. `./laghos -p 1 -m data/square01_quad.mesh -rs 3 -tf 0.80 -pa -d cuda` -4. `./laghos -p 1 -m data/cube01_hex.mesh -rs 2 -tf 0.60 -pa -d cuda` +1. `./laghos -p 0 -m data/square01_quad.mesh -rs 3 -tf 0.75 -pa -d gpu` +2. `./laghos -p 0 -m data/cube01_hex.mesh -rs 1 -tf 0.75 -pa -d gpu` +3. `./laghos -p 1 -m data/square01_quad.mesh -rs 3 -tf 0.80 -pa -d gpu` +4. `./laghos -p 1 -m data/cube01_hex.mesh -E0 2 -rs 2 -tf 0.60 -pa -d gpu` 5. -- this is a 1D test that is not supported on the device -- -6. `./laghos -p 3 -m data/rectangle01_quad.mesh -rs 2 -tf 3.0 -pa -d cuda` -7. `./laghos -p 3 -m data/box01_hex.mesh -rs 1 -tf 5.0 -pa -cgt 1e-12 -d cuda` -8. `./laghos -p 4 -m data/square_gresho.mesh -rs 3 -ok 3 -ot 2 -tf 0.62831853 -s 7 -pa -d cuda` -9. `./laghos -p 7 -m data/rt2D.mesh -tf 4 -rs 1 -ok 4 -ot 3 -pa -d cuda` +6. `./laghos -p 3 -m data/rectangle01_quad.mesh -rs 2 -tf 3.0 -pa -d gpu` +7. `./laghos -p 3 -m data/box01_hex.mesh -rs 1 -tf 5.0 -pa -cgt 1e-12 -d gpu` +8. `./laghos -p 4 -m data/square_gresho.mesh -rs 3 -ok 3 -ot 2 -tf 0.62831853 -s 7 -pa -d gpu` +9. `./laghos -p 7 -m data/rt2D.mesh -tf 4 -rs 1 -ok 4 -ot 3 -pa -d gpu` An implementation is considered valid if the final energy values are all within round-off distance from the above reference values. diff --git a/laghos.cpp b/laghos.cpp index b6f03ed9..a73d4b9c 100644 --- a/laghos.cpp +++ b/laghos.cpp @@ -55,7 +55,8 @@ #include "fem/qinterp/grad.hpp" #include "fem/integ/bilininteg_mass_kernels.hpp" -#ifdef USE_CALIPER +#include "sedov/sedov_sol.hpp" +#ifdef LAGHOS_USE_CALIPER #include #include #endif @@ -100,13 +101,12 @@ int main(int argc, char *argv[]) problem = 1; dim = 3; const char *mesh_file = "default"; - int elem_per_mpi = 1; + int elem_per_mpi = 0; int rs_levels = 2; int rp_levels = 0; int nx = 2; int ny = 2; int nz = 2; - Array cxyz; int order_v = 2; int order_e = 1; int order_q = -1; @@ -115,6 +115,7 @@ int main(int argc, char *argv[]) double cfl = 0.5; double cg_tol = 1e-8; double ftz_tol = 0.0; + double delta_tol = 1e-12; int cg_max_iter = 300; int max_tsteps = -1; bool p_assembly = true; @@ -124,37 +125,51 @@ int main(int argc, char *argv[]) bool visit = false; bool gfprint = false; const char *basename = "results/Laghos"; - int partition_type = 0; const char *device = "cpu"; bool check = false; + bool check_exact_sedov = false; bool mem_usage = false; bool fom = false; bool gpu_aware_mpi = false; int dev = 0; int dev_pool_size = 4; - double blast_energy = 0.25; - double blast_position[] = {0.0, 0.0, 0.0}; + + double blast_energy = 1; + real_t Sx = 1, Sy = 1, Sz = 1; bool enable_nc = true; - bool enable_rebalance = true; OptionsParser args(argc, argv); args.AddOption(&dim, "-dim", "--dimension", "Dimension of the problem."); args.AddOption(&mesh_file, "-m", "--mesh", "Mesh file to use."); - args.AddOption(&elem_per_mpi, "-epm", "--elem-per-mpi", - "Number of element per mpi task."); + args.AddOption( + &elem_per_mpi, "-epm", "--elem-per-mpi", + "Number of element per mpi task. Note: this is mutually-exclusive with " + "-nx, -ny, and -nz. Use -epm 0 to use -nx, -ny, and -nz."); args.AddOption(&nx, "-nx", "--xelems", - "Elements in x-dimension (do not specify mesh_file)"); + "Elements in x-dimension (do not specify mesh_file). Note: " + "this is mutually-exclusive with -epm. Use -epm " + "0 to use -nx, -ny, and -nz."); args.AddOption(&ny, "-ny", "--yelems", - "Elements in y-dimension (do not specify mesh_file)"); + "Elements in y-dimension (do not specify mesh_file). Note: " + "this is mutually-exclusive with -epm. Use -epm " + "0 to use -nx, -ny, and -nz."); args.AddOption(&nz, "-nz", "--zelems", - "Elements in z-dimension (do not specify mesh_file)"); + "Elements in z-dimension (do not specify mesh_file). Note: " + "this is mutually-exclusive with -epm. Use -epm " + "0 to use -nx, -ny, and -nz."); + args.AddOption(&blast_energy, "-E0", "--blast-energy", + "Sedov initial blast energy (for problem 1)"); + args.AddOption(&Sx, "-Sx", "--xwidth", + "Domain width in x-dimension (do not specify mesh_file)"); + args.AddOption(&Sy, "-Sy", "--ywidth", + "Domain width in y-dimension (do not specify mesh_file)"); + args.AddOption(&Sz, "-Sz", "--zwidth", + "Domain width in z-dimension (do not specify mesh_file)"); args.AddOption(&rs_levels, "-rs", "--refine-serial", "Number of times to refine the mesh uniformly in serial."); args.AddOption(&rp_levels, "-rp", "--refine-parallel", "Number of times to refine the mesh uniformly in parallel."); - args.AddOption(&cxyz, "-c", "--cartesian-partitioning", - "Use Cartesian partitioning."); args.AddOption(&problem, "-p", "--problem", "Problem setup to use."); args.AddOption(&order_v, "-ok", "--order-kinematic", "Order (degree) of the kinematic finite element space."); @@ -173,6 +188,8 @@ int main(int argc, char *argv[]) "Relative CG tolerance (velocity linear solve)."); args.AddOption(&ftz_tol, "-ftz", "--ftz-tol", "Absolute flush-to-zero tolerance."); + args.AddOption(&delta_tol, "-dtol", "--delta-tol", + "Tolerance for projecting Delta functions."); args.AddOption(&cg_max_iter, "-cgm", "--cg-max-steps", "Maximum number of CG iterations (velocity linear solve)."); args.AddOption(&max_tsteps, "-ms", "--max-steps", @@ -194,19 +211,14 @@ int main(int argc, char *argv[]) "Enable or disable result output (files in mfem format)."); args.AddOption(&basename, "-k", "--outputfilename", "Name of the visit dump files"); - args.AddOption(&partition_type, "-pt", "--partition", - "Customized x/y/z Cartesian MPI partitioning of the serial mesh.\n\t" - "Here x,y,z are relative task ratios in each direction.\n\t" - "Example: with 48 mpi tasks and -pt 321, one would get a Cartesian\n\t" - "partition of the serial mesh by (6,4,2) MPI tasks in (x,y,z).\n\t" - "NOTE: the serially refined mesh must have the appropriate number\n\t" - "of zones in each direction, e.g., the number of zones in direction x\n\t" - "must be divisible by the number of MPI tasks in direction x.\n\t" - "Available options: 11, 21, 111, 211, 221, 311, 321, 322, 432."); args.AddOption(&device, "-d", "--device", "Device configuration string, see Device::Configure()."); args.AddOption(&check, "-chk", "--checks", "-no-chk", "--no-checks", "Enable 2D checks."); + args.AddOption(&check_exact_sedov, "-err", "--exact-error", "-no-err", + "--no-exact-error", + "Enable comparing the Sedov problem (problem 1) against the " + "exact solution."); args.AddOption(&mem_usage, "-mb", "--mem", "-no-mem", "--no-mem", "Enable memory usage."); args.AddOption(&fom, "-f", "--fom", "-no-fom", "--no-fom", @@ -218,10 +230,6 @@ int main(int argc, char *argv[]) args.AddOption(&enable_nc, "-nc", "--nonconforming", "-no-nc", "--conforming", "Use non-conforming meshes. Requires a 2D or 3D mesh."); - args.AddOption(&enable_rebalance, "-b", "--balance", "-no-b", - "--no-rebalance", - "Perform a rebalance after parallel refinement. Only enabled \n\t" - "for non-conforming meshes with Metis partitioning."); args.AddOption(&dev, "-dev", "--dev", "GPU device to use."); args.Parse(); if (!args.Good()) @@ -231,7 +239,15 @@ int main(int argc, char *argv[]) } if (Mpi::Root()) { args.PrintOptions(cout); } -#ifdef USE_CALIPER + if (check_exact_sedov) + { + MFEM_VERIFY( + problem == 1, + "Can only compare problem 1 (Sedov) against the exact solution"); + MFEM_VERIFY(strncmp(mesh_file, "default", 7) == 0, "check: mesh_file"); + } + +#ifdef LAGHOS_USE_CALIPER cali_config_set("CALI_CALIPER_ATTRIBUTE_DEFAULT_SCOPE", "process"); CALI_CXX_MARK_FUNCTION; @@ -319,15 +335,62 @@ int main(int argc, char *argv[]) } else { - mesh = PartitionMPI(dim, Mpi::WorldSize(), elem_per_mpi, myid == 0, - rp_levels, mpi_partitioning); - if (dim == 1) + if (elem_per_mpi) { - mesh.GetBdrElement(0)->SetAttribute(1); - mesh.GetBdrElement(1)->SetAttribute(1); + mesh = PartitionMPI(dim, Mpi::WorldSize(), elem_per_mpi, myid == 0, + rp_levels, mpi_partitioning); + // scale mesh by Sx, Sy, Sz + switch (dim) + { + case 1: + mesh.Transform([=](const Vector &x, Vector &y) { y[0] = x[0] * Sx; }); + mesh.GetBdrElement(0)->SetAttribute(1); + mesh.GetBdrElement(1)->SetAttribute(1); + break; + case 2: + mesh.Transform([=](const Vector &x, Vector &y) + { + y[0] = x[0] * Sx; + y[1] = x[1] * Sy; + }); + AssignMeshBdrAttrs2D(mesh, 0_r, Sx); + break; + case 3: + mesh.Transform([=](const Vector &x, Vector &y) + { + y[0] = x[0] * Sx; + y[1] = x[1] * Sy; + y[2] = x[2] * Sz; + }); + AssignMeshBdrAttrs3D(mesh, 0_r, Sx, 0_r, Sy); + break; + } + } + else + { + if (dim == 1) + { + mesh = Mesh::MakeCartesian1D(nx, Sx); + mesh.GetBdrElement(0)->SetAttribute(1); + mesh.GetBdrElement(1)->SetAttribute(1); + } + if (dim == 2) + { + mesh = Mesh::MakeCartesian2D(nx, ny, Element::QUADRILATERAL, true, Sx, + Sy); + AssignMeshBdrAttrs2D(mesh, 0_r, Sx); + } + if (dim == 3) + { + mesh = Mesh::MakeCartesian3D(nx, ny, nz, Element::HEXAHEDRON, Sx, Sy, + Sz, true); + AssignMeshBdrAttrs3D(mesh, 0_r, Sx, 0_r, Sy); + } + for (int lev = 0; lev < rs_levels; lev++) + { + mesh.UniformRefinement(); + } } - if (dim == 2) { AssignMeshBdrAttrs2D(mesh, 0.0, 1.0); } - if (dim == 3) { AssignMeshBdrAttrs3D(mesh, 0.0, 1.0, 0.0, 1.0); } } dim = mesh.Dimension(); @@ -362,6 +425,12 @@ int main(int argc, char *argv[]) mesh.Clear(); for (int lev = 0; lev < rp_levels; lev++) { pmesh.UniformRefinement(); } + int NE = pmesh.GetNE(), ne_min, ne_max; + MPI_Reduce(&NE, &ne_min, 1, MPI_INT, MPI_MIN, 0, pmesh.GetComm()); + MPI_Reduce(&NE, &ne_max, 1, MPI_INT, MPI_MAX, 0, pmesh.GetComm()); + if (myid == 0) + { cout << "Zones min/max: " << ne_min << " " << ne_max << endl; } + // Define the parallel finite element spaces. We use: // - H1 (Gauss-Lobatto, continuous) for position and velocity. // - L2 (Bernstein, discontinuous) for specific internal energy. @@ -467,12 +536,26 @@ int main(int argc, char *argv[]) ParGridFunction l2_rho0_gf(&l2_fes), l2_e(&l2_fes); l2_rho0_gf.ProjectCoefficient(rho0_coeff); rho0_gf.ProjectGridFunction(l2_rho0_gf); + + double blast_position[] = {0.0, 0.0, 0.0}; if (problem == 1) { // For the Sedov test, we use a delta function at the origin. + // divide amount of blast energy by 2^d due to simulating only a portion + // of the symmetric blast. DeltaCoefficient e_coeff(blast_position[0], blast_position[1], - blast_position[2], blast_energy); + blast_position[2], blast_energy / pow(2, dim)); + e_coeff.SetTol(delta_tol); l2_e.ProjectCoefficient(e_coeff); + + int non_finite = l2_e.CheckFinite(); + MPI_Allreduce(MPI_IN_PLACE, &non_finite, 1, MPI_INT, MPI_SUM, pmesh.GetComm()); + if (non_finite > 0) + { + cout << "Delta function could not be initialized!\n"; + delete ode_solver; + return 1; + } } else { @@ -570,6 +653,7 @@ int main(int argc, char *argv[]) int steps = 0; BlockVector S_old(S); long mem=0, mmax=0, msum=0; + long dmem = 0, dmmax = 0, dmsum = 0; int checks = 0; // const double internal_energy = hydro.InternalEnergy(e_gf); // const double kinetic_energy = hydro.KineticEnergy(v_gf); @@ -594,13 +678,13 @@ int main(int argc, char *argv[]) // } // -#ifdef USE_CALIPER +#ifdef LAGHOS_USE_CALIPER CALI_CXX_MARK_LOOP_BEGIN(mainloop_annotation, "timestep loop"); #endif int ti = 1; for (; !last_step; ti++) { -#ifdef USE_CALIPER +#ifdef LAGHOS_USE_CALIPER CALI_CXX_MARK_LOOP_ITERATION(mainloop_annotation, static_cast(ti)); #endif if (t + dt >= t_final) @@ -655,6 +739,18 @@ int main(int argc, char *argv[]) if (mem_usage) { mem = GetMaxRssMB(); + size_t mfree, mtot; + if (Device::Allows(Backend::CUDA_MASK | Backend::HIP_MASK)) + { + Device::DeviceMem(&mfree, &mtot); + dmem = mtot - mfree; + MPI_Reduce(&dmem, &dmmax, 1, MPI_LONG, MPI_MAX, 0, + pmesh.GetComm()); + MPI_Reduce(&dmem, &dmsum, 1, MPI_LONG, MPI_SUM, 0, + pmesh.GetComm()); + dmmax /= 1024*1024; + dmsum /= 1024*1024; + } MPI_Reduce(&mem, &mmax, 1, MPI_LONG, MPI_MAX, 0, pmesh.GetComm()); MPI_Reduce(&mem, &msum, 1, MPI_LONG, MPI_SUM, 0, pmesh.GetComm()); } @@ -679,7 +775,8 @@ int main(int argc, char *argv[]) cout << std::fixed; if (mem_usage) { - cout << ", mem: " << mmax << "/" << msum << " MB"; + cout << ", mem: " << mmax << "/" << msum << " MB, " + << dmmax << "/" << dmsum << " MB"; } cout << endl; } @@ -764,8 +861,7 @@ int main(int argc, char *argv[]) Checks(ti, e_norm, checks); } } - -#ifdef USE_CALIPER +#ifdef LAGHOS_USE_CALIPER CALI_CXX_MARK_LOOP_END(mainloop_annotation); adiak::value("steps", ti); #endif @@ -785,6 +881,16 @@ int main(int argc, char *argv[]) if (mem_usage) { + if (Device::Allows(Backend::CUDA_MASK | Backend::HIP_MASK)) + { + size_t mfree, mtot; + Device::DeviceMem(&mfree, &mtot); + dmem = mtot - mfree; + MPI_Reduce(&dmem, &dmmax, 1, MPI_LONG, MPI_MAX, 0, pmesh.GetComm()); + MPI_Reduce(&dmem, &dmsum, 1, MPI_LONG, MPI_SUM, 0, pmesh.GetComm()); + dmmax /= 1024*1024; + dmsum /= 1024*1024; + } mem = GetMaxRssMB(); MPI_Reduce(&mem, &mmax, 1, MPI_LONG, MPI_MAX, 0, pmesh.GetComm()); MPI_Reduce(&mem, &msum, 1, MPI_LONG, MPI_SUM, 0, pmesh.GetComm()); @@ -799,8 +905,8 @@ int main(int argc, char *argv[]) << fabs(energy_init - energy_final) << endl; if (mem_usage) { - cout << "Maximum memory resident set size: " - << mmax << "/" << msum << " MB" << endl; + cout << "Maximum memory resident set size: " << mmax << "/" << msum + << " MB, " << dmmax << "/" << dmsum << " MB" << endl; } } @@ -825,10 +931,91 @@ int main(int argc, char *argv[]) vis_e.close(); } -#ifdef USE_CALIPER +#ifdef LAGHOS_USE_CALIPER adiak::fini(); #endif + if (check_exact_sedov) + { + // compare against the exact Sedov solution + double gamma = 1.4; + double rho0 = 1; + double omega = 0; + + SedovSol asol(dim, gamma, rho0, blast_energy, omega); + + asol.SetTime(t_final); + + if (strncmp(mesh_file, "default", 7) == 0) + { + real_t min_r = std::min(std::min(Sx, Sy), Sz); + MFEM_VERIFY( + asol.r2 <= min_r, + "Solution reflections off boundaries detected, cannot compare " + "against exact solution."); + } + + int err_order = std::max((std::max(order_v, order_e) + 1) * 2, order_q) * 2; + const IntegrationRule &irule = + IntRules.Get(pmesh.GetTypicalElementGeometry(), err_order); + + QuadratureSpace qspace(pmesh, irule); + // only compare density + QuadratureFunction sim_qfunc(qspace, 1); + QuadratureFunction err_qfunc(qspace, 1); + + hydro.ComputeDensity(rho_gf); + + rho_gf.HostReadWrite(); + + { + GridFunctionCoefficient ctmp(&rho_gf); + ctmp.Coefficient::Project(sim_qfunc); + } + + auto slambda = [&](const Vector &x, Vector &res) + { + real_t tmp[3]; + Vector dr(tmp, dim); + double r = 0; + + for (int i = 0; i < dim; ++i) + { + dr[i] = x[i] - blast_position[i]; + r += dr[i] * dr[i]; + } + r = sqrt(r); + if (r > 0) + { + for (int i = 0; i < dim; ++i) + { + dr[i] /= r; + } + } + else + { + dr = 0_r; + } + double rho, v, P; + asol.EvalSol(r, rho, v, P); + res[0] = rho; + }; + VectorFunctionCoefficient asol_coeff(1, slambda); + asol_coeff.Project(err_qfunc); + + sim_qfunc.HostRead(); + err_qfunc.HostReadWrite(); + for (int i = 0; i < err_qfunc.Size(); ++i) + { + err_qfunc[i] = pow(err_qfunc[i] - AsConst(sim_qfunc)[i], 2); + } + real_t lrho_err = err_qfunc.Integrate(); + if (Mpi::Root()) + { + cout << "Density L2 error: " << sqrt(lrho_err) << endl; + } + } + // Free the used memory. delete ode_solver; @@ -1079,7 +1266,7 @@ static void Checks(const int ti, const double nrm, int &chk) }, { {{5, 1.198510951452527e+03}, {188, 1.199384410059154e+03}}, - {{5, 1.339163718592566e+01}, { 28, 7.521073677397994e+00}}, + {{5, 6.695818592962833e+00}, { 20, 4.267902387082487e+00}}, {{5, 2.041491591302486e+01}, { 59, 3.443180411803796e+01}}, {{5, 1.600000000000000e+01}, { 16, 1.600000000000000e+01}}, {{5, 6.892649884704898e+01}, { 18, 6.893688067534482e+01}}, diff --git a/makefile b/makefile index 176229e6..6f333be2 100644 --- a/makefile +++ b/makefile @@ -81,7 +81,7 @@ ifeq ($(wildcard $(CALIPER_DIR)),) else CALIPER_INCLUDE := -I $(CALIPER_DIR)/include CALIPER_LIBS := -L $(CALIPER_DIR)/lib64 -lcaliper - CALIPER_FLAGS := -DUSE_CALIPER + CALIPER_FLAGS := -DLAGHOS_USE_CALIPER endif # Only configure Adiak if Caliper is enabled @@ -121,8 +121,8 @@ CCC = $(strip $(CXX) $(LAGHOS_FLAGS) $(if $(EXTRA_INC_DIR),-I$(EXTRA_INC_DIR))) LAGHOS_LIBS = $(MFEM_LIBS) $(MFEM_EXT_LIBS) $(CALIPER_LIBS) $(ADIAK_LIBS) LIBS = $(strip $(LAGHOS_LIBS) $(LDFLAGS)) -SOURCE_FILES = $(sort $(wildcard *.cpp)) -HEADER_FILES = $(sort $(wildcard *.hpp)) +SOURCE_FILES = $(sort $(wildcard *.cpp) sedov/sedov_sol.cpp) +HEADER_FILES = $(sort $(wildcard *.hpp) $(wildcard sedov/*.hpp)) OBJECT_FILES = $(SOURCE_FILES:.cpp=.o) # Targets @@ -247,7 +247,7 @@ tests: grep -E '^[[:space:]]*step[[:space:]]+[0-9]+' RUN.dat | tail -n 1 | \ awk '{ printf("step = %04d, dt = %s |e| = %.10e\n", $$2, $$8, $$11); }' >> RESULTS.dat $(MFEM_MPIEXEC) $(MFEM_MPIEXEC_NP) $(MFEM_MPI_NP) \ - ./laghos -p 1 -m data/cube01_hex.mesh -rs 2 -tf 0.6 -pa -vs 100 | tee RUN.dat + ./laghos -p 1 -m data/cube01_hex.mesh -E0 2 -rs 2 -tf 0.6 -pa -vs 100 | tee RUN.dat grep -E '^[[:space:]]*step[[:space:]]+[0-9]+' RUN.dat | tail -n 1 | \ awk '{ printf("step = %04d, dt = %s |e| = %.10e\n", $$2, $$8, $$11); }' >> RESULTS.dat $(MFEM_MPIEXEC) $(MFEM_MPIEXEC_NP) $(MFEM_MPI_NP) \ diff --git a/sedov/adaptive_quad.hpp b/sedov/adaptive_quad.hpp new file mode 100644 index 00000000..aae3c007 --- /dev/null +++ b/sedov/adaptive_quad.hpp @@ -0,0 +1,174 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +// reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#ifndef LAGHOS_ADAPTIVE_QUAD_HPP +#define LAGHOS_ADAPTIVE_QUAD_HPP + +#include +#include + +/// +/// Implements the 21-point adaptive Gauss-Kronrod quadrature method +/// +template struct gk21 +{ + Fun fun; + Err err_fun; + using res_type = decltype(fun(0.0)); + constexpr static size_t gl_points() { return 10; } + constexpr static size_t gk_points() { return 11; } + +private: + res_type integrate_recurse(double lower, double upper, size_t curr_depth, + size_t max_depth = 20) const + { + static constexpr double data[] = + { + // gl_abscissa + -1.488743389816312108848260011297200e-01, + -4.333953941292471907992659431657842e-01, + -6.794095682990244062343273651148736e-01, + -8.650633666889845107320966884234930e-01, + -9.739065285171717200779640120844521e-01, + 1.488743389816312108848260011297200e-01, + 4.333953941292471907992659431657842e-01, + 6.794095682990244062343273651148736e-01, + 8.650633666889845107320966884234930e-01, + 9.739065285171717200779640120844521e-01, + // gl_weights + 2.955242247147528701738929946513383e-01, + 2.692667193099963550912269215694694e-01, + 2.190863625159820439955349342281632e-01, + 1.494513491505805931457763396576973e-01, + 6.667134430868813759356880989333179e-02, + 2.955242247147528701738929946513383e-01, + 2.692667193099963550912269215694694e-01, + 2.190863625159820439955349342281632e-01, + 1.494513491505805931457763396576973e-01, + 6.667134430868813759356880989333179e-02, + // glk_weights + 1.477391049013384913748415159720680e-01, + 1.347092173114733259280540017717068e-01, + 1.093871588022976418992105903258050e-01, + 7.503967481091995276704314091619001e-02, + 3.255816230796472747881897245938976e-02, + 1.477391049013384913748415159720680e-01, + 1.347092173114733259280540017717068e-01, + 1.093871588022976418992105903258050e-01, + 7.503967481091995276704314091619001e-02, + 3.255816230796472747881897245938976e-02, + // gk_abscissa + 0.000000000000000000000000000000000e00, + -2.943928627014601981311266031038656e-01, + -5.627571346686046833390000992726941e-01, + -7.808177265864168970637175783450424e-01, + -9.301574913557082260012071800595083e-01, + -9.956571630258080807355272806890028e-01, + 2.943928627014601981311266031038656e-01, + 5.627571346686046833390000992726941e-01, + 7.808177265864168970637175783450424e-01, + 9.301574913557082260012071800595083e-01, + 9.956571630258080807355272806890028e-01, + // gk_weights + 1.494455540029169056649364683898212e-01, + 1.427759385770600807970942731387171e-01, + 1.234919762620658510779581098310742e-01, + 9.312545458369760553506546508336634e-02, + 5.475589657435199603138130024458018e-02, + 1.169463886737187427806439606219205e-02, + 1.427759385770600807970942731387171e-01, + 1.234919762620658510779581098310742e-01, + 9.312545458369760553506546508336634e-02, + 5.475589657435199603138130024458018e-02, + 1.169463886737187427806439606219205e-02, + }; + // TODO: where to copy gk21_base data to scratch memory? + res_type gl_sum = 0; + res_type gk_sum = 0; + double jac = (upper - lower) * 0.5; + for (int i = 0; i < gl_points(); ++i) + { + res_type f_eval = fun((data[i] + 1) * jac + lower); + gl_sum += f_eval * data[gl_points() + i]; + gk_sum += f_eval * data[2 * gl_points() + i]; + } + for (int i = 0; i < gk_points(); ++i) + { + res_type f_eval = fun((data[3 * gl_points() + i] + 1) * jac + lower); + gk_sum += f_eval * data[3 * gl_points() + gk_points() + i]; + } + gk_sum *= jac; + gl_sum *= jac; + if (curr_depth < max_depth && !err_fun(gk_sum, gl_sum)) + { + gk_sum = integrate_recurse(lower, lower + jac, curr_depth + 1, max_depth); + gk_sum += + integrate_recurse(lower + jac, upper, curr_depth + 1, max_depth); + } + + return gk_sum; + } + +public: + res_type integrate(double lower, double upper, size_t start_segs = 1, + size_t max_depth = 20) const + { + double dx = (upper - lower) / start_segs; + res_type res = 0; + double curr = lower; + for (size_t i = 0; i < start_segs; ++i) + { + double next = lower + (i + 1) * dx; + res += integrate_recurse(curr, next, 1, max_depth); + curr = next; + } + return res; + } +}; + +template +auto gk21_integrate(F &&f, E &&e, double lower, double upper, + size_t start_segs = 1, size_t max_depth = 20) +{ + gk21 integrator{f, e}; + return integrator.integrate(lower, upper, start_segs, max_depth); +} + +struct scalar_error_functor +{ + double eps_abs; + double eps_rel; + template bool operator()(const T &ho, const T &lo) const + { + if (!std::isfinite(ho)) + { + return true; + } + double delta = std::fabs(ho - lo); + if (delta < eps_abs) + { + return true; + } + double denom = std::max(std::fabs(ho), std::fabs(lo)); + if (delta < eps_rel * denom) + { + return true; + } + return false; + } +}; + +#endif diff --git a/sedov/bisect.hpp b/sedov/bisect.hpp new file mode 100644 index 00000000..7382164c --- /dev/null +++ b/sedov/bisect.hpp @@ -0,0 +1,98 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +// reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#ifndef LAGHOS_BISECT_HPP +#define LAGHOS_BISECT_HPP + +#include +#include + +#include + +/// Bisection root finder +template double bisection(Fun &&fun, double lower, double upper) +{ + double lv = fun(lower); + constexpr double tol = 1e-20; + if (std::fabs(lv) < tol) + { + return lower; + } + double rv = fun(upper); + if (std::fabs(rv) < tol) + { + return upper; + } + if (std::copysign(1., lv) * std::copysign(1., rv) > 0) + { + throw std::runtime_error("bisection: no sign change"); + } + auto dx_init = upper - lower; + auto dx_last = dx_init; + while (true) + { + double mid = 0.5 * (lower + upper); + auto dx = mid - lower; + double mv = fun(mid); + if (dx < dx_init * 1e-16 || dx >= dx_last) + { + if (fabs(mv) < fabs(lv)) + { + if (fabs(mv) < fabs(rv)) + { + return mid; + } + else if (fabs(rv) < fabs(lv)) + { + return upper; + } + else + { + return lower; + } + } + else if (fabs(rv) < fabs(lv)) + { + return upper; + } + else + { + return lower; + } + } + if (std::fabs(mv) < tol) + { + return mid; + } + if (std::copysign(1., lv) != std::copysign(1., mv)) + { + upper = mid; + rv = mv; + } + else if (std::copysign(1., rv) != std::copysign(1., mv)) + { + lower = mid; + lv = mv; + } + else + { + throw std::runtime_error("bisection: no sign change"); + } + dx_last = dx; + } +} + +#endif diff --git a/sedov/sedov.cpp b/sedov/sedov.cpp new file mode 100644 index 00000000..7fe7e5f1 --- /dev/null +++ b/sedov/sedov.cpp @@ -0,0 +1,226 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +// reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +// Computes and writes out the Sedov shock solution +// +// See: +// James R. Kamm, Evaluation of the Sedov-von Neumann-Taylor Blast Wave Solution +// LA-UR-00-6055 + +#include "sedov_sol.hpp" + +#include + +#include +#include + +using namespace mfem; + +int main(int argc, char *argv[]) { + // Initialize MPI. + Mpi::Init(); + int myid = Mpi::WorldRank(); + Hypre::Init(); + + // Parse command-line options. + int dim = 3; + int rs_levels = 2; + int rp_levels = 0; + int nx = 2; + int ny = 2; + int nz = 2; + int order_q = 4; + double t_final = 0.6; + const char *basename = "results/Sedov"; + real_t Sx = 1, Sy = 1, Sz = 1; + + OptionsParser args(argc, argv); + args.AddOption(&dim, "-dim", "--dimension", "Dimension of the problem."); + args.AddOption(&nx, "-nx", "--xelems", "Elements in x-dimension"); + args.AddOption(&ny, "-ny", "--yelems", "Elements in y-dimension"); + args.AddOption(&nz, "-nz", "--zelems", "Elements in z-dimension"); + args.AddOption(&Sx, "-Sx", "--xwidth", "Domain width in x-dimension"); + args.AddOption(&Sy, "-Sy", "--ywidth", "Domain width in y-dimension"); + args.AddOption(&Sz, "-Sz", "--zwidth", "Domain width in z-dimension"); + args.AddOption(&rs_levels, "-rs", "--refine-serial", + "Number of times to refine the mesh uniformly in serial."); + args.AddOption(&rp_levels, "-rp", "--refine-parallel", + "Number of times to refine the mesh uniformly in parallel."); + args.AddOption(&order_q, "-oq", "--order-intrule", + "Order of the integration rule."); + args.AddOption(&t_final, "-tf", "--t-final", "Final time; start time is 0."); + args.AddOption(&basename, "-k", "--outputfilename", + "Name of the visit dump files"); + args.Parse(); + if (!args.Good()) { + if (Mpi::Root()) { + args.PrintUsage(std::cout); + } + return 1; + } + if (Mpi::Root()) { + args.PrintOptions(std::cout); + } + + Device::SetMemoryTypes(MemoryType::HOST, MemoryType::DEVICE); + + // On all processors, use the default builtin 1D/2D/3D mesh or read the + // serial one given on the command line. + std::unique_ptr mesh; + switch (dim) { + case 1: + mesh.reset(new Mesh(Mesh::MakeCartesian1D(nx, Sx))); + break; + case 2: + mesh.reset(new Mesh( + Mesh::MakeCartesian2D(nx, ny, Element::QUADRILATERAL, true, Sx, Sy))); + break; + case 3: + mesh.reset(new Mesh(Mesh::MakeCartesian3D(nx, ny, nz, Element::HEXAHEDRON, + Sx, Sy, Sz, true))); + break; + default: + if (Mpi::Root()) { + std::cout << "Invalid number of dims" << std::endl; + } + return -1; + } + + // Refine the mesh in serial to increase the resolution. + for (int lev = 0; lev < rs_levels; lev++) { + mesh->UniformRefinement(); + } + const int mesh_NE = mesh->GetNE(); + if (Mpi::Root()) { + std::cout << "Number of zones in the serial mesh: " << mesh_NE << std::endl; + } + + // Parallel partitioning of the mesh. + std::unique_ptr pmesh; + const int num_tasks = Mpi::WorldSize(); + + if (myid == 0) { + std::cout << "Non-Cartesian partitioning through METIS will be used.\n"; +#ifndef MFEM_USE_METIS + std::cout << "MFEM was built without METIS. " + << "Adjust the number of tasks to use a Cartesian split." + << std::endl; +#endif + } +#ifndef MFEM_USE_METIS + return 1; +#endif + pmesh.reset(new ParMesh(MPI_COMM_WORLD, *mesh)); + + // Refine the mesh further in parallel to increase the resolution. + for (int lev = 0; lev < rp_levels; lev++) { + pmesh->UniformRefinement(); + } + + int NE = pmesh->GetNE(), ne_min, ne_max; + MPI_Reduce(&NE, &ne_min, 1, MPI_INT, MPI_MIN, 0, pmesh->GetComm()); + MPI_Reduce(&NE, &ne_max, 1, MPI_INT, MPI_MAX, 0, pmesh->GetComm()); + if (myid == 0) { + std::cout << "Zones min/max: " << ne_min << " " << ne_max << std::endl; + } + + const IntegrationRule &irule = + IntRules.Get(pmesh->GetTypicalElementGeometry(), order_q); + + QuadratureSpace qspace(*pmesh, irule); + QuadratureFunction qfunc(qspace, 2 + dim); + + double gamma = 1.4; + double blast_energy = 1; + double blast_position[] = {0.0, 0.0, 0.0}; + double rho0 = 1; + double omega = 0; + { + std::cout << std::setprecision(16); + SedovSol asol(dim, gamma, rho0, blast_energy, omega); + asol.SetTime(t_final); + if (myid == 0) { + std::cout << "a = " << asol.a << std::endl; + std::cout << "b = " << asol.b << std::endl; + std::cout << "c = " << asol.c << std::endl; + std::cout << "d = " << asol.d << std::endl; + std::cout << "e = " << asol.e << std::endl; + + std::cout << "alpha0 = " << asol.alpha0 << std::endl; + std::cout << "alpha1 = " << asol.alpha1 << std::endl; + std::cout << "alpha2 = " << asol.alpha2 << std::endl; + std::cout << "alpha3 = " << asol.alpha3 << std::endl; + std::cout << "alpha4 = " << asol.alpha4 << std::endl; + std::cout << "alpha5 = " << asol.alpha5 << std::endl; + + std::cout << "V0 = " << asol.V0 << std::endl; + std::cout << "Vv = " << asol.Vv << std::endl; + std::cout << "V2 = " << asol.V2 << std::endl; + std::cout << "Vs = " << asol.Vs << std::endl; + std::cout << "alpha = " << asol.alpha << std::endl; + + std::cout << "r2 (shock position) = " << asol.r2 << std::endl; + std::cout << "U (shock speed) = " << asol.U << std::endl; + std::cout << "rho1 (pre-shock density) = " << asol.rho1 << std::endl; + std::cout << "rho2 (post-shock density) = " << asol.rho2 << std::endl; + std::cout << "v2 (post-shock velocity) = " << asol.v2 << std::endl; + std::cout << "p2 (post-shock pressure) = " << asol.p2 << std::endl; + } + auto slambda = [&](const Vector &x, Vector &res) { + real_t tmp[3]; + Vector dr(tmp, dim); + double r = 0; + + for (int i = 0; i < dim; ++i) { + dr[i] = x[i] - blast_position[i]; + r += dr[i] * dr[i]; + } + r = sqrt(r); + if (r) { + for (int i = 0; i < dim; ++i) { + dr[i] /= r; + } + } + else + { + dr = 0_r; + } + double rho, v, P; + asol.EvalSol(r, rho, v, P); + res[0] = rho; + for (int i = 0; i < dim; ++i) { + res[1 + i] = v * dr[i]; + } + // internal energy + res[1 + dim] = P / (gamma - 1); + }; + VectorFunctionCoefficient asol_coeff(2 + dim, slambda); + asol_coeff.Project(qfunc); + } + + { + std::stringstream fname; + fname << basename << "_mesh"; + pmesh->Save(fname.str().c_str()); + } + { + std::stringstream fname; + fname << basename << "_qfunc"; + std::ofstream out(fname.str()); + qfunc.Save(out); + } + return 0; +} diff --git a/sedov/sedov_sol.cpp b/sedov/sedov_sol.cpp new file mode 100644 index 00000000..7afd99bb --- /dev/null +++ b/sedov/sedov_sol.cpp @@ -0,0 +1,198 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +// reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#include "sedov_sol.hpp" + +#include "adaptive_quad.hpp" +#include "bisect.hpp" + +#include +#include + +#include + +SedovSol::SedovSol(int dim_, double gamma_, double rho_0_, double blast_energy_, + double omega_) + : dim(dim_), gamma(gamma_), rho_0(rho_0_), omega(omega_), + blast_energy(blast_energy_) +{ + a = (dim + 2 - omega) * (gamma + 1) * 0.25; + b = (gamma + 1) / (gamma - 1); + c = (dim + 2 - omega) * gamma * 0.5; + d = ((dim + 2 - omega) * (gamma + 1) / + ((dim + 2 - omega) * (gamma + 1) - 2 * (2 + dim * (gamma - 1)))); + e = (2 + dim * (gamma - 1)) * 0.5; + + alpha0 = 2. / (dim + 2 - omega); + alpha2 = -(gamma - 1) / (2 * (gamma - 1) + dim - gamma * omega); + alpha1 = + ((dim + 2 - omega) * gamma / (2 + dim * (gamma - 1)) * + (2 * (dim * (2 - gamma) - omega) / (gamma * pow((dim + 2 - omega), 2)) - + alpha2)); + alpha3 = (dim - omega) / (2 * (gamma - 1) + dim - dim * omega); + alpha4 = + (dim + 2 - omega) * (dim - omega) * alpha1 / (dim * (2 - gamma) - omega); + alpha5 = (omega * (1 + gamma) - 2 * dim) / (dim * (2 - gamma) - omega); + + V0 = 2. / ((dim + 2 - omega) * gamma); + Vv = 2. / (dim + 2 - omega); + V2 = 4. / ((dim + 2 - omega) * (gamma + 1)); + Vs = 2. / ((gamma - 1) * dim + 2); + + if (V2 == Vs) + { + // singular + alpha = (gamma + 1) / (gamma - 1) * pow(2, dim) / + pow(dim * ((gamma - 1) * dim + 2), 2); + if (dim > 1) + { + alpha *= M_PI; + } + } + else + { + // standard or vacuum + auto Vmin = std::min(V0, Vv); + auto J1_integrand = [dim = dim, gamma = gamma, alpha0 = alpha0, + alpha1 = alpha1, alpha2 = alpha2, alpha3 = alpha3, + alpha4 = alpha4, alpha5 = alpha5, a = a, b = b, c = c, + d = d, e = e, omega = omega](double V) + { + return -(gamma + 1) / (gamma - 1) * pow(V, 2) * + (alpha0 / V + alpha2 * c / (c * V - 1) - + alpha1 * e / (1 - e * V)) * + pow((pow((a * V), alpha0) * pow((b * (c * V - 1)), alpha2) * + pow((d * (1 - e * V)), alpha1)), + (-(dim + 2 - omega))) * + pow((b * (c * V - 1)), alpha3) * pow((d * (1 - e * V)), alpha4) * + pow((b * (1 - c * V / gamma)), alpha5); + }; + scalar_error_functor err_fun; + err_fun.eps_abs = 1.49e-15; + err_fun.eps_rel = 1.49e-15; + auto J1 = gk21_integrate(J1_integrand, err_fun, Vmin, V2, 20, 64); + + auto J2_integrand = [dim = dim, gamma = gamma, alpha0 = alpha0, + alpha1 = alpha1, alpha2 = alpha2, alpha3 = alpha3, + alpha4 = alpha4, alpha5 = alpha5, a = a, b = b, c = c, + d = d, e = e, omega = omega](double V) + { + double denom = 1 - c * V; + if (fabs(denom) <= 1e-15) + { + denom = std::copysign(1e-15, denom); + } + return -(gamma + 1) / (2 * gamma) * pow(V, 2) * (c * V - gamma) / denom * + (alpha0 / V + alpha2 * c / -denom - alpha1 * e / (1 - e * V)) * + pow(pow(a * V, alpha0) * pow(b * (c * V - 1), alpha2) * + pow(d * (1 - e * V), alpha1), + -(dim + 2 - omega)) * + pow((b * (c * V - 1)), alpha3) * pow((d * (1 - e * V)), alpha4) * + pow((b * (1 - c * V / gamma)), alpha5); + + }; + auto J2 = gk21_integrate(J2_integrand, err_fun, Vmin, V2, 20, 64); + double I1 = pow(2, dim - 2) * J1; + double I2 = pow(2, (dim - 1)) / (gamma - 1) * J2; + if (dim > 1) + { + I1 *= M_PI; + I2 *= M_PI; + } + alpha = I1 + I2; + } +} + +void SedovSol::SetTime(double t_) +{ + t = t_; + + r2 = pow((blast_energy / (alpha * rho_0)), (1. / (dim + 2 - omega))) * + pow(t, (2. / (dim + 2 - omega))); + U = (2 / (dim + 2 - omega)) * (r2 / t); + rho1 = rho_0 * pow(r2, -omega); + rho2 = ((gamma + 1) / (gamma - 1)) * rho1; + v2 = (2 / (gamma + 1)) * U; + p2 = (2 / (gamma + 1)) * rho1 * U * U; +} + +void SedovSol::EvalSol(double r, double &rho, double &v, double &P) const +{ + if (r >= r2) + { + // pre-shock state + rho = rho_0 * pow(r, -omega); + v = 0; + P = 0; + return; + } + // post-shock state + if (V2 == Vs) + { + // singular + rho = rho2 * pow((r / r2), (dim - 2)); + v = v2 * r / r2; + P = p2 * pow((r / r2), dim); + } + else + { + // find V(r) + auto x1 = [&](double V) { return a * V; }; + auto x2 = [&](double V) { return b * (c * V - 1); }; + auto x3 = [&](double V) { return d * (1 - e * V); }; + auto x4 = [&](double V) { return b * (1 - c * V / gamma); }; + auto lmbda = [&](double V) + { + return pow(x1(V), -alpha0) * pow(x2(V), -alpha2) * pow(x3(V), -alpha1); + }; + auto f = [&](double V) { return x1(V) * lmbda(V); }; + auto g = [&](double V) + { + return pow(x1(V), alpha0 * omega) * + pow(x2(V), (alpha3 + alpha2 * omega)) * + pow(x3(V), (alpha4 + alpha1 * omega)) * pow(x4(V), alpha5); + }; + auto h = [&](double V) + { + return pow(x1(V), (alpha0 * dim)) * + pow(x3(V), (alpha4 + alpha1 * (omega - 2))) * + pow(x4(V), (1 + alpha5)); + }; + double V; + if (V2 < Vs) + { + // standard + V = bisection([&](double V_) { return r2 * lmbda(V_) - r; }, V0, V2); + } + else + { + // vacuum + V = bisection([&](double V_) { return r2 * lmbda(V_) - r; }, Vv, V2); + double r_vacuum = r2 * lmbda(Vv); + if (r <= r_vacuum) + { + // vacuum part + rho = 0; + v = 0; + P = 0; + return; + } + } + rho = rho2 * g(V); + v = v2 * f(V); + P = p2 * h(V); + } +} diff --git a/sedov/sedov_sol.hpp b/sedov/sedov_sol.hpp new file mode 100644 index 00000000..2e558a2a --- /dev/null +++ b/sedov/sedov_sol.hpp @@ -0,0 +1,78 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +// reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#ifndef LAGHOS_SEDOV_SOL_HPP +#define LAGHOS_SEDOV_SOL_HPP + +/// Taylor-von Neumann-Sedov blast wave solution +struct SedovSol +{ + /// 1 for plane wave, 2 for cylinder, 3 for sphere + int dim; + /// time to compute the solution at + double t = 0; + /// ideal gas gamma + double gamma; + /// initial density = rho_0 * pow(r, -omega) + double rho_0; + double omega; + /// initial blast energy + double blast_energy; + + /// currently only supports uniform initial density + /// computed quantities used for computing the solution + /// these values don't depend on time + double a; + double b; + double c; + double d; + double e; + + double alpha0; + double alpha1; + double alpha2; + double alpha3; + double alpha4; + double alpha5; + + double V0; + double Vv; + double V2; + double Vs; + + double alpha; + + /// these values depend on time + /// shock position + double r2; + /// shock speed + double U; + /// pre-shock density + double rho1; + /// post-shock state + double rho2; + double v2; + double p2; + + void SetTime(double t); + + void EvalSol(double r, double &rho, double &v, double &P) const; + + SedovSol(int dim, double gamma, double rho_0, double blast_energy, + double omega = 0); +}; + +#endif