diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 8ca97b7cc5..a185c19a8e 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -47,6 +47,9 @@ steps: - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.precompile()'" - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.status()'" - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" + + # Set up the mpiexecjl command + - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using MPI; MPI.install_mpiexecjl()'" agents: queue: Oceananigans architecture: CPU @@ -163,7 +166,7 @@ steps: ##### HydrostaticFreeSurfaceModel ##### - - label: "💧 gpu hydrostatic free surface model tests" + - label: "🐡 gpu hydrostatic free surface model tests" env: JULIA_DEPOT_PATH: "$SVERDRUP_HOME/.julia-$BUILDKITE_BUILD_NUMBER" TEST_GROUP: "hydrostatic_free_surface" @@ -174,7 +177,7 @@ steps: architecture: GPU depends_on: "init_gpu" - - label: "💦 cpu hydrostatic free surface model tests" + - label: "🐠 cpu hydrostatic free surface model tests" env: JULIA_DEPOT_PATH: "$TARTARUS_HOME/.julia-$BUILDKITE_BUILD_NUMBER" TEST_GROUP: "hydrostatic_free_surface" @@ -190,7 +193,7 @@ steps: ##### ShallowWaterModel ##### - - label: "💧 gpu shallow water model tests" + - label: "🦑 gpu shallow water model tests" env: JULIA_DEPOT_PATH: "$SVERDRUP_HOME/.julia-$BUILDKITE_BUILD_NUMBER" TEST_GROUP: "shallow_water" @@ -201,7 +204,7 @@ steps: architecture: GPU depends_on: "init_gpu" - - label: "💦 cpu shallow water model tests" + - label: "🦐 cpu shallow water model tests" env: JULIA_DEPOT_PATH: "$TARTARUS_HOME/.julia-$BUILDKITE_BUILD_NUMBER" TEST_GROUP: "shallow_water" @@ -240,6 +243,23 @@ steps: architecture: CPU depends_on: "init_cpu" +##### +##### Distributed/MPI +##### + + - label: "🐉 cpu distributed tests" + env: + JULIA_DEPOT_PATH: "$TARTARUS_HOME/.julia-$BUILDKITE_BUILD_NUMBER" + TEST_GROUP: "distributed" + CUDA_VISIBLE_DEVICES: "-1" + commands: + - "PATH=$PATH:$TARTARUS_HOME/julia-$JULIA_VERSION/bin" # Need julia binary in $PATH for mpiexecjl to work. + - "$TARTARUS_HOME/.julia-$BUILDKITE_BUILD_NUMBER/bin/mpiexecjl -np 4 $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" + agents: + queue: Oceananigans + architecture: CPU + depends_on: "init_cpu" + ##### ##### Regression ##### @@ -317,7 +337,7 @@ steps: ##### Clean up ##### - - label: "🧻 clean up gpu environment" + - label: "🧽 clean up gpu environment" command: "rm -rf $SVERDRUP_HOME/.julia-$BUILDKITE_BUILD_NUMBER" agents: queue: Oceananigans diff --git a/Manifest.toml b/Manifest.toml index 5b65c6b50b..d071cee7ef 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -2,9 +2,9 @@ [[AbstractFFTs]] deps = ["LinearAlgebra"] -git-tree-sha1 = "051c95d6836228d120f5f4b984dd5aba1624f716" +git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0" uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" -version = "0.5.0" +version = "1.0.1" [[Adapt]] deps = ["LinearAlgebra"] @@ -12,6 +12,12 @@ git-tree-sha1 = "ffcfa2d345aaee0ef3d8346a073d5dd03c983ebe" uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" version = "3.2.0" +[[ArrayInterface]] +deps = ["IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"] +git-tree-sha1 = "e7edcc1ac140cce87b7442ff0fa88b5f19fb71fa" +uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +version = "3.1.3" + [[Artifacts]] deps = ["Pkg"] git-tree-sha1 = "c30985d8821e0cd73870b17b0ed0ce6dc44cb744" @@ -106,6 +112,12 @@ uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" deps = ["Random", "Serialization", "Sockets"] uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" +[[DocStringExtensions]] +deps = ["LibGit2", "Markdown", "Pkg", "Test"] +git-tree-sha1 = "50ddf44c53698f5e784bbebb3f4b21c5807401b1" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.8.3" + [[ExprTools]] git-tree-sha1 = "10407a39b87f29d47ebaca8edbc75d7c302ff93e" uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" @@ -113,9 +125,9 @@ version = "0.1.3" [[FFTW]] deps = ["AbstractFFTs", "FFTW_jll", "IntelOpenMP_jll", "Libdl", "LinearAlgebra", "MKL_jll", "Reexport"] -git-tree-sha1 = "8fda0934cb99db617171f7296dc361f4d6fa5424" +git-tree-sha1 = "1b48dbde42f307e48685fa9213d8b9f8c0d87594" uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" -version = "1.3.0" +version = "1.3.2" [[FFTW_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] @@ -146,6 +158,11 @@ git-tree-sha1 = "fd83fa0bde42e01952757f01149dd968c06c4dba" uuid = "0234f1f7-429e-5d53-9886-15a909be8d59" version = "1.12.0+1" +[[IfElse]] +git-tree-sha1 = "28e837ff3e7a6c3cdb252ce49fb412c8eb3caeef" +uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" +version = "0.1.0" + [[IntelOpenMP_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "d979e54b71da82f3a65b62553da4fc3d18c9004c" @@ -163,15 +180,21 @@ version = "1.0.0" [[JLD2]] deps = ["CodecZlib", "DataStructures", "MacroTools", "Mmap", "Pkg", "Printf", "Requires", "UUIDs"] -git-tree-sha1 = "bb9a457481adf060ab5898823a49d4f854ff4ddd" +git-tree-sha1 = "b8343a7f96591404ade118b3a7014e1a52062465" uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" -version = "0.4.0" +version = "0.4.2" [[JLLWrappers]] git-tree-sha1 = "a431f5f2ca3f4feef3bd7a5e94b8b8d4f2f647a0" uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" version = "1.2.0" +[[JSON3]] +deps = ["Dates", "Mmap", "Parsers", "StructTypes", "UUIDs"] +git-tree-sha1 = "62d4063c67d7c84d5788107878bb925ceaadd252" +uuid = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" +version = "1.7.1" + [[KernelAbstractions]] deps = ["Adapt", "CUDA", "Cassette", "InteractiveUtils", "MacroTools", "SpecialFunctions", "StaticArrays", "UUIDs"] git-tree-sha1 = "ee7f03c23d874c8353813a44315daf82a1e82046" @@ -184,6 +207,12 @@ git-tree-sha1 = "b616937c31337576360cb9fb872ec7633af7b194" uuid = "929cbde3-209d-540e-8aea-75f648917ca0" version = "3.6.0" +[[LazyArtifacts]] +deps = ["Pkg"] +git-tree-sha1 = "4bb5499a1fc437342ea9ab7e319ede5a457c0968" +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" +version = "1.3.0" + [[LibCURL_jll]] deps = ["LibSSH2_jll", "Libdl", "MbedTLS_jll", "Pkg", "Zlib_jll", "nghttp2_jll"] git-tree-sha1 = "897d962c20031e6012bba7b3dcb7a667170dad17" @@ -211,10 +240,22 @@ uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" [[MKL_jll]] -deps = ["IntelOpenMP_jll", "Libdl", "Pkg"] -git-tree-sha1 = "eb540ede3aabb8284cb482aa41d00d6ca850b1f8" +deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"] +git-tree-sha1 = "c253236b0ed414624b083e6b72bfe891fbd2c7af" uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" -version = "2020.2.254+0" +version = "2021.1.1+1" + +[[MPI]] +deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "Pkg", "Random", "Requires", "Serialization", "Sockets"] +git-tree-sha1 = "d3aae0fd4d9e1a09c3e2fc728fbe2522ec6d54bc" +uuid = "da04e1cc-30fd-572f-bb4f-1f8673147195" +version = "0.16.1" + +[[MPICH_jll]] +deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"] +git-tree-sha1 = "4d37f1e07b4e2a74462eebf9ee48c626d15ffdac" +uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4" +version = "3.3.2+10" [[MacroTools]] deps = ["Markdown", "Random"] @@ -232,6 +273,12 @@ git-tree-sha1 = "0eef589dd1c26a3ac9d753fe1a8bcad63f956fa6" uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" version = "2.16.8+1" +[[MicrosoftMPI_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "e5c90234b3967684c9c6f87b4a54549b4ce21836" +uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf" +version = "10.1.3+0" + [[Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" @@ -255,9 +302,15 @@ version = "400.701.400+0" [[OffsetArrays]] deps = ["Adapt"] -git-tree-sha1 = "76622f08645764e040b4d7e86d0ff471fd126ae4" +git-tree-sha1 = "b3dfef5f2be7d7eb0e782ba9146a5271ee426e90" uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" -version = "1.5.3" +version = "1.6.2" + +[[OpenMPI_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "41b983e26a7ab8c9bf05f7d70c274b817d541b46" +uuid = "fe0851c0-eecd-5654-98d4-656369965a5c" +version = "4.0.2+2" [[OpenSSL_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] @@ -272,9 +325,27 @@ uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" version = "0.5.3+4" [[OrderedCollections]] -git-tree-sha1 = "d45739abcfc03b51f6a42712894a593f74c80a23" +git-tree-sha1 = "4fa2ba51070ec13fcc7517db714445b4ab986bdf" uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.3.3" +version = "1.4.0" + +[[Parsers]] +deps = ["Dates"] +git-tree-sha1 = "223a825cccef2228f3fdbf2ecc7ca93363059073" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "1.0.16" + +[[PencilArrays]] +deps = ["ArrayInterface", "JSON3", "Libdl", "LinearAlgebra", "MPI", "OffsetArrays", "Reexport", "Requires", "StaticArrays", "StaticPermutations", "TimerOutputs"] +git-tree-sha1 = "6921d07316f41e2be5befd8b815eee28d3fab9f8" +uuid = "0e08944d-e94e-41b1-9406-dcf66b6a9d2e" +version = "0.9.0" + +[[PencilFFTs]] +deps = ["AbstractFFTs", "FFTW", "LinearAlgebra", "MPI", "PencilArrays", "Reexport", "TimerOutputs"] +git-tree-sha1 = "a7665838a566accd7d9cf308bbb497126dc5edf4" +uuid = "4a48f351-57a6-4416-9ec4-c37015456aae" +version = "0.12.1" [[Pkg]] deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] @@ -340,9 +411,15 @@ uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [[SpecialFunctions]] deps = ["ChainRulesCore", "OpenSpecFun_jll"] -git-tree-sha1 = "75394dbe2bd346beeed750fb02baa6445487b862" +git-tree-sha1 = "5919936c0e92cff40e57d0ddf0ceb667d42e5902" uuid = "276daf66-3868-5448-9aa4-cd146d93841b" -version = "1.2.1" +version = "1.3.0" + +[[Static]] +deps = ["IfElse"] +git-tree-sha1 = "98ace568bf638e89eac33c99337f3c8c6e2227b8" +uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" +version = "0.2.0" [[StaticArrays]] deps = ["LinearAlgebra", "Random", "Statistics"] @@ -350,6 +427,11 @@ git-tree-sha1 = "9da72ed50e94dbff92036da395275ed114e04d49" uuid = "90137ffa-7385-5640-81b9-e52037218182" version = "1.0.1" +[[StaticPermutations]] +git-tree-sha1 = "193c3daa18ff3e55c1dae66acb6a762c4a3bdb0b" +uuid = "15972242-4b8f-49a0-b8a1-9ac0e7a1a45d" +version = "0.3.0" + [[Statistics]] deps = ["LinearAlgebra", "SparseArrays"] uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -360,6 +442,12 @@ git-tree-sha1 = "26ea43b4be7e919a2390c3c0f824e7eb4fc19a0a" uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" version = "0.5.0" +[[StructTypes]] +deps = ["Dates", "UUIDs"] +git-tree-sha1 = "d7f4287dbc1e590265f50ceda1b40ed2bb31bbbb" +uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" +version = "1.4.0" + [[TableTraits]] deps = ["IteratorInterfaceExtensions"] git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e" @@ -378,9 +466,9 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [[TimerOutputs]] deps = ["Printf"] -git-tree-sha1 = "3318281dd4121ecf9713ce1383b9ace7d7476fdd" +git-tree-sha1 = "32cdbe6cd2d214c25a0b88f985c9e0092877c236" uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" -version = "0.5.7" +version = "0.5.8" [[TranscodingStreams]] deps = ["Random", "Test"] diff --git a/Project.toml b/Project.toml index d67e8d9b83..d9e42a09d0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "Oceananigans" uuid = "9e8cae18-63c1-5223-a75c-80ca9d6e9a09" -version = "0.52.1" +version = "0.53.0" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" @@ -14,9 +14,11 @@ JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +PencilFFTs = "4a48f351-57a6-4416-9ec4-c37015456aae" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -33,9 +35,11 @@ FFTW = "^1" Glob = "1.3" JLD2 = "^0.2, ^0.3, 0.4" KernelAbstractions = "^0.3, 0.4, 0.5" +MPI = "0.16" NCDatasets = "^0.10, ^0.11" OffsetArrays = "^1.4" OrderedCollections = "^1.1" +PencilFFTs = "0.12" SafeTestsets = "0.0.1" SeawaterPolynomials = "^0.2" StructArrays = "0.4, 0.5" diff --git a/benchmark/Manifest.toml b/benchmark/Manifest.toml index 11edda056a..f40874ac4e 100644 --- a/benchmark/Manifest.toml +++ b/benchmark/Manifest.toml @@ -2,15 +2,14 @@ [[AbstractFFTs]] deps = ["LinearAlgebra"] -git-tree-sha1 = "8ed9de2f1b1a9b1dee48582ad477c6e67b83eb2c" +git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0" uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" -version = "1.0.0" +version = "1.0.1" [[AbstractTrees]] -deps = ["Markdown"] -git-tree-sha1 = "33e450545eaf7699da1a6e755f9ea65f14077a45" +git-tree-sha1 = "03e0550477d86222521d254b741d470ba17ea0b5" uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" -version = "0.3.3" +version = "0.3.4" [[Adapt]] deps = ["LinearAlgebra"] @@ -31,9 +30,9 @@ uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" version = "0.1.0" [[BSON]] -git-tree-sha1 = "2878972c4bc17d9c8d26d48d9ef00fcfe1899e7a" +git-tree-sha1 = "db18b5ea04686f73d269e10bdb241947c40d7d6f" uuid = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" -version = "0.3.0" +version = "0.3.2" [[Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" @@ -67,6 +66,12 @@ git-tree-sha1 = "de4f08843c332d355852721adb1592bce7924da3" uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" version = "0.9.29" +[[CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.0" + [[Compat]] deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] git-tree-sha1 = "919c7f3151e79ff196add81d7f4e45d91bbf420b" @@ -178,6 +183,12 @@ git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" uuid = "82899510-4779-5014-852e-03e436cf321d" version = "1.0.0" +[[JLD2]] +deps = ["CodecZlib", "DataStructures", "MacroTools", "Mmap", "Pkg", "Printf", "Requires", "UUIDs"] +git-tree-sha1 = "b8343a7f96591404ade118b3a7014e1a52062465" +uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +version = "0.4.2" + [[JLLWrappers]] git-tree-sha1 = "a431f5f2ca3f4feef3bd7a5e94b8b8d4f2f647a0" uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" @@ -248,9 +259,9 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804" [[NNlib]] deps = ["ChainRulesCore", "Compat", "LinearAlgebra", "Pkg", "Requires", "Statistics"] -git-tree-sha1 = "df42d0816edfc24f5b82a728f46381613c4dff79" +git-tree-sha1 = "5ce2e4b2bfe3811811e7db4b6a148439806fd2f8" uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -version = "0.7.14" +version = "0.7.16" [[OrderedCollections]] git-tree-sha1 = "4fa2ba51070ec13fcc7517db714445b4ab986bdf" @@ -259,9 +270,9 @@ version = "1.4.0" [[Parsers]] deps = ["Dates"] -git-tree-sha1 = "50c9a9ed8c714945e01cd53a21007ed3865ed714" +git-tree-sha1 = "223a825cccef2228f3fdbf2ecc7ca93363059073" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "1.0.15" +version = "1.0.16" [[Pkg]] deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] @@ -274,10 +285,10 @@ uuid = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d" version = "0.2.10" [[PooledArrays]] -deps = ["DataAPI"] -git-tree-sha1 = "0e8f5c428a41a81cd71f76d76f2fc3415fe5a676" +deps = ["DataAPI", "Future"] +git-tree-sha1 = "cde4ce9d6f33219465b55162811d8de8139c0414" uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" -version = "1.1.0" +version = "1.2.1" [[PrettyTables]] deps = ["Crayons", "Formatting", "Markdown", "Reexport", "Tables"] @@ -310,9 +321,9 @@ version = "1.0.0" [[Requires]] deps = ["UUIDs"] -git-tree-sha1 = "cfbac6c1ed70c002ec6361e7fd334f02820d6419" +git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621" uuid = "ae029012-a4dd-5104-9daa-d747884805df" -version = "1.1.2" +version = "1.1.3" [[SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" @@ -367,9 +378,9 @@ version = "1.0.0" [[Tables]] deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"] -git-tree-sha1 = "a716dde43d57fa537a19058d044b495301ba6565" +git-tree-sha1 = "f03fc113290ee7726b173fc7ea661260d204b3f2" uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" -version = "1.3.2" +version = "1.4.0" [[TerminalLoggers]] deps = ["LeftChildRightSiblingTrees", "Logging", "Markdown", "Printf", "ProgressLogging", "UUIDs"] @@ -383,9 +394,15 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [[TimerOutputs]] deps = ["Printf"] -git-tree-sha1 = "3318281dd4121ecf9713ce1383b9ace7d7476fdd" +git-tree-sha1 = "32cdbe6cd2d214c25a0b88f985c9e0092877c236" uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" -version = "0.5.7" +version = "0.5.8" + +[[TranscodingStreams]] +deps = ["Random", "Test"] +git-tree-sha1 = "7c53c35547de1c5b9d46a4797cf6d8253807108c" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.9.5" [[UUIDs]] deps = ["Random", "SHA"] @@ -393,3 +410,9 @@ uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [[Unicode]] uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[Zlib_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "320228915c8debb12cb434c59057290f0834dbf6" +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.11+18" diff --git a/benchmark/Project.toml b/benchmark/Project.toml index 4861b22f9e..86bb462f92 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -8,6 +8,7 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" diff --git a/benchmark/strong_scaling_incompressible_model.jl b/benchmark/strong_scaling_incompressible_model.jl new file mode 100644 index 0000000000..3b24cb272a --- /dev/null +++ b/benchmark/strong_scaling_incompressible_model.jl @@ -0,0 +1,39 @@ +using JLD2 +using BenchmarkTools +using Benchmarks + +# Benchmark parameters + +Nx = 256 +Ny = 256 +Nz = Nx + +ranks = (1, 2, 4, 8, 16) + +# Run and collect benchmarks + +print_system_info() + +for r in ranks + @info "Benchmarking distributed incompressible model strong scaling [N=($Nx, $Ny, $Nz), ranks=$r]..." + julia = Base.julia_cmd() + run(`mpiexec -np $r $julia --project strong_scaling_incompressible_model_single.jl $Nx $Ny $Nz`) +end + +suite = BenchmarkGroup(["size", "ranks"]) +for r in ranks + jldopen("strong_scaling_incompressible_model_$r.jld2", "r") do file + suite[((Nx, Ny, Nz), r)] = file["trial"] + end +end + +# Summarize benchmarks + +df = benchmarks_dataframe(suite) +sort!(df, :ranks) +benchmarks_pretty_table(df, title="Incompressible model strong scaling benchmark") + +suite_Δ = speedups_suite(suite, base_case=((Nx, Ny, Nz), 1)) +df_Δ = speedups_dataframe(suite_Δ) +sort!(df_Δ, :ranks) +benchmarks_pretty_table(df_Δ, title="Incompressible model strong scaling speedup") diff --git a/benchmark/strong_scaling_incompressible_model_single.jl b/benchmark/strong_scaling_incompressible_model_single.jl new file mode 100644 index 0000000000..9e5d3aa4c5 --- /dev/null +++ b/benchmark/strong_scaling_incompressible_model_single.jl @@ -0,0 +1,45 @@ +using Logging +using MPI +using JLD2 +using BenchmarkTools + +using Oceananigans +using Oceananigans.Distributed +using Benchmarks + +Logging.global_logger(OceananigansLogger()) + +MPI.Init() +comm = MPI.COMM_WORLD + +local_rank = MPI.Comm_rank(comm) +R = MPI.Comm_size(comm) + +Nx = parse(Int, ARGS[1]) +Ny = parse(Int, ARGS[2]) +Nz = parse(Int, ARGS[3]) + +@info "Setting up distributed incompressible model with N=($Nx, $Ny, $Nz) grid points on $R rank(s)..." + +topo = (Periodic, Periodic, Periodic) +distributed_grid = RegularRectilinearGrid(topology=topo, size=(Nx, Ny, Nz), extent=(1, 1, 1)) +arch = MultiCPU(grid=distributed_grid, ranks=(1, R, 1)) +model = DistributedIncompressibleModel(architecture=arch, grid=distributed_grid) + +@info "Warming up distributed incompressible model..." + +time_step!(model, 1) # warmup + +@info "Benchmarking distributed incompressible model..." + +trial = @benchmark begin + @sync_gpu time_step!($model, 1) + MPI.Barrier(comm) +end samples=10 + +@info "Rank $local_rank is done benchmarking!" + +jldopen("strong_scaling_incompressible_model_$(R)_$local_rank.jld2", "w") do file + file["trial"] = trial +end + diff --git a/src/Architectures.jl b/src/Architectures.jl index fbe36ae084..b4b2293af1 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -2,7 +2,7 @@ module Architectures export @hascuda, - AbstractArchitecture, CPU, GPU, + AbstractArchitecture, AbstractCPUArchitecture, AbstractGPUArchitecture, CPU, GPU, device, architecture, array_type, arch_array using CUDA @@ -16,20 +16,35 @@ Abstract supertype for architectures supported by Oceananigans. """ abstract type AbstractArchitecture end + +""" + AbstractCPUArchitecture + +Abstract supertype for CPU architectures supported by Oceananigans. +""" +abstract type AbstractCPUArchitecture <: AbstractArchitecture end + +""" + AbstractGPUArchitecture + +Abstract supertype for GPU architectures supported by Oceananigans. +""" +abstract type AbstractGPUArchitecture <: AbstractArchitecture end + """ CPU <: AbstractArchitecture Run Oceananigans on one CPU node. Uses multiple threads if the environment variable `JULIA_NUM_THREADS` is set. """ -struct CPU <: AbstractArchitecture end +struct CPU <: AbstractCPUArchitecture end """ GPU <: AbstractArchitecture Run Oceananigans on a single NVIDIA CUDA GPU. """ -struct GPU <: AbstractArchitecture end +struct GPU <: AbstractGPUArchitecture end """ @hascuda expr @@ -41,8 +56,8 @@ macro hascuda(expr) return has_cuda() ? :($(esc(expr))) : :(nothing) end -device(::CPU) = KernelAbstractions.CPU() -device(::GPU) = KernelAbstractions.CUDADevice() +device(::AbstractCPUArchitecture) = KernelAbstractions.CPU() +device(::AbstractGPUArchitecture) = KernelAbstractions.CUDADevice() architecture(::Number) = nothing architecture(::Array) = CPU() @@ -51,9 +66,9 @@ architecture(::CuArray) = GPU() array_type(::CPU) = Array array_type(::GPU) = CuArray -arch_array(::CPU, A::Array) = A -arch_array(::CPU, A::CuArray) = Array(A) -arch_array(::GPU, A::Array) = CuArray(A) -arch_array(::GPU, A::CuArray) = A +arch_array(::AbstractCPUArchitecture, A::Array) = A +arch_array(::AbstractCPUArchitecture, A::CuArray) = Array(A) +arch_array(::AbstractGPUArchitecture, A::Array) = CuArray(A) +arch_array(::AbstractGPUArchitecture, A::CuArray) = A end diff --git a/src/Distributed/Distributed.jl b/src/Distributed/Distributed.jl new file mode 100644 index 0000000000..7c1973c807 --- /dev/null +++ b/src/Distributed/Distributed.jl @@ -0,0 +1,21 @@ +module Distributed + +export + MultiCPU, child_architecture, + HaloCommunication, HaloCommunicationBC, + inject_halo_communication_boundary_conditions, + DistributedFFTBasedPoissonSolver, + DistributedIncompressibleModel + +using Oceananigans.Utils + +include("distributed_utils.jl") +include("multi_architectures.jl") +include("halo_communication_bcs.jl") +include("halo_communication.jl") +include("distributed_fields.jl") +include("distributed_fft_based_poisson_solver.jl") +include("distributed_solve_for_pressure.jl") +include("distributed_incompressible_model.jl") + +end # module diff --git a/src/Distributed/distributed_fft_based_poisson_solver.jl b/src/Distributed/distributed_fft_based_poisson_solver.jl new file mode 100644 index 0000000000..039e1f2238 --- /dev/null +++ b/src/Distributed/distributed_fft_based_poisson_solver.jl @@ -0,0 +1,54 @@ +import PencilFFTs + +import Oceananigans.Solvers: poisson_eigenvalues, solve_poisson_equation! + +struct DistributedFFTBasedPoissonSolver{P, F, L, λ, S} + plan :: P + full_grid :: F + my_grid :: L + eigenvalues :: λ + storage :: S +end + +function DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) + topo = (TX, TY, TZ) = topology(full_grid) + + λx = poisson_eigenvalues(full_grid.Nx, full_grid.Lx, 1, TX()) + λy = poisson_eigenvalues(full_grid.Ny, full_grid.Ly, 2, TY()) + λz = poisson_eigenvalues(full_grid.Nz, full_grid.Lz, 3, TZ()) + + I, J, K = arch.local_index + λx = λx[(J-1)*local_grid.Ny+1:J*local_grid.Ny, :, :] + + eigenvalues = (; λx, λy, λz) + + transform = PencilFFTs.Transforms.FFT!() + proc_dims = (arch.ranks[2], arch.ranks[3]) + plan = PencilFFTs.PencilFFTPlan(size(full_grid), transform, proc_dims, MPI.COMM_WORLD) + storage = PencilFFTs.allocate_input(plan) + + return DistributedFFTBasedPoissonSolver(plan, full_grid, local_grid, eigenvalues, storage) +end + +function solve_poisson_equation!(solver::DistributedFFTBasedPoissonSolver) + λx, λy, λz = solver.eigenvalues + + # Apply forward transforms. + solver.plan * solver.storage + + # Solve the discrete Poisson equation. + RHS = ϕ = solver.storage[2] + @. ϕ = - RHS / (λx + λy + λz) + + # Setting DC component of the solution (the mean) to be zero. This is also + # necessary because the source term to the Poisson equation has zero mean + # and so the DC component comes out to be ∞. + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + ϕ[1, 1, 1] = 0 + end + + # Apply backward transforms. + solver.plan \ solver.storage + + return nothing +end diff --git a/src/Distributed/distributed_fields.jl b/src/Distributed/distributed_fields.jl new file mode 100644 index 0000000000..3d9a345577 --- /dev/null +++ b/src/Distributed/distributed_fields.jl @@ -0,0 +1,10 @@ +import Oceananigans.Fields: Field + +function Field(X, Y, Z, arch::AbstractMultiArchitecture, grid, + bcs = FieldBoundaryConditions(grid, (X, Y, Z)), + data = new_data(eltype(grid), arch, grid, (X, Y, Z))) + + communicative_bcs = inject_halo_communication_boundary_conditions(bcs, arch.local_rank, arch.connectivity) + + return Field(X, Y, Z, child_architecture(arch), grid, communicative_bcs, data) +end diff --git a/src/Distributed/distributed_incompressible_model.jl b/src/Distributed/distributed_incompressible_model.jl new file mode 100644 index 0000000000..87d4c337ff --- /dev/null +++ b/src/Distributed/distributed_incompressible_model.jl @@ -0,0 +1,54 @@ +using MPI + +using Oceananigans +using Oceananigans.Grids +using Oceananigans.Grids: halo_size + +##### +##### Distributed incompressible model constructor +##### + +function DistributedIncompressibleModel(; architecture, grid, model_kwargs...) + i, j, k = architecture.local_index + Rx, Ry, Rz = architecture.ranks + my_connectivity = architecture.connectivity + + Nx, Ny, Nz = size(grid) + Lx, Ly, Lz = length(grid) + + # Pull out endpoints for full grid. + xL, xR = grid.xF[1], grid.xF[Nx+1] + yL, yR = grid.yF[1], grid.yF[Ny+1] + zL, zR = grid.zF[1], grid.zF[Nz+1] + + # Make sure we can put an integer number of grid points in each rank. + # Will generalize in the future. + # TODO: Check that we have enough grid points on each rank to fit the halos! + @assert isinteger(Nx / Rx) + @assert isinteger(Ny / Ry) + @assert isinteger(Nz / Rz) + + nx, ny, nz = Nx÷Rx, Ny÷Ry, Nz÷Rz + lx, ly, lz = Lx/Rx, Ly/Ry, Lz/Rz + + x₁, x₂ = xL + (i-1)*lx, xL + i*lx + y₁, y₂ = yL + (j-1)*ly, yL + j*ly + z₁, z₂ = zL + (k-1)*lz, zL + k*lz + + # FIXME? local grid might have different topology! + my_grid = RegularRectilinearGrid(topology=topology(grid), size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂), halo=halo_size(grid)) + + ## Construct local model + + pressure_solver = haskey(model_kwargs, :pressure_solver) ? Dict(model_kwargs)[:pressure_solver] : + DistributedFFTBasedPoissonSolver(architecture, grid, my_grid) + + my_model = IncompressibleModel(; + architecture = architecture, + grid = my_grid, + pressure_solver = pressure_solver, + model_kwargs... + ) + + return my_model +end diff --git a/src/Distributed/distributed_solve_for_pressure.jl b/src/Distributed/distributed_solve_for_pressure.jl new file mode 100644 index 0000000000..5e05e642b3 --- /dev/null +++ b/src/Distributed/distributed_solve_for_pressure.jl @@ -0,0 +1,9 @@ +using Oceananigans.Solvers: calculate_pressure_source_term_fft_based_solver! + +import Oceananigans.Solvers: solve_for_pressure!, source_term_storage, source_term_kernel, solution_storage + +source_term_storage(solver::DistributedFFTBasedPoissonSolver) = first(solver.storage) + +source_term_kernel(::DistributedFFTBasedPoissonSolver) = calculate_pressure_source_term_fft_based_solver! + +solution_storage(solver::DistributedFFTBasedPoissonSolver) = first(solver.storage) diff --git a/src/Distributed/distributed_utils.jl b/src/Distributed/distributed_utils.jl new file mode 100644 index 0000000000..8ce5fe3827 --- /dev/null +++ b/src/Distributed/distributed_utils.jl @@ -0,0 +1,99 @@ +using Oceananigans.Fields: AbstractField +using Oceananigans.Grids: + interior_indices, + left_halo_indices, right_halo_indices, + underlying_left_halo_indices, underlying_right_halo_indices + +# TODO: Move to Grids/grid_utils.jl + +##### +##### Viewing halos +##### + +west_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} = + include_corners ? view(f.data, left_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), :, :) : + view(f.data, left_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), + interior_indices(LY, topology(f, 2), f.grid.Ny), + interior_indices(LZ, topology(f, 3), f.grid.Nz)) + +east_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} = + include_corners ? view(f.data, right_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), :, :) : + view(f.data, right_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), + interior_indices(LY, topology(f, 2), f.grid.Ny), + interior_indices(LZ, topology(f, 3), f.grid.Nz)) + +south_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} = + include_corners ? view(f.data, :, left_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), :) : + view(f.data, interior_indices(LX, topology(f, 1), f.grid.Nx), + left_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), + interior_indices(LZ, topology(f, 3), f.grid.Nz)) + +north_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} = + include_corners ? view(f.data, :, right_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), :) : + view(f.data, interior_indices(LX, topology(f, 1), f.grid.Nx), + right_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), + interior_indices(LZ, topology(f, 3), f.grid.Nz)) + +bottom_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} = + include_corners ? view(f.data, :, :, left_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz)) : + view(f.data, interior_indices(LX, topology(f, 1), f.grid.Nx), + interior_indices(LY, topology(f, 2), f.grid.Ny), + left_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz)) + +top_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} = + include_corners ? view(f.data, :, :, right_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz)) : + view(f.data, interior_indices(LX, topology(f, 1), f.grid.Nx), + interior_indices(LY, topology(f, 2), f.grid.Ny), + left_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz)) + +underlying_west_halo(f, grid, location) = + view(f.parent, underlying_left_halo_indices(location, topology(grid, 1), grid.Nx, grid.Hx), :, :) + +underlying_east_halo(f, grid, location) = + view(f.parent, underlying_right_halo_indices(location, topology(grid, 1), grid.Nx, grid.Hx), :, :) + +underlying_south_halo(f, grid, location) = + view(f.parent, :, underlying_left_halo_indices(location, topology(grid, 2), grid.Ny, grid.Hy), :) + +underlying_north_halo(f, grid, location) = + view(f.parent, :, underlying_right_halo_indices(location, topology(grid, 2), grid.Ny, grid.Hy), :) + +underlying_bottom_halo(f, grid, location) = + view(f.parent, :, :, underlying_left_halo_indices(location, topology(grid, 3), grid.Nz, grid.Hz)) + +underlying_top_halo(f, grid, location) = + view(f.parent, :, :, underlying_right_halo_indices(location, topology(grid, 3), grid.Nz, grid.Hz)) + +##### +##### Viewing boundary grid points (used to fill other halos) +##### + +left_boundary_indices(loc, topo, N, H) = 1:H +left_boundary_indices(::Type{Nothing}, topo, N, H) = 1:0 # empty + +right_boundary_indices(loc, topo, N, H) = N-H+1:N +right_boundary_indices(::Type{Nothing}, topo, N, H) = 1:0 # empty + +underlying_left_boundary_indices(loc, topo, N, H) = 1+H:2H +underlying_left_boundary_indices(::Type{Nothing}, topo, N, H) = 1:0 # empty + +underlying_right_boundary_indices(loc, topo, N, H) = N+1:N+H +underlying_right_boundary_indices(::Type{Nothing}, topo, N, H) = 1:0 # empty + +underlying_west_boundary(f, grid, location) = + view(f.parent, underlying_left_boundary_indices(location, topology(grid, 1), grid.Nx, grid.Hx), :, :) + +underlying_east_boundary(f, grid, location) = + view(f.parent, underlying_right_boundary_indices(location, topology(grid, 1), grid.Nx, grid.Hx), :, :) + +underlying_south_boundary(f, grid, location) = + view(f.parent, :, underlying_left_boundary_indices(location, topology(grid, 2), grid.Ny, grid.Hy), :) + +underlying_north_boundary(f, grid, location) = + view(f.parent, :, underlying_right_boundary_indices(location, topology(grid, 2), grid.Ny, grid.Hy), :) + +underlying_bottom_boundary(f, grid, location) = + view(f.parent, :, :, underlying_left_boundary_indices(location, topology(grid, 3), grid.Nz, grid.Hz)) + +underlying_top_boundary(f, grid, location) = + view(f.parent, :, :, underlying_right_boundary_indices(location, topology(grid, 3), grid.Nz, grid.Hz)) diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl new file mode 100644 index 0000000000..b14daa6d6d --- /dev/null +++ b/src/Distributed/halo_communication.jl @@ -0,0 +1,165 @@ +using KernelAbstractions: @kernel, @index, Event, MultiEvent + +import Oceananigans.BoundaryConditions: + fill_halo_regions!, + fill_west_halo!, fill_east_halo!, fill_south_halo!, + fill_north_halo!, fill_bottom_halo!, fill_top_halo! + +##### +##### MPI tags for halo communication BCs +##### + +sides = (:west, :east, :south, :north, :top, :bottom) +side_id = Dict(side => n for (n, side) in enumerate(sides)) + +opposite_side = Dict( + :west => :east, :east => :west, + :south => :north, :north => :south, + :bottom => :top, :top => :bottom +) + +# Define functions that return unique send and recv MPI tags for each side. +# It's an integer where +# digit 1: the side +# digits 2-4: the "from" rank +# digits 5-7: the "to" rank + +RANK_DIGITS = 3 + +for side in sides + side_str = string(side) + send_tag_fn_name = Symbol("$(side)_send_tag") + recv_tag_fn_name = Symbol("$(side)_recv_tag") + @eval begin + function $send_tag_fn_name(local_rank, rank_to_send_to) + from_digits = string(local_rank, pad=RANK_DIGITS) + to_digits = string(rank_to_send_to, pad=RANK_DIGITS) + side_digit = string(side_id[Symbol($side_str)]) + return parse(Int, from_digits * to_digits * side_digit) + end + + function $recv_tag_fn_name(local_rank, rank_to_recv_from) + from_digits = string(rank_to_recv_from, pad=RANK_DIGITS) + to_digits = string(local_rank, pad=RANK_DIGITS) + side_digit = string(side_id[opposite_side[Symbol($side_str)]]) + return parse(Int, from_digits * to_digits * side_digit) + end + end +end + +##### +##### Filling halos for halo communication boundary conditions +##### + +fill_halo_regions!(field::AbstractField{LX, LY, LZ}, arch::AbstractMultiArchitecture, args...) where {LX, LY, LZ} = + fill_halo_regions!(field.data, field.boundary_conditions, arch, field.grid, (LX, LY, LZ), args...) + +function fill_halo_regions!(c::AbstractArray, bcs, arch::AbstractMultiArchitecture, grid, c_location, args...) + + barrier = Event(device(child_architecture(arch))) + + west_event, east_event = fill_west_and_east_halos!(c, bcs.west, bcs.east, arch, barrier, grid, c_location, args...) + south_event, north_event = fill_south_and_north_halos!(c, bcs.south, bcs.north, arch, barrier, grid, c_location, args...) + bottom_event, top_event = fill_bottom_and_top_halos!(c, bcs.bottom, bcs.top, arch, barrier, grid, c_location, args...) + + events = [west_event, east_event, south_event, north_event, bottom_event, top_event] + events = filter(e -> e isa Event, events) + wait(device(child_architecture(arch)), MultiEvent(Tuple(events))) + + return nothing +end + +##### +##### fill_west_and_east_halos! } +##### fill_south_and_north_halos! } for non-communicating boundary conditions (fallback) +##### fill_bottom_and_top_halos! } +##### + +for (side, opposite_side) in zip([:west, :south, :bottom], [:east, :north, :top]) + fill_both_halos! = Symbol("fill_$(side)_and_$(opposite_side)_halos!") + fill_side_halo! = Symbol("fill_$(side)_halo!") + fill_opposite_side_halo! = Symbol("fill_$(opposite_side)_halo!") + + @eval begin + function $fill_both_halos!(c, bc_side, bc_opposite_side, arch, barrier, grid, args...) + event_side = $fill_side_halo!(c, bc_side, child_architecture(arch), barrier, grid, args...) + event_opposite_side = $fill_opposite_side_halo!(c, bc_opposite_side, child_architecture(arch), barrier, grid, args...) + return event_side, event_opposite_side + end + end +end + +##### +##### fill_west_and_east_halos! } +##### fill_south_and_north_halos! } for when both halos are communicative +##### fill_bottom_and_top_halos! } +##### + +for (side, opposite_side) in zip([:west, :south, :bottom], [:east, :north, :top]) + fill_both_halos! = Symbol("fill_$(side)_and_$(opposite_side)_halos!") + send_side_halo = Symbol("send_$(side)_halo") + send_opposite_side_halo = Symbol("send_$(opposite_side)_halo") + recv_and_fill_side_halo! = Symbol("recv_and_fill_$(side)_halo!") + recv_and_fill_opposite_side_halo! = Symbol("recv_and_fill_$(opposite_side)_halo!") + + @eval begin + function $fill_both_halos!(c, bc_side::HaloCommunicationBC, bc_opposite_side::HaloCommunicationBC, arch, barrier, grid, c_location, args...) + @assert bc_side.condition.from == bc_opposite_side.condition.from # Extra protection in case of bugs + local_rank = bc_side.condition.from + + $send_side_halo(c, grid, c_location, local_rank, bc_side.condition.to) + $send_opposite_side_halo(c, grid, c_location, local_rank, bc_opposite_side.condition.to) + + $recv_and_fill_side_halo!(c, grid, c_location, local_rank, bc_side.condition.to) + $recv_and_fill_opposite_side_halo!(c, grid, c_location, local_rank, bc_opposite_side.condition.to) + + return nothing, nothing + end + end +end + +##### +##### Sending halos +##### + +for side in sides + side_str = string(side) + send_side_halo = Symbol("send_$(side)_halo") + underlying_side_boundary = Symbol("underlying_$(side)_boundary") + side_send_tag = Symbol("$(side)_send_tag") + + @eval begin + function $send_side_halo(c, grid, c_location, local_rank, rank_to_send_to) + send_buffer = $underlying_side_boundary(c, grid, c_location) + send_tag = $side_send_tag(local_rank, rank_to_send_to) + + @debug "Sending " * $side_str * " halo: local_rank=$local_rank, rank_to_send_to=$rank_to_send_to, send_tag=$send_tag" + status = MPI.Isend(send_buffer, rank_to_send_to, send_tag, MPI.COMM_WORLD) + + return status + end + end +end + +##### +##### Receiving and filling halos (buffer is a view so it gets filled upon receive) +##### + +for side in sides + side_str = string(side) + recv_and_fill_side_halo! = Symbol("recv_and_fill_$(side)_halo!") + underlying_side_halo = Symbol("underlying_$(side)_halo") + side_recv_tag = Symbol("$(side)_recv_tag") + + @eval begin + function $recv_and_fill_side_halo!(c, grid, c_location, local_rank, rank_to_recv_from) + recv_buffer = $underlying_side_halo(c, grid, c_location) + recv_tag = $side_recv_tag(local_rank, rank_to_recv_from) + + @debug "Receiving " * $side_str * " halo: local_rank=$local_rank, rank_to_recv_from=$rank_to_recv_from, recv_tag=$recv_tag" + MPI.Recv!(recv_buffer, rank_to_recv_from, recv_tag, MPI.COMM_WORLD) + + return nothing + end + end +end diff --git a/src/Distributed/halo_communication_bcs.jl b/src/Distributed/halo_communication_bcs.jl new file mode 100644 index 0000000000..56f2c511f7 --- /dev/null +++ b/src/Distributed/halo_communication_bcs.jl @@ -0,0 +1,55 @@ +using Oceananigans.BoundaryConditions +using Oceananigans.BoundaryConditions: BCType + +import Oceananigans.BoundaryConditions: bctype_str, print_condition + +struct HaloCommunication <: BCType end + +const HaloCommunicationBC = BoundaryCondition{<:HaloCommunication} + +bctype_str(::HaloCommunicationBC) ="HaloCommunication" + +HaloCommunicationBoundaryCondition(val; kwargs...) = BoundaryCondition(HaloCommunication, val; kwargs...) + +struct HaloCommunicationRanks{F, T} + from :: F + to :: T +end + +HaloCommunicationRanks(; from, to) = HaloCommunicationRanks(from, to) + +print_condition(hcr::HaloCommunicationRanks) = "(from rank $(hcr.from) to rank $(hcr.to))" + +function inject_halo_communication_boundary_conditions(field_bcs, local_rank, connectivity) + rank_east = connectivity.east + rank_west = connectivity.west + rank_north = connectivity.north + rank_south = connectivity.south + rank_top = connectivity.top + rank_bottom = connectivity.bottom + + east_comm_ranks = HaloCommunicationRanks(from=local_rank, to=rank_east) + west_comm_ranks = HaloCommunicationRanks(from=local_rank, to=rank_west) + north_comm_ranks = HaloCommunicationRanks(from=local_rank, to=rank_north) + south_comm_ranks = HaloCommunicationRanks(from=local_rank, to=rank_south) + top_comm_ranks = HaloCommunicationRanks(from=local_rank, to=rank_top) + bottom_comm_ranks = HaloCommunicationRanks(from=local_rank, to=rank_bottom) + + east_comm_bc = HaloCommunicationBoundaryCondition(east_comm_ranks) + west_comm_bc = HaloCommunicationBoundaryCondition(west_comm_ranks) + north_comm_bc = HaloCommunicationBoundaryCondition(north_comm_ranks) + south_comm_bc = HaloCommunicationBoundaryCondition(south_comm_ranks) + top_comm_bc = HaloCommunicationBoundaryCondition(top_comm_ranks) + bottom_comm_bc = HaloCommunicationBoundaryCondition(bottom_comm_ranks) + + x_bcs = CoordinateBoundaryConditions(isnothing(rank_west) ? field_bcs.west : west_comm_bc, + isnothing(rank_east) ? field_bcs.east : east_comm_bc) + + y_bcs = CoordinateBoundaryConditions(isnothing(rank_south) ? field_bcs.south : south_comm_bc, + isnothing(rank_north) ? field_bcs.north : north_comm_bc) + + z_bcs = CoordinateBoundaryConditions(isnothing(rank_bottom) ? field_bcs.bottom : bottom_comm_bc, + isnothing(rank_top) ? field_bcs.top : top_comm_bc) + + return FieldBoundaryConditions(x_bcs, y_bcs, z_bcs) +end diff --git a/src/Distributed/multi_architectures.jl b/src/Distributed/multi_architectures.jl new file mode 100644 index 0000000000..122711771a --- /dev/null +++ b/src/Distributed/multi_architectures.jl @@ -0,0 +1,151 @@ +using Oceananigans.Architectures + +using Oceananigans.Grids: topology, validate_tupled_argument + +struct MultiCPU{G, R, I, ρ, C, γ} <: AbstractCPUArchitecture + distributed_grid :: G + local_rank :: R + local_index :: I + ranks :: ρ + connectivity :: C + communicator :: γ +end + +struct MultiGPU{G, R, I, ρ, C, γ} <: AbstractGPUArchitecture + distributed_grid :: G + local_rank :: R + local_index :: I + ranks :: ρ + connectivity :: C + communicator :: γ +end + +const AbstractMultiArchitecture = Union{MultiCPU, MultiGPU} + +child_architecture(::MultiCPU) = CPU() +child_architecture(::CPU) = CPU() + +child_architecture(::MultiGPU) = GPU() +child_architecture(::GPU) = GPU() + +##### +##### Converting between index and MPI rank taking k as the fast index +##### + +index2rank(i, j, k, Rx, Ry, Rz) = (i-1)*Ry*Rz + (j-1)*Rz + (k-1) + +function rank2index(r, Rx, Ry, Rz) + i = div(r, Ry*Rz) + r -= i*Ry*Rz + j = div(r, Rz) + k = mod(r, Rz) + return i+1, j+1, k+1 # 1-based Julia +end + +##### +##### Rank connectivity graph +##### + +struct RankConnectivity{E, W, N, S, T, B} + east :: E + west :: W + north :: N + south :: S + top :: T + bottom :: B +end + +RankConnectivity(; east, west, north, south, top, bottom) = + RankConnectivity(east, west, north, south, top, bottom) + +function increment_index(i, R, topo) + R == 1 && return nothing + if i+1 > R + if topo == Periodic + return 1 + else + return nothing + end + else + return i+1 + end +end + +function decrement_index(i, R, topo) + R == 1 && return nothing + if i-1 < 1 + if topo == Periodic + return R + else + return nothing + end + else + return i-1 + end +end + +function RankConnectivity(model_index, ranks, topology) + i, j, k = model_index + Rx, Ry, Rz = ranks + TX, TY, TZ = topology + + i_east = increment_index(i, Rx, TX) + i_west = decrement_index(i, Rx, TX) + j_north = increment_index(j, Ry, TY) + j_south = decrement_index(j, Ry, TY) + k_top = increment_index(k, Rz, TZ) + k_bot = decrement_index(k, Rz, TZ) + + r_east = isnothing(i_east) ? nothing : index2rank(i_east, j, k, Rx, Ry, Rz) + r_west = isnothing(i_west) ? nothing : index2rank(i_west, j, k, Rx, Ry, Rz) + r_north = isnothing(j_north) ? nothing : index2rank(i, j_north, k, Rx, Ry, Rz) + r_south = isnothing(j_south) ? nothing : index2rank(i, j_south, k, Rx, Ry, Rz) + r_top = isnothing(k_top) ? nothing : index2rank(i, j, k_top, Rx, Ry, Rz) + r_bot = isnothing(k_bot) ? nothing : index2rank(i, j, k_bot, Rx, Ry, Rz) + + return RankConnectivity(east=r_east, west=r_west, north=r_north, + south=r_south, top=r_top, bottom=r_bot) +end + +##### +##### Constructors +##### + +function MultiCPU(; grid, ranks, communicator=MPI.COMM_WORLD) + MPI.Initialized() || error("Must call MPI.Init() before constructing a MultiCPU.") + + validate_tupled_argument(ranks, Int, "ranks") + + Rx, Ry, Rz = ranks + total_ranks = Rx*Ry*Rz + + mpi_ranks = MPI.Comm_size(communicator) + local_rank = MPI.Comm_rank(communicator) + + i, j, k = local_index = rank2index(local_rank, Rx, Ry, Rz) + + if total_ranks != mpi_ranks + throw(ArgumentError("ranks=($Rx, $Ry, $Rz) [$total_ranks total] inconsistent " * + "with number of MPI ranks: $mpi_ranks.")) + end + + local_connectivity = RankConnectivity(local_index, ranks, topology(grid)) + + return MultiCPU(grid, local_rank, local_index, ranks, local_connectivity, communicator) +end + +##### +##### Pretty printing +##### + +function Base.show(io::IO, arch::MultiCPU) + c = arch.connectivity + print(io, "MultiCPU architecture (rank $(arch.local_rank)/$(prod(arch.ranks))) [index $(arch.local_index) / $(arch.ranks)]\n", + "└── connectivity:", + isnothing(c.east) ? "" : " east=$(c.east)", + isnothing(c.west) ? "" : " west=$(c.west)", + isnothing(c.north) ? "" : " north=$(c.north)", + isnothing(c.south) ? "" : " south=$(c.south)", + isnothing(c.top) ? "" : " top=$(c.top)", + isnothing(c.bottom) ? "" : " bottom=$(c.bottom)") +end diff --git a/src/Fields/field.jl b/src/Fields/field.jl index 6389e7a665..c350423415 100644 --- a/src/Fields/field.jl +++ b/src/Fields/field.jl @@ -78,7 +78,7 @@ function CenterField(FT::DataType, arch, grid, bcs = TracerBoundaryConditions(grid), data = new_data(FT, arch, grid, (Center, Center, Center))) - return Field{Center, Center, Center}(data, grid, bcs) + return Field(Center, Center, Center, arch, grid, bcs, data) end """ @@ -93,7 +93,7 @@ function XFaceField(FT::DataType, arch, grid, bcs = UVelocityBoundaryConditions(grid), data = new_data(FT, arch, grid, (Face, Center, Center))) - return Field{Face, Center, Center}(data, grid, bcs) + return Field(Face, Center, Center, arch, grid, bcs, data) end """ @@ -108,7 +108,7 @@ function YFaceField(FT::DataType, arch, grid, bcs = VVelocityBoundaryConditions(grid), data = new_data(FT, arch, grid, (Center, Face, Center))) - return Field{Center, Face, Center}(data, grid, bcs) + return Field(Center, Face, Center, arch, grid, bcs, data) end """ @@ -123,13 +123,13 @@ function ZFaceField(FT::DataType, arch, grid, bcs = WVelocityBoundaryConditions(grid), data = new_data(FT, arch, grid, (Center, Center, Face))) - return Field{Center, Center, Face}(data, grid, bcs) + return Field(Center, Center, Face, arch, grid, bcs, data) end - CenterField(arch::AbstractArchitecture, grid, args...) = CenterField(eltype(grid), arch, grid, args...) -XFaceField(arch::AbstractArchitecture, grid, args...) = XFaceField(eltype(grid), arch, grid, args...) -YFaceField(arch::AbstractArchitecture, grid, args...) = YFaceField(eltype(grid), arch, grid, args...) -ZFaceField(arch::AbstractArchitecture, grid, args...) = ZFaceField(eltype(grid), arch, grid, args...) +CenterField(arch::AbstractArchitecture, grid, args...) = CenterField(eltype(grid), arch, grid, args...) + XFaceField(arch::AbstractArchitecture, grid, args...) = XFaceField(eltype(grid), arch, grid, args...) + YFaceField(arch::AbstractArchitecture, grid, args...) = YFaceField(eltype(grid), arch, grid, args...) + ZFaceField(arch::AbstractArchitecture, grid, args...) = ZFaceField(eltype(grid), arch, grid, args...) @propagate_inbounds Base.setindex!(f::Field, v, inds...) = @inbounds setindex!(f.data, v, inds...) diff --git a/src/Fields/new_data.jl b/src/Fields/new_data.jl index 9297009819..d1da2bb5c2 100644 --- a/src/Fields/new_data.jl +++ b/src/Fields/new_data.jl @@ -45,7 +45,7 @@ Returns an `OffsetArray` of zeros of float type `FT`, with parent data in CPU memory and indices corresponding to a field on a `grid` of `size(grid)` and located at `loc`. """ -function new_data(FT, ::CPU, grid, loc) +function new_data(FT, ::AbstractCPUArchitecture, grid, loc) underlying_data = zeros(FT, total_length(loc[1], topology(grid, 1), grid.Nx, grid.Hx), total_length(loc[2], topology(grid, 2), grid.Ny, grid.Hy), total_length(loc[3], topology(grid, 3), grid.Nz, grid.Hz)) @@ -60,7 +60,7 @@ Returns an `OffsetArray` of zeros of float type `FT`, with parent data in GPU memory and indices corresponding to a field on a `grid` of `size(grid)` and located at `loc`. """ -function new_data(FT, ::GPU, grid, loc) +function new_data(FT, ::AbstractGPUArchitecture, grid, loc) underlying_data = CuArray{FT}(undef, total_length(loc[1], topology(grid, 1), grid.Nx, grid.Hx), total_length(loc[2], topology(grid, 2), grid.Ny, grid.Hy), total_length(loc[3], topology(grid, 3), grid.Nz, grid.Hz)) diff --git a/src/Models/IncompressibleModels/show_incompressible_model.jl b/src/Models/IncompressibleModels/show_incompressible_model.jl index 19e1229307..96fe62a996 100644 --- a/src/Models/IncompressibleModels/show_incompressible_model.jl +++ b/src/Models/IncompressibleModels/show_incompressible_model.jl @@ -1,9 +1,9 @@ -using Oceananigans.Utils: prettytime, ordered_dict_show using Oceananigans: short_show +using Oceananigans.Utils: prettytime, ordered_dict_show """Show the innards of a `Model` in the REPL.""" function Base.show(io::IO, model::IncompressibleModel{TS, C, A}) where {TS, C, A} - print(io, "IncompressibleModel{$A, $(eltype(model.grid))}", + print(io, "IncompressibleModel{$(Base.typename(A)), $(eltype(model.grid))}", "(time = $(prettytime(model.clock.time)), iteration = $(model.clock.iteration)) \n", "├── grid: $(short_show(model.grid))\n", "├── tracers: $(tracernames(model.tracers))\n", diff --git a/src/Oceananigans.jl b/src/Oceananigans.jl index 4ca575c376..2434cd06d5 100644 --- a/src/Oceananigans.jl +++ b/src/Oceananigans.jl @@ -165,6 +165,7 @@ include("Diagnostics/Diagnostics.jl") include("OutputWriters/OutputWriters.jl") include("Simulations/Simulations.jl") include("AbstractOperations/AbstractOperations.jl") +include("Distributed/Distributed.jl") ##### ##### Needed so we can export names from sub-modules at the top-level @@ -190,6 +191,7 @@ using .Diagnostics using .OutputWriters using .Simulations using .AbstractOperations +using .Distributed function __init__() threads = Threads.nthreads() diff --git a/src/Simulations/simulation.jl b/src/Simulations/simulation.jl index 2529cc54e1..a2dae097c2 100644 --- a/src/Simulations/simulation.jl +++ b/src/Simulations/simulation.jl @@ -83,7 +83,7 @@ function Simulation(model; Δt, end Base.show(io::IO, s::Simulation) = - print(io, "Simulation{$(typeof(s.model).name){$(typeof(s.model.architecture)), $(eltype(s.model.grid))}}\n", + print(io, "Simulation{$(typeof(s.model).name){$(Base.typename(typeof(s.model.architecture))), $(eltype(s.model.grid))}}\n", "├── Model clock: time = $(prettytime(s.model.clock.time)), iteration = $(s.model.clock.iteration) \n", "├── Next time step ($(typeof(s.Δt))): $(prettytime(get_Δt(s.Δt))) \n", "├── Iteration interval: $(s.iteration_interval)\n", diff --git a/test/runtests.jl b/test/runtests.jl index a0f88cce96..814d4029f2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,6 +6,7 @@ using LinearAlgebra using Logging using CUDA +using MPI using JLD2 using FFTW using OffsetArrays @@ -28,6 +29,7 @@ using Oceananigans.Diagnostics using Oceananigans.OutputWriters using Oceananigans.TurbulenceClosures using Oceananigans.AbstractOperations +using Oceananigans.Distributed using Oceananigans.Logger using Oceananigans.Units using Oceananigans.Utils @@ -116,6 +118,14 @@ group = get(ENV, "TEST_GROUP", :all) |> Symbol end end + if group == :shallow_water || group == :all + include("test_shallow_water_models.jl") + end + + if group == :hydrostatic_free_surface || group == :all + include("test_hydrostatic_free_surface_models.jl") + end + if group == :simulation || group == :all @testset "Simulation tests" begin include("test_simulations.jl") @@ -126,6 +136,12 @@ group = get(ENV, "TEST_GROUP", :all) |> Symbol end end + if group == :distributed || group == :all + MPI.Initialized() || MPI.Init() + include("test_distributed_models.jl") + include("test_distributed_poisson_solvers.jl") + end + if group == :regression || group == :all include("test_regression.jl") end @@ -139,12 +155,4 @@ group = get(ENV, "TEST_GROUP", :all) |> Symbol if group == :convergence include("test_convergence.jl") end - - if group == :shallow_water || group == :all - include("test_shallow_water_models.jl") - end - - if group == :hydrostatic_free_surface || group == :all - include("test_hydrostatic_free_surface_models.jl") - end end diff --git a/test/test_distributed_models.jl b/test/test_distributed_models.jl new file mode 100644 index 0000000000..dded3c090a --- /dev/null +++ b/test/test_distributed_models.jl @@ -0,0 +1,494 @@ +using MPI + +using Oceananigans.BoundaryConditions: fill_halo_regions! +using Oceananigans.Distributed: index2rank, east_halo, west_halo, north_halo, south_halo, top_halo, bottom_halo + +# Right now just testing with 4 ranks! +comm = MPI.COMM_WORLD +mpi_ranks = MPI.Comm_size(comm) +@assert mpi_ranks == 4 + +##### +##### Multi architectures and rank connectivity +##### + +function test_triply_periodic_rank_connectivity_with_411_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) + + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) + @test local_rank == index2rank(arch.local_index..., arch.ranks...) + + connectivity = arch.connectivity + + # No communication in y and z. + @test isnothing(connectivity.south) + @test isnothing(connectivity.north) + @test isnothing(connectivity.top) + @test isnothing(connectivity.bottom) + + # +---+---+---+---+ + # | 0 | 1 | 2 | 3 | + # +---+---+---+---+ + + if local_rank == 0 + @test connectivity.east == 1 + @test connectivity.west == 3 + elseif local_rank == 1 + @test connectivity.east == 2 + @test connectivity.west == 0 + elseif local_rank == 2 + @test connectivity.east == 3 + @test connectivity.west == 1 + elseif local_rank == 3 + @test connectivity.east == 0 + @test connectivity.west == 2 + end + + return nothing +end + +function test_triply_periodic_rank_connectivity_with_141_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) + + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) + @test local_rank == index2rank(arch.local_index..., arch.ranks...) + + connectivity = arch.connectivity + + # No communication in x and z. + @test isnothing(connectivity.east) + @test isnothing(connectivity.west) + @test isnothing(connectivity.top) + @test isnothing(connectivity.bottom) + + # +---+ + # | 3 | + # +---+ + # | 2 | + # +---+ + # | 1 | + # +---+ + # | 0 | + # +---+ + + if local_rank == 0 + @test connectivity.north == 1 + @test connectivity.south == 3 + elseif local_rank == 1 + @test connectivity.north == 2 + @test connectivity.south == 0 + elseif local_rank == 2 + @test connectivity.north == 3 + @test connectivity.south == 1 + elseif local_rank == 3 + @test connectivity.north == 0 + @test connectivity.south == 2 + end + + return nothing +end + +function test_triply_periodic_rank_connectivity_with_114_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) + + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) + @test local_rank == index2rank(arch.local_index..., arch.ranks...) + + connectivity = arch.connectivity + + # No communication in x and y. + @test isnothing(connectivity.east) + @test isnothing(connectivity.west) + @test isnothing(connectivity.north) + @test isnothing(connectivity.south) + + # /---/ + # / 3 / + # /---/ + # /---/ + # / 2 / + # /---/ + # /---/ + # / 1 / + # /---/ + # /---/ + # / 0 / + # /---/ + + if local_rank == 0 + @test connectivity.top == 1 + @test connectivity.bottom == 3 + elseif local_rank == 1 + @test connectivity.top == 2 + @test connectivity.bottom == 0 + elseif local_rank == 2 + @test connectivity.top == 3 + @test connectivity.bottom == 1 + elseif local_rank == 3 + @test connectivity.top == 0 + @test connectivity.bottom == 2 + end + + return nothing +end + +function test_triply_periodic_rank_connectivity_with_221_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) + + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) + @test local_rank == index2rank(arch.local_index..., arch.ranks...) + + connectivity = arch.connectivity + + # No communication in z. + @test isnothing(connectivity.top) + @test isnothing(connectivity.bottom) + + # +---+---+ + # | 0 | 2 | + # +---+---+ + # | 1 | 3 | + # +---+---+ + + if local_rank == 0 + @test connectivity.east == 2 + @test connectivity.west == 2 + @test connectivity.north == 1 + @test connectivity.south == 1 + elseif local_rank == 1 + @test connectivity.east == 3 + @test connectivity.west == 3 + @test connectivity.north == 0 + @test connectivity.south == 0 + elseif local_rank == 2 + @test connectivity.east == 0 + @test connectivity.west == 0 + @test connectivity.north == 3 + @test connectivity.south == 3 + elseif local_rank == 3 + @test connectivity.east == 1 + @test connectivity.west == 1 + @test connectivity.north == 2 + @test connectivity.south == 2 + end + + return nothing +end + +##### +##### Local grids for distributed models +##### + +function test_triply_periodic_local_grid_with_411_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) + local_grid = model.grid + nx, ny, nz = size(local_grid) + + @test local_grid.xF[1] == 0.25*local_rank + @test local_grid.xF[nx+1] == 0.25*(local_rank+1) + @test local_grid.yF[1] == 0 + @test local_grid.yF[ny+1] == 2 + @test local_grid.zF[1] == -3 + @test local_grid.zF[nz+1] == 0 + + return nothing +end + +function test_triply_periodic_local_grid_with_141_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) + local_grid = model.grid + nx, ny, nz = size(local_grid) + + @test local_grid.xF[1] == 0 + @test local_grid.xF[nx+1] == 1 + @test local_grid.yF[1] == 0.5*local_rank + @test local_grid.yF[ny+1] == 0.5*(local_rank+1) + @test local_grid.zF[1] == -3 + @test local_grid.zF[nz+1] == 0 + + return nothing +end + +function test_triply_periodic_local_grid_with_114_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) + local_grid = model.grid + nx, ny, nz = size(local_grid) + + @test local_grid.xF[1] == 0 + @test local_grid.xF[nx+1] == 1 + @test local_grid.yF[1] == 0 + @test local_grid.yF[ny+1] == 2 + @test local_grid.zF[1] == -3 + 0.75*local_rank + @test local_grid.zF[nz+1] == -3 + 0.75*(local_rank+1) + + return nothing +end + +function test_triply_periodic_local_grid_with_221_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + + i, j, k = arch.local_index + local_grid = model.grid + nx, ny, nz = size(local_grid) + + @test local_grid.xF[1] == 0.5*(i-1) + @test local_grid.xF[nx+1] == 0.5*i + @test local_grid.yF[1] == j-1 + @test local_grid.yF[ny+1] == j + @test local_grid.zF[1] == -3 + @test local_grid.zF[nz+1] == 0 + + return nothing +end + +##### +##### Injection of halo communication BCs +##### + +function test_triply_periodic_bc_injection_with_411_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + + for field in merge(fields(model), model.pressures) + fbcs = field.boundary_conditions + @test fbcs.east isa HaloCommunicationBC + @test fbcs.west isa HaloCommunicationBC + @test !isa(fbcs.north, HaloCommunicationBC) + @test !isa(fbcs.south, HaloCommunicationBC) + @test !isa(fbcs.top, HaloCommunicationBC) + @test !isa(fbcs.bottom, HaloCommunicationBC) + end +end + +function test_triply_periodic_bc_injection_with_141_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + + for field in merge(fields(model), model.pressures) + fbcs = field.boundary_conditions + @test !isa(fbcs.east, HaloCommunicationBC) + @test !isa(fbcs.west, HaloCommunicationBC) + @test fbcs.north isa HaloCommunicationBC + @test fbcs.south isa HaloCommunicationBC + @test !isa(fbcs.top, HaloCommunicationBC) + @test !isa(fbcs.bottom, HaloCommunicationBC) + end +end + +function test_triply_periodic_bc_injection_with_114_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + + for field in merge(fields(model), model.pressures) + fbcs = field.boundary_conditions + @test !isa(fbcs.east, HaloCommunicationBC) + @test !isa(fbcs.west, HaloCommunicationBC) + @test !isa(fbcs.north, HaloCommunicationBC) + @test !isa(fbcs.south, HaloCommunicationBC) + @test fbcs.top isa HaloCommunicationBC + @test fbcs.bottom isa HaloCommunicationBC + end +end + +function test_triply_periodic_bc_injection_with_221_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + + for field in merge(fields(model), model.pressures) + fbcs = field.boundary_conditions + @test fbcs.east isa HaloCommunicationBC + @test fbcs.west isa HaloCommunicationBC + @test fbcs.north isa HaloCommunicationBC + @test fbcs.south isa HaloCommunicationBC + @test !isa(fbcs.top, HaloCommunicationBC) + @test !isa(fbcs.bottom, HaloCommunicationBC) + end +end + +##### +##### Halo communication +##### + +function test_triply_periodic_halo_communication_with_411_ranks(halo) + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(16, 6, 4), extent=(1, 2, 3), halo=halo) + arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + + for field in merge(fields(model), model.pressures) + interior(field) .= arch.local_rank + fill_halo_regions!(field, arch) + + @test all(east_halo(field) .== arch.connectivity.east) + @test all(west_halo(field) .== arch.connectivity.west) + + @test all(interior(field) .== arch.local_rank) + @test all(north_halo(field, include_corners=false) .== arch.local_rank) + @test all(south_halo(field, include_corners=false) .== arch.local_rank) + @test all(top_halo(field, include_corners=false) .== arch.local_rank) + @test all(bottom_halo(field, include_corners=false) .== arch.local_rank) + end + + return nothing +end + +function test_triply_periodic_halo_communication_with_141_ranks(halo) + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(4, 16, 4), extent=(1, 2, 3), halo=halo) + arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + + for field in merge(fields(model), model.pressures) + interior(field) .= arch.local_rank + fill_halo_regions!(field, arch) + + @test all(north_halo(field) .== arch.connectivity.north) + @test all(south_halo(field) .== arch.connectivity.south) + + @test all(interior(field) .== arch.local_rank) + @test all(east_halo(field, include_corners=false) .== arch.local_rank) + @test all(west_halo(field, include_corners=false) .== arch.local_rank) + @test all(top_halo(field, include_corners=false) .== arch.local_rank) + @test all(bottom_halo(field, include_corners=false) .== arch.local_rank) + end + + return nothing +end + +function test_triply_periodic_halo_communication_with_114_ranks(halo) + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(4, 4, 16), extent=(1, 2, 3), halo=halo) + arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + + for field in merge(fields(model), model.pressures) + interior(field) .= arch.local_rank + fill_halo_regions!(field, arch) + + @test all(top_halo(field) .== arch.connectivity.top) + @test all(bottom_halo(field) .== arch.connectivity.bottom) + + @test all(interior(field) .== arch.local_rank) + @test all(east_halo(field, include_corners=false) .== arch.local_rank) + @test all(west_halo(field, include_corners=false) .== arch.local_rank) + @test all(north_halo(field, include_corners=false) .== arch.local_rank) + @test all(south_halo(field, include_corners=false) .== arch.local_rank) + end + + return nothing +end + +function test_triply_periodic_halo_communication_with_221_ranks(halo) + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 3), extent=(1, 2, 3), halo=halo) + arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + + for field in merge(fields(model), model.pressures) + interior(field) .= arch.local_rank + fill_halo_regions!(field, arch) + + @test all(east_halo(field, include_corners=false) .== arch.connectivity.east) + @test all(west_halo(field, include_corners=false) .== arch.connectivity.west) + @test all(north_halo(field, include_corners=false) .== arch.connectivity.north) + @test all(south_halo(field, include_corners=false) .== arch.connectivity.south) + + @test all(interior(field) .== arch.local_rank) + @test all(top_halo(field, include_corners=false) .== arch.local_rank) + @test all(bottom_halo(field, include_corners=false) .== arch.local_rank) + end + + return nothing +end + +##### +##### Run tests! +##### + +@testset "Distributed MPI Oceananigans" begin + @info "Testing distributed MPI Oceananigans..." + + @testset "Multi architectures rank connectivity" begin + @info " Testing multi architecture rank connectivity..." + test_triply_periodic_rank_connectivity_with_411_ranks() + test_triply_periodic_rank_connectivity_with_141_ranks() + test_triply_periodic_rank_connectivity_with_114_ranks() + test_triply_periodic_rank_connectivity_with_221_ranks() + end + + @testset "Local grids for distributed models" begin + @info " Testing local grids for distributed models..." + test_triply_periodic_local_grid_with_411_ranks() + test_triply_periodic_local_grid_with_141_ranks() + test_triply_periodic_local_grid_with_114_ranks() + test_triply_periodic_local_grid_with_221_ranks() + end + + @testset "Injection of halo communication BCs" begin + @info " Testing injection of halo communication BCs..." + test_triply_periodic_bc_injection_with_411_ranks() + test_triply_periodic_bc_injection_with_141_ranks() + test_triply_periodic_bc_injection_with_114_ranks() + test_triply_periodic_bc_injection_with_221_ranks() + end + + @testset "Halo communication" begin + @info " Testing halo communication..." + for H in 1:3 + test_triply_periodic_halo_communication_with_411_ranks((H, H, H)) + test_triply_periodic_halo_communication_with_141_ranks((H, H, H)) + test_triply_periodic_halo_communication_with_114_ranks((H, H, H)) + test_triply_periodic_halo_communication_with_221_ranks((H, H, H)) + end + end + + @testset "Time stepping" begin + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid) + + time_step!(model, 1) + @test model isa IncompressibleModel + @test model.clock.time == 1 + + simulation = Simulation(model, Δt=1, stop_iteration=2) + run!(simulation) + @test model isa IncompressibleModel + @test model.clock.time == 2 + end +end diff --git a/test/test_distributed_poisson_solvers.jl b/test/test_distributed_poisson_solvers.jl new file mode 100644 index 0000000000..82e6d7c011 --- /dev/null +++ b/test/test_distributed_poisson_solvers.jl @@ -0,0 +1,68 @@ + +function random_divergent_source_term(FT, arch, grid) + # Generate right hand side from a random (divergent) velocity field. + Ru = CenterField(FT, arch, grid, UVelocityBoundaryConditions(grid)) + Rv = CenterField(FT, arch, grid, VVelocityBoundaryConditions(grid)) + Rw = CenterField(FT, arch, grid, WVelocityBoundaryConditions(grid)) + U = (u=Ru, v=Rv, w=Rw) + + Nx, Ny, Nz = size(grid) + set!(Ru, rand(Nx, Ny, Nz)) + set!(Rv, rand(Nx, Ny, Nz)) + set!(Rw, rand(Nx, Ny, Nz)) + + # Adding (nothing, nothing) in case we need to dispatch on ::NFBC + fill_halo_regions!(Ru, arch, nothing, nothing) + fill_halo_regions!(Rv, arch, nothing, nothing) + fill_halo_regions!(Rw, arch, nothing, nothing) + + # Compute the right hand side R = ∇⋅U + ArrayType = array_type(arch) + R = zeros(Nx, Ny, Nz) |> ArrayType + event = launch!(arch, grid, :xyz, divergence!, grid, U.u.data, U.v.data, U.w.data, R, + dependencies=Event(device(arch))) + wait(device(arch), event) + + return R +end + +function compute_∇²!(∇²ϕ, ϕ, arch, grid) + fill_halo_regions!(ϕ, arch) + child_arch = child_architecture(arch) + event = launch!(child_arch, grid, :xyz, ∇²!, grid, ϕ.data, ∇²ϕ.data, dependencies=Event(device(child_arch))) + wait(device(child_arch), event) + fill_halo_regions!(∇²ϕ, arch) + return nothing +end + +function divergence_free_poisson_solution_triply_periodic(grid_points, ranks) + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=grid_points, extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=ranks) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid) + + local_grid = model.grid + solver = DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) + + R = random_divergent_source_term(Float64, child_architecture(arch), local_grid) + first(solver.storage) .= R + + solve_poisson_equation!(solver) + + p_bcs = PressureBoundaryConditions(local_grid) + p_bcs = inject_halo_communication_boundary_conditions(p_bcs, arch.local_rank, arch.connectivity) + + ϕ = CenterField(Float64, child_architecture(arch), local_grid, p_bcs) # "pressure" + ∇²ϕ = CenterField(Float64, child_architecture(arch), local_grid, p_bcs) + + interior(ϕ) .= real(first(solver.storage)) + compute_∇²!(∇²ϕ, ϕ, arch, local_grid) + + return R ≈ interior(∇²ϕ) +end + +@testset "Distributed FFT-based Poisson solver" begin + @info " Testing distributed FFT-based Poisson solver..." + @test divergence_free_poisson_solution_triply_periodic((16, 16, 1), (1, 4, 1)) + @test divergence_free_poisson_solution_triply_periodic((44, 44, 1), (1, 4, 1)) +end