softmax has problem with dim parameter #599

norci · 2020-12-14T08:31:54Z

Describe the bug
softmax returns wrong result, seem like it does not use dim parameter

To reproduce

julia> using CUDA,NNlib

julia> x = rand(Float32, 2,2)
2×2 Array{Float32,2}:
 0.431404  0.794794
 0.118349  0.288102

julia> Array(softmax(cu(x), dims=1)) ≈ softmax(x, dims=1)
true

julia> Array(softmax(cu(x), dims=2)) ≈ softmax(x, dims=2)
false

Manifest.toml

# This file is machine-generated - editing it directly is not advised

[[ATK_jll]]
deps = ["Artifacts", "Glib_jll", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "d2cb610e0f31d9d20f4ca37e4535ea5e6f374889"
uuid = "7b86fcea-f67b-53e1-809c-8f1719c154e8"
version = "2.34.1+5"

[[AbstractFFTs]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "051c95d6836228d120f5f4b984dd5aba1624f716"
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
version = "0.5.0"

[[AbstractTrees]]
deps = ["Markdown"]
git-tree-sha1 = "33e450545eaf7699da1a6e755f9ea65f14077a45"
uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
version = "0.3.3"

[[Adapt]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "42c42f2221906892ceb765dbcb1a51deeffd86d7"
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
version = "2.3.0"

[[Artifacts]]
deps = ["Pkg"]
git-tree-sha1 = "c30985d8821e0cd73870b17b0ed0ce6dc44cb744"
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
version = "1.3.0"

[[BFloat16s]]
deps = ["LinearAlgebra", "Test"]
git-tree-sha1 = "4af69e205efc343068dc8722b8dfec1ade89254a"
uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
version = "0.1.0"

[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"

[[BenchmarkTools]]
deps = ["JSON", "Logging", "Printf", "Statistics", "UUIDs"]
git-tree-sha1 = "9e62e66db34540a0c919d72172cc2f642ac71260"
uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
version = "0.5.0"

[[Blosc]]
deps = ["Blosc_jll"]
git-tree-sha1 = "84cf7d0f8fd46ca6f1b3e0305b4b4a37afe50fd6"
uuid = "a74b3585-a348-5f62-a45c-50e91977d574"
version = "0.7.0"

[[Blosc_jll]]
deps = ["Libdl", "Lz4_jll", "Pkg", "Zlib_jll", "Zstd_jll"]
git-tree-sha1 = "aa9ef39b54a168c3df1b2911e7797e4feee50fbe"
uuid = "0b7ba130-8d10-5ba8-a3d6-c5182647fed9"
version = "1.14.3+1"

[[Bzip2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "c3598e525718abcc440f69cc6d5f60dda0a1b61e"
uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
version = "1.0.6+5"

[[CEnum]]
git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
version = "0.4.1"

[[CSTParser]]
deps = ["Tokenize"]
git-tree-sha1 = "60e9121d9ea044c30a04397e59b00c5d9eb826ee"
uuid = "00ebfdb7-1f24-5e51-bd34-a7502290713f"
version = "2.5.0"

[[CUDA]]
deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "Libdl", "LinearAlgebra", "Logging", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "Reexport", "Requires", "SparseArrays", "Statistics", "TimerOutputs"]
git-tree-sha1 = "7663b61782b569b03fba91d330a5ed2f86cd4cb8"
uuid = "052768ef-5323-5732-b1bb-66c8b64840ba"
version = "2.3.0"

[[Cairo]]
deps = ["Cairo_jll", "Colors", "Glib_jll", "Graphics", "Libdl", "Pango_jll"]
git-tree-sha1 = "d0b3f8b4ad16cb0a2988c6788646a5e6a17b6b1b"
uuid = "159f3aea-2a34-519c-b102-8c37f9878175"
version = "1.0.5"

[[Cairo_jll]]
deps = ["Artifacts", "Bzip2_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"]
git-tree-sha1 = "e2f47f6d8337369411569fd45ae5753ca10394c6"
uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a"
version = "1.16.0+6"

[[CodeTracking]]
deps = ["InteractiveUtils", "UUIDs"]
git-tree-sha1 = "8ad457cfeb0bca98732c97958ef81000a543e73e"
uuid = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
version = "1.0.5"

[[ColorTypes]]
deps = ["FixedPointNumbers", "Random"]
git-tree-sha1 = "4bffea7ed1a9f0f3d1a131bbcd4b925548d75288"
uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
version = "0.10.9"

[[Colors]]
deps = ["ColorTypes", "FixedPointNumbers", "InteractiveUtils", "Reexport"]
git-tree-sha1 = "008d6bc68dea6beb6303fdc37188cb557391ebf2"
uuid = "5ae59095-9a9b-59fe-a467-6f913c188581"
version = "0.12.4"

[[CommonMark]]
deps = ["Crayons", "JSON", "URIParser"]
git-tree-sha1 = "c1f1514d7cc1ad243a103b403d896ee184c91b62"
uuid = "a80b9123-70ca-4bc0-993e-6e3bcb318db6"
version = "0.6.4"

[[Compat]]
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
git-tree-sha1 = "a706ff10f1cd8dab94f59fd09c0e657db8e77ff0"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "3.23.0"

[[Conda]]
deps = ["JSON", "VersionParsing"]
git-tree-sha1 = "c0647249d785f1d5139c0cc96db8f6b32f7ec416"
uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d"
version = "1.5.0"

[[Crayons]]
git-tree-sha1 = "3f71217b538d7aaee0b69ab47d9b7724ca8afa0d"
uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
version = "4.0.4"

[[DataAPI]]
git-tree-sha1 = "ad84f52c0b8f05aa20839484dbaf01690b41ff84"
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
version = "1.4.0"

[[DataStructures]]
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "fb0aa371da91c1ff9dc7fbed6122d3e411420b9c"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.18.8"

[[DataValueInterfaces]]
git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
version = "1.0.0"

[[Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"

[[Dbus_jll]]
deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "97f1325c10bd02b1cc1882e9c2bf6407ba630ace"
uuid = "ee1fde0b-3d02-5ea6-8484-8dfef6360eab"
version = "1.12.16+3"

[[Debugger]]
deps = ["CodeTracking", "Crayons", "Highlights", "InteractiveUtils", "JuliaInterpreter", "Markdown", "REPL"]
git-tree-sha1 = "c37eca3eff2657c700f32e05500215feac886dcf"
uuid = "31a5f54b-26ea-5ae9-a837-f05ce5417438"
version = "0.6.6"

[[DelimitedFiles]]
deps = ["Mmap"]
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"

[[Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"

[[DocStringExtensions]]
deps = ["LibGit2", "Markdown", "Pkg", "Test"]
git-tree-sha1 = "50ddf44c53698f5e784bbebb3f4b21c5807401b1"
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
version = "0.8.3"

[[Documenter]]
deps = ["Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
git-tree-sha1 = "a4875e0763112d6d017126f3944f4133abb342ae"
uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
version = "0.25.5"

[[EllipsisNotation]]
git-tree-sha1 = "65dad386e877850e6fce4fc77f60fe75a468ce9d"
uuid = "da5c29d0-fa7d-589e-88eb-ea29b0a81949"
version = "0.4.0"

[[Expat_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "1402e52fcda25064f51c77a9655ce8680b76acf0"
uuid = "2e619515-83b5-522b-bb60-26c02a35a201"
version = "2.2.7+6"

[[ExprTools]]
git-tree-sha1 = "10407a39b87f29d47ebaca8edbc75d7c302ff93e"
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
version = "0.1.3"

[[FileIO]]
deps = ["Pkg"]
git-tree-sha1 = "fee8955b9dfa7bec67117ef48085fb2b559b9c22"
uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
version = "1.4.5"

[[FileWatching]]
uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"

[[FixedPointNumbers]]
deps = ["Statistics"]
git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc"
uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
version = "0.8.4"

[[FlameGraphs]]
deps = ["AbstractTrees", "Colors", "FileIO", "FixedPointNumbers", "IndirectArrays", "LeftChildRightSiblingTrees", "Profile"]
git-tree-sha1 = "c0204e7baf2d9d6472a477d677f632b4b80d99be"
uuid = "08572546-2f56-4bcf-ba4e-bab62c3a3f89"
version = "0.2.4"

[[Fontconfig_jll]]
deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "35895cf184ceaab11fd778b4590144034a167a2f"
uuid = "a3f928ae-7b40-5064-980b-68af3947d34b"
version = "2.13.1+14"

[[FreeType2_jll]]
deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
git-tree-sha1 = "cbd58c9deb1d304f5a245a0b7eb841a2560cfec6"
uuid = "d7e528f0-a631-5988-bf34-fe36492bcfd7"
version = "2.10.1+5"

[[FriBidi_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "0d20aed5b14dd4c9a2453c1b601d08e1149679cc"
uuid = "559328eb-81f9-559d-9380-de523a88c83c"
version = "1.0.5+6"

[[GPUArrays]]
deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization"]
git-tree-sha1 = "2c1dd57bca7ba0b3b4bf81d9332aeb81b154ef4c"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "6.1.2"

[[GPUCompiler]]
deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "Scratch", "Serialization", "TimerOutputs", "UUIDs"]
git-tree-sha1 = "c853c810b52a80f9aad79ab109207889e57f41ef"
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
version = "0.8.3"

[[GTK3_jll]]
deps = ["ATK_jll", "Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "FriBidi_jll", "Glib_jll", "Graphene_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl", "Libepoxy_jll", "Pango_jll", "Pkg", "Wayland_jll", "Xorg_libX11_jll", "Xorg_libXcomposite_jll", "Xorg_libXcursor_jll", "Xorg_libXdamage_jll", "Xorg_libXext_jll", "Xorg_libXfixes_jll", "Xorg_libXi_jll", "Xorg_libXinerama_jll", "Xorg_libXrandr_jll", "Xorg_libXrender_jll", "adwaita_icon_theme_jll", "at_spi2_atk_jll", "gdk_pixbuf_jll", "iso_codes_jll", "xkbcommon_jll"]
git-tree-sha1 = "98fafcb3e4bedbc9c463708b64b4e647a846c915"
uuid = "77ec8976-b24b-556a-a1bf-49a033a670a6"
version = "3.24.11+4"

[[Gettext_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "XML2_jll"]
git-tree-sha1 = "8c14294a079216000a0bdca5ec5a447f073ddc9d"
uuid = "78b55507-aeef-58d4-861c-77aaff3498b1"
version = "0.20.1+7"

[[Glib_jll]]
deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "04690cc5008b38ecbdfede949220bc7d9ba26397"
uuid = "7746bdde-850d-59dc-9ae8-88ece973131d"
version = "2.59.0+4"

[[Graphene_jll]]
deps = ["Glib_jll", "Libdl", "Pkg"]
git-tree-sha1 = "9d0b534de52582b7f1f044d622b858915dc6b126"
uuid = "75302f13-0b7e-5bab-a6d1-23fa92e4c2ea"
version = "1.10.0+2"

[[Graphics]]
deps = ["Colors", "LinearAlgebra", "NaNMath"]
git-tree-sha1 = "45d684ead5b65c043ad46bd5be750d61c39d7ef8"
uuid = "a2bd30eb-e257-5431-a919-1863eab51364"
version = "1.0.2"

[[Graphite2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "42adbc6fd39ba41138f894b8ac711146a2b0d986"
uuid = "3b182d85-2403-5c21-9c21-1e1f0cc25472"
version = "1.3.13+4"

[[Gtk]]
deps = ["Cairo", "Cairo_jll", "Dates", "GTK3_jll", "Glib_jll", "Graphics", "Libdl", "Pkg", "Reexport", "Serialization", "Test", "Xorg_xkeyboard_config_jll", "adwaita_icon_theme_jll", "gdk_pixbuf_jll", "hicolor_icon_theme_jll"]
git-tree-sha1 = "f2d7243793f227527de082bece66f649e3927886"
uuid = "4c0ca9eb-093a-5379-98c5-f87ac0bbbf44"
version = "1.1.5"

[[GtkReactive]]
deps = ["Cairo", "Colors", "Dates", "FixedPointNumbers", "Graphics", "Gtk", "IntervalSets", "Reactive", "Reexport", "RoundingIntegers"]
git-tree-sha1 = "f32edfff5e9c8741afaada4ff7f1454a988a962a"
uuid = "27996c0f-39cd-5cc1-a27a-05f136f946b6"
version = "1.0.4"

[[HDF5]]
deps = ["Blosc", "HDF5_jll", "Libdl", "Mmap", "Random"]
git-tree-sha1 = "0713cbabdf855852dfab3ce6447c87145f3d9ea8"
uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
version = "0.13.6"

[[HDF5_jll]]
deps = ["Artifacts", "JLLWrappers", "LibCURL_jll", "Libdl", "OpenSSL_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "fd83fa0bde42e01952757f01149dd968c06c4dba"
uuid = "0234f1f7-429e-5d53-9886-15a909be8d59"
version = "1.12.0+1"

[[HarfBuzz_jll]]
deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Gettext_jll", "Glib_jll", "Graphite2_jll", "ICU_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg"]
git-tree-sha1 = "90bed5fc61d12d10832ebf988988104888eebaca"
uuid = "2e76f6c2-a576-52d4-95c1-20adfe4de566"
version = "2.6.1+10"

[[Highlights]]
deps = ["DocStringExtensions", "InteractiveUtils", "REPL"]
git-tree-sha1 = "f823a2d04fb233d52812c8024a6d46d9581904a4"
uuid = "eafb193a-b7ab-5a9e-9068-77385905fa72"
version = "0.4.5"

[[ICU_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "0ec2e6e6a049eb8520c19cd8976085afcf2943fb"
uuid = "a51ab1cf-af8e-5615-a023-bc2c838bba6b"
version = "67.1.0+2"

[[IOCapture]]
deps = ["Logging"]
git-tree-sha1 = "377252859f740c217b936cebcd918a44f9b53b59"
uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
version = "0.1.1"

[[IndirectArrays]]
git-tree-sha1 = "c2a145a145dc03a7620af1444e0264ef907bd44f"
uuid = "9b13fd28-a010-5f03-acff-a1bbcff69959"
version = "0.5.1"

[[InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[[IntervalSets]]
deps = ["Dates", "EllipsisNotation", "Statistics"]
git-tree-sha1 = "3b1cef135bc532b3c3401b309e1b8a2a2ba26af5"
uuid = "8197267c-284f-5f27-9208-e0e47529a953"
version = "0.5.1"

[[IteratorInterfaceExtensions]]
git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
uuid = "82899510-4779-5014-852e-03e436cf321d"
version = "1.0.0"

[[JLD]]
deps = ["FileIO", "HDF5", "Printf"]
git-tree-sha1 = "f6cf928214ae7c0e7550b2424a57f11875d7e49a"
uuid = "4138dd39-2aa7-5051-a626-17a0bb65d9c8"
version = "0.10.0"

[[JLLWrappers]]
git-tree-sha1 = "c70593677bbf2c3ccab4f7500d0f4dacfff7b75c"
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
version = "1.1.3"

[[JSON]]
deps = ["Dates", "Mmap", "Parsers", "Unicode"]
git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4"
uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
version = "0.21.1"

[[JpegTurbo_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "9aff0587d9603ea0de2c6f6300d9f9492bbefbd3"
uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8"
version = "2.0.1+3"

[[JuliaFormatter]]
deps = ["CSTParser", "CommonMark", "DataStructures", "Documenter", "Pkg", "Test", "Tokenize"]
git-tree-sha1 = "4c12c478efa90037ab62ecbcd9cb95895e5897ed"
uuid = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
version = "0.11.0"

[[JuliaInterpreter]]
deps = ["CodeTracking", "InteractiveUtils", "Random", "UUIDs"]
git-tree-sha1 = "ea112fc026812bfb7e362e8732ca207dcf8d4281"
uuid = "aa1ae85d-cabe-5617-a682-6adf51b2e16a"
version = "0.8.5"

[[LLVM]]
deps = ["CEnum", "Libdl", "Printf", "Unicode"]
git-tree-sha1 = "000a737732aa4eb996414c4685368f6a74b41d14"
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
version = "3.4.0"

[[LZO_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "f128cd6cd05ffd6d3df0523ed99b90ff6f9b349a"
uuid = "dd4b983a-f0e5-5f8d-a1b7-129d4a5fb1ac"
version = "2.10.0+3"

[[LeftChildRightSiblingTrees]]
deps = ["AbstractTrees"]
git-tree-sha1 = "71be1eb5ad19cb4f61fa8c73395c0338fd092ae0"
uuid = "1d6d02ad-be62-4b6b-8a6d-2f90e265016e"
version = "0.1.2"

[[LibCURL_jll]]
deps = ["LibSSH2_jll", "Libdl", "MbedTLS_jll", "Pkg", "Zlib_jll", "nghttp2_jll"]
git-tree-sha1 = "897d962c20031e6012bba7b3dcb7a667170dad17"
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
version = "7.70.0+2"

[[LibGit2]]
deps = ["Printf"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"

[[LibSSH2_jll]]
deps = ["Libdl", "MbedTLS_jll", "Pkg"]
git-tree-sha1 = "717705533148132e5466f2924b9a3657b16158e8"
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
version = "1.9.0+3"

[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

[[Libepoxy_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libX11_jll"]
git-tree-sha1 = "aeac8ae441bc55be433ab53b729ffac274997320"
uuid = "42c93a91-0102-5b3f-8f9d-e41de60ac950"
version = "1.5.4+1"

[[Libffi_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "a2cd088a88c0d37eef7d209fd3d8712febce0d90"
uuid = "e9f186c6-92d2-5b65-8a66-fee21dc1b490"
version = "3.2.1+4"

[[Libgcrypt_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"]
git-tree-sha1 = "b391a18ab1170a2e568f9fb8d83bc7c780cb9999"
uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4"
version = "1.8.5+4"

[[Libglvnd_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll"]
git-tree-sha1 = "7739f837d6447403596a75d19ed01fd08d6f56bf"
uuid = "7e76a0d4-f3c7-5321-8279-8d96eeed0f29"
version = "1.3.0+3"

[[Libgpg_error_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "ec7f2e8ad5c9fa99fc773376cdbc86d9a5a23cb7"
uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8"
version = "1.36.0+3"

[[Libiconv_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "8e924324b2e9275a51407a4e06deb3455b1e359f"
uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
version = "1.16.0+7"

[[Libmount_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "51ad0c01c94c1ce48d5cad629425035ad030bfd5"
uuid = "4b2f31a3-9ecc-558c-b454-b3730dcb73e9"
version = "2.34.0+3"

[[Libtiff_jll]]
deps = ["Artifacts", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Pkg", "Zlib_jll", "Zstd_jll"]
git-tree-sha1 = "291dd857901f94d683973cdf679984cdf73b56d0"
uuid = "89763e89-9b03-5906-acba-b20f662cd828"
version = "4.1.0+2"

[[Libuuid_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "f879ae9edbaa2c74c922e8b85bb83cc84ea1450b"
uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700"
version = "2.34.0+7"

[[LinearAlgebra]]
deps = ["Libdl"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

[[Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"

[[LoweredCodeUtils]]
deps = ["JuliaInterpreter"]
git-tree-sha1 = "9af25a91bda16307caff2a50f9c744c432b8bc1b"
uuid = "6f1432cf-f94c-5a45-995e-cdbf5db27b0b"
version = "1.2.6"

[[Lz4_jll]]
deps = ["Libdl", "Pkg"]
git-tree-sha1 = "51b1db0732bbdcfabb60e36095cc3ed9c0016932"
uuid = "5ced341a-0733-55b8-9ab6-a4889d929147"
version = "1.9.2+2"

[[MacroTools]]
deps = ["Markdown", "Random"]
git-tree-sha1 = "6a8a2a625ab0dea913aba95c11370589e0239ff0"
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
version = "0.5.6"

[[Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"

[[MbedTLS_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "0eef589dd1c26a3ac9d753fe1a8bcad63f956fa6"
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
version = "2.16.8+1"

[[Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"

[[Mustache]]
deps = ["Printf", "Tables"]
git-tree-sha1 = "f5d718790ff475b5b8ab9c1599ed105f0f24f253"
uuid = "ffc61752-8dc7-55ee-8c37-f3e9cdd09e70"
version = "1.0.8"

[[NNlib]]
deps = ["Compat", "Libdl", "LinearAlgebra", "Pkg", "Requires", "Statistics"]
git-tree-sha1 = "1ae42464fea5258fd2ff49f1c4a40fc41cba3860"
uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
version = "0.7.7"

[[NaNMath]]
git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb"
uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
version = "0.3.5"

[[OpenSSL_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "71bbbc616a1d710879f5a1021bcba65ffba6ce58"
uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
version = "1.1.1+6"

[[OrderedCollections]]
git-tree-sha1 = "cf59cfed2e2c12e8a2ff0a4f1e9b2cd8650da6db"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.3.2"

[[PCRE_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "1b556ad51dceefdbf30e86ffa8f528b73c7df2bb"
uuid = "2f80f16e-611a-54ab-bc61-aa92de5b98fc"
version = "8.42.0+4"

[[Pango_jll]]
deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "FriBidi_jll", "Glib_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "9a336dee51d20d1ed890c4a8dca636e86e2b76ca"
uuid = "36c8627f-9965-5494-a995-c6b170f724f3"
version = "1.42.4+10"

[[Parameters]]
deps = ["OrderedCollections", "UnPack"]
git-tree-sha1 = "38b2e970043613c187bd56a995fe2e551821eb4a"
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
version = "0.12.1"

[[Parsers]]
deps = ["Dates"]
git-tree-sha1 = "6370b5b3cf2ce5a3d2b6f7ab2dc10f374e4d7d2b"
uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
version = "1.0.14"

[[Pixman_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "6a20a83c1ae86416f0a5de605eaea08a552844a3"
uuid = "30392449-352a-5448-841d-b1acce4e97dc"
version = "0.40.0+0"

[[Pkg]]
deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"

[[PkgTemplates]]
deps = ["Dates", "InteractiveUtils", "LibGit2", "Mustache", "Parameters", "Pkg", "REPL", "UUIDs"]
git-tree-sha1 = "bba7908465ec36d6308855ac95d582636a87ebab"
uuid = "14b8a8f1-9102-5b29-a752-f990bacb7fe1"
version = "0.7.13"

[[Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"

[[Profile]]
deps = ["Printf"]
uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"

[[ProfileView]]
deps = ["Cairo", "Colors", "FileIO", "FlameGraphs", "Graphics", "Gtk", "GtkReactive", "InteractiveUtils", "IntervalSets", "JLD", "Profile", "UUIDs"]
git-tree-sha1 = "0eda4be69d5f49c290ea0404a79cb9324b2ebf72"
uuid = "c46f51b8-102a-5cf2-8d2c-8597cb0e0da7"
version = "0.6.7"

[[REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"

[[Random]]
deps = ["Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[[Reactive]]
deps = ["DataStructures", "Distributed", "Test"]
git-tree-sha1 = "5862d915387ebb954016f50a88e34f79a9e5fcd2"
uuid = "a223df75-4e93-5b7c-acf9-bdd599c0f4de"
version = "0.8.3"

[[Reexport]]
deps = ["Pkg"]
git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0"
uuid = "189a3867-3050-52da-a836-e630ba90ab69"
version = "0.2.0"

[[Requires]]
deps = ["UUIDs"]
git-tree-sha1 = "e05c53ebc86933601d36212a93b39144a2733493"
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
version = "1.1.1"

[[Revise]]
deps = ["CodeTracking", "Distributed", "FileWatching", "JuliaInterpreter", "LibGit2", "LoweredCodeUtils", "OrderedCollections", "Pkg", "REPL", "Requires", "UUIDs", "Unicode"]
git-tree-sha1 = "637d03a5262b9f1d334cd63aaf092297fbc4f168"
uuid = "295af30f-e4ad-537b-8983-00126c2a3abe"
version = "3.1.9"

[[RoundingIntegers]]
deps = ["Test"]
git-tree-sha1 = "293ba0ab32218b9ffd596040224228def84f8da0"
uuid = "d5f540fe-1c90-5db3-b776-2e2f362d9394"
version = "0.2.0"

[[SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"

[[Scratch]]
deps = ["Dates"]
git-tree-sha1 = "ad4b278adb62d185bbcb6864dc24959ab0627bf6"
uuid = "6c6a2e73-6563-6170-7368-637461726353"
version = "1.0.3"

[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"

[[SharedArrays]]
deps = ["Distributed", "Mmap", "Random", "Serialization"]
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"

[[Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"

[[SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[[Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[[TableTraits]]
deps = ["IteratorInterfaceExtensions"]
git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e"
uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
version = "1.0.0"

[[Tables]]
deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"]
git-tree-sha1 = "240d19b8762006ff04b967bdd833269ad642d550"
uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
version = "1.2.2"

[[Test]]
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[TimerOutputs]]
deps = ["Printf"]
git-tree-sha1 = "3318281dd4121ecf9713ce1383b9ace7d7476fdd"
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
version = "0.5.7"

[[Tokenize]]
git-tree-sha1 = "73c00ad506d88a7e8e4f90f48a70943101728227"
uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624"
version = "0.5.8"

[[URIParser]]
deps = ["Unicode"]
git-tree-sha1 = "53a9f49546b8d2dd2e688d216421d050c9a31d0d"
uuid = "30578b45-9adc-5946-b283-645ec420af67"
version = "0.4.1"

[[UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[[UnPack]]
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
version = "1.0.2"

[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[[VersionParsing]]
git-tree-sha1 = "80229be1f670524750d905f8fc8148e5a8c4537f"
uuid = "81def892-9a0e-5fdd-b105-ffc91e053289"
version = "1.2.0"

[[Wayland_jll]]
deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"]
git-tree-sha1 = "dc643a9b774da1c2781413fd7b6dcd2c56bb8056"
uuid = "a2964d1f-97da-50d4-b82a-358c7fce9d89"
version = "1.17.0+4"

[[Wayland_protocols_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll"]
git-tree-sha1 = "2839f1c1296940218e35df0bbb220f2a79686670"
uuid = "2381bf8a-dfd0-557d-9999-79630e7b1b91"
version = "1.18.0+4"

[[XML2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "be0db24f70aae7e2b89f2f3092e93b8606d659a6"
uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a"
version = "2.9.10+3"

[[XSLT_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Pkg", "XML2_jll"]
git-tree-sha1 = "2b3eac39df218762d2d005702d601cd44c997497"
uuid = "aed1982a-8fda-507f-9586-7b0439959a61"
version = "1.1.33+4"

[[Xorg_libX11_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll", "Xorg_xtrans_jll"]
git-tree-sha1 = "5be649d550f3f4b95308bf0183b82e2582876527"
uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc"
version = "1.6.9+4"

[[Xorg_libXau_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "4e490d5c960c314f33885790ed410ff3a94ce67e"
uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec"
version = "1.0.9+4"

[[Xorg_libXcomposite_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXfixes_jll"]
git-tree-sha1 = "7c688ca9c957837539bbe1c53629bb871025e423"
uuid = "3c9796d7-64a0-5134-86ad-79f8eb684845"
version = "0.4.5+4"

[[Xorg_libXcursor_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXfixes_jll", "Xorg_libXrender_jll"]
git-tree-sha1 = "12e0eb3bc634fa2080c1c37fccf56f7c22989afd"
uuid = "935fb764-8cf2-53bf-bb30-45bb1f8bf724"
version = "1.2.0+4"

[[Xorg_libXdamage_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXfixes_jll"]
git-tree-sha1 = "fe4ffb2024ba3eddc862c6e1d70e2b070cd1c2bf"
uuid = "0aeada51-83db-5f97-b67e-184615cfc6f6"
version = "1.1.5+4"

[[Xorg_libXdmcp_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "4fe47bd2247248125c428978740e18a681372dd4"
uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05"
version = "1.1.3+4"

[[Xorg_libXext_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3"
uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3"
version = "1.3.4+4"

[[Xorg_libXfixes_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
git-tree-sha1 = "0e0dc7431e7a0587559f9294aeec269471c991a4"
uuid = "d091e8ba-531a-589c-9de9-94069b037ed8"
version = "5.0.3+4"

[[Xorg_libXi_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXfixes_jll"]
git-tree-sha1 = "89b52bc2160aadc84d707093930ef0bffa641246"
uuid = "a51aa0fd-4e3c-5386-b890-e753decda492"
version = "1.7.10+4"

[[Xorg_libXinerama_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll"]
git-tree-sha1 = "26be8b1c342929259317d8b9f7b53bf2bb73b123"
uuid = "d1454406-59df-5ea1-beac-c340f2130bc3"
version = "1.1.4+4"

[[Xorg_libXrandr_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll"]
git-tree-sha1 = "34cea83cb726fb58f325887bf0612c6b3fb17631"
uuid = "ec84b674-ba8e-5d96-8ba1-2a689ba10484"
version = "1.5.2+4"

[[Xorg_libXrender_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
git-tree-sha1 = "19560f30fd49f4d4efbe7002a1037f8c43d43b96"
uuid = "ea2f1a96-1ddc-540d-b46f-429655e07cfa"
version = "0.9.10+4"

[[Xorg_libXtst_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXfixes_jll", "Xorg_libXi_jll"]
git-tree-sha1 = "0c0a60851f44add2a64069ddf213e941c30ed93c"
uuid = "b6f176f1-7aea-5357-ad67-1d3e565ea1c6"
version = "1.2.3+4"

[[Xorg_libpthread_stubs_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "6783737e45d3c59a4a4c4091f5f88cdcf0908cbb"
uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74"
version = "0.1.0+3"

[[Xorg_libxcb_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"]
git-tree-sha1 = "daf17f441228e7a3833846cd048892861cff16d6"
uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b"
version = "1.13.0+3"

[[Xorg_libxkbfile_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
git-tree-sha1 = "926af861744212db0eb001d9e40b5d16292080b2"
uuid = "cc61e674-0454-545c-8b26-ed2c68acab7a"
version = "1.1.0+4"

[[Xorg_xkbcomp_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxkbfile_jll"]
git-tree-sha1 = "4bcbf660f6c2e714f87e960a171b119d06ee163b"
uuid = "35661453-b289-5fab-8a00-3d9160c6a3a4"
version = "1.4.2+4"

[[Xorg_xkeyboard_config_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xkbcomp_jll"]
git-tree-sha1 = "5c8424f8a67c3f2209646d4425f3d415fee5931d"
uuid = "33bec58e-1273-512f-9401-5d533626f822"
version = "2.27.0+4"

[[Xorg_xtrans_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845"
uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10"
version = "1.4.0+3"

[[Zlib_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "320228915c8debb12cb434c59057290f0834dbf6"
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
version = "1.2.11+18"

[[Zstd_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "6f1abcb0c44f184690912aa4b0ba861dd64f11b9"
uuid = "3161d3a3-bdf6-5164-811a-617609db77b4"
version = "1.4.5+2"

[[adwaita_icon_theme_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "hicolor_icon_theme_jll"]
git-tree-sha1 = "37c9a36ccb876e02876c8a654f1b2e8c1b443a78"
uuid = "b437f822-2cd6-5e08-a15c-8bac984d38ee"
version = "3.33.92+5"

[[at_spi2_atk_jll]]
deps = ["ATK_jll", "Artifacts", "JLLWrappers", "Libdl", "Pkg", "XML2_jll", "Xorg_libX11_jll", "at_spi2_core_jll"]
git-tree-sha1 = "f16ae690aca4761f33d2cb338ee9899e541f5eae"
uuid = "de012916-1e3f-58c2-8f29-df3ef51d412d"
version = "2.34.1+4"

[[at_spi2_core_jll]]
deps = ["Artifacts", "Dbus_jll", "Glib_jll", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXtst_jll"]
git-tree-sha1 = "d2d540cd145f2b2933614649c029d222fe125188"
uuid = "0fc3237b-ac94-5853-b45c-d43d59a06200"
version = "2.34.0+4"

[[gdk_pixbuf_jll]]
deps = ["Artifacts", "Glib_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pkg", "Xorg_libX11_jll", "libpng_jll"]
git-tree-sha1 = "031f60d4362fba8f8778b31047491823f5a73000"
uuid = "da03df04-f53b-5353-a52f-6a8b0620ced0"
version = "2.38.2+9"

[[hicolor_icon_theme_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "b458a6f6fc2b1a8ca74ed63852e4eaf43fb9f5ea"
uuid = "059c91fe-1bad-52ad-bddd-f7b78713c282"
version = "0.17.0+3"

[[iso_codes_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "5ee24c3ae30e006117ec2da5ea50f2ce457c019a"
uuid = "bf975903-5238-5d20-8243-bc370bc1e7e5"
version = "4.3.0+4"

[[libpng_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
git-tree-sha1 = "6abbc424248097d69c0c87ba50fcb0753f93e0ee"
uuid = "b53b4c65-9356-5827-b1ea-8c7a1a84506f"
version = "1.6.37+6"

[[nghttp2_jll]]
deps = ["Libdl", "Pkg"]
git-tree-sha1 = "8e2c44ab4d49ad9518f359ed8b62f83ba8beede4"
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
version = "1.40.0+2"

[[xkbcommon_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll", "Wayland_protocols_jll", "Xorg_libxcb_jll", "Xorg_xkeyboard_config_jll"]
git-tree-sha1 = "ece2350174195bb31de1a63bea3a41ae1aa593b6"
uuid = "d8fb68d0-12a3-5cfd-a85a-d49703b185fd"
version = "0.9.1+5"

Version info

Details on Julia:

julia> versioninfo()
Julia Version 1.5.3
Commit 788b2c77c1 (2020-11-09 13:37 UTC)
Platform Info:
  OS: Windows (x86_64-w64-mingw32)
  CPU: AMD Ryzen 9 3900X 12-Core Processor
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-9.0.1 (ORCJIT, znver2)
Environment:
  JULIA_NUM_THREADS = 24

Details on CUDA:

julia> CUDA.versioninfo()
CUDA toolkit 11.1.1, artifact installation
CUDA driver 11.2.0
ERROR:

Additional context

See FluxML/Flux.jl#1425 (comment)

The text was updated successfully, but these errors were encountered:

denizyuret · 2020-12-14T14:59:13Z

Hi, I already fixed this bug in the dy/cudnn branch (#523) -- please check cudnn related issues on that branch as well. There is a way to use cudnn softmax to compute the correct result for dims=2. Just give me a few more days to finish writing the tests before we can merge dy/cudnn.

denizyuret · 2020-12-14T16:23:48Z

Not sure how to run NNlib's softmax -- in my version it calls CUDNN if the input is a CuArray.

denizyuret · 2020-12-15T05:31:16Z

We can use the cudnn softmax to simulate nnlib softmax for dims 1, (;), and (partially) 2. There is not need for permutedims.

Please check out my implementation at https://github.com/denizyuret/CUDA.jl/blob/dy/cudnn/lib/cudnn/nnlib.jl

norci · 2020-12-16T16:56:33Z

@denizyuret ,

Could you have a look at my PR #600 (comment)
I have solved a problem in your branch.
that CUDNN can be used for Tensors with any dimensions, on any dims.

Thanks.

denizyuret · 2020-12-17T08:31:59Z

@norci -- I have done some testing comparing cudnn vs manual implementation speeds. You can find my results here. Accordingly, cudnn only outperforms with dims=1. I have fixed the implementation in dy/cudnn branch here. If you find anything suboptimal with this implementation please let me know.

norci · 2020-12-17T11:16:31Z

@denizyuret ,

Thanks for your reply.

I created a few test cases, similar to yours.

It's based on my branch https://github.com/norci/CUDA.jl/tree/fix_599

benchmark_softmax.jl

using CUDA,NNlib,BenchmarkTools,Test
function _softmax!(y::T, x::T; dims) where {T <: DenseCuArray}

y .= exp.(x .- maximum(x; dims))

y ./= sum(y; dims)

end
x = CUDA.rand(1000, 1000, 128);

out = similar(x)
validate
@testset begin

for i in 1:ndims(x)

ref = softmax(Array(x), dims=i) |> cu;

@test softmax!(out, x, dims=i) ≈ ref

@test _softmax!(out, x, dims=i) ≈ ref

end

end
macro ctime(ex)

quote

GC.gc(true)

println($(sprint(Base.show_unquoted, ex)))

@btime (CUDA.@sync $ex; nothing)

CUDA.@time $ex

end |> esc

end
benchmark
@ctime  softmax!(out, x, dims=1);

@ctime _softmax!(out, x, dims=1);
@ctime  softmax!(out, x, dims=2);

@ctime _softmax!(out, x, dims=2);
@ctime  softmax!(out, x, dims=3);

@ctime _softmax!(out, x, dims=3);

results: CUDNN is much faster.

Run on a RTX 2080 Super.

I used a tensor with size 1000 x 1000 x 128.

dims=1, cudnn is 10x faster than _softmax!
dims=2, cudnn is 3.4x faster than _softmax!
dims=3, cudnn is 4.5x faster than _softmax!

test log

Test Summary: | Pass  Total

test set      |    6      6
softmax!(out, x, dims = 1)

2.585 ms (47 allocations: 1.34 KiB)

0.002601 seconds (56 CPU allocations: 1.688 KiB)

_softmax!(out, x, dims = 1)

25.846 ms (109 allocations: 3.00 KiB)

0.026963 seconds (118 CPU allocations: 3.344 KiB) (2 GPU allocations: 1000.000 KiB, 0.01% gc time)
softmax!(out, x, dims = 2)

8.188 ms (51 allocations: 1.41 KiB)

0.008253 seconds (60 CPU allocations: 1.750 KiB)

_softmax!(out, x, dims = 2)

28.178 ms (109 allocations: 3.00 KiB)

0.028923 seconds (118 CPU allocations: 3.344 KiB) (2 GPU allocations: 1000.000 KiB, 0.01% gc time)
softmax!(out, x, dims = 3)

6.247 ms (49 allocations: 1.41 KiB)

0.006886 seconds (58 CPU allocations: 1.750 KiB)

_softmax!(out, x, dims = 3)

28.057 ms (107 allocations: 2.97 KiB)

0.029762 seconds (134 CPU allocations: 3.688 KiB) (2 GPU allocations: 7.629 MiB, 2.56% gc time of which 98.37% spent allocating)

BTW, I don't think you used the correct parameters for cudnnSoftmaxForward. So your benchmark result should be wrong too.

Note:

cudnnSoftmaxForward is slower when dims>1, this is expected. The reason is:
when dims=1, the kernel function can read/write data from a contiguous device global memory. so the processor can use L1/L2 Cache to speed up memory op.
But when the stride value > 1, then the processor can not have any speed up in memory op.

But an optimized kernel function from CUDNN must be faster than a few function calls which composed by linear algebra and memory allocation/copy.

denizyuret · 2020-12-17T13:39:34Z

@norci — the main difference seems to be the size of the tests we use. By far the most common use of softmax is at the end of a network for classification. I used the typical imagenet size of 1000x256 (1000 for num classes, 256 for batch size). Can you try with this setting? What did you have in mind for a use case with 1000x1000x128?

…

On Thu, Dec 17, 2020 at 2:16 PM norci ***@***.***> wrote: @denizyuret <https://github.com/denizyuret> , Thanks for your reply. I created a few test cases, similar to yours. benchmark_softmax.jl using CUDA,NNlib,BenchmarkTools,Test function _softmax!(y::T, x::T; dims) where {T <: DenseCuArray} y .= exp.(x .- maximum(x; dims)) y ./= sum(y; dims) end x = CUDA.rand(1000, 1000, 128); out = similar(x) validate @testset begin for i in 1:ndims(x) ref = softmax(Array(x), dims=i) |> cu; @test softmax!(out, x, dims=i) ≈ ref @test _softmax!(out, x, dims=i) ≈ ref end end macro ctime(ex) quote GC.gc(true) println($(sprint(Base.show_unquoted, ex))) @Btime ***@***.*** $ex; nothing) ***@***.*** $ex end |> esc end benchmark @ctime softmax!(out, x, dims=1); @ctime _softmax!(out, x, dims=1); @ctime softmax!(out, x, dims=2); @ctime _softmax!(out, x, dims=2); @ctime softmax!(out, x, dims=3); @ctime _softmax!(out, x, dims=3); results: Run on a RTX 2080 Super. I used a tensor with size 1000 x 1000 x 128. - dims=1, cudnn is 10x faster than _softmax! - dims=2, cudnn is 3.4x faster than _softmax! - dims=3, cudnn is 4.5x faster than _softmax! test log Test Summary: | Pass Total test set | 6 6 softmax!(out, x, dims = 1) 2.585 ms (47 allocations: 1.34 KiB) 0.002601 seconds (56 CPU allocations: 1.688 KiB) _softmax!(out, x, dims = 1) 25.846 ms (109 allocations: 3.00 KiB) 0.026963 seconds (118 CPU allocations: 3.344 KiB) (2 GPU allocations: 1000.000 KiB, 0.01% gc time) softmax!(out, x, dims = 2) 8.188 ms (51 allocations: 1.41 KiB) 0.008253 seconds (60 CPU allocations: 1.750 KiB) _softmax!(out, x, dims = 2) 28.178 ms (109 allocations: 3.00 KiB) 0.028923 seconds (118 CPU allocations: 3.344 KiB) (2 GPU allocations: 1000.000 KiB, 0.01% gc time) softmax!(out, x, dims = 3) 6.247 ms (49 allocations: 1.41 KiB) 0.006886 seconds (58 CPU allocations: 1.750 KiB) _softmax!(out, x, dims = 3) 28.057 ms (107 allocations: 2.97 KiB) 0.029762 seconds (134 CPU allocations: 3.688 KiB) (2 GPU allocations: 7.629 MiB, 2.56% gc time of which 98.37% spent allocating) BTW, I don't think you used the correct parameters for cudnnSoftmaxForward. So your benchmark result should be wrong too. Note: cudnnSoftmaxForward is slower when dims>1, this is expected. The reason is: when dims=1, the kernel function can read/write data from a contiguous device global memory. so the processor can use L1/L2 Cache to speed up memory op. But when the stride value > 1, then the processor can not have any speed up in memory op. But an optimized kernel function from CUDNN must be faster than a few function calls which composed by linear algebra and memory allocation/copy. — You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub <#599 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AAN43JWQI5H4MJRGI652BZTSVHSB5ANCNFSM4U2LZJKA> .

denizyuret · 2020-12-17T13:58:55Z

@norci:

BTW, I don't think you used the correct parameters for cudnnSoftmaxForward. So your benchmark result should be wrong too.

Do you have something more specific? What function call, what parameter?

But an optimized kernel function from CUDNN must be faster than a few function calls which composed by linear algebra and memory allocation/copy.

:) I have been working with CUDNN for a while now and I do not share your optimism. The library is full of bugs and inefficiencies and has been since the beginning. Currently there are many cases where simple Knet/CUDA code is faster than the "optimized" cudnn code, for example try:

using CUDA, Knet, BenchmarkTools
x = CUDA.rand(100,100,100,100)
@btime (CUDA.@sync CUDA.CUDNN.cudnnReduceTensor(x))
@btime (CUDA.@sync sum(x))
b = CUDA.rand(1,1,100,1)
@btime (CUDA.@sync CUDA.CUDNN.cudnnAddTensor(x,b));
@btime (CUDA.@sync x .+ b);

denizyuret · 2020-12-17T14:16:42Z

Your softmax_helper is very cool, completes my ad-hoc hack and will work with any single dimension. If we can determine the sizes/dims where cudnn has the advantage we should definitely use it. (We still need to figure out multi dimensional reductions, e.g. dims=(2,3) or dims=Colon())

Reasons I can think of for the differences in our test results other than array size:

Please include using Knet so you can take advantage of some CuArray optimizations I made which are not yet integrated to CUDA.jl: https://discourse.julialang.org/t/ann-knet-1-4-0-accelerating-cuarrays/45206
I have tested on T4 and V100.
I am using latest CUDA.jl master.

maxfreu · 2020-12-17T14:33:57Z

What did you have in mind for a use case with 1000x1000x128?

This comes up at the end of segmentation models.

norci · 2020-12-18T15:37:36Z

@denizyuret ,
thanks for your advise.
I have benchmarked softmax again, with more sizes.

code

https://pastebin.com/i3jCUzgj

results

2D Tensor

dims = 1

CUDNN is always faster

dims = 2

The ratio depends on the size of the 2D Tensor.
Julia is faster in most cases.

3D Tensor

CUDNN is always faster than Julia

Conclusion

We can create a function for 2D tensor, dims=2, in order to select the best algo.

we'd better file a bug for CUDNN?

norci · 2020-12-18T16:04:47Z

btw, I'm not able to use Knet. It seems Julia or cuda driver has a bug in Windows platform.

julia> using Knet
ERROR: InitError: 
Please submit a bug report with steps to reproduce this fault, and any error messages that follow (in their entirety). Thanks.
Exception: EXCEPTION_ACCESS_VIOLATION at 0x7ffa334cd2d0 -- strlen at C:\Windows\System32\msvcrt.dll (unknown line)

and TOT CUDA is not usable too

julia> using CUDA
[ Info: Precompiling CUDA [052768ef-5323-5732-b1bb-66c8b64840ba]
ERROR: LoadError: ArgumentError: Package GPUCompiler [61eb1bfa-7361-4325-ad38-22787b887f55] is required 
but does not seem to be installed:
 - Run `Pkg.instantiate()` to install all recorded dependencies.

(CUDA) pkg> up
   Updating registry at `C:\Users\zhexu\.julia\registries\General`
ERROR: expected package `LazyArtifacts [4af54fe1]` to be registered

maleadt · 2020-12-18T16:31:38Z

TOT CUDA is not usable too

CUDA#master requires Julia 1.6.

denizyuret · 2020-12-18T17:08:53Z

And the compatible Knet is Knet#dy/cudnn (with julia 1.6) Thanks for the experiments, I will repeat them on my platform and integrate your changes into CUDA#dy/cudnn

…

On Fri, Dec 18, 2020 at 7:31 PM Tim Besard ***@***.***> wrote: TOT CUDA is not usable too CUDA#master requires Julia 1.6. — You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub <#599 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AAN43JR5XRDDVRLCJZN4VE3SVN7XTANCNFSM4U2LZJKA> .

denizyuret · 2020-12-19T09:21:42Z

@norci I generalized your dims trick to multiple (contiguous) dimensions and added an exception to use the backup implementation in the region where cudnn is slow.

https://github.com/denizyuret/CUDA.jl/blob/dbf4e0b4217d9ea63b6a731dd167839ad3613876/lib/cudnn/nnlib.jl#L83

norci · 2020-12-20T08:34:04Z

@denizyuret ,

I see. The design of softmaxdims is great!
Thanks for your update.

But I have a concern about the algorithm selector, maybe it's not optimal. So I benchmarked it with more sizes. and used a decision tree to get the optimal choice.

Shall we use this tree to select the best algo?
or keep existing code, and wait for Nvidia to fix this CUDNN bug?

Details:

code

https://pastebin.com/6D362TDf

Data sizes

dim 1 size: s = 2 ^(2:20)
batch size: `2 .^(1:24-s)

Decision tree as algo selector

(see the last part of the code)

features

dim1 size
batch size
data size

(FIXME: are these features enough? )

labels

0 - Julia
1 - CUDNN

results

julia> print_tree(model)
Feature 2, Threshold 96.0
L-> Feature 2, Threshold 48.0
    L-> 1.0 : 94/94
    R-> Feature 3, Threshold 49152.0
        L-> Feature 1, Threshold 384.0
            L-> 1.0 : 7/7
            R-> 0.0 : 1/1
        R-> 1.0 : 9/9
R-> Feature 1, Threshold 3072.0
    L-> Feature 1, Threshold 1536.0
        L-> Feature 3, Threshold 768.0
            L-> 1.0 : 1/1
            R-> 0.0 : 107/107
        R-> Feature 2, Threshold 768.0
            L-> 1.0 : 3/3
            R-> 0.0 : 4/4
    R-> Feature 2, Threshold 3072.0
        L-> 1.0 : 20/20
        R-> 0.0 : 1/1

FIXME: do we need to prune the tree?

denizyuret · 2020-12-20T11:55:58Z

@norci thanks for the experiments. They show the current hack is definitely not optimal. However I am not sure it is worth optimizing at the expense of complicating the code (I wasn't even sure if adding the current hack was a good idea) for a couple of reasons:

This optimization will probably be specific to the current version of cudnn and will have to be revised.
The case we are optimizing (2-D tensor with dims=2) is not a frequent use case: people generally have instances in columns in the 2D case and use dims=1.

All in all I favor waiting nvidia to do something, but not very strongly. If you disagree and send me a modification of softmaxdims, I'll put it in the PR.

norci · 2020-12-20T13:14:37Z

@denizyuret , I agree with you.

I have reported this performance issue to Nvidia, wish they will fix it.
I think CUDNN's softmax is focused at 2D & 3D image processing, so they assume the input data is batched.
This use case equals to a image with many dimensions, and batch is only 1. So there is no optimized kernel function.

norci added the bug Something isn't working label Dec 14, 2020

This was referenced Dec 14, 2020

Flux.softmax returns wrong result with CuArray FluxML/Flux.jl#1425

Open

New high level interface for cuDNN #523

Merged

added a softmax helper. #600

Closed

denizyuret linked a pull request Dec 14, 2020 that will close this issue

New high level interface for cuDNN #523

Merged

25 tasks

norci mentioned this issue Dec 24, 2020

used a linear model to select softmax function. denizyuret/CUDA.jl#1

Closed

maleadt closed this as completed in #523 Feb 2, 2021

ToucheSir mentioned this issue Oct 9, 2021

Slow ∇softmax! compared with generic version. FluxML/NNlib.jl#513

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

softmax has problem with dim parameter #599

softmax has problem with dim parameter #599

norci commented Dec 14, 2020 •

edited

denizyuret commented Dec 14, 2020 •

edited

denizyuret commented Dec 14, 2020

denizyuret commented Dec 15, 2020

norci commented Dec 16, 2020 •

edited

denizyuret commented Dec 17, 2020

norci commented Dec 17, 2020 •

edited

validate

benchmark

denizyuret commented Dec 17, 2020 via email

denizyuret commented Dec 17, 2020

denizyuret commented Dec 17, 2020

maxfreu commented Dec 17, 2020

norci commented Dec 18, 2020

norci commented Dec 18, 2020

maleadt commented Dec 18, 2020

denizyuret commented Dec 18, 2020 via email

denizyuret commented Dec 19, 2020

norci commented Dec 20, 2020 •

edited

denizyuret commented Dec 20, 2020

norci commented Dec 20, 2020

softmax has problem with dim parameter #599

softmax has problem with dim parameter #599

Comments

norci commented Dec 14, 2020 • edited

denizyuret commented Dec 14, 2020 • edited

denizyuret commented Dec 14, 2020

denizyuret commented Dec 15, 2020

norci commented Dec 16, 2020 • edited

denizyuret commented Dec 17, 2020

norci commented Dec 17, 2020 • edited

I created a few test cases, similar to yours.

validate

benchmark

results: CUDNN is much faster.

Note:

denizyuret commented Dec 17, 2020 via email

denizyuret commented Dec 17, 2020

denizyuret commented Dec 17, 2020

maxfreu commented Dec 17, 2020

norci commented Dec 18, 2020

code

results

2D Tensor

dims = 1

dims = 2

3D Tensor

Conclusion

norci commented Dec 18, 2020

maleadt commented Dec 18, 2020

denizyuret commented Dec 18, 2020 via email

denizyuret commented Dec 19, 2020

norci commented Dec 20, 2020 • edited

Details:

code

Data sizes

Decision tree as algo selector

features

labels

results

denizyuret commented Dec 20, 2020

norci commented Dec 20, 2020

norci commented Dec 14, 2020 •

edited

denizyuret commented Dec 14, 2020 •

edited

norci commented Dec 16, 2020 •

edited

norci commented Dec 17, 2020 •

edited

norci commented Dec 20, 2020 •

edited