From f8c490a72cfb8d41d64981c2b4107d01352c43d4 Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Sat, 11 Sep 2021 08:44:13 -0400 Subject: [PATCH 1/2] Polyester -> PolyesterWeave; closes #113 --- Project.toml | 4 ++-- src/Octavian.jl | 2 +- src/matmul.jl | 20 ++++++++++---------- test/matmul_coverage.jl | 4 ++-- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Project.toml b/Project.toml index 8fffc2d..8003766 100644 --- a/Project.toml +++ b/Project.toml @@ -8,7 +8,7 @@ ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667" -Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588" +PolyesterWeave = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad" Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" ThreadingUtilities = "8290d209-cae3-49c0-8002-c8c24d57dab5" VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" @@ -18,7 +18,7 @@ ArrayInterface = "3.1.14" IfElse = "0.1" LoopVectorization = "0.12.34" ManualMemory = "0.1.1" -Polyester = "0.4" +PolyesterWeave = "0.1" Static = "0.2, 0.3" ThreadingUtilities = "0.4.6" VectorizationBase = "0.21.5" diff --git a/src/Octavian.jl b/src/Octavian.jl index 6bd66fd..3f1c925 100644 --- a/src/Octavian.jl +++ b/src/Octavian.jl @@ -8,7 +8,7 @@ using VectorizationBase: align, AbstractStridedPointer, zstridedpointer, vsub_ns using LoopVectorization: preserve_buffer, CloseOpen, UpperBoundedInteger using ArrayInterface: size, strides, offsets, indices, axes, StrideIndex using IfElse: ifelse -using Polyester +using PolyesterWeave using Static: StaticInt, Zero, One, StaticBool, True, False, gt, eq, StaticFloat64, roundtostaticint, floortostaticint using ManualMemory: MemoryBuffer, load, store! diff --git a/src/matmul.jl b/src/matmul.jl index 0c67c49..7573a08 100644 --- a/src/matmul.jl +++ b/src/matmul.jl @@ -308,20 +308,20 @@ function matmulsplitn!(C::AbstractStridedPointer{T}, A, B, α, β, ::StaticInt{M _nspawn = Mblocks * Nblocks Mbsize_Mrem, Mbsize_ = promote(Mbsize + W, Mbsize) Nbsize_Nrem, Nbsize_ = promote(Nbsize + One(), Nbsize) - (tnum, tuu) = Polyester.initial_state(threads) + (tnum, tuu) = PolyesterWeave.initial_state(threads) let _A = A, _B = B, _C = C, n = 0, Nrc = Nblocks - Nrem, Mrc = Mblocks - Mrem, __Mblocks = Mblocks - One() while true nsize = ifelse(Nblocks > Nrc, Nbsize_Nrem, Nbsize_); Nblocks -= 1 let _A = _A, _C = _C, __Mblocks = __Mblocks while __Mblocks != 0 msize = ifelse(__Mblocks ≥ Mrc, Mbsize_Mrem, Mbsize_); __Mblocks -= 1 - (tnum, tuu) = Polyester.iter(tnum, tuu) + (tnum, tuu) = PolyesterWeave.iter(tnum, tuu) launch_thread_mul!(_C, _A, _B, α, β, msize, K, nsize, tnum, Val{PACK}()) _A = gesp(_A, (msize, Zero())) _C = gesp(_C, (msize, Zero())) end if Nblocks != 0 - (tnum, tuu) = Polyester.iter(tnum, tuu) + (tnum, tuu) = PolyesterWeave.iter(tnum, tuu) launch_thread_mul!(_C, _A, _B, α, β, Mremfinal, K, nsize, tnum, Val{PACK}()) else call_loopmul!(_C, _A, _B, α, β, Mremfinal, K, nsize, Val{PACK}()) @@ -360,8 +360,8 @@ function __matmul!( clamp(div_fast(M * N, StaticInt{256}() * W), 0, _nthread-1) end # nkern = cld_fast(M * N, MᵣW * Nᵣ) - threads, torelease = Polyester.__request_threads(_nrequest % UInt32, Polyester.worker_pointer()) - # _threads, _torelease = Polyester.request_threads(Threads.threadid()%UInt32, _nrequest) + threads, torelease = PolyesterWeave.__request_threads(_nrequest % UInt32, PolyesterWeave.worker_pointer()) + # _threads, _torelease = PolyesterWeave.request_threads(Threads.threadid()%UInt32, _nrequest) nrequest = threads.i iszero(nrequest) && @goto SINGLETHREAD @@ -388,7 +388,7 @@ function __matmul!( else # TODO: Allow splitting along `N` for `matmul_pack_A_and_B!` matmul_pack_A_and_B!(C, A, B, α, β, M, K, N, threads, W₁Default(), W₂Default(), R₁Default(), R₂Default()) end - Polyester.free_threads!(torelease) + PolyesterWeave.free_threads!(torelease) nothing end @@ -398,9 +398,9 @@ function waitonmultasks(threads, nthread) # for (_,tid) ∈ threads # wait(tid) # end - (tnum, tuu) = Polyester.initial_state(threads) + (tnum, tuu) = PolyesterWeave.initial_state(threads) for _ ∈ CloseOpen(One(), nthread) - (tnum, tuu) = Polyester.iter(tnum, tuu) + (tnum, tuu) = PolyesterWeave.iter(tnum, tuu) wait(tnum) end end @@ -423,13 +423,13 @@ function matmul_pack_A_and_B!( end Mblock_Mrem, Mblock_ = promote(Mbsize + W, Mbsize) u_to_spawn = _to_spawn % UInt - (tnum, tuu) = Polyester.initial_state(threads) + (tnum, tuu) = PolyesterWeave.initial_state(threads) bc = _use_bcache() bc_ptr = Base.unsafe_convert(typeof(pointer(C)), pointer(bc)) last_id = _to_spawn - One() for m ∈ CloseOpen(last_id) # ...thus the fact that `CloseOpen()` iterates at least once is okay. Mblock = ifelse(m < Mrem, Mblock_Mrem, Mblock_) - (tnum, tuu) = Polyester.iter(tnum, tuu) + (tnum, tuu) = PolyesterWeave.iter(tnum, tuu) launch_thread_mul!(C, A, B, α, β, Mblock, K, N, p, bc_ptr, tnum, m % UInt, u_to_spawn, StaticFloat64{W₁}(),StaticFloat64{W₂}(),StaticFloat64{R₁}(),StaticFloat64{R₂}()) A = gesp(A, (Mblock, Zero())) C = gesp(C, (Mblock, Zero())) diff --git a/test/matmul_coverage.jl b/test/matmul_coverage.jl index dc0f912..9abdc8a 100644 --- a/test/matmul_coverage.jl +++ b/test/matmul_coverage.jl @@ -8,13 +8,13 @@ function matmul_pack_ab!(C, A, B) nspawn = min(Threads.nthreads(), Octavian.num_cores()) GC.@preserve C A B begin if nspawn > 1 - threads, torelease = Octavian.Polyester.__request_threads((nspawn-1)%UInt32, Octavian.Polyester.worker_pointer()) + threads, torelease = Octavian.PolyesterWeave.__request_threads((nspawn-1)%UInt32, Octavian.PolyesterWeave.worker_pointer()) @assert threads.i < Threads.nthreads() Octavian.matmul_pack_A_and_B!( zc, za, zb, Octavian.StaticInt{1}(), Octavian.StaticInt{0}(), M, K, N, threads, Octavian.W₁Default(), Octavian.W₂Default(), Octavian.R₁Default(), Octavian.R₂Default() ) - Octavian.Polyester.free_threads!(torelease) + Octavian.PolyesterWeave.free_threads!(torelease) else Octavian.matmul_st_pack_A_and_B!( zc, za, zb, Octavian.StaticInt{1}(), Octavian.StaticInt{0}(), M, K, N, From c6dd05400008ebd03fd167e6659df9eb40f2e370 Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Sat, 11 Sep 2021 08:44:59 -0400 Subject: [PATCH 2/2] Bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 8003766..32177fa 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Octavian" uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" authors = ["Mason Protter", "Chris Elrod", "Dilum Aluthge", "contributors"] -version = "0.3.2" +version = "0.3.3" [deps] ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"