diff --git a/Project.toml b/Project.toml index d1b8f97..8fffc2d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Octavian" uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" authors = ["Mason Protter", "Chris Elrod", "Dilum Aluthge", "contributors"] -version = "0.3.1" +version = "0.3.2" [deps] ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" @@ -18,10 +18,10 @@ ArrayInterface = "3.1.14" IfElse = "0.1" LoopVectorization = "0.12.34" ManualMemory = "0.1.1" -Polyester = "0.3.5" +Polyester = "0.4" Static = "0.2, 0.3" ThreadingUtilities = "0.4.6" -VectorizationBase = "0.20.16" +VectorizationBase = "0.21.5" julia = "1.6" [extras] diff --git a/src/Octavian.jl b/src/Octavian.jl index 933cb1c..6bd66fd 100644 --- a/src/Octavian.jl +++ b/src/Octavian.jl @@ -6,7 +6,7 @@ using VectorizationBase: align, AbstractStridedPointer, zstridedpointer, vsub_ns static_sizeof, StridedPointer, gesp, pause, pick_vector_width, has_feature, cache_size, num_cores, num_cores, cache_inclusive, cache_linesize using LoopVectorization: preserve_buffer, CloseOpen, UpperBoundedInteger -using ArrayInterface: size, strides, offsets, indices, axes +using ArrayInterface: size, strides, offsets, indices, axes, StrideIndex using IfElse: ifelse using Polyester using Static: StaticInt, Zero, One, StaticBool, True, False, gt, eq, StaticFloat64, diff --git a/src/matmul.jl b/src/matmul.jl index 52604b0..0c67c49 100644 --- a/src/matmul.jl +++ b/src/matmul.jl @@ -360,7 +360,9 @@ function __matmul!( clamp(div_fast(M * N, StaticInt{256}() * W), 0, _nthread-1) end # nkern = cld_fast(M * N, MᵣW * Nᵣ) - threads, torelease = Polyester.request_threads(Threads.threadid()%UInt32, _nrequest) + threads, torelease = Polyester.__request_threads(_nrequest % UInt32, Polyester.worker_pointer()) + # _threads, _torelease = Polyester.request_threads(Threads.threadid()%UInt32, _nrequest) + nrequest = threads.i iszero(nrequest) && @goto SINGLETHREAD nspawn = nrequest + 1 diff --git a/src/utils.jl b/src/utils.jl index 8875b16..18c4ed4 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -38,7 +38,8 @@ function default_stridedpointer_quote(::Type{T}, N, Ot) where {T} quote $(Expr(:meta,:inline)) st = $st - StridedPointer{$T,$N,$C,$B,$R}(ptr, $xt, $o) + si = StrideIndex{$N,$R,$C}($xt, $o) + stridedpointer(ptr, si, StaticInt{$B}()) end end @@ -66,9 +67,10 @@ end Bn = Core.ifelse(B > 1, B+1, B) quote $(Expr(:meta,:inline)) - x = $gf(sp, :strd) + x = strides(sp) x0 = $gf(x, 1, false) - StridedPointer{$T,$(N+1),$Cn,$Bn,$Rn}($gf(sp,:p), $xt, $ot) + si = StrideIndex{$(N+1),$Rn,$Cn}($xt, $ot) + stridedpointer($gf(sp,:p), si, StaticInt{$Bn}()) end end @generated function droplastdim(sp::StridedPointer{T,N,C,B,R}) where {T,N,C,B,R} @@ -85,9 +87,10 @@ end end quote $(Expr(:meta,:inline)) - x = $gf(sp, :strd) - o = $gf(sp, :offsets) - StridedPointer{$T,$(N-1),$Cn,$Bn,$rt}($gf(sp,:p), $xt, $ot) + x = strides(sp) + o = offsets(sp) + si = StrideIndex{$(N-1),$rt,$Cn}($xt, $ot) + stridedpointer($gf(sp,:p), si, StaticInt{$Bn}()) end end diff --git a/test/matmul_coverage.jl b/test/matmul_coverage.jl index 47f9595..dc0f912 100644 --- a/test/matmul_coverage.jl +++ b/test/matmul_coverage.jl @@ -8,7 +8,7 @@ function matmul_pack_ab!(C, A, B) nspawn = min(Threads.nthreads(), Octavian.num_cores()) GC.@preserve C A B begin if nspawn > 1 - threads, torelease = Octavian.Polyester.request_threads(Threads.threadid(), nspawn-1) + threads, torelease = Octavian.Polyester.__request_threads((nspawn-1)%UInt32, Octavian.Polyester.worker_pointer()) @assert threads.i < Threads.nthreads() Octavian.matmul_pack_A_and_B!( zc, za, zb, Octavian.StaticInt{1}(), Octavian.StaticInt{0}(), M, K, N, threads,