Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
name = "Octavian"
uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
authors = ["Mason Protter", "Chris Elrod", "Dilum Aluthge", "contributors"]
version = "0.3.2"
version = "0.3.3"

[deps]
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667"
Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588"
PolyesterWeave = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad"
Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
ThreadingUtilities = "8290d209-cae3-49c0-8002-c8c24d57dab5"
VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
Expand All @@ -18,7 +18,7 @@ ArrayInterface = "3.1.14"
IfElse = "0.1"
LoopVectorization = "0.12.34"
ManualMemory = "0.1.1"
Polyester = "0.4"
PolyesterWeave = "0.1"
Static = "0.2, 0.3"
ThreadingUtilities = "0.4.6"
VectorizationBase = "0.21.5"
Expand Down
2 changes: 1 addition & 1 deletion src/Octavian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ using VectorizationBase: align, AbstractStridedPointer, zstridedpointer, vsub_ns
using LoopVectorization: preserve_buffer, CloseOpen, UpperBoundedInteger
using ArrayInterface: size, strides, offsets, indices, axes, StrideIndex
using IfElse: ifelse
using Polyester
using PolyesterWeave
using Static: StaticInt, Zero, One, StaticBool, True, False, gt, eq, StaticFloat64,
roundtostaticint, floortostaticint
using ManualMemory: MemoryBuffer, load, store!
Expand Down
20 changes: 10 additions & 10 deletions src/matmul.jl
Original file line number Diff line number Diff line change
Expand Up @@ -308,20 +308,20 @@ function matmulsplitn!(C::AbstractStridedPointer{T}, A, B, α, β, ::StaticInt{M
_nspawn = Mblocks * Nblocks
Mbsize_Mrem, Mbsize_ = promote(Mbsize + W, Mbsize)
Nbsize_Nrem, Nbsize_ = promote(Nbsize + One(), Nbsize)
(tnum, tuu) = Polyester.initial_state(threads)
(tnum, tuu) = PolyesterWeave.initial_state(threads)
let _A = A, _B = B, _C = C, n = 0, Nrc = Nblocks - Nrem, Mrc = Mblocks - Mrem, __Mblocks = Mblocks - One()
while true
nsize = ifelse(Nblocks > Nrc, Nbsize_Nrem, Nbsize_); Nblocks -= 1
let _A = _A, _C = _C, __Mblocks = __Mblocks
while __Mblocks != 0
msize = ifelse(__Mblocks ≥ Mrc, Mbsize_Mrem, Mbsize_); __Mblocks -= 1
(tnum, tuu) = Polyester.iter(tnum, tuu)
(tnum, tuu) = PolyesterWeave.iter(tnum, tuu)
launch_thread_mul!(_C, _A, _B, α, β, msize, K, nsize, tnum, Val{PACK}())
_A = gesp(_A, (msize, Zero()))
_C = gesp(_C, (msize, Zero()))
end
if Nblocks != 0
(tnum, tuu) = Polyester.iter(tnum, tuu)
(tnum, tuu) = PolyesterWeave.iter(tnum, tuu)
launch_thread_mul!(_C, _A, _B, α, β, Mremfinal, K, nsize, tnum, Val{PACK}())
else
call_loopmul!(_C, _A, _B, α, β, Mremfinal, K, nsize, Val{PACK}())
Expand Down Expand Up @@ -360,8 +360,8 @@ function __matmul!(
clamp(div_fast(M * N, StaticInt{256}() * W), 0, _nthread-1)
end
# nkern = cld_fast(M * N, MᵣW * Nᵣ)
threads, torelease = Polyester.__request_threads(_nrequest % UInt32, Polyester.worker_pointer())
# _threads, _torelease = Polyester.request_threads(Threads.threadid()%UInt32, _nrequest)
threads, torelease = PolyesterWeave.__request_threads(_nrequest % UInt32, PolyesterWeave.worker_pointer())
# _threads, _torelease = PolyesterWeave.request_threads(Threads.threadid()%UInt32, _nrequest)

nrequest = threads.i
iszero(nrequest) && @goto SINGLETHREAD
Expand All @@ -388,7 +388,7 @@ function __matmul!(
else # TODO: Allow splitting along `N` for `matmul_pack_A_and_B!`
matmul_pack_A_and_B!(C, A, B, α, β, M, K, N, threads, W₁Default(), W₂Default(), R₁Default(), R₂Default())
end
Polyester.free_threads!(torelease)
PolyesterWeave.free_threads!(torelease)
nothing
end

Expand All @@ -398,9 +398,9 @@ function waitonmultasks(threads, nthread)
# for (_,tid) ∈ threads
# wait(tid)
# end
(tnum, tuu) = Polyester.initial_state(threads)
(tnum, tuu) = PolyesterWeave.initial_state(threads)
for _ ∈ CloseOpen(One(), nthread)
(tnum, tuu) = Polyester.iter(tnum, tuu)
(tnum, tuu) = PolyesterWeave.iter(tnum, tuu)
wait(tnum)
end
end
Expand All @@ -423,13 +423,13 @@ function matmul_pack_A_and_B!(
end
Mblock_Mrem, Mblock_ = promote(Mbsize + W, Mbsize)
u_to_spawn = _to_spawn % UInt
(tnum, tuu) = Polyester.initial_state(threads)
(tnum, tuu) = PolyesterWeave.initial_state(threads)
bc = _use_bcache()
bc_ptr = Base.unsafe_convert(typeof(pointer(C)), pointer(bc))
last_id = _to_spawn - One()
for m ∈ CloseOpen(last_id) # ...thus the fact that `CloseOpen()` iterates at least once is okay.
Mblock = ifelse(m < Mrem, Mblock_Mrem, Mblock_)
(tnum, tuu) = Polyester.iter(tnum, tuu)
(tnum, tuu) = PolyesterWeave.iter(tnum, tuu)
launch_thread_mul!(C, A, B, α, β, Mblock, K, N, p, bc_ptr, tnum, m % UInt, u_to_spawn, StaticFloat64{W₁}(),StaticFloat64{W₂}(),StaticFloat64{R₁}(),StaticFloat64{R₂}())
A = gesp(A, (Mblock, Zero()))
C = gesp(C, (Mblock, Zero()))
Expand Down
4 changes: 2 additions & 2 deletions test/matmul_coverage.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ function matmul_pack_ab!(C, A, B)
nspawn = min(Threads.nthreads(), Octavian.num_cores())
GC.@preserve C A B begin
if nspawn > 1
threads, torelease = Octavian.Polyester.__request_threads((nspawn-1)%UInt32, Octavian.Polyester.worker_pointer())
threads, torelease = Octavian.PolyesterWeave.__request_threads((nspawn-1)%UInt32, Octavian.PolyesterWeave.worker_pointer())
@assert threads.i < Threads.nthreads()
Octavian.matmul_pack_A_and_B!(
zc, za, zb, Octavian.StaticInt{1}(), Octavian.StaticInt{0}(), M, K, N, threads,
Octavian.W₁Default(), Octavian.W₂Default(), Octavian.R₁Default(), Octavian.R₂Default()
)
Octavian.Polyester.free_threads!(torelease)
Octavian.PolyesterWeave.free_threads!(torelease)
else
Octavian.matmul_st_pack_A_and_B!(
zc, za, zb, Octavian.StaticInt{1}(), Octavian.StaticInt{0}(), M, K, N,
Expand Down