Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Octavian"
uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
authors = ["Mason Protter", "Chris Elrod", "Dilum Aluthge", "contributors"]
version = "0.3.14"
version = "0.3.15"

[deps]
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
Expand All @@ -23,7 +23,7 @@ LoopVectorization = "0.12.86"
ManualMemory = "0.1.1"
PolyesterWeave = "0.1.1"
Requires = "1"
Static = "0.2, 0.3, 0.4, 0.6"
Static = "0.2, 0.3, 0.4, 0.6, 0.7"
ThreadingUtilities = "0.5"
VectorizationBase = "0.21.15"
julia = "1.6"
Expand Down
4 changes: 4 additions & 0 deletions src/Octavian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ using ManualMemory: MemoryBuffer, load, store!

using ThreadingUtilities: _atomic_add!, _atomic_load, _atomic_store!, launch, wait, SPIN

if !(StaticInt <: Base.Integer)
const Integer = Union{Base.Integer, StaticInt}
end

export StaticInt
export matmul!
export matmul
Expand Down
2 changes: 1 addition & 1 deletion src/funcptrs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ function (::LoopMulFunc{P,TC,TA,TB,Α,Β,Md,Kd,Nd})(p::Ptr{UInt}) where {P,TC,TA
end
@inline _call_loopmul!(C, A, B, α, β, M, K, N, ::Val{false}) = loopmul!(C, A, B, α, β, M, K, N)
@inline function _call_loopmul!(C::StridedPointer{T}, A, B, α, β, M, K, N, ::Val{true}) where {T}
if M*K < first_cache_size(Val(T)) * R₂Default()
if M*K < ceil(Int,Float64(first_cache_size(Val(T)) * R₂Default()))
packaloopmul!(C, A, B, α, β, M, K, N)
return
else
Expand Down
4 changes: 1 addition & 3 deletions src/global_constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,5 @@ bcache_count() = VectorizationBase.num_cache(second_cache())
const BCACHEPTR = Ref{Ptr{Cvoid}}(C_NULL)
const BCACHE_LOCK = Threads.Atomic{UInt}(zero(UInt))

@static if Sys.WORD_SIZE == 32
const ACACHEPTR = Ref{Ptr{Cvoid}}(C_NULL)
end
const ACACHEPTR = Ref{Ptr{Cvoid}}(C_NULL)

12 changes: 4 additions & 8 deletions src/init.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,11 @@ function init_bcache()
nothing
end

@static if Sys.WORD_SIZE == 32
function init_acache()
if ACACHEPTR[] == C_NULL
ACACHEPTR[] = VectorizationBase.valloc(first_cache_size() * init_num_tasks(), Cvoid, ccall(:jl_getpagesize, Int, ()))
end
nothing
function init_acache()
if ACACHEPTR[] == C_NULL
ACACHEPTR[] = VectorizationBase.valloc(first_cache_size() * init_num_tasks(), Cvoid, ccall(:jl_getpagesize, Int, ()))
end
else
init_acache() = nothing
nothing
end

function init_num_tasks()
Expand Down
10 changes: 2 additions & 8 deletions src/memory_buffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,8 @@
@inline function first_cache_buffer(::Val{T}) where {T}
first_cache_buffer(Val{T}(), first_cache_size(Val(T)))
end
@static if Sys.WORD_SIZE == 32
@inline function first_cache_buffer(::Val{T}, N) where {T}
reinterpret(Ptr{T}, ACACHEPTR[] + ((Threads.threadid()-1) * N) * static_sizeof(T))
end
else
@inline function first_cache_buffer(::Val{T}, ::StaticInt{N}) where {T,N}
MemoryBuffer{N,T}(undef)
end
@inline function first_cache_buffer(::Val{T}, N) where {T}
reinterpret(Ptr{T}, ACACHEPTR[] + ((Threads.threadid()-1) * N) * static_sizeof(T))
end

BCache(i::Integer) = BCache(BCACHEPTR[]+cld_fast(second_cache_size()*i, Threads.nthreads()), i % UInt)
Expand Down