diff --git a/Project.toml b/Project.toml index 2deaa33..eb6acff 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Octavian" uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" authors = ["Mason Protter", "Chris Elrod", "Dilum Aluthge", "contributors"] -version = "0.3.14" +version = "0.3.15" [deps] ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" @@ -23,7 +23,7 @@ LoopVectorization = "0.12.86" ManualMemory = "0.1.1" PolyesterWeave = "0.1.1" Requires = "1" -Static = "0.2, 0.3, 0.4, 0.6" +Static = "0.2, 0.3, 0.4, 0.6, 0.7" ThreadingUtilities = "0.5" VectorizationBase = "0.21.15" julia = "1.6" diff --git a/src/Octavian.jl b/src/Octavian.jl index da53372..b3c1184 100644 --- a/src/Octavian.jl +++ b/src/Octavian.jl @@ -17,6 +17,10 @@ using ManualMemory: MemoryBuffer, load, store! using ThreadingUtilities: _atomic_add!, _atomic_load, _atomic_store!, launch, wait, SPIN +if !(StaticInt <: Base.Integer) +const Integer = Union{Base.Integer, StaticInt} +end + export StaticInt export matmul! export matmul diff --git a/src/funcptrs.jl b/src/funcptrs.jl index 26c0f89..bcc8d15 100644 --- a/src/funcptrs.jl +++ b/src/funcptrs.jl @@ -15,7 +15,7 @@ function (::LoopMulFunc{P,TC,TA,TB,Α,Β,Md,Kd,Nd})(p::Ptr{UInt}) where {P,TC,TA end @inline _call_loopmul!(C, A, B, α, β, M, K, N, ::Val{false}) = loopmul!(C, A, B, α, β, M, K, N) @inline function _call_loopmul!(C::StridedPointer{T}, A, B, α, β, M, K, N, ::Val{true}) where {T} - if M*K < first_cache_size(Val(T)) * R₂Default() + if M*K < ceil(Int,Float64(first_cache_size(Val(T)) * R₂Default())) packaloopmul!(C, A, B, α, β, M, K, N) return else diff --git a/src/global_constants.jl b/src/global_constants.jl index 00455c4..7fa2843 100644 --- a/src/global_constants.jl +++ b/src/global_constants.jl @@ -75,7 +75,5 @@ bcache_count() = VectorizationBase.num_cache(second_cache()) const BCACHEPTR = Ref{Ptr{Cvoid}}(C_NULL) const BCACHE_LOCK = Threads.Atomic{UInt}(zero(UInt)) -@static if Sys.WORD_SIZE == 32 - const ACACHEPTR = Ref{Ptr{Cvoid}}(C_NULL) -end +const ACACHEPTR = Ref{Ptr{Cvoid}}(C_NULL) diff --git a/src/init.jl b/src/init.jl index 7c9c5e4..99b5c6f 100644 --- a/src/init.jl +++ b/src/init.jl @@ -24,15 +24,11 @@ function init_bcache() nothing end -@static if Sys.WORD_SIZE == 32 - function init_acache() - if ACACHEPTR[] == C_NULL - ACACHEPTR[] = VectorizationBase.valloc(first_cache_size() * init_num_tasks(), Cvoid, ccall(:jl_getpagesize, Int, ())) - end - nothing +function init_acache() + if ACACHEPTR[] == C_NULL + ACACHEPTR[] = VectorizationBase.valloc(first_cache_size() * init_num_tasks(), Cvoid, ccall(:jl_getpagesize, Int, ())) end -else - init_acache() = nothing + nothing end function init_num_tasks() diff --git a/src/memory_buffer.jl b/src/memory_buffer.jl index aae81ad..aea1c99 100644 --- a/src/memory_buffer.jl +++ b/src/memory_buffer.jl @@ -2,14 +2,8 @@ @inline function first_cache_buffer(::Val{T}) where {T} first_cache_buffer(Val{T}(), first_cache_size(Val(T))) end -@static if Sys.WORD_SIZE == 32 - @inline function first_cache_buffer(::Val{T}, N) where {T} - reinterpret(Ptr{T}, ACACHEPTR[] + ((Threads.threadid()-1) * N) * static_sizeof(T)) - end -else - @inline function first_cache_buffer(::Val{T}, ::StaticInt{N}) where {T,N} - MemoryBuffer{N,T}(undef) - end +@inline function first_cache_buffer(::Val{T}, N) where {T} + reinterpret(Ptr{T}, ACACHEPTR[] + ((Threads.threadid()-1) * N) * static_sizeof(T)) end BCache(i::Integer) = BCache(BCACHEPTR[]+cld_fast(second_cache_size()*i, Threads.nthreads()), i % UInt)