diff --git a/Project.toml b/Project.toml index 38b01e8..293a79d 100644 --- a/Project.toml +++ b/Project.toml @@ -17,7 +17,7 @@ LoopVectorization = "0.12" Static = "0.2" StrideArraysCore = "0.1.5" ThreadingUtilities = "0.4" -VectorizationBase = "0.20.5" +VectorizationBase = "0.20.9" julia = "1.6" [extras] diff --git a/README.md b/README.md index 59d3c85..ade1b25 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ Environment: JULIA_NUM_THREADS = 36 ``` Resulted in the following: -![octavian10980xebench](https://raw.githubusercontent.com/JuliaLinearAlgebra/Octavian.jl/master/docs/src/assets/bench10980xe.png) +![octavian10980xebench](https://raw.githubusercontent.com/JuliaLinearAlgebra/Octavian.jl/master/docs/src/assets/bench10980xe.svg) ## Related Packages diff --git a/src/macrokernels.jl b/src/macrokernels.jl index 8a290bc..405ad88 100644 --- a/src/macrokernels.jl +++ b/src/macrokernels.jl @@ -243,9 +243,9 @@ end ) where {T} Ãₚ, buffer = alloc_a_pack(A, M, T) GC.@preserve buffer begin - Mᵣ, Nᵣ = matmul_params() - packamul!(C, Ãₚ, A, B, α, β, M, K, Nᵣ) - loopmul!(gesp(C, (Zero(), Nᵣ)), Ãₚ, gesp(B, (Zero(), Nᵣ)), α, β, M, K, N - Nᵣ) + Mᵣ, Nᵣ = matmul_params() + packamul!(C, Ãₚ, A, B, α, β, M, K, Nᵣ) + loopmul!(gesp(C, (Zero(), Nᵣ)), Ãₚ, gesp(B, (Zero(), Nᵣ)), α, β, M, K, N - Nᵣ) end nothing end