diff --git a/src/Octavian.jl b/src/Octavian.jl index 1866e95..67bcd54 100644 --- a/src/Octavian.jl +++ b/src/Octavian.jl @@ -12,8 +12,10 @@ include("macros.jl") include("types.jl") include("block_sizes.jl") -include("macrokernel.jl") +include("macrokernels.jl") include("matmul.jl") +include("memory_buffer.jl") +include("pointer_matrix.jl") include("utils.jl") const BCACHE = UInt8[] diff --git a/src/block_sizes.jl b/src/block_sizes.jl index e923af3..9f2a424 100644 --- a/src/block_sizes.jl +++ b/src/block_sizes.jl @@ -53,11 +53,7 @@ function block_sizes(::Type{T}) where {T} st = VectorizationBase.static_sizeof(T) L2 = (StaticInt{_L2}() - StaticInt{_L1}()) ÷ st - if 2_L2 * L2c > _L3 * L3c - L3 = StaticInt{_L3}() ÷ st - else - L3 = (StaticInt{_L3}() - StaticInt{_L2}()) ÷ st - end + L3 = _calculate_L3(StaticInt{_L2}(), StaticInt{_L3}(), st, VectorizationBase.CACHE_INCLUSIVITY[3]) W = VectorizationBase.pick_vector_width_val(T) Mr = StaticInt{LoopVectorization.mᵣ}() @@ -69,3 +65,11 @@ function block_sizes(::Type{T}) where {T} Mc, Kc, Nc end + +@inline function _calculate_L3(_L2, _L3, st, cache_inclusivity_3::Bool) + if cache_inclusivity_3 + return (_L3 - _L2) ÷ st + else + return _L3 ÷ st + end +end diff --git a/src/macrokernel.jl b/src/macrokernels.jl similarity index 100% rename from src/macrokernel.jl rename to src/macrokernels.jl diff --git a/src/memory_buffer.jl b/src/memory_buffer.jl new file mode 100644 index 0000000..b804c82 --- /dev/null +++ b/src/memory_buffer.jl @@ -0,0 +1,5 @@ +Base.unsafe_convert(::Type{Ptr{T}}, m::MemoryBuffer) where {T} = Base.unsafe_convert(Ptr{T}, Base.pointer_from_objref(m)) +@inline MemoryBuffer(::StaticInt{L}, ::Type{T}) where {L,T} = MemoryBuffer{L,T}(undef) +@inline function L2Buffer(::Type{T}) where {T} + MemoryBuffer(StaticInt{VectorizationBase.CACHE_SIZE[2]}() ÷ VectorizationBase.static_sizeof(T), T) +end diff --git a/src/pointer_matrix.jl b/src/pointer_matrix.jl new file mode 100644 index 0000000..fcc7c33 --- /dev/null +++ b/src/pointer_matrix.jl @@ -0,0 +1,33 @@ +PointerMatrix(p::P, s::S) where {T,P<:VectorizationBase.AbstractStridedPointer{T},S} = PointerMatrix{T,P,S}(p, s) +Base.size(A::PointerMatrix) = map(Int, A.s) +VectorizationBase.stridedpointer(A::PointerMatrix) = A.p +Base.unsafe_convert(::Type{Ptr{T}}, A::PointerMatrix{T}) where {T} = pointer(A.p) +@inline function Base.getindex(A::PointerMatrix, i::Integer, j::Integer) + @boundscheck checkbounds(A, i, j) + VectorizationBase.vload(VectorizationBase.stridedpointer(A), (i,j)) +end +@inline function Base.getindex(A::PointerMatrix, i::Integer) + @boundscheck checkbounds(A, i) + VectorizationBase.vload(VectorizationBase.stridedpointer(A), (i-1,)) +end +@inline function Base.setindex!(A::PointerMatrix{T}, v, i::Integer, j::Integer) where {T} + @boundscheck checkbounds(A, i, j) + VectorizationBase.vstore!(VectorizationBase.stridedpointer(A), convert(T, v), (i,j)) + v +end +@inline function Base.setindex!(A::PointerMatrix{T}, v, i::Integer) where {T} + @boundscheck checkbounds(A, i) + VectorizationBase.vstore!(VectorizationBase.stridedpointer(A), convert(T, v), (i-1,)) + v +end + +function PointerMatrix(Bptr::Ptr{T}, (M,N), padcols::Bool = false) where {T} + st = VectorizationBase.static_sizeof(T) + _M = padcols ? VectorizationBase.align(M, T) : M + # Should maybe add a more convenient column major constructor + Bsptr = VectorizationBase.stridedpointer( + Bptr, VectorizationBase.ArrayInterface.Contiguous{1}(), VectorizationBase.ArrayInterface.ContiguousBatch{0}(), + VectorizationBase.ArrayInterface.StrideRank{(1,2)}(), (st, _M*st), (StaticInt{1}(),StaticInt{1}()) + ) + PointerMatrix(Bsptr, (M,N)) +end diff --git a/src/types.jl b/src/types.jl index c3b47d4..72c8ba4 100644 --- a/src/types.jl +++ b/src/types.jl @@ -3,48 +3,7 @@ struct PointerMatrix{T,P<:VectorizationBase.AbstractStridedPointer,S<:Tuple{Vara s::S end -PointerMatrix(p::P, s::S) where {T,P<:VectorizationBase.AbstractStridedPointer{T},S} = PointerMatrix{T,P,S}(p, s) -Base.size(A::PointerMatrix) = map(Int, A.s) -VectorizationBase.stridedpointer(A::PointerMatrix) = A.p -Base.unsafe_convert(::Type{Ptr{T}}, A::PointerMatrix{T}) where {T} = pointer(A.p) -@inline function Base.getindex(A::PointerMatrix, i::Integer, j::Integer) - @boundscheck checkbounds(A, i, j) - VectorizationBase.vload(VectorizationBase.stridedpointer(A), (i,j)) -end -@inline function Base.getindex(A::PointerMatrix, i::Integer) - @boundscheck checkbounds(A, i) - VectorizationBase.vload(VectorizationBase.stridedpointer(A), (i-1,)) -end -@inline function Base.setindex!(A::PointerMatrix{T}, v, i::Integer, j::Integer) where {T} - @boundscheck checkbounds(A, i, j) - VectorizationBase.vstore!(VectorizationBase.stridedpointer(A), convert(T, v), (i,j)) - v -end -@inline function Base.setindex!(A::PointerMatrix{T}, v, i::Integer) where {T} - @boundscheck checkbounds(A, i) - VectorizationBase.vstore!(VectorizationBase.stridedpointer(A), convert(T, v), (i-1,)) - v -end - -function PointerMatrix(Bptr::Ptr{T}, (M,N), padcols::Bool = false) where {T} - st = VectorizationBase.static_sizeof(T) - _M = padcols ? VectorizationBase.align(M, T) : M - # Should maybe add a more convenient column major constructor - Bsptr = VectorizationBase.stridedpointer( - Bptr, VectorizationBase.ArrayInterface.Contiguous{1}(), VectorizationBase.ArrayInterface.ContiguousBatch{0}(), - VectorizationBase.ArrayInterface.StrideRank{(1,2)}(), (st, _M*st), (StaticInt{1}(),StaticInt{1}()) - ) - PointerMatrix(Bsptr, (M,N)) -end - - mutable struct MemoryBuffer{L,T} data::NTuple{L,T} MemoryBuffer{L,T}(::UndefInitializer) where {L,T} = new{L,T}() end - -Base.unsafe_convert(::Type{Ptr{T}}, m::MemoryBuffer) where {T} = Base.unsafe_convert(Ptr{T}, Base.pointer_from_objref(m)) -@inline MemoryBuffer(::StaticInt{L}, ::Type{T}) where {L,T} = MemoryBuffer{L,T}(undef) -@inline function L2Buffer(::Type{T}) where {T} - MemoryBuffer(StaticInt{VectorizationBase.CACHE_SIZE[2]}() ÷ VectorizationBase.static_sizeof(T), T) -end diff --git a/src/utils.jl b/src/utils.jl index f1a0a62..a2842cc 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,5 +1,5 @@ check_sizes(::StaticInt{M}, ::StaticInt{M}) where {M} = StaticInt{M}() -check_sizes(::StaticInt{M}, ::StaticInt{N}) where {M,N} = throw("$M ≠ $N") +check_sizes(::StaticInt{M}, ::StaticInt{N}) where {M,N} = throw(ErrorException("$M ≠ $N")) check_sizes(::StaticInt{M}, m) where {M} = (@assert M == m; StaticInt{M}()) check_sizes(m, ::StaticInt{M}) where {M} = (@assert M == m; StaticInt{M}()) check_sizes(m, n) = (@assert m == n; m) diff --git a/test/block_sizes.jl b/test/block_sizes.jl new file mode 100644 index 0000000..6a385ba --- /dev/null +++ b/test/block_sizes.jl @@ -0,0 +1,4 @@ +@time @testset "block_sizes" begin + @test Octavian._calculate_L3(1, 1, 1, true) == 0 + @test Octavian._calculate_L3(1, 1, 1, false) == 1 +end diff --git a/test/macrokernels.jl b/test/macrokernels.jl new file mode 100644 index 0000000..9bf9ee6 --- /dev/null +++ b/test/macrokernels.jl @@ -0,0 +1,16 @@ +@time @testset "Macrokernels" begin + m = 20 + n = 30 + k = 40 + A1 = rand(Float64, m, k) + B1 = rand(Float64, k, n) + C1 = rand(Float64, m, n) + A2 = deepcopy(A1) + B2 = deepcopy(B1) + C2 = deepcopy(C1) + α = Float64(2.0) + β = Float64(2.0) + Octavian.macrokernel!(C1, A1, B1, α, β) + C2 = α*A2*B2 + β*C2 + @test C1 ≈ C2 +end diff --git a/test/matmul-coverage.jl b/test/matmul_coverage.jl similarity index 100% rename from test/matmul-coverage.jl rename to test/matmul_coverage.jl diff --git a/test/pointer_matrix.jl b/test/pointer_matrix.jl new file mode 100644 index 0000000..ff00a1b --- /dev/null +++ b/test/pointer_matrix.jl @@ -0,0 +1,12 @@ +@time Test.@testset "PointerMatrix" begin + mem = Octavian.L2Buffer(Float64); + ptr = Base.unsafe_convert(Ptr{Float64}, mem) + block = Octavian.PointerMatrix(ptr, (10, 20)) + Test.@test Base.unsafe_convert(Ptr{Float64}, block) == pointer(block.p) + GC.@preserve mem begin + block[1] = 2.3 + Test.@test block[1] == 2.3 + block[4, 5] = 67.89 + Test.@test block[4, 5] == 67.89 + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 45cbecf..4f92efe 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,16 +5,17 @@ import InteractiveUtils import LinearAlgebra import Test -using Test: @test, @testset +using Test: @testset, @test, @test_throws -include("test-suite-preamble.jl") +include("test_suite_preamble.jl") +include("block_sizes.jl") +include("macrokernels.jl") include("macros.jl") +include("matmul_coverage.jl") +include("pointer_matrix.jl") +include("utils.jl") -if (run_all_tests) || (coverage) - include("matmul-coverage.jl") -end - -if (run_all_tests) || (!coverage) +if !coverage include("matmul.jl") end diff --git a/test/test-suite-preamble.jl b/test/test_suite_preamble.jl similarity index 67% rename from test/test-suite-preamble.jl rename to test/test_suite_preamble.jl index 2465a35..1dca2e4 100644 --- a/test/test-suite-preamble.jl +++ b/test/test_suite_preamble.jl @@ -11,9 +11,3 @@ end const coverage = is_coverage() @info("Code coverage is $(coverage ? "enabled" : "disabled")") - -const run_all_tests = get(ENV, "RUN_ALL_TESTS", "false") == "true" - -if run_all_tests - @info("RUN_ALL_TESTS is $(run_all_tests)") -end diff --git a/test/utils.jl b/test/utils.jl new file mode 100644 index 0000000..6062834 --- /dev/null +++ b/test/utils.jl @@ -0,0 +1,12 @@ +@time @testset "utils" begin + @testset "check_sizes" begin + a = Octavian.StaticInt{1}() + b = Octavian.StaticInt{2}() + @test Octavian.check_sizes(a, a) == a + @test_throws ErrorException Octavian.check_sizes(a, b) + @test Octavian.check_sizes(a, 1) == a + @test_throws AssertionError Octavian.check_sizes(a, 100) + @test Octavian.check_sizes(2, b) == b + @test_throws AssertionError Octavian.check_sizes(200, b) + end +end