Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clean up and export crc32c function #22274

Merged
merged 14 commits into from Jun 13, 2017
2 changes: 2 additions & 0 deletions NEWS.md
Expand Up @@ -56,6 +56,8 @@ Library improvements
* `resize!` and `sizehint!` methods no longer over-reserve memory when the
requested array size is more than double of its current size ([#22038]).

* The `crc32c` function for CRC-32c checksums is now exported ([#22274]).

* The output of `versioninfo()` is now controlled with keyword arguments ([#21974]).

Compiler/Runtime improvements
Expand Down
1 change: 1 addition & 0 deletions base/exports.jl
Expand Up @@ -1047,6 +1047,7 @@ export
atexit,
atreplinit,
clipboard,
crc32c,
exit,
ntuple,
quit,
Expand Down
13 changes: 3 additions & 10 deletions base/loading.jl
Expand Up @@ -683,10 +683,7 @@ function compilecache(name::String)
if success(create_expr_cache(path, cachefile, concrete_deps))
# append checksum to the end of the .ji file:
open(cachefile, "a+") do f
data = Mmap.mmap(f, Vector{UInt8}, filesize(f), 0)
checksum = crc32c(data)
finalize(data)
write(f, hton(checksum))
write(f, hton(crc32c(seekstart(f), filesize(f))))
end
else
error("Failed to precompile $name to $cachefile.")
Expand Down Expand Up @@ -809,12 +806,8 @@ function stale_cachefile(modpath::String, cachefile::String)
end

# finally, verify that the cache file has a valid checksum
data = Mmap.mmap(io, Vector{UInt8}, filesize(io), 0)
# checksum = UInt32 read in bigendian format from the last 4 bytes:
checksum = UInt32(data[end]) + UInt32(data[end-1])<<8 + UInt32(data[end-2])<<16 + UInt32(data[end-3])<<24
crc = crc32c(@view(data[1:end-4]))
finalize(data)
if checksum != crc
crc = crc32c(seekstart(io), filesize(io)-4)
if crc != ntoh(read(io, UInt32))
DEBUG_LOADING[] && info("JL_DEBUG_LOADING: Rejecting cache file $cachefile because it has an invalid checksum.")
return true
end
Expand Down
53 changes: 47 additions & 6 deletions base/util.jl
Expand Up @@ -765,10 +765,6 @@ if is_windows()

end

# compute sizeof correctly for strings, arrays, and subarrays of bytes
_sizeof(a) = sizeof(a)
_sizeof(a::FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N) = length(a)

"""
crc32c(data, crc::UInt32=0x00000000)

Expand All @@ -778,9 +774,54 @@ a starting `crc` integer to be mixed in with the checksum. The `crc` parameter
can be used to compute a checksum on data divided into chunks: performing
`crc32c(data2, crc32c(data1))` is equivalent to the checksum of `[data1; data2]`.
(Technically, a little-endian checksum is computed.)

There is also a method `crc32c(io, nb, crc)` to checksum `nb` bytes from
a stream `io`, or `crc32c(io, crc)` to checksum all the remaining bytes.
Hence you can do [`open(crc32c, filename)`](@ref) to checksum an entire file,
or `crc32c(seekstart(buf))` to checksum an [`IOBuffer`](@ref) without
calling [`take!`](@ref).

For a `String`, note that the result is specific to the UTF-8 encoding
(a different checksum would be obtained from a different Unicode encoding).
To checksum an `a::Array` of some other bitstype, you can do `crc32c(reinterpret(UInt8,a))`,
but note that the result may be endian-dependent.
"""
function crc32c end

unsafe_crc32c(a, n, crc) = ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, n)

crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) =
unsafe_crc32c(a, length(a), crc)

crc32c(s::String, crc::UInt32=0x00000000) = unsafe_crc32c(s, sizeof(s), crc)

"""
crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N,String}, crc::UInt32=0x00000000) =
ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, _sizeof(a))
crc32c(io::IO, [nb::Integer,] crc::UInt32=0x00000000)

Read up to `nb` bytes from `io` and return the CRC-32c checksum, optionally
mixed with a starting `crc` integer. If `nb` is not supplied, then
`io` will be read until the end of the stream.
"""
function crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000)
nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0"))
buf = Array{UInt8}(min(nb, 16384))
while !eof(io) && nb > 16384
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be 8192 * 3 ? That's the LONG block size used in the sse4.2 version (and also on ARM in one of my up coming change)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense. I tried 16384 and 32768 and the latter wasn't any faster on my machine, but 8192 * 3 is fine too.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'll be catched by the short version so it won't matter too much but in principle 8192 * 2 and 8192 * 4 are equally bad since neither of them makes full use of the LONG loop.

n = readbytes!(io, buf)
crc = unsafe_crc32c(buf, n, crc)
nb -= n
end
eof(io) && return crc
@assert 0 ≤ nb ≤ length(buf)
return unsafe_crc32c(buf, readbytes!(io, buf, nb), crc)
end
crc32c(io::IO, crc::UInt32=0x00000000) = crc32c(io, typemax(Int64), crc)

# optimization for `open(crc, filename)` to use the size of the file
open(::typeof(crc32c), filename::AbstractString) =
open(filename, "r") do f
crc32c(f, filesize(f))
end


"""
@kwdef typedef
Expand Down
1 change: 1 addition & 0 deletions doc/src/stdlib/arrays.md
Expand Up @@ -131,6 +131,7 @@ Base.cumprod!
Base.cumsum
Base.cumsum!
Base.cumsum_kbn
Base.crc32c
Base.LinAlg.diff
Base.LinAlg.gradient
Base.rot180
Expand Down
1 change: 1 addition & 0 deletions doc/src/stdlib/io-network.md
Expand Up @@ -12,6 +12,7 @@ Base.take!(::Base.AbstractIOBuffer)
Base.fdio
Base.flush
Base.close
Base.crc32c(::IO, ::Integer, ::UInt32)
Base.write
Base.read
Base.read!
Expand Down
34 changes: 31 additions & 3 deletions test/misc.jl
Expand Up @@ -566,14 +566,42 @@ end
for force_software_crc in (1,0)
ccall(:jl_crc32c_init, Void, (Cint,), force_software_crc)
for (n,crc) in [(0,0x00000000),(1,0xa016d052),(2,0x03f89f52),(3,0xf130f21e),(4,0x29308cf4),(5,0x53518fab),(6,0x4f4dfbab),(7,0xbd3a64dc),(8,0x46891f81),(9,0x5a14b9f9),(10,0xb219db69),(11,0xd232a91f),(12,0x51a15563),(13,0x9f92de41),(14,0x4d8ae017),(15,0xc8b74611),(16,0xa0de6714),(17,0x672c992a),(18,0xe8206eb6),(19,0xc52fd285),(20,0x327b0397),(21,0x318263dd),(22,0x08485ccd),(23,0xea44d29e),(24,0xf6c0cb13),(25,0x3969bba2),(26,0x6a8810ec),(27,0x75b3d0df),(28,0x82d535b1),(29,0xbdf7fc12),(30,0x1f836b7d),(31,0xd29f33af),(32,0x8e4acb3e),(33,0x1cbee2d1),(34,0xb25f7132),(35,0xb0fa484c),(36,0xb9d262b4),(37,0x3207fe27),(38,0xa024d7ac),(39,0x49a2e7c5),(40,0x0e2c157f),(41,0x25f7427f),(42,0x368c6adc),(43,0x75efd4a5),(44,0xa84c5c31),(45,0x0fc817b2),(46,0x8d99a881),(47,0x5cc3c078),(48,0x9983d5e2),(49,0x9267c2db),(50,0xc96d4745),(51,0x058d8df3),(52,0x453f9cf3),(53,0xb714ade1),(54,0x55d3c2bc),(55,0x495710d0),(56,0x3bddf494),(57,0x4f2577d0),(58,0xdae0f604),(59,0x3c57c632),(60,0xfe39bbb0),(61,0x6f5d1d41),(62,0x7d996665),(63,0x68c738dc),(64,0x8dfea7ae)]
@test Base.crc32c(UInt8[1:n;]) == crc
@test crc32c(UInt8[1:n;]) == crc == crc32c(String(UInt8[1:n;]))
end
# test that crc parameter is equivalent to checksum of concatenated data,
# and test crc of subarrays:
a = UInt8[1:255;]
crc_256 = Base.crc32c(UInt8[1:255;])
crc_256 = crc32c(a)
@views for n = 1:255
@test Base.crc32c(a[n+1:end], Base.crc32c(a[1:n])) == crc_256
@test crc32c(a[n+1:end], crc32c(a[1:n])) == crc_256
end

@test crc32c(IOBuffer(a)) == crc_256
let buf = IOBuffer()
write(buf, a[1:3])
@test crc32c(seekstart(buf)) == crc32c(a[1:3])
@test crc32c(buf) == 0x00000000
@test crc32c(seek(buf, 1)) == crc32c(a[2:3])
@test crc32c(seek(buf, 0), 2) == crc32c(a[1:2])
@test crc32c(buf) == crc32c(a[3:3])
end

let f = tempname()
try
write(f, a)
@test open(crc32c, f) == crc_256
open(f, "r") do io
@test crc32c(io, 16) == crc32c(a[1:16])
@test crc32c(io, 16) == crc32c(a[17:32])
@test crc32c(io) == crc32c(a[33:end])
@test crc32c(io, 1000) == 0x00000000
end
a = rand(UInt8, 30000)
write(f, a)
@test open(crc32c, f) == crc32c(a) == open(io -> crc32c(io, 10^6), f)
finally
rm(f, force=true)
end
end
end

Expand Down