Skip to content

Commit

Permalink
Fix detect_compressed() (#318)
Browse files Browse the repository at this point in the history
* fix BZIP2 magic bytes

* add detect_compressor(): return compressor format
  • Loading branch information
alyst committed Mar 20, 2021
1 parent 3e72cd4 commit 94099c8
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 11 deletions.
24 changes: 13 additions & 11 deletions src/registry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,34 +22,36 @@ add_format(format"GZIP", [0x1f, 0x8b], ".gz", [:Libz => UUID("2ec943e9-cfe8-584d
add_format(format"BSON",(),".bson", [:BSON => UUID("fbb218c0-5317-5bc6-957e-2ee96dd4b1f0")])
add_format(format"JLSO", (), ".jlso", [:JLSO => UUID("9da8a3cd-07a3-59c0-a743-3fdc52c30d11")])

function detect_compressed(io, len=getlength(io); formats=["GZIP", "BZIP2", "XZ", "LZ4"])
function detect_compressor(io, len=getlength(io); formats=["GZIP", "BZIP2", "XZ", "LZ4"])
seekstart(io)
len < 2 && return false
len < 2 && return nothing
b1 = read(io, UInt8)
b2 = read(io, UInt8)
if "GZIP" formats
b1 == 0x1f && b2 == 0x8b && return true
b1 == 0x1f && b2 == 0x8b && return "GZIP"
end
len < 3 && return false
len < 3 && return nothing
b3 = read(io, UInt8)
if "BZIP2" formats
b1 == 0x42 && b2 == 0x5A && b3 == 68 && return true
b1 == 0x42 && b2 == 0x5A && b3 == 0x68 && return "BZIP2"
end
len < 4 && return false
len < 4 && return nothing
b4 = read(io, UInt8)
if "LZ4" formats
b1 == 0x04 && b2 == 0x22 && b3 == 0x4D && b4 == 0x18 && return true
b1 == 0x04 && b2 == 0x22 && b3 == 0x4D && b4 == 0x18 && return "LZ4"
end
len < 5 && return false
len < 5 && return nothing
b5 = read(io, UInt8)
len < 6 && return false
len < 6 && return nothing
b6 = read(io, UInt8)
if "XZ" formats
b1 == 0xFD && b2 == 0x37 && b3 == 0x7A && b4 == 0x58 && b5 == 0x5A && b6 == 0x00 && return true
b1 == 0xFD && b2 == 0x37 && b3 == 0x7A && b4 == 0x58 && b5 == 0x5A && b6 == 0x00 && return "XZ"
end
return false
return nothing
end

detect_compressed(io, len=getlength(io); kwargs...) = detect_compressor(io, len; kwargs...) !== nothing

# test for RD?n magic sequence at the beginning of R data input stream
function detect_rdata(io)
seekstart(io)
Expand Down
1 change: 1 addition & 0 deletions test/files/dummy.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is a dummy file for FileIO.detect_compressor() test
Binary file added test/files/dummy.txt.bz2
Binary file not shown.
Binary file added test/files/dummy.txt.gz
Binary file not shown.
Binary file added test/files/dummy.txt.lz4
Binary file not shown.
Binary file added test/files/dummy.txt.xz
Binary file not shown.
20 changes: 20 additions & 0 deletions test/query.jl
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,26 @@ end
end
end

@testset "detect $(format !== nothing ? format : "no") compression" for (ext, format) in [(nothing, nothing), (".gz", "GZIP"), (".bz2", "BZIP2"),
(".lz4", "LZ4"), (".xz", "XZ")]
fname = joinpath(@__DIR__, "files", "dummy.txt")
if ext !== nothing
fname *= ext
end
open(fname) do io
@test FileIO.detect_compressor(io, formats=[format]) == format # test with specific format only
end
open(fname) do io
@test FileIO.detect_compressor(io, formats=[]) === nothing # test with no formats
end
open(fname) do io
@test FileIO.detect_compressor(io) == format # test with all formats
end
open(fname) do io
@test FileIO.detect_compressed(io) == (format !== nothing)
end
end

let file_dir = joinpath(@__DIR__, "files"), file_path = Path(file_dir)
@testset "Querying with $(typeof(fp))" for fp in (file_dir, file_path)
@testset "bedGraph" begin
Expand Down

0 comments on commit 94099c8

Please sign in to comment.