Skip to content

Commit

Permalink
clarifies stream seeking for querying and detection functions
Browse files Browse the repository at this point in the history
  • Loading branch information
ssfrr committed Mar 20, 2018
1 parent 50c4ef3 commit c451aa1
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 18 deletions.
17 changes: 9 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ using FileIO
obj = load(filename)
```
to read data from a formatted file. Likewise, saving might be as simple as
```
```jl
save(filename, obj)
```

Expand Down Expand Up @@ -53,9 +53,11 @@ add_format(format"PNG", [0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a], ".png")
# have one of two possible file extensions
add_format(format"NRRD", "NRRD", [".nrrd",".nhdr"])

# A format whose magic bytes might not be at the beginning of the file,
# necessitating a custom function `detecthdf5` to find them
add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"])
# A format whose magic bytes more complicated, necessitating a custom function
# `detectwav` to find them. The function should assume that the stream is
# positioned at the beginning of the file being detected, and the query
# infrastructure will handle seeking to the correct position afterwards.
add_format(format"WAV", detectwav, ".wav")

# A fictitious format that, unfortunately, provides no magic
# bytes. Here we have to place our faith in the file extension.
Expand Down Expand Up @@ -103,7 +105,6 @@ using FileIO
# See important note about scope below
function load(f::File{format"PNG"})
open(f) do s
skipmagic(s) # skip over the magic bytes
# You can just call the method below...
ret = load(s)
# ...or implement everything here instead
Expand All @@ -112,7 +113,7 @@ end

# You can support streams and add keywords:
function load(s::Stream{format"PNG"}; keywords...)
# s is already positioned after the magic bytes
skipmagic(s) # skip over the magic bytes
# Do the stuff to read a PNG file
chunklength = read(s, UInt32)
...
Expand All @@ -130,8 +131,8 @@ end
Note that these are `load` and `save`, **not** `FileIO.load` and `FileIO.save`.
Because a given format might have multiple packages that are capable of reading it,
FileIO will dispatch to these using module-scoping, e.g., `SomePkg.load(args...)`.
Consequently, **packages should define "private" `load` and `save` methods, and
not extend (import) FileIO's**.
Consequently, **packages should define "private" `load` and `save` methods, and
not extend (import) FileIO's**.

`load(::File)` and `save(::File)` should close any streams
they open. (If you use the `do` syntax, this happens for you
Expand Down
1 change: 1 addition & 0 deletions src/query.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ For example:
add_format(format"PNG", (UInt8[0x4d,0x4d,0x00,0x2b], UInt8[0x49,0x49,0x2a,0x00]), [".tiff", ".tif"])
add_format(format"PNG", [0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a], ".png")
add_format(format"NRRD", "NRRD", [".nrrd",".nhdr"])
add_format(format"WAV", detectwav, [".wav", ".WAV"])
Note that extensions, magic numbers, and format-identifiers are case-sensitive.
"""
Expand Down
13 changes: 6 additions & 7 deletions src/registry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ add_format(format"GZIP", [0x1f, 0x8b], ".gz", [:Libz])

# test for RD?2 magic sequence at the beginning of R data input stream
function detect_rdata(io)
seekstart(io)
read(io, UInt8) == UInt8('R') &&
read(io, UInt8) == UInt8('D') &&
read(io, UInt8) in (UInt8('A'), UInt8('B'), UInt8('X')) &&
Expand All @@ -19,10 +18,8 @@ end
add_format(format"RData", detect_rdata, [".rda", ".RData", ".rdata"], [:RData, LOAD])

function detect_rdata_single(io)
seekstart(io)
res = read(io, UInt8) in (UInt8('A'), UInt8('B'), UInt8('X')) &&
(c = read(io, UInt8); c == UInt8('\n') || (c == UInt8('\r') && read(io, UInt8) == UInt8('\n')))
seekstart(io)
return res
end

Expand Down Expand Up @@ -145,10 +142,9 @@ add_format(format"GSLIB", (), [".gslib",".sgems"], [:GslibIO])

### Audio formats
function detectwav(io)
seekstart(io)
magic = read!(io, Vector{UInt8}(4))
magic == b"RIFF" || return false
seek(io, 8)
skip(io, 4)
submagic = read!(io, Vector{UInt8}(4))

submagic == b"WAVE"
Expand Down Expand Up @@ -200,10 +196,9 @@ skipmagic(io, ::typeof(detect_noometiff)) = seek(io, 4)

# AVI is a subtype of RIFF, as is WAV
function detectavi(io)
seekstart(io)
magic = read!(io, Vector{UInt8}(4))
magic == b"RIFF" || return false
seek(io, 8)
skip(io, 4)
submagic = read!(io, Vector{UInt8}(4))

submagic == b"AVI "
Expand All @@ -212,6 +207,8 @@ add_format(format"AVI", detectavi, ".avi", [:ImageMagick])

# HDF5: the complication is that the magic bytes may start at
# 0, 512, 1024, 2048, or any multiple of 2 thereafter
# this detection function assumes that the stream start and end match the
# file start and end, which is true if it's just a file on disk
h5magic = (0x89,0x48,0x44,0x46,0x0d,0x0a,0x1a,0x0a)
function detecthdf5(io)
position(io) == 0 || return false
Expand All @@ -234,6 +231,8 @@ function detecthdf5(io)
end
add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"], [:HDF5])

# the STL detection functions assumes that the stream start and end match the
# file start and end, which is true if it's just a file on disk
function detect_stlascii(io)
try
position(io) != 0 && (seekstart(io); return false)
Expand Down
25 changes: 22 additions & 3 deletions test/query.jl
Original file line number Diff line number Diff line change
Expand Up @@ -336,13 +336,32 @@ end
q = query(joinpath(file_dir, "minimal_ascii.rds"))
@test typeof(q) == File{format"RDataSingle"}
open(q) do io
@test position(io) == 0
@test FileIO.detect_rdata_single(io)
# need to seek to beginning of file where data structure starts
@test position(io) == 0
end
end
@testset "Format with function for magic bytes" begin
add_format(format"FUNCTION_FOR_MAGIC_BYTES", x -> 0x00, ".wav", [:WAV])
del_format(format"FUNCTION_FOR_MAGIC_BYTES")
end

function detect_position_test(io)
return read(io, 3) == b"DET"
end

@testset "Detection function called with properly-positioned stream" begin
add_format(format"DET", detect_position_test, ".det")
# we need extra junk to work around issue #176
junk = rand(UInt8, 35)
io = IOBuffer()
write(io, "DET")
write(io, junk)
seek(io, 0)
@test query(io) isa Formatted{format"DET"}
@test position(io) == 0
write(io, "junkDET")
write(io, junk)
seek(io, 4)
@test query(io) isa Formatted{format"DET"}
@test position(io) == 4
del_format(format"DET")
end

0 comments on commit c451aa1

Please sign in to comment.