Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clarifies stream seeking for querying and detection functions #177

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 8 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ using FileIO
obj = load(filename)
```
to read data from a formatted file. Likewise, saving might be as simple as
```
```jl
save(filename, obj)
```

Expand Down Expand Up @@ -91,9 +91,11 @@ add_format(format"PNG", [0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a], ".png")
# have one of two possible file extensions
add_format(format"NRRD", "NRRD", [".nrrd",".nhdr"])

# A format whose magic bytes might not be at the beginning of the file,
# necessitating a custom function `detecthdf5` to find them
add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"])
# A format whose magic bytes more complicated, necessitating a custom function
# `detectwav` to find them. The function should assume that the stream is
# positioned at the beginning of the file being detected, and the query
# infrastructure will handle seeking to the correct position afterwards.
add_format(format"WAV", detectwav, ".wav")

# A fictitious format that, unfortunately, provides no magic
# bytes. Here we have to place our faith in the file extension.
Expand Down Expand Up @@ -141,7 +143,6 @@ using FileIO
# See important note about scope below
function load(f::File{format"PNG"})
open(f) do s
skipmagic(s) # skip over the magic bytes
# You can just call the method below...
ret = load(s)
# ...or implement everything here instead
Expand All @@ -150,7 +151,7 @@ end

# You can support streams and add keywords:
function load(s::Stream{format"PNG"}; keywords...)
# s is already positioned after the magic bytes
skipmagic(s) # skip over the magic bytes
# Do the stuff to read a PNG file
chunklength = read(s, UInt32)
...
Expand All @@ -174,7 +175,7 @@ Consequently, **packages should define "private" `load` and `save` methods (also

If you run into a naming conflict with the `load` and `save` functions
(for example, you already have another function in your package that has
one of these names), you can instead name your loaders `fileio_load`,
one of these names), you can instead name your loaders `fileio_load`,
`fileio_save` etc. Note that you cannot mix and match these styles: either
all your loaders have to be named `load`, or all of them should be called
`fileio_load`, but you cannot use both conventions in one module.
Expand Down
1 change: 1 addition & 0 deletions src/query.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ For example:
add_format(format"PNG", (UInt8[0x4d,0x4d,0x00,0x2b], UInt8[0x49,0x49,0x2a,0x00]), [".tiff", ".tif"])
add_format(format"PNG", [0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a], ".png")
add_format(format"NRRD", "NRRD", [".nrrd",".nhdr"])
add_format(format"WAV", detectwav, [".wav", ".WAV"])

Note that extensions, magic numbers, and format-identifiers are case-sensitive.
"""
Expand Down
15 changes: 7 additions & 8 deletions src/registry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ add_format(format"GZIP", [0x1f, 0x8b], ".gz", [:Libz])

# test for RD?n magic sequence at the beginning of R data input stream
function detect_rdata(io)
seekstart(io)
read(io, UInt8) == UInt8('R') &&
read(io, UInt8) == UInt8('D') &&
read(io, UInt8) in (UInt8('A'), UInt8('B'), UInt8('X')) &&
Expand All @@ -19,10 +18,8 @@ end
add_format(format"RData", detect_rdata, [".rda", ".RData", ".rdata"], [:RData, LOAD])

function detect_rdata_single(io)
seekstart(io)
res = read(io, UInt8) in (UInt8('A'), UInt8('B'), UInt8('X')) &&
(c = read(io, UInt8); c == UInt8('\n') || (c == UInt8('\r') && read(io, UInt8) == UInt8('\n')))
seekstart(io)
return res
end

Expand Down Expand Up @@ -145,10 +142,9 @@ add_format(format"GSLIB", (), [".gslib",".sgems"], [:GslibIO])

### Audio formats
function detectwav(io)
seekstart(io)
magic = read!(io, Vector{UInt8}(undef, 4))
magic == b"RIFF" || return false
seek(io, 8)
skip(io, 4)
submagic = read!(io, Vector{UInt8}(undef, 4))

submagic == b"WAVE"
Expand Down Expand Up @@ -198,10 +194,9 @@ skipmagic(io, ::typeof(detect_noometiff)) = seek(io, 4)

# AVI is a subtype of RIFF, as is WAV
function detectavi(io)
seekstart(io)
magic = read!(io, Vector{UInt8}(undef, 4))
magic == b"RIFF" || return false
seek(io, 8)
skip(io, 4)
submagic = read!(io, Vector{UInt8}(undef, 4))

submagic == b"AVI "
Expand All @@ -210,6 +205,8 @@ add_format(format"AVI", detectavi, ".avi", [:ImageMagick])

# HDF5: the complication is that the magic bytes may start at
# 0, 512, 1024, 2048, or any multiple of 2 thereafter
# this detection function assumes that the stream start and end match the
# file start and end, which is true if it's just a file on disk
h5magic = (0x89,0x48,0x44,0x46,0x0d,0x0a,0x1a,0x0a)
function detecthdf5(io)
position(io) == 0 || return false
Expand All @@ -232,6 +229,8 @@ function detecthdf5(io)
end
add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"], [:HDF5])

# the STL detection functions assumes that the stream start and end match the
# file start and end, which is true if it's just a file on disk
function detect_stlascii(io)
pos = position(io)
try
Expand Down Expand Up @@ -283,4 +282,4 @@ add_format(format"MetaImage", "ObjectType", ".mhd", [:MetaImageFormat])

add_format(format"vegalite", (), [".vegalite"], [:VegaLite])

add_format(format"FCS", "FCS", [".fcs"], [:FCSFiles])
add_format(format"FCS", "FCS", [".fcs"], [:FCSFiles])
25 changes: 22 additions & 3 deletions test/query.jl
Original file line number Diff line number Diff line change
Expand Up @@ -336,13 +336,32 @@ end
q = query(joinpath(file_dir, "minimal_ascii.rds"))
@test typeof(q) == File{format"RDataSingle"}
open(q) do io
@test position(io) == 0
@test FileIO.detect_rdata_single(io)
# need to seek to beginning of file where data structure starts
@test position(io) == 0
end
end
@testset "Format with function for magic bytes" begin
add_format(format"FUNCTION_FOR_MAGIC_BYTES", x -> 0x00, ".wav", [:WAV])
del_format(format"FUNCTION_FOR_MAGIC_BYTES")
end

function detect_position_test(io)
return read(io, 3) == b"DET"
end

@testset "Detection function called with properly-positioned stream" begin
add_format(format"DET", detect_position_test, ".det")
# we need extra junk to work around issue #176
junk = rand(UInt8, 35)
io = IOBuffer()
write(io, "DET")
write(io, junk)
seek(io, 0)
@test query(io) isa Formatted{format"DET"}
@test position(io) == 0
write(io, "junkDET")
write(io, junk)
seek(io, 4)
@test query(io) isa Formatted{format"DET"}
@test position(io) == 4
del_format(format"DET")
end