diff --git a/README.md b/README.md index 89d540d8..f5b51ed4 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ using FileIO obj = load(filename) ``` to read data from a formatted file. Likewise, saving might be as simple as -``` +```jl save(filename, obj) ``` @@ -53,9 +53,11 @@ add_format(format"PNG", [0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a], ".png") # have one of two possible file extensions add_format(format"NRRD", "NRRD", [".nrrd",".nhdr"]) -# A format whose magic bytes might not be at the beginning of the file, -# necessitating a custom function `detecthdf5` to find them -add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"]) +# A format whose magic bytes more complicated, necessitating a custom function +# `detectwav` to find them. The function should assume that the stream is +# positioned at the beginning of the file being detected, and the query +# infrastructure will handle seeking to the correct position afterwards. +add_format(format"WAV", detectwav, ".wav") # A fictitious format that, unfortunately, provides no magic # bytes. Here we have to place our faith in the file extension. @@ -103,7 +105,6 @@ using FileIO # See important note about scope below function load(f::File{format"PNG"}) open(f) do s - skipmagic(s) # skip over the magic bytes # You can just call the method below... ret = load(s) # ...or implement everything here instead @@ -112,7 +113,7 @@ end # You can support streams and add keywords: function load(s::Stream{format"PNG"}; keywords...) - # s is already positioned after the magic bytes + skipmagic(s) # skip over the magic bytes # Do the stuff to read a PNG file chunklength = read(s, UInt32) ... @@ -130,8 +131,8 @@ end Note that these are `load` and `save`, **not** `FileIO.load` and `FileIO.save`. Because a given format might have multiple packages that are capable of reading it, FileIO will dispatch to these using module-scoping, e.g., `SomePkg.load(args...)`. -Consequently, **packages should define "private" `load` and `save` methods, and -not extend (import) FileIO's**. +Consequently, **packages should define "private" `load` and `save` methods, and +not extend (import) FileIO's**. `load(::File)` and `save(::File)` should close any streams they open. (If you use the `do` syntax, this happens for you diff --git a/src/query.jl b/src/query.jl index d877f0b9..06f5f54d 100644 --- a/src/query.jl +++ b/src/query.jl @@ -72,6 +72,7 @@ For example: add_format(format"PNG", (UInt8[0x4d,0x4d,0x00,0x2b], UInt8[0x49,0x49,0x2a,0x00]), [".tiff", ".tif"]) add_format(format"PNG", [0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a], ".png") add_format(format"NRRD", "NRRD", [".nrrd",".nhdr"]) + add_format(format"WAV", detectwav, [".wav", ".WAV"]) Note that extensions, magic numbers, and format-identifiers are case-sensitive. """ diff --git a/src/registry.jl b/src/registry.jl index 3ac133db..023fb41b 100644 --- a/src/registry.jl +++ b/src/registry.jl @@ -8,7 +8,6 @@ add_format(format"GZIP", [0x1f, 0x8b], ".gz", [:Libz]) # test for RD?2 magic sequence at the beginning of R data input stream function detect_rdata(io) - seekstart(io) read(io, UInt8) == UInt8('R') && read(io, UInt8) == UInt8('D') && read(io, UInt8) in (UInt8('A'), UInt8('B'), UInt8('X')) && @@ -19,10 +18,8 @@ end add_format(format"RData", detect_rdata, [".rda", ".RData", ".rdata"], [:RData, LOAD]) function detect_rdata_single(io) - seekstart(io) res = read(io, UInt8) in (UInt8('A'), UInt8('B'), UInt8('X')) && (c = read(io, UInt8); c == UInt8('\n') || (c == UInt8('\r') && read(io, UInt8) == UInt8('\n'))) - seekstart(io) return res end @@ -145,10 +142,9 @@ add_format(format"GSLIB", (), [".gslib",".sgems"], [:GslibIO]) ### Audio formats function detectwav(io) - seekstart(io) magic = read!(io, Vector{UInt8}(4)) magic == b"RIFF" || return false - seek(io, 8) + skip(io, 4) submagic = read!(io, Vector{UInt8}(4)) submagic == b"WAVE" @@ -200,10 +196,9 @@ skipmagic(io, ::typeof(detect_noometiff)) = seek(io, 4) # AVI is a subtype of RIFF, as is WAV function detectavi(io) - seekstart(io) magic = read!(io, Vector{UInt8}(4)) magic == b"RIFF" || return false - seek(io, 8) + skip(io, 4) submagic = read!(io, Vector{UInt8}(4)) submagic == b"AVI " @@ -212,6 +207,8 @@ add_format(format"AVI", detectavi, ".avi", [:ImageMagick]) # HDF5: the complication is that the magic bytes may start at # 0, 512, 1024, 2048, or any multiple of 2 thereafter +# this detection function assumes that the stream start and end match the +# file start and end, which is true if it's just a file on disk h5magic = (0x89,0x48,0x44,0x46,0x0d,0x0a,0x1a,0x0a) function detecthdf5(io) position(io) == 0 || return false @@ -234,6 +231,8 @@ function detecthdf5(io) end add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"], [:HDF5]) +# the STL detection functions assumes that the stream start and end match the +# file start and end, which is true if it's just a file on disk function detect_stlascii(io) try position(io) != 0 && (seekstart(io); return false) diff --git a/test/query.jl b/test/query.jl index cc0b89a6..754becab 100644 --- a/test/query.jl +++ b/test/query.jl @@ -336,13 +336,32 @@ end q = query(joinpath(file_dir, "minimal_ascii.rds")) @test typeof(q) == File{format"RDataSingle"} open(q) do io - @test position(io) == 0 @test FileIO.detect_rdata_single(io) - # need to seek to beginning of file where data structure starts - @test position(io) == 0 end end @testset "Format with function for magic bytes" begin add_format(format"FUNCTION_FOR_MAGIC_BYTES", x -> 0x00, ".wav", [:WAV]) del_format(format"FUNCTION_FOR_MAGIC_BYTES") end + +function detect_position_test(io) + return read(io, 3) == b"DET" +end + +@testset "Detection function called with properly-positioned stream" begin + add_format(format"DET", detect_position_test, ".det") + # we need extra junk to work around issue #176 + junk = rand(UInt8, 35) + io = IOBuffer() + write(io, "DET") + write(io, junk) + seek(io, 0) + @test query(io) isa Formatted{format"DET"} + @test position(io) == 0 + write(io, "junkDET") + write(io, junk) + seek(io, 4) + @test query(io) isa Formatted{format"DET"} + @test position(io) == 4 + del_format(format"DET") +end