Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Gziped fits file support and AstroImages.jl library #363

Merged
merged 1 commit into from
Jul 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 36 additions & 6 deletions src/registry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ add_format(format"JLD", (unsafe_wrap(Vector{UInt8}, "Julia data file (HDF5), ver
unsafe_wrap(Vector{UInt8}, "Julia data file (HDF5), version 0.1")), ".jld", [:JLD => UUID("4138dd39-2aa7-5051-a626-17a0bb65d9c8")])
add_format(format"JLD2", (unsafe_wrap(Vector{UInt8},"Julia data file (HDF5), version 0.2"),
unsafe_wrap(Vector{UInt8}, "HDF5-based Julia Data Format, version ")), ".jld2", [:JLD2 => UUID("033835bb-8acc-5ee8-8aae-3f567f8a3819")])
add_format(format"GZIP", [0x1f, 0x8b], ".gz", [:Libz => UUID("2ec943e9-cfe8-584d-b93d-64dcb6d567b7")])
add_format(format"BSON",(),".bson", [:BSON => UUID("fbb218c0-5317-5bc6-957e-2ee96dd4b1f0")])
add_format(format"JLSO", (), ".jlso", [:JLSO => UUID("9da8a3cd-07a3-59c0-a743-3fdc52c30d11")])
add_format(format"NPY", "\x93NUMPY", ".npy", [idNPZ])
Expand Down Expand Up @@ -60,6 +59,9 @@ end

detect_compressed(io, len=getlength(io); kwargs...) = detect_compressor(io, len; kwargs...) !== nothing

const compressed_fits_exten = r"\.(fit|fits|fts|FIT|FITS|FTS)\.(gz|GZ)\>"
name_matches_compressed_fits(io) = (:name ∈ propertynames(io)) && endswith(io.name, compressed_fits_exten)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curiously, what if propertynames doesn't contain :name? Will this break the functionality of detect_rdata?

How about

name_matches_compressed_fits(io::IOStream) = endswith(io.name, compressed_fits_exten)
name_matches_compressed_fits(io::IO) = false # does FITSIO and AstroImages support generic IO object (e.g., `IOBuffer`)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the logic of this again, I think it's okay.
If we detect that it's compressed, then if there's no name or the name does not match a compressed fits file, it falls back to the compressed R data path.

Ideally someone with compressed R data could remove the file extension and test this path out.


# test for RD?n magic sequence at the beginning of R data input stream
function detect_rdata(io)
seekstart(io)
Expand All @@ -81,7 +83,7 @@ function detect_rdata(io)
return true
end
checked_match(io) && return true
return detect_compressed(io; formats=["GZIP", "BZIP2", "XZ"])
return detect_compressed(io; formats=["GZIP", "BZIP2", "XZ"]) && !name_matches_compressed_fits(io)
end

add_format(format"RData", detect_rdata, [".rda", ".RData", ".rdata"], [idRData, LOAD])
Expand All @@ -102,7 +104,7 @@ function detect_rdata_single(io)

res = checked_match(io)
if !res
res = detect_compressed(io; formats=["GZIP", "BZIP2", "XZ"])
res = detect_compressed(io; formats=["GZIP", "BZIP2", "XZ"]) && !name_matches_compressed_fits(io)
end
seekstart(io)
return res
Expand Down Expand Up @@ -463,11 +465,39 @@ end
add_format(format"STL_ASCII", detect_stlascii, [".stl", ".STL"], [idMeshIO])
add_format(format"STL_BINARY", detect_stlbinary, [".stl", ".STL"], [idMeshIO])

# GZip has two simple magic bytes [0x1f, 0x8b] but we don't want to dispatch to Libz
# for file extensions like .fits.gz
function detect_gzip(io)
if name_matches_compressed_fits(io)
return false
end
getlength(io) >= 2 || return false
magic = read!(io, Vector{UInt8}(undef, 2))
return magic == [0x1f, 0x8b]
end
add_format(format"GZIP", detect_gzip, ".gz", [:Libz => UUID("2ec943e9-cfe8-584d-b93d-64dcb6d567b7")])


# Astro Data
# FITS files are often gziped and given the extension ".fits.gz". We want to load those directly and not dispatch to Libz
function detect_fits(io)
# FITS files can have
if name_matches_compressed_fits(io)
return true
end
getlength(io) >= 30 || return false
magic = read!(io, Vector{UInt8}(undef, 30))
return magic == [0x53,0x49,0x4d,0x50,0x4c,0x45,0x20,0x20,0x3d,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x54]
end
add_format(format"FITS",
# See https://www.loc.gov/preservation/digital/formats/fdd/fdd000317.shtml#sign
[0x53,0x49,0x4d,0x50,0x4c,0x45,0x20,0x20,0x3d,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x54],
[".fit", ".fits", ".fts", ".FIT", ".FITS", ".FTS"], [:FITSIO => UUID("525bcba6-941b-5504-bd06-fd0dc1a4d2eb")])
# See https://www.loc.gov/preservation/digital/formats/fdd/fdd000317.shtml#sign
# [0x53,0x49,0x4d,0x50,0x4c,0x45,0x20,0x20,0x3d,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x54],
detect_fits,
[".fit", ".fits", ".fts", ".FIT", ".FITS", ".FTS", ".fit",],
[:FITSIO => UUID("525bcba6-941b-5504-bd06-fd0dc1a4d2eb")],
[:AstroImages => UUID("fe3fc30c-9b16-11e9-1c73-17dabf39f4ad")])



function detect_gadget2(io)
pos = position(io)
Expand Down
2 changes: 1 addition & 1 deletion test/query.jl
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ let file_dir = joinpath(@__DIR__, "files"), file_path = Path(file_dir)

io = open(iris)
q = query(io)
@test typeof(q) <: Stream{format"GZIP"} # FIXME: should be RData
@test typeof(q) <: Stream{format"RData"}
@test FileIO.detect_rdata(io)

# issue #345: it errors here
Expand Down