Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tar.extract_file: extract content of single file in tarball #95

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
@@ -1,7 +1,7 @@
name = "Tar"
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
authors = ["Stefan Karpinski <stefan@karpinski.org>"]
version = "1.9.0"
version = "1.10.0"

[deps]
ArgTools = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
Expand Down
18 changes: 18 additions & 0 deletions README.md
Expand Up @@ -98,6 +98,24 @@ will also not be copied and will instead be skipped. By default, `extract` will
detect whether symlinks can be created in `dir` or not and will automatically
copy symlinks if they cannot be created.

### `Tar.extract_file`

```jl
extract_file(predicate::Function, tarball, out) -> Vector{Header}
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this should be

Suggested change
extract_file(predicate::Function, tarball, out) -> Vector{Header}
extract_files(predicate::Function, tarball, out) -> Vector{Header}

?

extract_file(predicate::AbstractString, tarball, out) -> Header
```
* `predicate :: Union{Function, AbstractString}`
* `tarball :: Union{AbstractString, AbstractCmd, IO}`
* `out :: Union{AbstractString, AbstractCmd, IO}`

Read file(s) matching the predicate from `tarball` and write to `out`.
Return the [`Header`](@ref)s of the matchin files.

If `predicate::Function` it should take a `Header` as the only input
argument and return `true`/`false`.
If `predicate::String` it is interpreted as a path relative the
tarball root and must only match a single entry.

### Tar.list

```jl
Expand Down
43 changes: 43 additions & 0 deletions src/extract.jl
Expand Up @@ -590,3 +590,46 @@ function read_data(
r < n && error("premature end of tar file")
return view(buf, 1:size)
end


"""
extract_file(predicate::Funtion, tarball, out) -> Vector{Header}
extract_file(predicate::AbstractString, tarball, out) -> Header

Accepted argument types:
* `predicate :: Union{AbstractString, Function}`
* `tarball :: Union{AbstractString, IO, Cmd}`
* `out :: Union{AbstractString, IO, Cmd}`

Read file(s) matching the predicate from `tarball` and write to `out`.
Return the [`Header`](@ref)s of the matchin files.

If `predicate::Function` it should take a `Header` as the only input
argument and return `true`/`false`.
If `predicate::String` it is interpreted as a path relative the
tarball root and must only match a single entry.
"""
function extract_file(predicate::Function, tarball::ArgRead, out::ArgWrite)::Vector{Header}
headers = Header[]
arg_read(tarball) do tar; arg_write(out) do io
read_tarball(predicate, tar) do hdr, _
if hdr.type == :file # TODO: read symlinks??
push!(headers, hdr)
read_data(tar, io, size=hdr.size)
end
end
end end
return headers
end
function extract_file(predicate::AbstractString, tarball::ArgRead, out::ArgWrite)::Header
parts = filter!(x -> x != ".", splitpath(predicate))
headers = extract_file(tarball, out) do hdr
hdr_parts = filter!(x -> x != ".", splitpath(hdr.path))
hdr.type == :file && parts == hdr_parts
end
if length(headers) != 1
s = length(headers) == 0 ? "no" : "multiple"
throw(ArgumentError("$s files in the tarball matches the filename $predicate"))
end
return headers[1]
end
69 changes: 69 additions & 0 deletions test/runtests.jl
Expand Up @@ -601,6 +601,75 @@ end
end
end

@testset "API: extract_file" begin
mktempdir() do dir
open(joinpath(dir, "file.txt"), "w") do io
write(io, "file at the root")
end
dir2 = mkdir(joinpath(dir, "directory"))
open(joinpath(dir2, "file2.txt"), "w") do io
write(io, "file in directory")
end
tarball = Tar.create(dir)

for tar in (()->tarball, ()->open(tarball))
## predicate::String
io = IOBuffer()
for pred in ("file.txt", "./file.txt")
hdr = Tar.extract_file(pred, tar(), io)
@test hdr.path == "file.txt"
@test hdr.size == 16
@test String(take!(io)) == "file at the root"
end

for pred in ("directory/file2.txt", "./directory/file2.txt")
hdr = Tar.extract_file(pred, tar(), io)
@test hdr.path == "directory/file2.txt"
@test hdr.size == 17
@test String(take!(io)) == "file in directory"
end

@test_throws ArgumentError("no files in the tarball matches the filename nope") Tar.extract_file("nope", tar(), io)

# predicate::Function
hdrs = Tar.extract_file(tar(), io) do hdr
hdr.path == "file.txt"
end
@test length(hdrs) == 1
@test hdrs[1].path == "file.txt"
@test hdrs[1].size == 16
@test String(take!(io)) == "file at the root"

hdrs = Tar.extract_file(hdr -> true, tar(), io)
@test length(hdrs) == 2
str = String(take!(io))
@test occursin("file at the root", str)
@test occursin("file in directory", str)
@test sum(h.size for h in hdrs) == sizeof(str)

hdrs = Tar.extract_file(hdr -> true, tar(), io)
@test length(hdrs) == 2
str = String(take!(io))
@test occursin("file at the root", str)
@test occursin("file in directory", str)
@test sum(h.size for h in hdrs) == sizeof(str)

hdrs = Tar.extract_file(hdr -> false, tar(), io)
@test length(hdrs) == 0
@test sizeof(String(take!(io))) == 0

# Non-IO output
mktempdir() do tmpd
o = joinpath(tmpd, "out.data")
Tar.extract_file("file.txt", tar(), o)
@test read(o, String) == "file at the root"
Tar.extract_file(hdr -> hdr.path == "file.txt", tar(), o)
@test read(o, String) == "file at the root"
end
end
end
end

@testset "API: rewrite" begin
# reference standard tarball
reference, hash₁ = make_test_tarball()
Expand Down