Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
handle extract of self hardlink (fix #127)
This is a weird construction: an entry for a file followed by a hardlink
to what that file as its target and with the same path. The hardlink
copies the content of its target and creates a file with the "new" path
and mode, which happens in this case to be the same path as the original
file. The effect of this construction is to change the permissions of an
previous file entry. We could diallow this since it's odd, but based on
the bug report it is something `tar` can generate and the other tarball
reading methods like `tree_hash` and `rewrite` already just work because
the logic is simply to copy the contents of an existing node with a new
path and mode and for a sane tree data structure, you can just overwrite
an arbitrary node. The file system is wonkier and the change in logic
here is merely to ensure that the old file isn't deleted too early.
  • Loading branch information
StefanKarpinski committed Feb 10, 2022
commit 96affcfd12c4c518320e94d2f16088dcd9dd62c1
8 changes: 4 additions & 4 deletions src/extract.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,14 @@ function extract_tarball(
paths = read_tarball(predicate, tar; buf=buf, skeleton=skeleton) do hdr, parts
# get the file system version of the path
sys_path = reduce(joinpath, init=root, parts)
# ensure dirname(sys_path) is a directory
src_path = joinpath(root, hdr.link)
dir = dirname(sys_path)
st = stat(dir)
# ensure dirname(sys_path) is a directory
if !isdir(st)
ispath(st) && rm(dir, force=true, recursive=true)
mkpath(dir)
else
elseif hdr.type != :hardlink || src_path != sys_path
st = lstat(sys_path)
hdr.type == :directory && isdir(st) && return # from callback
ispath(st) && rm(sys_path, force=true, recursive=true)
Expand All @@ -75,8 +76,7 @@ function extract_tarball(
elseif hdr.type == :symlink
copy_symlinks || symlink(hdr.link, sys_path)
elseif hdr.type == :hardlink
src_path = joinpath(root, hdr.link)
cp(src_path, sys_path)
src_path != sys_path && cp(src_path, sys_path)
elseif hdr.type == :file
read_data(tar, sys_path, size=hdr.size, buf=buf)
else # should already be caught by check_header
Expand Down
25 changes: 25 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1050,3 +1050,28 @@ end
end
end
end

@testset "self hardlink" begin
# setup
len = 1234
pad = mod(-len, 512)
data = rand(UInt8, len)
# create reference tarball
reference, io = mktemp()
Tar.write_header(io, Tar.Header("file", :file, 0o755, len, ""))
write(io, data)
write(io, fill(0x0, pad))
close(io)
hash = tree_hash(Tar.extract(reference))
# create tarball with self-referential hard link
tarball, io = mktemp()
Tar.write_header(io, Tar.Header("file", :file, 0o644, len, ""))
write(io, data)
write(io, fill(0x0, pad))
Tar.write_header(io, Tar.Header("file", :hardlink, 0o755, 0, "file"))
close(io)
# test that it behaves like the reference tarball
@test hash == Tar.tree_hash(tarball)
@test hash == tree_hash(Tar.extract(tarball))
@test read(reference) == read(Tar.rewrite(tarball))
end