Skip to content

Commit 0087814

Browse files
icanhasmathclaude
andcommitted
Backport CVE-2025-8869: validate symlink targets in tar fallback extraction
Adds an `is_symlink_target_in_tar` helper that confirms each symlink member's target points to another member inside the same tar archive, and calls it from `untar_file` before extracting symlink members. Without this check, a malicious sdist could ship a symlink whose target is an arbitrary path on the host filesystem; pip would then follow that symlink during subsequent file writes, allowing files to be created outside the install directory. Python >=3.9.17/3.10.12/3.11.4/>=3.12 are protected by stdlib's PEP 706 data_filter, but Python 3.7 has no backport, so the fallback path is the only path on 3.7. This is the consolidated equivalent of the upstream commit series: 2490eb2 Check symlink target in tar extraction fallback 7f2a979 normpath linkname 3390548 Handle different separators in tar member names eaee181 Fix the bug in the process logic dcd1ff5 Rename _check_link_target -> is_symlink_target_in_tar b154d06 Remove redundant backslash check Pip 24.0 has not yet split untar_file into filter/no-filter paths, so the hardened helper is applied directly to the existing untar_file. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent ef78c12 commit 0087814

2 files changed

Lines changed: 206 additions & 0 deletions

File tree

src/pip/_internal/utils/unpacking.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,20 @@ def is_within_directory(directory: str, target: str) -> bool:
8585
return prefix == abs_directory
8686

8787

88+
def is_symlink_target_in_tar(tar: tarfile.TarFile, tarinfo: tarfile.TarInfo) -> bool:
89+
"""Check if the file pointed to by the symbolic link is in the tar archive"""
90+
linkname = os.path.join(os.path.dirname(tarinfo.name), tarinfo.linkname)
91+
92+
linkname = os.path.normpath(linkname)
93+
linkname = linkname.replace("\\", "/")
94+
95+
try:
96+
tar.getmember(linkname)
97+
return True
98+
except KeyError:
99+
return False
100+
101+
88102
def set_extracted_file_to_default_mode_plus_executable(path: str) -> None:
89103
"""
90104
Make file present at path have execute for user/group/world
@@ -187,6 +201,14 @@ def untar_file(filename: str, location: str) -> None:
187201
if member.isdir():
188202
ensure_dir(path)
189203
elif member.issym():
204+
if not is_symlink_target_in_tar(tar, member):
205+
message = (
206+
"The tar file ({}) has a file ({}) trying to install "
207+
"outside target directory ({})"
208+
)
209+
raise InstallationError(
210+
message.format(filename, member.name, member.linkname)
211+
)
190212
try:
191213
tar._extract_member(member, path)
192214
except Exception as exc:

tests/unit/test_utils_unpacking.py

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from typing import List, Tuple
1212

1313
import pytest
14+
from _pytest.monkeypatch import MonkeyPatch
1415

1516
from pip._internal.exceptions import InstallationError
1617
from pip._internal.utils.unpacking import is_within_directory, untar_file, unzip_file
@@ -171,6 +172,189 @@ def test_unpack_tar_success(self) -> None:
171172
test_tar = self.make_tar_file("test_tar.tar", files)
172173
untar_file(test_tar, self.tempdir)
173174

175+
@pytest.mark.parametrize(
176+
"input_prefix, unpack_prefix",
177+
[
178+
("", ""),
179+
("dir/", ""), # pip ignores a common leading directory
180+
("dir/sub/", "sub/"), # pip ignores *one* common leading directory
181+
],
182+
)
183+
def test_unpack_tar_links(self, input_prefix: str, unpack_prefix: str) -> None:
184+
"""
185+
Test unpacking a *.tar with file containing hard & soft links
186+
"""
187+
test_tar = os.path.join(self.tempdir, "test_tar_links.tar")
188+
content = b"file content"
189+
with tarfile.open(test_tar, "w") as mytar:
190+
file_tarinfo = tarfile.TarInfo(input_prefix + "regular_file.txt")
191+
file_tarinfo.size = len(content)
192+
mytar.addfile(file_tarinfo, io.BytesIO(content))
193+
194+
hardlink_tarinfo = tarfile.TarInfo(input_prefix + "hardlink.txt")
195+
hardlink_tarinfo.type = tarfile.LNKTYPE
196+
hardlink_tarinfo.linkname = input_prefix + "regular_file.txt"
197+
mytar.addfile(hardlink_tarinfo)
198+
199+
symlink_tarinfo = tarfile.TarInfo(input_prefix + "symlink.txt")
200+
symlink_tarinfo.type = tarfile.SYMTYPE
201+
symlink_tarinfo.linkname = "regular_file.txt"
202+
mytar.addfile(symlink_tarinfo)
203+
204+
untar_file(test_tar, self.tempdir)
205+
206+
unpack_dir = os.path.join(self.tempdir, unpack_prefix)
207+
with open(os.path.join(unpack_dir, "regular_file.txt"), "rb") as f:
208+
assert f.read() == content
209+
210+
with open(os.path.join(unpack_dir, "hardlink.txt"), "rb") as f:
211+
assert f.read() == content
212+
213+
with open(os.path.join(unpack_dir, "symlink.txt"), "rb") as f:
214+
assert f.read() == content
215+
216+
def test_unpack_normal_tar_link1_no_data_filter(
217+
self, monkeypatch: MonkeyPatch
218+
) -> None:
219+
"""
220+
Test unpacking a normal tar with file containing soft links, but no data_filter
221+
"""
222+
if hasattr(tarfile, "data_filter"):
223+
monkeypatch.delattr("tarfile.data_filter")
224+
225+
tar_filename = "test_tar_links_no_data_filter.tar"
226+
tar_filepath = os.path.join(self.tempdir, tar_filename)
227+
228+
extract_path = os.path.join(self.tempdir, "extract_path")
229+
230+
with tarfile.open(tar_filepath, "w") as tar:
231+
file_data = io.BytesIO(b"normal\n")
232+
normal_file_tarinfo = tarfile.TarInfo(name="normal_file")
233+
normal_file_tarinfo.size = len(file_data.getbuffer())
234+
tar.addfile(normal_file_tarinfo, fileobj=file_data)
235+
236+
info = tarfile.TarInfo("normal_symlink")
237+
info.type = tarfile.SYMTYPE
238+
info.linkpath = "normal_file"
239+
tar.addfile(info)
240+
241+
untar_file(tar_filepath, extract_path)
242+
243+
assert os.path.islink(os.path.join(extract_path, "normal_symlink"))
244+
245+
link_path = os.readlink(os.path.join(extract_path, "normal_symlink"))
246+
assert link_path == "normal_file"
247+
248+
with open(os.path.join(extract_path, "normal_symlink"), "rb") as f:
249+
assert f.read() == b"normal\n"
250+
251+
def test_unpack_normal_tar_link2_no_data_filter(
252+
self, monkeypatch: MonkeyPatch
253+
) -> None:
254+
"""
255+
Test unpacking a normal tar with file containing soft links, but no data_filter
256+
"""
257+
if hasattr(tarfile, "data_filter"):
258+
monkeypatch.delattr("tarfile.data_filter")
259+
260+
tar_filename = "test_tar_links_no_data_filter.tar"
261+
tar_filepath = os.path.join(self.tempdir, tar_filename)
262+
263+
extract_path = os.path.join(self.tempdir, "extract_path")
264+
265+
with tarfile.open(tar_filepath, "w") as tar:
266+
file_data = io.BytesIO(b"normal\n")
267+
normal_file_tarinfo = tarfile.TarInfo(name="normal_file")
268+
normal_file_tarinfo.size = len(file_data.getbuffer())
269+
tar.addfile(normal_file_tarinfo, fileobj=file_data)
270+
271+
info = tarfile.TarInfo("sub/normal_symlink")
272+
info.type = tarfile.SYMTYPE
273+
info.linkpath = ".." + os.path.sep + "normal_file"
274+
tar.addfile(info)
275+
276+
untar_file(tar_filepath, extract_path)
277+
278+
assert os.path.islink(os.path.join(extract_path, "sub", "normal_symlink"))
279+
280+
link_path = os.readlink(os.path.join(extract_path, "sub", "normal_symlink"))
281+
assert link_path == ".." + os.path.sep + "normal_file"
282+
283+
with open(os.path.join(extract_path, "sub", "normal_symlink"), "rb") as f:
284+
assert f.read() == b"normal\n"
285+
286+
def test_unpack_evil_tar_link1_no_data_filter(
287+
self, monkeypatch: MonkeyPatch
288+
) -> None:
289+
"""
290+
Test unpacking an evil tar with file containing soft links, but no data_filter
291+
"""
292+
if hasattr(tarfile, "data_filter"):
293+
monkeypatch.delattr("tarfile.data_filter")
294+
295+
tar_filename = "test_tar_links_no_data_filter.tar"
296+
tar_filepath = os.path.join(self.tempdir, tar_filename)
297+
298+
import_filename = "import_file"
299+
import_filepath = os.path.join(self.tempdir, import_filename)
300+
open(import_filepath, "w").close()
301+
302+
extract_path = os.path.join(self.tempdir, "extract_path")
303+
304+
with tarfile.open(tar_filepath, "w") as tar:
305+
info = tarfile.TarInfo("evil_symlink")
306+
info.type = tarfile.SYMTYPE
307+
info.linkpath = import_filepath
308+
tar.addfile(info)
309+
310+
with pytest.raises(InstallationError) as e:
311+
untar_file(tar_filepath, extract_path)
312+
313+
msg = (
314+
"The tar file ({}) has a file ({}) trying to install outside "
315+
"target directory ({})"
316+
)
317+
assert msg.format(tar_filepath, "evil_symlink", import_filepath) in str(e.value)
318+
319+
assert not os.path.exists(os.path.join(extract_path, "evil_symlink"))
320+
321+
def test_unpack_evil_tar_link2_no_data_filter(
322+
self, monkeypatch: MonkeyPatch
323+
) -> None:
324+
"""
325+
Test unpacking an evil tar with file containing soft links, but no data_filter
326+
"""
327+
if hasattr(tarfile, "data_filter"):
328+
monkeypatch.delattr("tarfile.data_filter")
329+
330+
tar_filename = "test_tar_links_no_data_filter.tar"
331+
tar_filepath = os.path.join(self.tempdir, tar_filename)
332+
333+
import_filename = "import_file"
334+
import_filepath = os.path.join(self.tempdir, import_filename)
335+
open(import_filepath, "w").close()
336+
337+
extract_path = os.path.join(self.tempdir, "extract_path")
338+
339+
link_path = ".." + os.sep + import_filename
340+
341+
with tarfile.open(tar_filepath, "w") as tar:
342+
info = tarfile.TarInfo("evil_symlink")
343+
info.type = tarfile.SYMTYPE
344+
info.linkpath = link_path
345+
tar.addfile(info)
346+
347+
with pytest.raises(InstallationError) as e:
348+
untar_file(tar_filepath, extract_path)
349+
350+
msg = (
351+
"The tar file ({}) has a file ({}) trying to install outside "
352+
"target directory ({})"
353+
)
354+
assert msg.format(tar_filepath, "evil_symlink", link_path) in str(e.value)
355+
356+
assert not os.path.exists(os.path.join(extract_path, "evil_symlink"))
357+
174358

175359
def test_unpack_tar_unicode(tmpdir: Path) -> None:
176360
test_tar = tmpdir / "test.tar"

0 commit comments

Comments
 (0)