diff --git a/git/cmd.py b/git/cmd.py index b8256b266..1aa683026 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -35,7 +35,7 @@ is_win, ) from git.exc import CommandError, UnsafeOptionError, UnsafeProtocolError -from git.util import is_cygwin_git, cygpath, expand_path +from git.util import is_cygwin_git, cygpath, expand_path, patch_env from .exc import ( GitCommandError, @@ -132,7 +132,7 @@ def pump_stream(cmdline, name, stream, is_decode, handler): return finalizer(process) -def _safer_popen_windows(command, shell, env=None, **kwargs): +def _safer_popen_windows(command, shell=False, env=None, **kwargs): """Call :class:`subprocess.Popen` on Windows but don't include a CWD in the search. This avoids an untrusted search path condition where a file like ``git.exe`` in a malicious repository would be run when GitPython operates on the repository. The @@ -154,7 +154,6 @@ def _safer_popen_windows(command, shell, env=None, **kwargs): # https://docs.python.org/3/library/subprocess.html#subprocess.Popen.send_signal # https://docs.python.org/3/library/subprocess.html#subprocess.CREATE_NEW_PROCESS_GROUP creationflags = subprocess.CREATE_NO_WINDOW | subprocess.CREATE_NEW_PROCESS_GROUP - # When using a shell, the shell is the direct subprocess, so the variable must be # set in its environment, to affect its search behavior. (The "1" can be any value.) if shell: @@ -162,7 +161,6 @@ def _safer_popen_windows(command, shell, env=None, **kwargs): safer_env["NoDefaultCurrentDirectoryInExePath"] = "1" else: safer_env = env - # When not using a shell, the current process does the search in a CreateProcessW # API call, so the variable must be set in our environment. With a shell, this is # unnecessary, in versions where https://github.com/python/cpython/issues/101283 is @@ -185,6 +183,12 @@ def _safer_popen_windows(command, shell, env=None, **kwargs): safer_popen = Popen +if os.name == "nt": + safer_popen = _safer_popen_windows +else: + safer_popen = Popen + + def dashify(string): return string.replace('_', '-') @@ -801,15 +805,11 @@ def execute(self, command, cmd_not_found_exception = OSError if kill_after_timeout: raise GitCommandError(command, '"kill_after_timeout" feature is not supported on Windows.') - - # Only search PATH, not CWD. This must be in the *caller* environment. The "1" can be any value. - patch_caller_env = unittest.mock.patch.dict(os.environ, {"NoDefaultCurrentDirectoryInExePath": "1"}) else: if sys.version_info[0] > 2: cmd_not_found_exception = FileNotFoundError # NOQA # exists, flake8 unknown @UndefinedVariable else: cmd_not_found_exception = OSError - patch_caller_env = nullcontext() # end handle stdout_sink = (PIPE @@ -829,7 +829,7 @@ def execute(self, command, stdin=istream, stderr=PIPE, stdout=stdout_sink, - shell=shell is not None and shell or self.USE_SHELL, + shell=shell, universal_newlines=universal_newlines, **subprocess_kwargs ) diff --git a/git/index/fun.py b/git/index/fun.py index e88cd862d..5289bd48a 100644 --- a/git/index/fun.py +++ b/git/index/fun.py @@ -74,11 +74,11 @@ def run_commit_hook(name, index, *args): env["GIT_EDITOR"] = ":" try: cmd = safer_popen([hp] + list(args), - env=env, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - cwd=index.repo.working_dir, - close_fds=is_posix) + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=index.repo.working_dir, + close_fds=is_posix) except Exception as ex: raise HookExecutionError(hp, ex) else: diff --git a/git/test/fixtures/polyglot b/git/test/fixtures/polyglot new file mode 100755 index 000000000..f1dd56b26 --- /dev/null +++ b/git/test/fixtures/polyglot @@ -0,0 +1,8 @@ +#!/usr/bin/env sh +# Valid script in both Bash and Python, but with different behavior. +""":" +echo 'Ran intended hook.' >output.txt +exit +" """ +from pathlib import Path +Path('payload.txt').write_text('Ran impostor hook!', encoding='utf-8') diff --git a/git/test/test_git.py b/git/test/test_git.py index 8126244ac..5f481d9d6 100644 --- a/git/test/test_git.py +++ b/git/test/test_git.py @@ -6,7 +6,8 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php from __future__ import print_function -import contextlib +import contextlib2 as contextlib +import ddt import os import shutil import subprocess @@ -31,10 +32,11 @@ assert_equal, assert_true, assert_match, - fixture_path + fixture_path, + with_rw_directory, ) -from git.test.lib import with_rw_directory -from git.util import finalize_process + +from git.util import finalize_process, cwd import os.path as osp @@ -51,6 +53,7 @@ from git.compat import is_win + @contextlib.contextmanager def _chdir(new_dir): """Context manager to temporarily change directory. Not reentrant.""" @@ -77,7 +80,7 @@ def _patch_out_env(name): os.environ[name] = old_value - +@ddt.ddt class TestGit(TestBase): @classmethod @@ -137,23 +140,46 @@ def test_it_transforms_kwargs_into_git_command_arguments(self): def test_it_executes_git_to_shell_and_returns_result(self): assert_match(r'^git version [\d\.]{2}.*$', self.git.execute(["git", "version"])) - def test_it_executes_git_not_from_cwd(self): - with TemporaryDirectory() as tmpdir: - if is_win: - # Copy an actual binary executable that is not git. - other_exe_path = os.path.join(os.getenv("WINDIR"), "system32", "hostname.exe") - impostor_path = os.path.join(tmpdir, "git.exe") - shutil.copy(other_exe_path, impostor_path) - else: - # Create a shell script that doesn't do anything. - impostor_path = os.path.join(tmpdir, "git") - with open(impostor_path, mode="w") as file: - print("#!/bin/sh", file=file) - os.chmod(impostor_path, 0o755) - - with _chdir(tmpdir): - # six.assertRegex(self.git.execute(["git", "version"]).encode("UTF-8"), r"^git version\b") - self.assertRegexpMatches(self.git.execute(["git", "version"]), r"^git version\b") + @ddt.data( + # chdir_to_repo, shell, command, use_shell_impostor + (False, False, ["git", "version"], False), + (False, True, "git version", False), + (False, True, "git version", True), + (True, False, ["git", "version"], False), + (True, True, "git version", False), + (True, True, "git version", True), + ) + def test_it_executes_git_not_from_cwd(self, case): + self.internal_it_executes_git_not_from_cwd(case) + + @with_rw_directory + def internal_it_executes_git_not_from_cwd(self, rw_dir, case): + chdir_to_repo, shell, command, use_shell_impostor = case + repo = Repo.init(rw_dir) + if os.name == "nt": + # Copy an actual binary executable that is not git. (On Windows, running + # "hostname" only displays the hostname, it never tries to change it.) + other_exe_path = Path(os.environ["SystemRoot"], "system32", "hostname.exe") + impostor_path = Path(rw_dir, "git.exe") + shutil.copy(other_exe_path, str(impostor_path)) + else: + # Create a shell script that doesn't do anything. + impostor_path = Path(rw_dir, "git") + impostor_path.write_text(u"#!/bin/sh\n", encoding="utf-8") + os.chmod(str(impostor_path), 0o755) + if use_shell_impostor: + shell_name = "cmd.exe" if os.name == "nt" else "sh" + shutil.copy(str(impostor_path), str(Path(rw_dir, shell_name))) + with contextlib.ExitStack() as stack: + if chdir_to_repo: + stack.enter_context(cwd(rw_dir)) + if use_shell_impostor: + stack.enter_context(_patch_out_env("ComSpec")) + # Run the command without raising an exception on failure, as the exception + # message is currently misleading when the command is a string rather than a + # sequence of strings (it really runs "git", but then wrongly reports "g"). + output = repo.git.execute(command, with_exceptions=False, shell=shell) + self.assertRegexpMatches(output, r"^git version\b") def test_it_accepts_stdin(self): filename = fixture_path("cat_file_blob") @@ -325,7 +351,7 @@ def test_environment(self, rw_dir): self.assertIn('FOO', str(err)) def test_handle_process_output(self): - from git.cmd import handle_process_output + from git.cmd import handle_process_output, safer_popen line_count = 5002 count = [None, 0, 0] @@ -337,13 +363,11 @@ def counter_stderr(line): count[2] += 1 cmdline = [sys.executable, fixture_path('cat_file.py'), str(fixture_path('issue-301_stderr'))] - proc = subprocess.Popen(cmdline, - stdin=None, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=False, - # creationflags=cmd.PROC_CREATIONFLAGS, - ) + proc = safer_popen(cmdline, + stdin=None, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=False) handle_process_output(proc, counter_stdout, counter_stderr, finalize_process) diff --git a/git/test/test_index.py b/git/test/test_index.py index 4738815f3..af8d35744 100644 --- a/git/test/test_index.py +++ b/git/test/test_index.py @@ -5,14 +5,21 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php +import contextlib +# from dataclasses import dataclass from io import BytesIO import os +import shutil from stat import ( S_ISLNK, ST_MODE ) +import sys import tempfile from unittest import skipIf +import ddt + +from git.test.lib.helper import VirtualEnvironment from git import ( IndexFile, @@ -30,26 +37,32 @@ HookExecutionError, InvalidGitRepositoryError ) -from git.index.fun import hook_path +from git.index.fun import hook_path, run_commit_hook from git.index.typ import ( BaseIndexEntry, IndexEntry ) from git.objects import Blob from git.test.lib import ( - TestBase, fixture_path, fixture, - with_rw_repo + with_rw_directory, + with_rw_repo, + TestBase ) -from git.test.lib import with_rw_directory -from git.util import Actor, rmtree +from git.util import Actor, cwd, rmtree from git.util import HIDE_WINDOWS_KNOWN_ERRORS, hex_to_bin from gitdb.base import IStream import os.path as osp from git.cmd import Git +try: + from pathlib import Path +except ImportError: + from pathlib2 import Path + + HOOKS_SHEBANG = "#!/usr/bin/env sh\n" @@ -67,6 +80,141 @@ def _make_hook(git_dir, name, content, make_exec=True): return hp +@contextlib.contextmanager +def nullcontext(enter_result=None): + yield enter_result + + +def _get_windows_ansi_encoding(): + """Get the encoding specified by the Windows system-wide ANSI active code page.""" + # locale.getencoding may work but is only in Python 3.11+. Use the registry instead. + import winreg + + hklm_path = R"SYSTEM\CurrentControlSet\Control\Nls\CodePage" + with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, hklm_path) as key: + value, _ = winreg.QueryValueEx(key, "ACP") + return "cp" + str(value) + + +class WinBashStatus: + """Namespace of native-Windows bash.exe statuses. Affects what hook tests can pass. + + Call check() to check the status. (CheckError and WinError should not typically be + used to trigger skip or xfail, because they represent unexpected situations.) + """ + + # @dataclass + class Inapplicable: + """This system is not native Windows: either not Windows at all, or Cygwin.""" + + # @dataclass + class Absent: + """No command for bash.exe is found on the system.""" + + # @dataclass + class Native: + """Running bash.exe operates outside any WSL distribution (as with Git Bash).""" + + # @dataclass + class Wsl: + """Running bash.exe calls bash in a WSL distribution.""" + + # @dataclass + class WslNoDistro: + """Running bash.exe tries to run bash on a WSL distribution, but none exists.""" + + process = None + message = None + + # @dataclass + class CheckError: + """Running bash.exe fails in an unexpected error or gives unexpected output.""" + + process = None + message = None + + # @dataclass + class WinError: + """bash.exe may exist but can't run. CreateProcessW fails unexpectedly.""" + + exception = None + + @classmethod + def check(cls): + """Check the status of the bash.exe that run_commit_hook will try to use. + + This runs a command with bash.exe and checks the result. On Windows, shell and + non-shell executable search differ; shutil.which often finds the wrong bash.exe. + + run_commit_hook uses Popen, including to run bash.exe on Windows. It doesn't + pass shell=True (and shouldn't). On Windows, Popen calls CreateProcessW, which + checks some locations before using the PATH environment variable. It is expected + to try System32, even if another directory with the executable precedes it in + PATH. When WSL is present, even with no distributions, bash.exe usually exists + in System32; Popen finds it even if a shell would run another one, as on CI. + (Without WSL, System32 may still have bash.exe; users sometimes put it there.) + """ + if sys.platform != "win32": + return cls.Inapplicable() + + try: + # Output rather than forwarding the test command's exit status so that if a + # failure occurs before we even get to this point, we will detect it. For + # information on ways to check for WSL, see https://superuser.com/a/1749811. + script = 'test -e /proc/sys/fs/binfmt_misc/WSLInterop; echo "$?"' + command = ["bash.exe", "-c", script] + process = subprocess.run(command, capture_output=True) + except FileNotFoundError: + return cls.Absent() + except OSError as error: + return cls.WinError(error) + + text = cls._decode(process.stdout).rstrip() # stdout includes WSL's own errors. + + if process.returncode == 1 and re.search(r"\bhttps://aka.ms/wslstore\b", text): + return cls.WslNoDistro(process, text) + if process.returncode != 0: + _logger.error("Error running bash.exe to check WSL status: %s", text) + return cls.CheckError(process, text) + if text == "0": + return cls.Wsl() + if text == "1": + return cls.Native() + _logger.error("Strange output checking WSL status: %s", text) + return cls.CheckError(process, text) + + @staticmethod + def _decode(stdout): + """Decode bash.exe output as best we can.""" + # When bash.exe is the WSL wrapper but the output is from WSL itself rather than + # code running in a distribution, the output is often in UTF-16LE, which Windows + # uses internally. The UTF-16LE representation of a Windows-style line ending is + # rarely seen otherwise, so use it to detect this situation. + if b"\r\0\n\0" in stdout: + return stdout.decode("utf-16le") + + # At this point, the output is either blank or probably not UTF-16LE. It's often + # UTF-8 from inside a WSL distro or non-WSL bash shell. Our test command only + # uses the ASCII subset, so we can safely guess a wrong code page for it. Errors + # from such an environment can contain any text, but unlike WSL's own messages, + # they go to stderr, not stdout. So we can try the system ANSI code page first. + acp = _get_windows_ansi_encoding() + try: + return stdout.decode(acp) + except UnicodeDecodeError: + pass + except LookupError as error: + _logger.warning(str(error)) # Message already says "Unknown encoding:". + + # Assume UTF-8. If invalid, substitute Unicode replacement characters. + return stdout.decode("utf-8", errors="replace") + + +_win_bash_status = WinBashStatus.check() + + + +@ddt.ddt class TestIndex(TestBase): def __init__(self, *args): @@ -927,3 +1075,41 @@ def test_commit_msg_hook_fail(self, rw_repo): assert str(err) else: raise AssertionError("Should have cought a HookExecutionError") + + @ddt.data((False,), (True,)) + @with_rw_directory + def test_hook_uses_shell_not_from_cwd(self, rw_dir, case): + (chdir_to_repo,) = case + shell_name = "bash.exe" if os.name == "nt" else "sh" + maybe_chdir = cwd(rw_dir) if chdir_to_repo else nullcontext() + repo = Repo.init(rw_dir) + # We need an impostor shell that works on Windows and that the test can + # distinguish from the real bash.exe. But even if the real bash.exe is absent or + # unusable, we should verify the impostor is not run. So the impostor needs a + # clear side effect (unlike in TestGit.test_it_executes_git_not_from_cwd). Popen + # on Windows uses CreateProcessW, which disregards PATHEXT; the impostor may + # need to be a binary executable to ensure the vulnerability is found if + # present. No compiler need exist, shipping a binary in the test suite may + # target the wrong architecture, and generating one in a bespoke way may trigger + # false positive virus scans. So we use a Bash/Python polyglot for the hook and + # use the Python interpreter itself as the bash.exe impostor. But an interpreter + # from a venv may not run when copied outside of it, and a global interpreter + # won't run when copied to a different location if it was installed from the + # Microsoft Store. So we make a new venv in rw_dir and use its interpreter. + venv = VirtualEnvironment(rw_dir, with_pip=False) + shutil.copy(str(venv.python), str(Path(rw_dir, shell_name))) + shutil.copy(str(fixture_path("polyglot")), str(hook_path("polyglot", repo.git_dir))) + payload = Path(rw_dir, "payload.txt") + if type(_win_bash_status) in {WinBashStatus.Absent, WinBashStatus.WslNoDistro}: + # The real shell can't run, but the impostor should still not be used. + with self.assertRaises(HookExecutionError): + with maybe_chdir: + run_commit_hook("polyglot", repo.index) + self.assertFalse(payload.exists()) + else: + # The real shell should run, and not the impostor. + with maybe_chdir: + run_commit_hook("polyglot", repo.index) + self.assertFalse(payload.exists()) + output = Path(rw_dir, "output.txt").read_text(encoding="utf-8") + self.assertEqual(output, "Ran intended hook.\n") diff --git a/git/util.py b/git/util.py index 0e397045e..e9e39a4ff 100644 --- a/git/util.py +++ b/git/util.py @@ -84,6 +84,20 @@ def cwd(new_dir): os.chdir(old_dir) +@contextlib.contextmanager +def patch_env(name, value): + """Context manager to temporarily patch an environment variable.""" + old_value = os.getenv(name) + os.environ[name] = value + try: + yield + finally: + if old_value is None: + del os.environ[name] + else: + os.environ[name] = old_value + + def rmtree(path): """Remove the given recursively. @@ -202,7 +216,6 @@ def py_where(program, path=None): Windows differ considerably. """ - # From: http://stackoverflow.com/a/377028/548792 winprog_exts = _get_exe_extensions() diff --git a/test-requirements.txt b/test-requirements.txt index 47e7c99b5..b4dd8f2bc 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,12 +1,12 @@ -ddt>=1.1.1; python_version>'3.0' -ddt>=1.1.1,<=1.6.0; python_version =='2.7' +backports.tempfile coverage +ddt>=1.1.1,<=1.6.0; python_version =='2.7' +ddt>=1.1.1; python_version>'3.0' flake8 -nose -tox mock; python_version=='2.7' +nose pathlib2; python_version=='2.7' -backports.tempfile six +tox virtualenv - +contextlib2