Skip to content

Commit

Permalink
fix: Handle systemctl when dbus not ready (canonical#4842)
Browse files Browse the repository at this point in the history
See commit d29b744, but this commit generalizes the solution for all
calls in status.py

LP: #2046483
  • Loading branch information
TheRealFalcon committed Feb 2, 2024
1 parent e127ee6 commit 00cbb41
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 68 deletions.
89 changes: 46 additions & 43 deletions cloudinit/cmd/status.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,36 @@ class StatusDetails(NamedTuple):
{description}"""


def query_systemctl(
systemctl_args: List[str],
*,
wait: bool,
existing_status: Optional[UXAppStatus] = None,
) -> str:
"""Query systemd with retries and return output."""
while True:
try:
return subp.subp(["systemctl", *systemctl_args]).stdout.strip()
except subp.ProcessExecutionError as e:
if existing_status and existing_status in (
UXAppStatus.DEGRADED_RUNNING,
UXAppStatus.RUNNING,
):
return ""
last_exception = e
if wait:
sleep(0.25)
else:
break
print(
"Failed to get status from systemd. "
"Cloud-init status may be inaccurate. ",
f"Error from systemctl: {last_exception.stderr}",
file=sys.stderr,
)
return ""


def get_parser(parser=None):
"""Build or extend an arg parser for status utility.
Expand Down Expand Up @@ -229,7 +259,9 @@ def handle_status_args(name, args) -> int:
return 0


def get_bootstatus(disable_file, paths) -> Tuple[UXAppBootStatusCode, str]:
def get_bootstatus(
disable_file, paths, wait
) -> Tuple[UXAppBootStatusCode, str]:
"""Report whether cloud-init current boot status
@param disable_file: The path to the cloud-init disable file.
Expand All @@ -253,7 +285,7 @@ def get_bootstatus(disable_file, paths) -> Tuple[UXAppBootStatusCode, str]:
elif "cloud-init=disabled" in os.environ.get("KERNEL_CMDLINE", "") or (
uses_systemd()
and "cloud-init=disabled"
in subp.subp(["systemctl", "show-environment"]).stdout
in query_systemctl(["show-environment"], wait=wait)
):
bootstatus_code = UXAppBootStatusCode.DISABLED_BY_ENV_VARIABLE
reason = (
Expand All @@ -272,7 +304,9 @@ def get_bootstatus(disable_file, paths) -> Tuple[UXAppBootStatusCode, str]:
return (bootstatus_code, reason)


def _get_error_or_running_from_systemd() -> Optional[UXAppStatus]:
def _get_error_or_running_from_systemd(
existing_status: UXAppStatus, wait: bool
) -> Optional[UXAppStatus]:
"""Get if systemd is in error or running state.
Using systemd, we can get more fine-grained status of the
Expand All @@ -288,14 +322,18 @@ def _get_error_or_running_from_systemd() -> Optional[UXAppStatus]:
"cloud-init.service",
"cloud-init-local.service",
]:
stdout = subp.subp(
stdout = query_systemctl(
[
"systemctl",
"show",
"--property=ActiveState,UnitFileState,SubState,MainPID",
service,
],
).stdout
wait=wait,
existing_status=existing_status,
)
if not stdout:
# Systemd isn't ready
return None
states = dict(
[[x.strip() for x in r.split("=")] for r in stdout.splitlines()]
)
Expand Down Expand Up @@ -327,39 +365,6 @@ def _get_error_or_running_from_systemd() -> Optional[UXAppStatus]:
return None


def _get_error_or_running_from_systemd_with_retry(
existing_status: UXAppStatus, *, wait: bool
) -> Optional[UXAppStatus]:
"""Get systemd status and retry if dbus isn't ready.
If cloud-init has determined that we're still running, then we can
ignore the status from systemd. However, if cloud-init has detected error,
then we should retry on systemd status so we don't incorrectly report
error state while cloud-init is still running.
"""
while True:
try:
return _get_error_or_running_from_systemd()
except subp.ProcessExecutionError as e:
last_exception = e
if existing_status in (
UXAppStatus.DEGRADED_RUNNING,
UXAppStatus.RUNNING,
):
return None
if wait:
sleep(0.25)
else:
break
print(
"Failed to get status from systemd. "
"Cloud-init status may be inaccurate. ",
f"Error from systemctl: {last_exception.stderr}",
file=sys.stderr,
)
return None


def get_status_details(
paths: Optional[Paths] = None, wait: bool = False
) -> StatusDetails:
Expand All @@ -380,7 +385,7 @@ def get_status_details(
result_file = os.path.join(paths.run_dir, "result.json")

boot_status_code, description = get_bootstatus(
CLOUDINIT_DISABLED_FILE, paths
CLOUDINIT_DISABLED_FILE, paths, wait
)
if boot_status_code in DISABLED_BOOT_CODES:
status = UXAppStatus.DISABLED
Expand Down Expand Up @@ -433,9 +438,7 @@ def get_status_details(
UXAppStatus.NOT_RUN,
UXAppStatus.DISABLED,
):
systemd_status = _get_error_or_running_from_systemd_with_retry(
status, wait=wait
)
systemd_status = _get_error_or_running_from_systemd(status, wait=wait)
if systemd_status:
status = systemd_status

Expand Down
99 changes: 74 additions & 25 deletions tests/unittests/cmd/test_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from cloudinit.cmd.status import (
UXAppStatus,
_get_error_or_running_from_systemd,
_get_error_or_running_from_systemd_with_retry,
)
from cloudinit.subp import SubpResult
from cloudinit.util import ensure_file
Expand Down Expand Up @@ -65,7 +64,7 @@ class TestStatus:
),
)
@mock.patch(
f"{M_PATH}_get_error_or_running_from_systemd_with_retry",
f"{M_PATH}_get_error_or_running_from_systemd",
return_value=None,
)
def test_get_status_details_ds_none(
Expand Down Expand Up @@ -188,6 +187,7 @@ def test_get_bootstatus(
failure_msg: str,
expected_reason: Union[str, Callable],
config: Config,
mocker,
):
if ensured_file is not None:
ensure_file(ensured_file(config))
Expand All @@ -201,15 +201,10 @@ def test_get_bootstatus(
stderr=None,
),
):
code, reason = wrap_and_call(
M_NAME,
{
"uses_systemd": uses_systemd,
"get_cmdline": get_cmdline,
},
status.get_bootstatus,
config.disable_file,
config.paths,
mocker.patch(f"{M_PATH}uses_systemd", return_value=uses_systemd)
mocker.patch(f"{M_PATH}get_cmdline", return_value=get_cmdline)
code, reason = status.get_bootstatus(
config.disable_file, config.paths, False
)
assert code == expected_bootstatus, failure_msg
if isinstance(expected_reason, str):
Expand Down Expand Up @@ -713,7 +708,7 @@ def fakeexists(filepath):
)
@mock.patch(M_PATH + "read_cfg_paths")
@mock.patch(
f"{M_PATH}_get_error_or_running_from_systemd_with_retry",
f"{M_PATH}_get_error_or_running_from_systemd",
return_value=None,
)
def test_status_output(
Expand Down Expand Up @@ -757,7 +752,7 @@ def test_status_output(

@mock.patch(M_PATH + "read_cfg_paths")
@mock.patch(
f"{M_PATH}_get_error_or_running_from_systemd_with_retry",
f"{M_PATH}_get_error_or_running_from_systemd",
return_value=None,
)
def test_status_wait_blocks_until_done(
Expand Down Expand Up @@ -811,7 +806,7 @@ def fake_sleep(interval):

@mock.patch(M_PATH + "read_cfg_paths")
@mock.patch(
f"{M_PATH}_get_error_or_running_from_systemd_with_retry",
f"{M_PATH}_get_error_or_running_from_systemd",
return_value=None,
)
def test_status_wait_blocks_until_error(
Expand Down Expand Up @@ -867,7 +862,7 @@ def fake_sleep(interval):

@mock.patch(M_PATH + "read_cfg_paths")
@mock.patch(
f"{M_PATH}_get_error_or_running_from_systemd_with_retry",
f"{M_PATH}_get_error_or_running_from_systemd",
return_value=None,
)
def test_status_main(
Expand Down Expand Up @@ -948,7 +943,10 @@ def test_get_error_or_running_from_systemd(
stderr=None,
),
):
assert _get_error_or_running_from_systemd() == status
assert (
_get_error_or_running_from_systemd(UXAppStatus.RUNNING, False)
== status
)

def test_exception_while_running(self, mocker, capsys):
m_subp = mocker.patch(
Expand All @@ -959,9 +957,7 @@ def test_exception_while_running(self, mocker, capsys):
),
)
assert (
_get_error_or_running_from_systemd_with_retry(
UXAppStatus.RUNNING, wait=True
)
_get_error_or_running_from_systemd(UXAppStatus.RUNNING, wait=True)
is None
)
assert 1 == m_subp.call_count
Expand All @@ -987,9 +983,7 @@ def test_retry(self, mocker, capsys):
],
)
assert (
_get_error_or_running_from_systemd_with_retry(
UXAppStatus.ERROR, wait=True
)
_get_error_or_running_from_systemd(UXAppStatus.ERROR, wait=True)
is UXAppStatus.RUNNING
)
assert 3 == m_subp.call_count
Expand All @@ -1005,13 +999,68 @@ def test_retry_no_wait(self, mocker, capsys):
)
mocker.patch("time.time", side_effect=[1, 2, 50])
assert (
_get_error_or_running_from_systemd_with_retry(
UXAppStatus.ERROR, wait=False
)
_get_error_or_running_from_systemd(UXAppStatus.ERROR, wait=False)
is None
)
assert 1 == m_subp.call_count
assert (
"Failed to get status from systemd. "
"Cloud-init status may be inaccurate."
) in capsys.readouterr().err


class TestQuerySystemctl:
def test_query_systemctl(self, mocker):
m_subp = mocker.patch(
f"{M_PATH}subp.subp",
return_value=SubpResult(stdout="hello", stderr=None),
)
assert status.query_systemctl(["some", "args"], wait=False) == "hello"
m_subp.assert_called_once_with(["systemctl", "some", "args"])

def test_query_systemctl_with_exception(self, mocker, capsys):
m_subp = mocker.patch(
f"{M_PATH}subp.subp",
side_effect=subp.ProcessExecutionError(
"Message recipient disconnected", stderr="oh noes!"
),
)
assert status.query_systemctl(["some", "args"], wait=False) == ""
m_subp.assert_called_once_with(["systemctl", "some", "args"])
assert "Error from systemctl: oh noes!" in capsys.readouterr().err

def test_query_systemctl_wait_with_exception(self, mocker):
m_sleep = mocker.patch(f"{M_PATH}sleep")
m_subp = mocker.patch(
f"{M_PATH}subp.subp",
side_effect=[
subp.ProcessExecutionError("Message recipient disconnected"),
subp.ProcessExecutionError("Message recipient disconnected"),
subp.ProcessExecutionError("Message recipient disconnected"),
SubpResult(stdout="hello", stderr=None),
],
)

assert status.query_systemctl(["some", "args"], wait=True) == "hello"
assert m_subp.call_count == 4
assert m_sleep.call_count == 3

def test_query_systemctl_wait_with_exception_status(self, mocker):
m_sleep = mocker.patch(f"{M_PATH}sleep")
m_subp = mocker.patch(
f"{M_PATH}subp.subp",
side_effect=subp.ProcessExecutionError(
"Message recipient disconnected"
),
)

assert (
status.query_systemctl(
["some", "args"],
wait=True,
existing_status=UXAppStatus.RUNNING,
)
== ""
)
assert m_subp.call_count == 1
assert m_sleep.call_count == 0

0 comments on commit 00cbb41

Please sign in to comment.