Skip to content

Commit

Permalink
Merge pull request #2668 from Azure/release-2.8.0.0
Browse files Browse the repository at this point in the history
Merge release 2.8 to master
  • Loading branch information
narrieta committed Sep 20, 2022
2 parents e4cba48 + 3928dbd commit 04ded9f
Show file tree
Hide file tree
Showing 130 changed files with 4,879 additions and 2,736 deletions.
10 changes: 0 additions & 10 deletions README.md
Expand Up @@ -177,7 +177,6 @@ A sample configuration file is shown below:
```yml
Extensions.Enabled=y
Extensions.GoalStatePeriod=6
Extensions.GoalStateHistoryCleanupPeriod=1800
Provisioning.Agent=auto
Provisioning.DeleteRootPassword=n
Provisioning.RegenerateSshHostKeyPair=y
Expand Down Expand Up @@ -243,15 +242,6 @@ _Note_: setting up this parameter to more than a few minutes can make the state
the VM be reported as unresponsive/unavailable on the Azure portal. Also, this
setting affects how fast the agent starts executing extensions.

#### __Extensions.GoalStateHistoryCleanupPeriod__

_Type: Integer_
_Default: 1800 (30 minutes)_

How often to clean up the history folder of the agent. The agent keeps past goal
states on this folder, each goal state represented with a set of small files. The
history is useful to debug issues in the agent or extensions.

#### __AutoUpdate.Enabled__

_Type: Boolean_
Expand Down
1 change: 1 addition & 0 deletions azurelinuxagent/common/agent_supported_feature.py
Expand Up @@ -22,6 +22,7 @@ class SupportedFeatureNames(object):
"""
MultiConfig = "MultipleExtensionsPerHandler"
ExtensionTelemetryPipeline = "ExtensionTelemetryPipeline"
FastTrack = "FastTrack"


class AgentSupportedFeature(object):
Expand Down
17 changes: 12 additions & 5 deletions azurelinuxagent/common/cgroup.py
Expand Up @@ -120,7 +120,9 @@ def is_active(self):

def get_tracked_metrics(self, **_):
"""
Retrieves the current value of the metrics tracked for this cgroup and returns them as an array
Retrieves the current value of the metrics tracked for this cgroup and returns them as an array.
Note: Agent won't track the metrics if the current cpu ticks less than previous value and returns empty array.
"""
raise NotImplementedError()

Expand Down Expand Up @@ -241,11 +243,16 @@ def get_throttled_time(self):
return float(self._current_throttled_time - self._previous_throttled_time) / 1E9

def get_tracked_metrics(self, **kwargs):
tracked = [
MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.PROCESSOR_PERCENT_TIME, self.name, self.get_cpu_usage()),
]
tracked = []
cpu_usage = self.get_cpu_usage()
if cpu_usage >= float(0):
tracked.append(MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.PROCESSOR_PERCENT_TIME, self.name, cpu_usage))

if 'track_throttled_time' in kwargs and kwargs['track_throttled_time']:
tracked.append(MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.THROTTLED_TIME, self.name, self.get_throttled_time()))
throttled_time = self.get_throttled_time()
if cpu_usage >= float(0) and throttled_time >= float(0):
tracked.append(MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.THROTTLED_TIME, self.name, throttled_time))

return tracked


Expand Down
2 changes: 1 addition & 1 deletion azurelinuxagent/common/cgroupapi.py
Expand Up @@ -61,7 +61,7 @@ def cgroups_supported():
return False
return ((distro_name.lower() == 'ubuntu' and distro_version.major >= 16) or
(distro_name.lower() in ("centos", "redhat") and
((distro_version.major == 7 and distro_version.minor >= 8) or distro_version.major >= 8)))
((distro_version.major == 7 and distro_version.minor >= 4) or distro_version.major >= 8)))

@staticmethod
def track_cgroups(extension_cgroups):
Expand Down
28 changes: 23 additions & 5 deletions azurelinuxagent/common/cgroupconfigurator.py
Expand Up @@ -94,7 +94,6 @@
[Service]
CPUQuota={0}
"""
_AGENT_THROTTLED_TIME_THRESHOLD = 120 # 2 minutes


class DisableCgroups(object):
Expand Down Expand Up @@ -605,7 +604,8 @@ def _check_processes_in_agent_cgroup(self):
current = process
while current != 0 and current not in agent_commands:
current = self._get_parent(current)
if current == 0:
# Process started by agent will have a marker and check if that marker found in process environment.
if current == 0 and not self.__is_process_descendant_of_the_agent(process):
unexpected.append(self.__format_process(process))
if len(unexpected) >= 5: # collect just a small sample
break
Expand Down Expand Up @@ -640,11 +640,29 @@ def __format_process(pid):
pass
return "[PID: {0}] UNKNOWN".format(pid)

@staticmethod
def __is_process_descendant_of_the_agent(pid):
"""
Returns True if the process is descendant of the agent by looking at the env flag(AZURE_GUEST_AGENT_PARENT_PROCESS_NAME)
that we set when the process starts otherwise False.
"""
try:
env = '/proc/{0}/environ'.format(pid)
if os.path.exists(env):
with open(env, "r") as env_file:
environ = env_file.read()
if environ and environ[-1] == '\x00':
environ = environ[:-1]
return "{0}={1}".format(shellutil.PARENT_PROCESS_NAME, shellutil.AZURE_GUEST_AGENT) in environ
except Exception:
pass
return False

@staticmethod
def _check_agent_throttled_time(cgroup_metrics):
for metric in cgroup_metrics:
if metric.instance == AGENT_NAME_TELEMETRY and metric.counter == MetricsCounter.THROTTLED_TIME:
if metric.value > _AGENT_THROTTLED_TIME_THRESHOLD:
if metric.value > conf.get_agent_cpu_throttled_time_threshold():
raise CGroupsException("The agent has been throttled for {0} seconds".format(metric.value))

@staticmethod
Expand Down Expand Up @@ -780,7 +798,7 @@ def set_extension_services_cpu_memory_quota(self, services_list):
if self.enabled() and services_list is not None:
for service in services_list:
service_name = service.get('name', None)
unit_file_path = service.get('path', None)
unit_file_path = systemd.get_unit_file_install_path()
if service_name is not None and unit_file_path is not None:
files_to_create = []
drop_in_path = os.path.join(unit_file_path, "{0}.d".format(service_name))
Expand All @@ -804,7 +822,7 @@ def remove_extension_services_drop_in_files(self, services_list):
if services_list is not None:
for service in services_list:
service_name = service.get('name', None)
unit_file_path = service.get('path', None)
unit_file_path = systemd.get_unit_file_install_path()
if service_name is not None and unit_file_path is not None:
files_to_cleanup = []
drop_in_path = os.path.join(unit_file_path, "{0}.d".format(service_name))
Expand Down
34 changes: 25 additions & 9 deletions azurelinuxagent/common/conf.py
Expand Up @@ -168,8 +168,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__):
__INTEGER_OPTIONS__ = {
"Extensions.GoalStatePeriod": 6,
"Extensions.InitialGoalStatePeriod": 6,
"Extensions.GoalStateHistoryCleanupPeriod": 1800,
"OS.EnableFirewallPeriod": 30,
"OS.EnableFirewallPeriod": 300,
"OS.RemovePersistentNetRulesPeriod": 30,
"OS.RootDeviceScsiTimeoutPeriod": 30,
"OS.MonitorDhcpClientRestartPeriod": 30,
Expand All @@ -186,9 +185,11 @@ def load_conf_from_file(conf_file_path, conf=__conf__):
#
"Debug.CgroupCheckPeriod": 300,
"Debug.AgentCpuQuota": 75,
"Debug.AgentCpuThrottledTimeThreshold": 120,
"Debug.EtpCollectionPeriod": 300,
"Debug.AutoUpdateHotfixFrequency": 14400,
"Debug.AutoUpdateNormalFrequency": 86400
"Debug.AutoUpdateNormalFrequency": 86400,
"Debug.FirewallRulesLogPeriod": 86400
}


Expand Down Expand Up @@ -229,7 +230,7 @@ def enable_firewall(conf=__conf__):


def get_enable_firewall_period(conf=__conf__):
return conf.get_int("OS.EnableFirewallPeriod", 30)
return conf.get_int("OS.EnableFirewallPeriod", 300)


def get_remove_persistent_net_rules_period(conf=__conf__):
Expand Down Expand Up @@ -376,10 +377,6 @@ def get_initial_goal_state_period(conf=__conf__):
return conf.get_int("Extensions.InitialGoalStatePeriod", default_value=lambda: get_goal_state_period(conf=conf))


def get_goal_state_history_cleanup_period(conf=__conf__):
return conf.get_int("Extensions.GoalStateHistoryCleanupPeriod", 1800)


def get_allow_reset_sys_user(conf=__conf__):
return conf.get_switch("Provisioning.AllowResetSysUser", False)

Expand Down Expand Up @@ -548,7 +545,17 @@ def get_agent_cpu_quota(conf=__conf__):
"""
return conf.get_int("Debug.AgentCpuQuota", 75)

def get_cgroup_monitor_expiry_time (conf=__conf__):

def get_agent_cpu_throttled_time_threshold(conf=__conf__):
"""
Throttled time threshold for agent cpu in seconds.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.AgentCpuThrottledTimeThreshold", 120)


def get_cgroup_monitor_expiry_time(conf=__conf__):
"""
cgroups monitoring for pilot extensions disabled after expiry time
Expand Down Expand Up @@ -605,3 +612,12 @@ def get_enable_ga_versioning(conf=__conf__):
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_switch("Debug.EnableGAVersioning", False)


def get_firewall_rules_log_period(conf=__conf__):
"""
Determine the frequency to perform the periodic operation of logging firewall rules.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.FirewallRulesLogPeriod", 86400)
16 changes: 0 additions & 16 deletions azurelinuxagent/common/exception.py
Expand Up @@ -121,16 +121,6 @@ class ExtensionsConfigError(ExtensionsGoalStateError):
"""


class VmSettingsError(ExtensionsGoalStateError):
"""
Error raised when the VmSettings are malformed
"""
def __init__(self, message, etag, vm_settings_text, inner=None):
super(VmSettingsError, self).__init__(message, inner)
self.etag = etag
self.vm_settings_text = vm_settings_text


class MultiConfigExtensionEnableError(ExtensionError):
"""
Error raised when enable for a Multi-Config extension is failing.
Expand Down Expand Up @@ -188,12 +178,6 @@ class ProtocolNotFoundError(ProtocolError):
"""


class IncompleteGoalStateError(ProtocolError):
"""
Goal state is returned incomplete.
"""


class HttpError(AgentError):
"""
Http request failure
Expand Down
7 changes: 1 addition & 6 deletions azurelinuxagent/common/logcollector.py
Expand Up @@ -50,12 +50,7 @@

_MUST_COLLECT_FILES = [
_AGENT_LOG,
os.path.join(_AGENT_LIB_DIR, "GoalState.*.xml"),
os.path.join(_AGENT_LIB_DIR, "ExtensionsConfig.*.xml"),
os.path.join(_AGENT_LIB_DIR, "HostingEnvironmentConfig.*.xml"),
os.path.join(_AGENT_LIB_DIR, "SharedConfig.*.xml"),
os.path.join(_AGENT_LIB_DIR, "*manifest.xml"),
os.path.join(_AGENT_LIB_DIR, "waagent_status.*.json"),
os.path.join(_AGENT_LIB_DIR, "waagent_status.json"),
os.path.join(_AGENT_LIB_DIR, "history", "*.zip"),
os.path.join(_EXTENSION_LOG_DIR, "*", "*"),
os.path.join(_EXTENSION_LOG_DIR, "*", "*", "*"),
Expand Down
15 changes: 4 additions & 11 deletions azurelinuxagent/common/logcollector_manifests.py
Expand Up @@ -39,16 +39,13 @@
echo,
echo,### Gathering Extension Files ###
copy,$LIB_DIR/*.xml
copy,$LIB_DIR/VmSettings.*.json
copy,$LIB_DIR/waagent_status.*.json
copy,$LIB_DIR/ovf-env.xml
copy,$LIB_DIR/waagent_status.json
copy,$LIB_DIR/*/status/*.status
copy,$LIB_DIR/*/config/*.settings
copy,$LIB_DIR/*/config/HandlerState
copy,$LIB_DIR/*/config/HandlerStatus
copy,$LIB_DIR/*.agentsManifest
copy,$LIB_DIR/error.json
copy,$LIB_DIR/Incarnation
copy,$LIB_DIR/history/*.zip
echo,
"""
Expand Down Expand Up @@ -108,19 +105,15 @@
echo,
echo,### Gathering Extension Files ###
copy,$LIB_DIR/ExtensionsConfig.*.xml
copy,$LIB_DIR/ovf-env.xml
copy,$LIB_DIR/*/status/*.status
copy,$LIB_DIR/*/config/*.settings
copy,$LIB_DIR/*/config/HandlerState
copy,$LIB_DIR/*/config/HandlerStatus
copy,$LIB_DIR/GoalState.*.xml
copy,$LIB_DIR/HostingEnvironmentConfig.xml
copy,$LIB_DIR/*.manifest.xml
copy,$LIB_DIR/SharedConfig.xml
copy,$LIB_DIR/ManagedIdentity-*.json
copy,$LIB_DIR/*/error.json
copy,$LIB_DIR/Incarnation
copy,$LIB_DIR/waagent_status.*.json
copy,$LIB_DIR/waagent_status.json
copy,$LIB_DIR/history/*.zip
echo,
Expand Down
4 changes: 4 additions & 0 deletions azurelinuxagent/common/logger.py
Expand Up @@ -45,6 +45,7 @@ def __init__(self, logger=None, prefix=None):
self.logger = self if logger is None else logger
self.periodic_messages = {}
self.prefix = prefix
self.silent = False

def reset_periodic(self):
self.logger.periodic_messages = {}
Expand Down Expand Up @@ -124,6 +125,9 @@ def write_log(log_appender): # pylint: disable=W0612
finally:
log_appender.appender_lock = False

if self.silent:
return

# if msg_format is not unicode convert it to unicode
if type(msg_format) is not ustr:
msg_format = ustr(msg_format, errors="backslashreplace")
Expand Down

0 comments on commit 04ded9f

Please sign in to comment.