Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests around the Omnibus cache and make duration management safer #25478

Merged
merged 8 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,7 @@
/tasks/updater.py @DataDog/fleet
/tasks/libs/common/omnibus.py @DataDog/agent-build-and-releases
/tasks/omnibus.py @DataDog/agent-build-and-releases
/tasks/unit-tests/omnibus_tests.py @DataDog/agent-build-and-releases
/tasks/installer.py @DataDog/fleet
/test/ @DataDog/agent-developer-tools
/test/benchmarks/ @DataDog/agent-metrics-logs
Expand Down
29 changes: 14 additions & 15 deletions tasks/omnibus.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,9 @@ def build(
"""

flavor = AgentFlavor[flavor]
durations = {}
if not skip_deps:
with timed(quiet=True) as deps_elapsed:
with timed(quiet=True) as durations['Deps']:
deps(ctx)

# base dir (can be overridden through env vars, command line takes precedence)
Expand Down Expand Up @@ -219,8 +220,6 @@ def build(
elif agent_binaries:
target_project = "agent-binaries"

aws_cmd = "aws.cmd" if sys.platform == 'win32' else "aws"

# Get the python_mirror from the PIP_INDEX_URL environment variable if it is not passed in the args
python_mirror = python_mirror or os.environ.get("PIP_INDEX_URL")

Expand All @@ -231,11 +230,12 @@ def build(
with open(pip_config_file, 'w') as f:
f.write(pip_index_url)

with timed(quiet=True) as bundle_elapsed:
with timed(quiet=True) as durations['Bundle']:
bundle_install_omnibus(ctx, gem_path, env)

omnibus_cache_dir = os.environ.get('OMNIBUS_GIT_CACHE_DIR')
use_omnibus_git_cache = omnibus_cache_dir is not None and target_project == "agent" and host_distribution != "ociru"
aws_cmd = "aws.cmd" if sys.platform == 'win32' else "aws"
if use_omnibus_git_cache:
# The cache will be written in the provided cache dir (see omnibus.rb) but
# the git repository itself will be located in a subfolder that replicates
Expand Down Expand Up @@ -263,7 +263,7 @@ def build(
bundle_path = (
"/tmp/omnibus-git-cache-bundle" if sys.platform != 'win32' else "C:\\TEMP\\omnibus-git-cache-bundle"
)
with timed(quiet=True) as restore_cache:
with timed(quiet=True) as durations['Restoring omnibus cache']:
# Allow failure in case the cache was evicted
if ctx.run(f"{aws_cmd} s3 cp --only-show-errors {git_cache_url} {bundle_path}", warn=True):
print(f'Successfully retrieved cache {cache_key}')
Expand All @@ -279,7 +279,7 @@ def build(
ctx, os.environ.get('CI_PIPELINE_ID'), remote_cache_name, os.environ.get('CI_JOB_ID')
)

with timed(quiet=True) as omnibus_elapsed:
with timed(quiet=True) as durations['Omnibus']:
omnibus_run_task(
ctx=ctx,
task="build",
Expand All @@ -302,20 +302,19 @@ def build(
# in case they were included in the bundle in a previous build
for _, tag in enumerate(stale_tags.split(os.linesep)):
ctx.run(f'git -C {omnibus_cache_dir} tag -d {tag}')
with timed(quiet=True) as update_cache:
with timed(quiet=True) as durations['Updating omnibus cache']:
if use_remote_cache and ctx.run(f"git -C {omnibus_cache_dir} tag -l").stdout != cache_state:
ctx.run(f"git -C {omnibus_cache_dir} bundle create {bundle_path} --tags")
ctx.run(f"{aws_cmd} s3 cp --only-show-errors {bundle_path} {git_cache_url}")

# Output duration information for different steps
print("Build component timing:")
if not skip_deps:
print(f"Deps: {deps_elapsed.duration}")
print(f"Bundle: {bundle_elapsed.duration}")
print(f"Omnibus: {omnibus_elapsed.duration}")
if use_omnibus_git_cache and use_remote_cache:
print(f"Restoring omnibus cache: {restore_cache.duration}")
print(f"Updating omnibus cache: {update_cache.duration}")
send_build_metrics(ctx, omnibus_elapsed.duration)
durations_to_print = ["Deps", "Bundle", "Omnibus", "Restoring omnibus cache", "Updating omnibus cache"]
for name in durations_to_print:
if name in durations:
print(f"{name}: {durations[name].duration}")

send_build_metrics(ctx, durations['Omnibus'].duration)


@task
Expand Down
177 changes: 177 additions & 0 deletions tasks/unit-tests/omnibus_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import os
import re
import unittest
from unittest import mock

from invoke.context import MockContext
from invoke.exceptions import UnexpectedExit
from invoke.runners import Result

from tasks import omnibus


class MockContextRaising(MockContext):
"""A more realistic `MockContext` which raises UnexpectedExit under the right circumstances."""

def run(self, *args, **kwargs):
result = super().run(*args, **kwargs)
if not (result or kwargs.get("warn")):
raise UnexpectedExit(result)
return result


def _run_calls_to_string(mock_calls):
"""Transform a list of calls into a newline-separated string.

This is aimed at making it easy to make relatively complex assertions on a sequence
of `run` commands by using just regular expressions.
"""
commands_run = (call.args[0] for call in mock_calls)
return '\n'.join(commands_run)


@mock.patch('sys.platform', 'linux')
@mock.patch.dict(
'os.environ',
{
'OMNIBUS_GIT_CACHE_DIR': 'omnibus-git-cache',
'CI_JOB_NAME_SLUG': 'slug',
'CI_COMMIT_REF_NAME': '',
'CI_PROJECT_DIR': '',
'CI_PIPELINE_ID': '',
'RELEASE_VERSION_7': 'nightly',
'S3_OMNIBUS_CACHE_BUCKET': 'omnibus-cache',
'API_KEY_ORG2_SSM_NAME': 'api-key',
},
clear=True,
)
class TestOmnibusCache(unittest.TestCase):
def setUp(self):
self.mock_ctx = MockContextRaising(run={})

def _set_up_default_command_mocks(self):
# This should allow to postpone the setting up of these broadly catching patterns
# after the ones specific for a test have been set up.
patterns = [
(r'bundle .*', Result()),
(r'git describe --tags .*', Result('6.0.0-beta.0-1-g4f19118')),
(r'git .*', Result()),
(r'aws s3 .*', Result()),
(r'go mod .*', Result()),
(r'grep .*', Result()),
(r'aws ssm .*', Result()),
]
for pattern, result in patterns:
self.mock_ctx.set_result_for('run', re.compile(pattern), result)

def assertRunLines(self, line_patterns):
"""Assert the given line patterns appear in the given order in `msg`."""
commands = _run_calls_to_string(self.mock_ctx.run.mock_calls)

pattern = '(\n|.)*'.join(line_patterns)
return self.assertIsNotNone(
re.search(pattern, commands, re.MULTILINE),
f'Failed to match pattern {line_patterns}.',
)

def test_successful_cache_hit(self):
self.mock_ctx.set_result_for(
'run',
re.compile(r'git (.* )?tag -l'),
Result('foo-1234'),
)
self._set_up_default_command_mocks()
omnibus.build(self.mock_ctx)

# Assert main actions were taken in the expected order
self.assertRunLines(
[
# We copied the cache from remote cache
r'aws s3 cp (\S* )?s3://omnibus-cache/builds/\w+/slug /tmp/omnibus-git-cache-bundle',
# We cloned the repo
r'git clone --mirror /tmp/omnibus-git-cache-bundle omnibus-git-cache/opt/datadog-agent',
# We listed the tags to get current cache state
r'git -C omnibus-git-cache/opt/datadog-agent tag -l',
# We ran omnibus
r'bundle exec omnibus build agent',
],
)

# By the way the mocks are set up, we expect the `cache state` to not have changed and thus the cache
# shouldn't have been bundled and uploaded
commands = _run_calls_to_string(self.mock_ctx.run.mock_calls)
lines = [
'git -C omnibus-git-cache/opt/datadog-agent bundle create /tmp/omnibus-git-cache-bundle --tags',
r'aws s3 cp (\S* )?/tmp/omnibus-git-cache-bundle s3://omnibus-cache/builds/\w+/slug',
]
for line in lines:
self.assertIsNone(re.search(line, commands))

def test_cache_miss(self):
self.mock_ctx.set_result_for(
'run',
re.compile(r'aws s3 cp (\S* )?s3://omnibus-cache/builds/\S* /tmp/omnibus-git-cache-bundle'),
Result(exited=1),
)
self.mock_ctx.set_result_for(
'run',
re.compile(r'git (.* )?tag -l'),
Result('foo-1234'),
)
self._set_up_default_command_mocks()
with mock.patch('requests.post') as post_mock:
omnibus.build(self.mock_ctx)

commands = _run_calls_to_string(self.mock_ctx.run.mock_calls)
commands_before_build = commands.split('bundle exec omnibus')[0]

# Assert we did NOT clone nor list tags before the omnibus build
lines = [
r'git clone --mirror /tmp/omnibus-git-cache-bundle omnibus-git-cache/opt/datadog-agent',
r'git -C omnibus-git-cache/opt/datadog-agent tag -l',
]
for line in lines:
self.assertIsNone(re.search(line, commands_before_build))
# Assert we sent a cache miss event
assert post_mock.mock_calls
self.assertIn("events", post_mock.mock_calls[0].args[0])
self.assertIn("omnibus cache miss", str(post_mock.mock_calls[0].kwargs['json']))
# Assert we bundled and uploaded the cache (should always happen on cache misses)
self.assertRunLines(
[
# We ran omnibus
r'bundle exec omnibus build agent',
# Listed tags for cache comparison
r'git -C omnibus-git-cache/opt/datadog-agent tag -l',
# And we created and uploaded the new cache
r'git -C omnibus-git-cache/opt/datadog-agent bundle create /tmp/omnibus-git-cache-bundle --tags',
r'aws s3 cp (\S* )?/tmp/omnibus-git-cache-bundle s3://omnibus-cache/builds/\w+/slug',
],
)

def test_cache_hit_with_corruption(self):
# Case where we get a bundle from S3 but git finds it to be corrupted

# Fail to clone
self.mock_ctx.set_result_for(
'run',
re.compile(r'git clone (\S* )?/tmp/omnibus-git-cache-bundle.*'),
Result('fatal: remote did not send all necessary objects', exited=1),
)
self._set_up_default_command_mocks()

omnibus.build(self.mock_ctx)

# We're satisfied if we ran the build despite that failure
self.assertRunLines([r'bundle exec omnibus build agent'])

def test_cache_is_disabled_by_unsetting_env_var(self):
del os.environ['OMNIBUS_GIT_CACHE_DIR']
self._set_up_default_command_mocks()

omnibus.build(self.mock_ctx)

# We ran the build but no command related to the cache
self.assertRunLines(['bundle exec omnibus build agent'])
commands = _run_calls_to_string(self.mock_ctx.run.mock_calls)
self.assertNotIn('omnibus-git-cache', commands)