diff --git a/ci/input_files/build.yaml.tpl b/ci/input_files/build.yaml.tpl index 420eea64..9d2f1a9f 100644 --- a/ci/input_files/build.yaml.tpl +++ b/ci/input_files/build.yaml.tpl @@ -7,13 +7,6 @@ stages: - publish - e2e -.python-before-script: &python-before-script - - pip install virtualenv - - virtualenv venv - - source venv/bin/activate - - pip install .[dev] - - pip install poetry - default: retry: max: 1 @@ -73,7 +66,8 @@ lint python: tags: ["arch:amd64"] image: registry.ddbuild.io/images/mirror/python:{{ $runtime.image }} cache: &{{ $runtime.name }}-{{ $runtime.arch }}-cache - before_script: *python-before-script + before_script: + - PYTHON_VERSION={{ $runtime.python_version }} ./scripts/setup_python_env.sh script: - source venv/bin/activate - ./scripts/check_format.sh @@ -83,7 +77,8 @@ unit-test ({{ $runtime.name }}-{{ $runtime.arch }}): tags: ["arch:amd64"] image: registry.ddbuild.io/images/mirror/python:{{ $runtime.image }} cache: &{{ $runtime.name }}-{{ $runtime.arch }}-cache - before_script: *python-before-script + before_script: + - PYTHON_VERSION={{ $runtime.python_version }} ./scripts/setup_python_env.sh script: - source venv/bin/activate - pytest -vv @@ -195,7 +190,8 @@ publish-pypi-package: stage: publish tags: ["arch:amd64"] image: registry.ddbuild.io/images/docker:20.10-py3 - before_script: *python-before-script + before_script: + - ./scripts/setup_python_env.sh cache: [] rules: - if: '$CI_COMMIT_TAG =~ /^v.*/' diff --git a/scripts/_spec_ddtrace_dep.sh b/scripts/_spec_ddtrace_dep.sh new file mode 100644 index 00000000..08ec4c70 --- /dev/null +++ b/scripts/_spec_ddtrace_dep.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +# Unless explicitly stated otherwise all files in this repository are licensed +# under the Apache License Version 2.0. +# This product includes software developed at Datadog (https://www.datadoghq.com/). + +# Shared helpers for rewriting the ddtrace dependency in pyproject.toml. +# Sourced by scripts/build_layers.sh and scripts/setup_python_env.sh, so the +# layer build and the unit-test/lint/publish jobs use the same env-var +# contract and resolve the dep in a single pip pass. +# +# Env-var contract (highest precedence first): +# DD_TRACE_COMMIT Specific dd-trace-py commit SHA from GitHub. +# DD_TRACE_COMMIT_BRANCH dd-trace-py branch name from GitHub. +# DD_TRACE_WHEEL Path to a pre-built ddtrace .whl file. +# UPSTREAM_PIPELINE_ID GitLab pipeline ID from dd-trace-py. Looks up the +# matching wheel from S3, trying the smaller +# serverless build first then falling back to the +# standard manylinux2014 build. +# +# When none of these are set, spec_ddtrace_dep is a no-op. +# +# When UPSTREAM_PIPELINE_ID is set, also requires: +# PYTHON_VERSION e.g. "3.12" (used to build the cpXY platform tag) +# ARCH "amd64" (default) or "arm64" + +# Replace the ddtrace dependency block in pyproject.toml. +# Usage: replace_ddtrace_dep "ddtrace = { ... }" +replace_ddtrace_dep() { + echo "Replacing ddtrace dep with: $1" + perl -i -0777 -pe "s|ddtrace = \[[^\]]*\]|$1|gs" pyproject.toml +} + +# Search S3 for a wheel matching basename + index, then rewrite the ddtrace +# dep to point at the downloaded file. Globals required: +# S3_BASE, PY_TAG, PLATFORM +# Returns 0 on success, 1 if no matching wheel was found at the index. +_search_and_spec_s3_wheel() { + local basename=$1 + local index=$2 + local search_pattern="${basename}-[^\"]*${PY_TAG}[^\"]*${PLATFORM}[^\"]*\.whl" + local index_url="${S3_BASE}/index-${index}.html" + echo "Searching for wheel ${search_pattern} in ${index_url}" + local wheel_file + wheel_file=$(curl -sSfL "${index_url}" | grep -o "${search_pattern}" | head -n 1 || true) + if [ -z "$wheel_file" ]; then + return 1 + fi + curl -sSfL "${S3_BASE}/${wheel_file}" -o "${wheel_file}" + echo "Using S3 wheel: ${wheel_file}" + replace_ddtrace_dep "${basename} = { file = \"${wheel_file}\" }" +} + +# Rewrite pyproject.toml's ddtrace dep based on the env-var precedence above. +# No-op if no override env var is set. Returns non-zero if UPSTREAM_PIPELINE_ID +# is set but no matching S3 wheel is found. +spec_ddtrace_dep() { + if [ -n "${DD_TRACE_COMMIT:-}" ]; then + replace_ddtrace_dep "ddtrace = { git = \"https://github.com/DataDog/dd-trace-py.git\", rev = \"${DD_TRACE_COMMIT}\" }" + elif [ -n "${DD_TRACE_COMMIT_BRANCH:-}" ]; then + replace_ddtrace_dep "ddtrace = { git = \"https://github.com/DataDog/dd-trace-py.git\", branch = \"${DD_TRACE_COMMIT_BRANCH}\" }" + elif [ -n "${DD_TRACE_WHEEL:-}" ]; then + local basename + basename=$(sed 's/^.*\///' <<< "${DD_TRACE_WHEEL%%-*}") + replace_ddtrace_dep "${basename} = { file = \"${DD_TRACE_WHEEL}\" }" + elif [ -n "${UPSTREAM_PIPELINE_ID:-}" ]; then + if [ -z "${PYTHON_VERSION:-}" ]; then + echo "ERROR: PYTHON_VERSION must be set when UPSTREAM_PIPELINE_ID is set" >&2 + return 1 + fi + S3_BASE="https://dd-trace-py-builds.s3.amazonaws.com/${UPSTREAM_PIPELINE_ID}" + PY_TAG="cp$(echo "$PYTHON_VERSION" | tr -d '.')" + if [ "${ARCH:-amd64}" = "amd64" ]; then + PLATFORM="manylinux2014_x86_64" + else + PLATFORM="manylinux2014_aarch64" + fi + _search_and_spec_s3_wheel "ddtrace_serverless" "serverless" \ + || _search_and_spec_s3_wheel "ddtrace" "manylinux2014" \ + || { echo "ERROR: No matching ddtrace wheel for ${PY_TAG} ${PLATFORM} in pipeline ${UPSTREAM_PIPELINE_ID}" >&2; return 1; } + fi +} diff --git a/scripts/build_layers.sh b/scripts/build_layers.sh index 8cdfc153..7c3bd5fa 100755 --- a/scripts/build_layers.sh +++ b/scripts/build_layers.sh @@ -88,68 +88,16 @@ cleanup() { } trap cleanup EXIT -# Helper: replace the multi-line ddtrace dependency in pyproject.toml. -# Uses perl instead of sed -z for macOS/Linux portability. -replace_ddtrace_dep() { - echo "Replacing dep with $1" - perl -i -0777 -pe "s|ddtrace = \[[^\]]*\]|$1|gs" pyproject.toml -} +# Source the shared ddtrace-dep specification logic. spec_ddtrace_dep reads +# DD_TRACE_COMMIT / DD_TRACE_COMMIT_BRANCH / DD_TRACE_WHEEL / UPSTREAM_PIPELINE_ID +# (PYTHON_VERSION + ARCH for the S3 path) and rewrites the ddtrace dep block +# in pyproject.toml. +source "$(dirname "$0")/_spec_ddtrace_dep.sh" function make_path_absolute { echo "$(cd "$(dirname "$1")"; pwd)/$(basename "$1")" } -function search_wheel { - # Args: [wheel base name] [index] - - WHEEL_BASENAME=$1 - INDEX=$2 - - SEARCH_PATTERN="${WHEEL_BASENAME}-[^\"]*${PY_TAG}[^\"]*${PLATFORM}[^\"]*\.whl" - INDEX_URL="${S3_BASE}/index-${INDEX}.html" - echo "Searching for wheel ${SEARCH_PATTERN}" - export WHEEL_FILE=$(curl -sSfL ${INDEX_URL} | grep -o "$SEARCH_PATTERN" | head -n 1) - if [ ! -z "${WHEEL_FILE}" ]; then - curl -sSfL "${S3_BASE}/${WHEEL_FILE}" -o "${WHEEL_FILE}" - echo "Using S3 wheel: ${WHEEL_FILE}" - replace_ddtrace_dep "${WHEEL_BASENAME} = { file = \"${WHEEL_FILE}\" }" - fi -} - -function find_and_spec_wheel { - # Args: [python version] [wheel base name] [index] - - arch=$2 - wheel_basename=$3 - index=$4 - - # Restore pyproject.toml to a clean state for each build iteration - cp pyproject.toml.bak pyproject.toml - - # Replace ddtrace source if necessary - if [ -n "$DD_TRACE_COMMIT" ]; then - replace_ddtrace_dep "${wheel_basename} = { git = \"https://github.com/DataDog/dd-trace-py.git\", rev = \"$DD_TRACE_COMMIT\" }" - elif [ -n "$DD_TRACE_COMMIT_BRANCH" ]; then - replace_ddtrace_dep "${wheel_basename} = { git = \"https://github.com/DataDog/dd-trace-py.git\", branch = \"$DD_TRACE_COMMIT_BRANCH\" }" - elif [ -n "$DD_TRACE_WHEEL" ]; then - wheel_basename=$(sed 's/^.*\///' <<< ${DD_TRACE_WHEEL%%-*}) - replace_ddtrace_dep "${wheel_basename} = { file = \"$DD_TRACE_WHEEL\" }" - elif [ -n "$UPSTREAM_PIPELINE_ID" ]; then - S3_BASE="https://dd-trace-py-builds.s3.amazonaws.com/${UPSTREAM_PIPELINE_ID}" - if [ "${arch}" = "amd64" ]; then - PLATFORM="manylinux2014_x86_64" - else - PLATFORM="manylinux2014_aarch64" - fi - PY_TAG="cp$(echo "$1" | tr -d '.')" - search_wheel ${wheel_basename} ${index} - if [ -z "${WHEEL_FILE}" ]; then - echo "No S3 wheel found for ${PY_TAG} ${PLATFORM}, using default pyproject.toml version" - return 1 - fi - fi -} - function docker_build_zip { # Args: [python version] [zip destination] @@ -180,14 +128,10 @@ do for architecture in "${ARCHS[@]}" do echo "Building layer for Python ${python_version} arch=${architecture}" - set +e - find_and_spec_wheel ${python_version} ${architecture} "ddtrace_serverless" "serverless" - FAILURE=$? - if [ $FAILURE != 0 ]; then - echo "Attempting layer build again with package ddtrace" - find_and_spec_wheel ${python_version} ${architecture} "ddtrace" "manylinux2014" - fi - set -e + # Restore pyproject.toml to a clean state before each iteration so the + # rewrite is deterministic regardless of what the previous loop did. + cp pyproject.toml.bak pyproject.toml + PYTHON_VERSION="${python_version}" ARCH="${architecture}" spec_ddtrace_dep docker_build_zip ${python_version} $LAYER_DIR/${LAYER_FILES_PREFIX}-${architecture}-${python_version}.zip ${architecture} done done diff --git a/scripts/setup_python_env.sh b/scripts/setup_python_env.sh new file mode 100755 index 00000000..cf877bb6 --- /dev/null +++ b/scripts/setup_python_env.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +# Unless explicitly stated otherwise all files in this repository are licensed +# under the Apache License Version 2.0. +# This product includes software developed at Datadog (https://www.datadoghq.com/). + +# Sets up the Python environment for the lint, unit-test, and publish-pypi +# CI jobs (and for local repro of those flows). Replaces the inline +# .python-before-script anchor previously defined in +# ci/input_files/build.yaml.tpl. +# +# Steps: +# 1. (Optional) Rewrite pyproject.toml's ddtrace dep based on the +# env-var contract documented in scripts/_spec_ddtrace_dep.sh +# (DD_TRACE_COMMIT / DD_TRACE_COMMIT_BRANCH / DD_TRACE_WHEEL / +# UPSTREAM_PIPELINE_ID). When dd-trace-py's CI triggers this repo's +# pipeline it sets UPSTREAM_PIPELINE_ID, so the unit-test job +# exercises the PR's wheel rather than the released ddtrace. +# 2. Create and activate a virtualenv ("venv/"). +# 3. Install lambda-python's runtime + dev dependencies. pip resolves the +# whole graph in one pass against the (possibly rewritten) pyproject.toml, +# so any version conflicts surface as install errors instead of +# runtime surprises. +# 4. Install poetry. +# +# Same dep-resolution path as scripts/build_layers.sh — both source +# scripts/_spec_ddtrace_dep.sh. +# +# DD_TRACE_COMMIT / DD_TRACE_COMMIT_BRANCH build ddtrace from source, which +# requires cargo, cmake, and a C/C++ toolchain — not present in the slim +# Python runner images. They are intended for local repro / git-bisect +# workflows. The dd-trace-py CI trigger uses UPSTREAM_PIPELINE_ID. +# +# Venv contract: this script sources venv/bin/activate inside its own +# subshell, so the activation does NOT persist into the calling job. Calling +# jobs must `source venv/bin/activate` themselves before running their +# command (matching the existing pattern in build.yaml.tpl). +# +# Environment variables: +# PYTHON_VERSION Python minor version (e.g. 3.12 or just 12). Required +# when the UPSTREAM_PIPELINE_ID branch is taken. + +set -e + +# Normalize Python version shorthand (e.g. 12 -> 3.12, 3.12 -> 3.12) +if [ -n "${PYTHON_VERSION:-}" ]; then + if [[ "$PYTHON_VERSION" =~ ^[0-9]+$ ]]; then + PYTHON_VERSION="3.${PYTHON_VERSION}" + fi +fi + +# Backup pyproject.toml so the rewrite doesn't persist across runs (matters +# for local invocations; CI runners are ephemeral but cheap to be tidy). +cp pyproject.toml pyproject.toml.bak +cleanup() { + mv pyproject.toml.bak pyproject.toml 2>/dev/null || true +} +trap cleanup EXIT + +source "$(dirname "$0")/_spec_ddtrace_dep.sh" +spec_ddtrace_dep + +pip install virtualenv +virtualenv venv +source venv/bin/activate +pip install .[dev] +pip install poetry + +python -c "import ddtrace; print('ddtrace version:', ddtrace.__version__)"