From 0e2a95e7c2c561e3f11e91385280de80d9b7ddc3 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 21 Oct 2019 15:20:59 -0400 Subject: [PATCH] Reduce docker image size & introduce per-python prefect images --- .circleci/config.yml | 86 +++++++++++++++---- Dockerfile | 22 +++-- src/prefect/environments/storage/docker.py | 60 +++++++++---- .../storage/test_docker_storage.py | 34 ++++++-- 4 files changed, 153 insertions(+), 49 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1e9eff3b65b3..f40341f04590 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,4 +1,4 @@ -version: 2 +version: 2.1 references: workspace_root: &workspace_root /tmp/workspace @@ -6,6 +6,7 @@ references: attach_workspace: at: *workspace_root + jobs: # ---------------------------------- # Check formatting @@ -147,7 +148,7 @@ jobs: # Run unit tests in Python 3.5-3.7 # ---------------------------------- - test-3.5: + test-35: docker: - image: python:3.5 @@ -176,7 +177,7 @@ jobs: paths: - coverage - test-3.6: + test-36: docker: - image: python:3.6 steps: @@ -204,7 +205,7 @@ jobs: paths: - coverage - test-3.7: + test-37: docker: - image: python:3.7 steps: @@ -266,41 +267,74 @@ jobs: name: Upload Coverage command: bash <(curl -s https://codecov.io/bash) -cF python -s "/tmp/workspace/coverage/" - build_image: + build-docker-image: docker: - image: docker + parameters: + python-version: + type: string + tag-latest: + type: boolean + default: false + environment: + PYTHON_VERSION: << parameters.python-version >> + PYTHON_TAG: python<< parameters.python-version >> steps: - - setup_remote_docker - checkout - - run: - name: Docker Build - command: docker build -t prefecthq/prefect . + - setup_remote_docker: + docker_layer_caching: true + - run: + name: Build image + command: >- + docker build + --build-arg GIT_SHA=$CIRCLE_SHA1 + --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') + --build-arg PREFECT_VERSION=$CIRCLE_TAG + -t prefecthq/prefect:${CIRCLE_TAG}-${PYTHON_TAG} + -t prefecthq/prefect:$PYTHON_TAG + . + - when: + condition: << parameters.tag-latest >> + steps: + - run: + name: Tag latest image + command: | + docker tag prefecthq/prefect:${CIRCLE_TAG}-${PYTHON_TAG} prefecthq/prefect:latest - run: name: Test image command: | docker run -dit prefecthq/prefect /bin/bash -c 'curl -fL0 https://raw.githubusercontent.com/PrefectHQ/prefect/master/examples/retries_with_mapping.py | python' - run: - name: Authenticate with Docker Hub and push + name: Push versioned tags command: | docker login --username $DOCKER_HUB_USER --password $DOCKER_HUB_PW - docker push prefecthq/prefect + docker push prefecthq/prefect:${CIRCLE_TAG}-${PYTHON_TAG} + docker push prefecthq/prefect:$PYTHON_TAG + - when: + condition: << parameters.tag-latest >> + steps: + - run: + name: Push latest tag + command: | + docker login --username $DOCKER_HUB_USER --password $DOCKER_HUB_PW + docker push prefecthq/prefect:latest workflows: version: 2 "Run tests": jobs: - - test-3.5 - - test-3.6 - - test-3.7 + - test-35 + - test-36 + - test-37 - test-lower-prefect - test-vanilla-prefect - test-py352-import-prefect - test-airflow - upload-coverage: requires: - - test-3.5 - - test-3.6 + - test-35 + - test-36 - test-vanilla-prefect - test-airflow @@ -312,7 +346,25 @@ workflows: "Build docker images": jobs: - - build_image: + - build-docker-image: + python-version: "3.5" + filters: + branches: + only: master + tags: + only: /^[0-9]+\.[0-9]+\.[0-9]+$/ + - build-docker-image: + python-version: "3.6" + filters: + branches: + only: master + tags: + only: /^[0-9]+\.[0-9]+\.[0-9]+$/ + - build-docker-image: + python-version: "3.7" + tag-latest: true filters: branches: only: master + tags: + only: /^[0-9]+\.[0-9]+\.[0-9]+$/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 72437519daa2..0261a66980d7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,20 @@ -ARG PYTHON_VERSION=3.6 +ARG PYTHON_VERSION=${PYTHON_VERSION:-3.6} +FROM python:${PYTHON_VERSION}-slim -FROM python:${PYTHON_VERSION} -LABEL maintainer="help@prefect.io" -ARG GIT_POINTER=master +RUN apt update && apt install -y gcc git && rm -rf /var/lib/apt/lists/* -RUN pip install git+https://github.com/PrefectHQ/prefect.git@${GIT_POINTER}#egg=prefect[kubernetes] +ARG PREFECT_VERSION +RUN pip install git+https://github.com/PrefectHQ/prefect.git@${PREFECT_VERSION}#egg=prefect[kubernetes] RUN mkdir /root/.prefect/ + +ARG GIT_SHA +ARG BUILD_DATE + +LABEL maintainer="help@prefect.io" +LABEL io.prefect.python-version=${PYTHON_VERSION} +LABEL org.label-schema.schema-version = "1.0" +LABEL org.label-schema.name="prefect" +LABEL org.label-schema.url="https://www.prefect.io/" +LABEL org.label-schema.version=${PREFECT_VERSION} +LABEL org.label-schema.vcs-ref=${GIT_SHA} +LABEL org.label-schema.build-date=${BUILD_DATE} \ No newline at end of file diff --git a/src/prefect/environments/storage/docker.py b/src/prefect/environments/storage/docker.py index 61ef7068b2bb..f0232fc74d39 100644 --- a/src/prefect/environments/storage/docker.py +++ b/src/prefect/environments/storage/docker.py @@ -2,6 +2,7 @@ import json import logging import os +import re import shutil import sys import tempfile @@ -60,17 +61,10 @@ def __init__( base_url: str = None, prefect_version: str = None, local_image: bool = False, + install_prefect: bool = False, ) -> None: self.registry_url = registry_url - if base_image is None: - python_version = "{}.{}".format( - sys.version_info.major, sys.version_info.minor - ) - self.base_image = "python:{}".format(python_version) - else: - self.base_image = base_image - if sys.platform == "win32": default_url = "npipe:////./pipe/docker_engine" else: @@ -79,12 +73,18 @@ def __init__( self.image_name = image_name self.image_tag = image_tag self.python_dependencies = python_dependencies or [] + self.python_dependencies.append("wheel") + self.env_vars = env_vars or {} + self.env_vars["PREFECT__USER_CONFIG_PATH"] = "/root/.prefect/config.toml" + self.files = files or {} self.flows = dict() # type: Dict[str, str] self._flows = dict() # type: Dict[str, "prefect.core.flow.Flow"] self.base_url = base_url or default_url self.local_image = local_image + self.install_prefect_package = install_prefect + self.initial_commands = [] version = prefect.__version__.split("+") if prefect_version is None: @@ -92,6 +92,28 @@ def __init__( else: self.prefect_version = prefect_version + if base_image is None: + python_version = "{}.{}".format( + sys.version_info.major, sys.version_info.minor + ) + if re.match("^[0-9]+\.[0-9]+\.[0-9]+$", self.prefect_version) != None: + # note: this does not necessarily mean that we have built/pushed all previous prefect versions to dockerhub + self.base_image = "prefecthq/prefect:{}-python{}".format( + self.prefect_version, python_version + ) + elif self.prefect_version == "master": + # use the latest image for the given python version + self.base_image = "prefecthq/prefect:python{}".format(python_version) + else: + # create an image from python:*-slim directly + self.base_image = "python:{}-slim".format(python_version) + self.install_prefect_package = True + self.initial_commands.append( + "apt update && apt install -y gcc git && rm -rf /var/lib/apt/lists/*" + ) + else: + self.base_image = base_image + not_absolute = [ file_path for file_path in self.files if not os.path.isabs(file_path) ] @@ -311,11 +333,11 @@ def create_dockerfile_object(self, directory: str = None) -> None: with open(os.path.join(directory, "Dockerfile"), "w+") as dockerfile: - # Generate RUN pip install commands for python dependencies - pip_installs = "" + # Generate single pip install command for python dependencies + pip_installs = "RUN pip install " if self.python_dependencies: for dependency in self.python_dependencies: - pip_installs += "RUN pip install {}\n".format(dependency) + pip_installs += "{} ".format(dependency) # Generate ENV variables to load into the image env_vars = "" @@ -355,6 +377,11 @@ def create_dockerfile_object(self, directory: str = None) -> None: source="{}.flow".format(clean_name), dest=flow_location ) + # Write all extra commands that should be run in the image + initial_commands = "" + for cmd in self.initial_commands: + initial_commands += "RUN {}".format(cmd) + # Write a healthcheck script into the image with open( os.path.join(os.path.dirname(__file__), "_healthcheck.py"), "r" @@ -368,29 +395,26 @@ def create_dockerfile_object(self, directory: str = None) -> None: """\ FROM {base_image} + {initial_commands} + RUN pip install pip --upgrade - RUN pip install wheel {pip_installs} - RUN mkdir /root/.prefect/ + RUN mkdir -p /root/.prefect/ {copy_flows} COPY healthcheck.py /root/.prefect/healthcheck.py {copy_files} - ENV PREFECT__USER_CONFIG_PATH="/root/.prefect/config.toml" {env_vars} - # update version if base image already has prefect installed - RUN pip install -U git+https://github.com/PrefectHQ/prefect.git@{version}#egg=prefect[kubernetes] - RUN python /root/.prefect/healthcheck.py '[{flow_file_paths}]' '{python_version}' """.format( + initial_commands=initial_commands, base_image=self.base_image, pip_installs=pip_installs, copy_flows=copy_flows, copy_files=copy_files, env_vars=env_vars, - version=self.prefect_version, flow_file_paths=", ".join( ['"{}"'.format(k) for k in self.flows.values()] ), diff --git a/tests/environments/storage/test_docker_storage.py b/tests/environments/storage/test_docker_storage.py index af2bcb48fd0b..40412fdff9a2 100644 --- a/tests/environments/storage/test_docker_storage.py +++ b/tests/environments/storage/test_docker_storage.py @@ -47,11 +47,13 @@ def test_empty_docker_storage(monkeypatch, platform, url): storage = Docker() assert not storage.registry_url - assert storage.base_image.startswith("python:") + assert storage.base_image.startswith("prefecthq/prefect:python") assert not storage.image_name assert not storage.image_tag - assert not storage.python_dependencies - assert not storage.env_vars + assert storage.python_dependencies == ["wheel"] + assert storage.env_vars == { + "PREFECT__USER_CONFIG_PATH": "/root/.prefect/config.toml" + } assert not storage.files assert storage.prefect_version assert storage.base_url == url @@ -63,7 +65,7 @@ def test_docker_init_responds_to_python_version(monkeypatch, version_info): version_mock = MagicMock(major=version_info[0], minor=version_info[1]) monkeypatch.setattr(sys, "version_info", version_mock) storage = Docker() - assert storage.base_image == "python:{}.{}".format(*version_info) + assert storage.base_image == "prefecthq/prefect:python{}.{}".format(*version_info) @pytest.mark.parametrize( @@ -97,8 +99,11 @@ def test_initialized_docker_storage(): assert storage.base_image == "test3" assert storage.image_name == "test4" assert storage.image_tag == "test5" - assert storage.python_dependencies == ["test"] - assert storage.env_vars == {"test": "1"} + assert storage.python_dependencies == ["test", "wheel"] + assert storage.env_vars == { + "test": "1", + "PREFECT__USER_CONFIG_PATH": "/root/.prefect/config.toml", + } assert storage.base_url == "test_url" assert storage.prefect_version == "my-branch" assert storage.local_image @@ -301,8 +306,19 @@ def test_create_dockerfile_from_base_image(): assert "FROM python:3.6" in output -def test_create_dockerfile_from_prefect_version(): - storage = Docker(prefect_version="master") +@pytest.mark.parametrize( + "prefect_version", + [ + ("0.5.3", "FROM prefecthq/prefect:0.5.3-python3.6"), + ("master", "FROM prefecthq/prefect:python3.6"), + ("0.5.2+999.gr34343.dirty", "FROM python:3.6-slim"), + ], +) +def test_create_dockerfile_from_prefect_version(monkeypatch, prefect_version): + version_mock = MagicMock(major=3, minor=6) + monkeypatch.setattr(sys, "version_info", version_mock) + + storage = Docker(prefect_version=prefect_version[0]) with tempfile.TemporaryDirectory() as tempdir: storage.create_dockerfile_object(directory=tempdir) @@ -310,7 +326,7 @@ def test_create_dockerfile_from_prefect_version(): with open(os.path.join(tempdir, "Dockerfile"), "r") as dockerfile: output = dockerfile.read() - assert "prefect.git@master" in output + assert prefect_version[1] in output def test_create_dockerfile_with_weird_flow_name():