Skip to content

Commit

Permalink
Reduce docker image size & introduce per-python prefect images
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Goodman committed Oct 21, 2019
1 parent ac5ddd0 commit 0e2a95e
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 49 deletions.
86 changes: 69 additions & 17 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
version: 2
version: 2.1

references:
workspace_root: &workspace_root /tmp/workspace
attach_workspace: &attach_workspace
attach_workspace:
at: *workspace_root


jobs:
# ----------------------------------
# Check formatting
Expand Down Expand Up @@ -147,7 +148,7 @@ jobs:
# Run unit tests in Python 3.5-3.7
# ----------------------------------

test-3.5:
test-35:
docker:
- image: python:3.5

Expand Down Expand Up @@ -176,7 +177,7 @@ jobs:
paths:
- coverage

test-3.6:
test-36:
docker:
- image: python:3.6
steps:
Expand Down Expand Up @@ -204,7 +205,7 @@ jobs:
paths:
- coverage

test-3.7:
test-37:
docker:
- image: python:3.7
steps:
Expand Down Expand Up @@ -266,41 +267,74 @@ jobs:
name: Upload Coverage
command: bash <(curl -s https://codecov.io/bash) -cF python -s "/tmp/workspace/coverage/"

build_image:
build-docker-image:
docker:
- image: docker
parameters:
python-version:
type: string
tag-latest:
type: boolean
default: false
environment:
PYTHON_VERSION: << parameters.python-version >>
PYTHON_TAG: python<< parameters.python-version >>
steps:
- setup_remote_docker
- checkout
- run:
name: Docker Build
command: docker build -t prefecthq/prefect .
- setup_remote_docker:
docker_layer_caching: true
- run:
name: Build image
command: >-
docker build
--build-arg GIT_SHA=$CIRCLE_SHA1
--build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ')
--build-arg PREFECT_VERSION=$CIRCLE_TAG
-t prefecthq/prefect:${CIRCLE_TAG}-${PYTHON_TAG}
-t prefecthq/prefect:$PYTHON_TAG
.
- when:
condition: << parameters.tag-latest >>
steps:
- run:
name: Tag latest image
command: |
docker tag prefecthq/prefect:${CIRCLE_TAG}-${PYTHON_TAG} prefecthq/prefect:latest
- run:
name: Test image
command: |
docker run -dit prefecthq/prefect /bin/bash -c 'curl -fL0 https://raw.githubusercontent.com/PrefectHQ/prefect/master/examples/retries_with_mapping.py | python'
- run:
name: Authenticate with Docker Hub and push
name: Push versioned tags
command: |
docker login --username $DOCKER_HUB_USER --password $DOCKER_HUB_PW
docker push prefecthq/prefect
docker push prefecthq/prefect:${CIRCLE_TAG}-${PYTHON_TAG}
docker push prefecthq/prefect:$PYTHON_TAG
- when:
condition: << parameters.tag-latest >>
steps:
- run:
name: Push latest tag
command: |
docker login --username $DOCKER_HUB_USER --password $DOCKER_HUB_PW
docker push prefecthq/prefect:latest
workflows:
version: 2

"Run tests":
jobs:
- test-3.5
- test-3.6
- test-3.7
- test-35
- test-36
- test-37
- test-lower-prefect
- test-vanilla-prefect
- test-py352-import-prefect
- test-airflow
- upload-coverage:
requires:
- test-3.5
- test-3.6
- test-35
- test-36
- test-vanilla-prefect
- test-airflow

Expand All @@ -312,7 +346,25 @@ workflows:

"Build docker images":
jobs:
- build_image:
- build-docker-image:
python-version: "3.5"
filters:
branches:
only: master
tags:
only: /^[0-9]+\.[0-9]+\.[0-9]+$/
- build-docker-image:
python-version: "3.6"
filters:
branches:
only: master
tags:
only: /^[0-9]+\.[0-9]+\.[0-9]+$/
- build-docker-image:
python-version: "3.7"
tag-latest: true
filters:
branches:
only: master
tags:
only: /^[0-9]+\.[0-9]+\.[0-9]+$/
22 changes: 17 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
ARG PYTHON_VERSION=3.6
ARG PYTHON_VERSION=${PYTHON_VERSION:-3.6}
FROM python:${PYTHON_VERSION}-slim

FROM python:${PYTHON_VERSION}
LABEL maintainer="help@prefect.io"
ARG GIT_POINTER=master
RUN apt update && apt install -y gcc git && rm -rf /var/lib/apt/lists/*

RUN pip install git+https://github.com/PrefectHQ/prefect.git@${GIT_POINTER}#egg=prefect[kubernetes]
ARG PREFECT_VERSION
RUN pip install git+https://github.com/PrefectHQ/prefect.git@${PREFECT_VERSION}#egg=prefect[kubernetes]
RUN mkdir /root/.prefect/

ARG GIT_SHA
ARG BUILD_DATE

LABEL maintainer="help@prefect.io"
LABEL io.prefect.python-version=${PYTHON_VERSION}
LABEL org.label-schema.schema-version = "1.0"
LABEL org.label-schema.name="prefect"
LABEL org.label-schema.url="https://www.prefect.io/"
LABEL org.label-schema.version=${PREFECT_VERSION}
LABEL org.label-schema.vcs-ref=${GIT_SHA}
LABEL org.label-schema.build-date=${BUILD_DATE}
60 changes: 42 additions & 18 deletions src/prefect/environments/storage/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import logging
import os
import re
import shutil
import sys
import tempfile
Expand Down Expand Up @@ -60,17 +61,10 @@ def __init__(
base_url: str = None,
prefect_version: str = None,
local_image: bool = False,
install_prefect: bool = False,
) -> None:
self.registry_url = registry_url

if base_image is None:
python_version = "{}.{}".format(
sys.version_info.major, sys.version_info.minor
)
self.base_image = "python:{}".format(python_version)
else:
self.base_image = base_image

if sys.platform == "win32":
default_url = "npipe:////./pipe/docker_engine"
else:
Expand All @@ -79,19 +73,47 @@ def __init__(
self.image_name = image_name
self.image_tag = image_tag
self.python_dependencies = python_dependencies or []
self.python_dependencies.append("wheel")

self.env_vars = env_vars or {}
self.env_vars["PREFECT__USER_CONFIG_PATH"] = "/root/.prefect/config.toml"

self.files = files or {}
self.flows = dict() # type: Dict[str, str]
self._flows = dict() # type: Dict[str, "prefect.core.flow.Flow"]
self.base_url = base_url or default_url
self.local_image = local_image
self.install_prefect_package = install_prefect
self.initial_commands = []

version = prefect.__version__.split("+")
if prefect_version is None:
self.prefect_version = "master" if len(version) > 1 else version[0]
else:
self.prefect_version = prefect_version

if base_image is None:
python_version = "{}.{}".format(
sys.version_info.major, sys.version_info.minor
)
if re.match("^[0-9]+\.[0-9]+\.[0-9]+$", self.prefect_version) != None:
# note: this does not necessarily mean that we have built/pushed all previous prefect versions to dockerhub
self.base_image = "prefecthq/prefect:{}-python{}".format(
self.prefect_version, python_version
)
elif self.prefect_version == "master":
# use the latest image for the given python version
self.base_image = "prefecthq/prefect:python{}".format(python_version)
else:
# create an image from python:*-slim directly
self.base_image = "python:{}-slim".format(python_version)
self.install_prefect_package = True
self.initial_commands.append(
"apt update && apt install -y gcc git && rm -rf /var/lib/apt/lists/*"
)
else:
self.base_image = base_image

not_absolute = [
file_path for file_path in self.files if not os.path.isabs(file_path)
]
Expand Down Expand Up @@ -311,11 +333,11 @@ def create_dockerfile_object(self, directory: str = None) -> None:

with open(os.path.join(directory, "Dockerfile"), "w+") as dockerfile:

# Generate RUN pip install commands for python dependencies
pip_installs = ""
# Generate single pip install command for python dependencies
pip_installs = "RUN pip install "
if self.python_dependencies:
for dependency in self.python_dependencies:
pip_installs += "RUN pip install {}\n".format(dependency)
pip_installs += "{} ".format(dependency)

# Generate ENV variables to load into the image
env_vars = ""
Expand Down Expand Up @@ -355,6 +377,11 @@ def create_dockerfile_object(self, directory: str = None) -> None:
source="{}.flow".format(clean_name), dest=flow_location
)

# Write all extra commands that should be run in the image
initial_commands = ""
for cmd in self.initial_commands:
initial_commands += "RUN {}".format(cmd)

# Write a healthcheck script into the image
with open(
os.path.join(os.path.dirname(__file__), "_healthcheck.py"), "r"
Expand All @@ -368,29 +395,26 @@ def create_dockerfile_object(self, directory: str = None) -> None:
"""\
FROM {base_image}
{initial_commands}
RUN pip install pip --upgrade
RUN pip install wheel
{pip_installs}
RUN mkdir /root/.prefect/
RUN mkdir -p /root/.prefect/
{copy_flows}
COPY healthcheck.py /root/.prefect/healthcheck.py
{copy_files}
ENV PREFECT__USER_CONFIG_PATH="/root/.prefect/config.toml"
{env_vars}
# update version if base image already has prefect installed
RUN pip install -U git+https://github.com/PrefectHQ/prefect.git@{version}#egg=prefect[kubernetes]
RUN python /root/.prefect/healthcheck.py '[{flow_file_paths}]' '{python_version}'
""".format(
initial_commands=initial_commands,
base_image=self.base_image,
pip_installs=pip_installs,
copy_flows=copy_flows,
copy_files=copy_files,
env_vars=env_vars,
version=self.prefect_version,
flow_file_paths=", ".join(
['"{}"'.format(k) for k in self.flows.values()]
),
Expand Down
34 changes: 25 additions & 9 deletions tests/environments/storage/test_docker_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,13 @@ def test_empty_docker_storage(monkeypatch, platform, url):
storage = Docker()

assert not storage.registry_url
assert storage.base_image.startswith("python:")
assert storage.base_image.startswith("prefecthq/prefect:python")
assert not storage.image_name
assert not storage.image_tag
assert not storage.python_dependencies
assert not storage.env_vars
assert storage.python_dependencies == ["wheel"]
assert storage.env_vars == {
"PREFECT__USER_CONFIG_PATH": "/root/.prefect/config.toml"
}
assert not storage.files
assert storage.prefect_version
assert storage.base_url == url
Expand All @@ -63,7 +65,7 @@ def test_docker_init_responds_to_python_version(monkeypatch, version_info):
version_mock = MagicMock(major=version_info[0], minor=version_info[1])
monkeypatch.setattr(sys, "version_info", version_mock)
storage = Docker()
assert storage.base_image == "python:{}.{}".format(*version_info)
assert storage.base_image == "prefecthq/prefect:python{}.{}".format(*version_info)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -97,8 +99,11 @@ def test_initialized_docker_storage():
assert storage.base_image == "test3"
assert storage.image_name == "test4"
assert storage.image_tag == "test5"
assert storage.python_dependencies == ["test"]
assert storage.env_vars == {"test": "1"}
assert storage.python_dependencies == ["test", "wheel"]
assert storage.env_vars == {
"test": "1",
"PREFECT__USER_CONFIG_PATH": "/root/.prefect/config.toml",
}
assert storage.base_url == "test_url"
assert storage.prefect_version == "my-branch"
assert storage.local_image
Expand Down Expand Up @@ -301,16 +306,27 @@ def test_create_dockerfile_from_base_image():
assert "FROM python:3.6" in output


def test_create_dockerfile_from_prefect_version():
storage = Docker(prefect_version="master")
@pytest.mark.parametrize(
"prefect_version",
[
("0.5.3", "FROM prefecthq/prefect:0.5.3-python3.6"),
("master", "FROM prefecthq/prefect:python3.6"),
("0.5.2+999.gr34343.dirty", "FROM python:3.6-slim"),
],
)
def test_create_dockerfile_from_prefect_version(monkeypatch, prefect_version):
version_mock = MagicMock(major=3, minor=6)
monkeypatch.setattr(sys, "version_info", version_mock)

storage = Docker(prefect_version=prefect_version[0])

with tempfile.TemporaryDirectory() as tempdir:
storage.create_dockerfile_object(directory=tempdir)

with open(os.path.join(tempdir, "Dockerfile"), "r") as dockerfile:
output = dockerfile.read()

assert "prefect.git@master" in output
assert prefect_version[1] in output


def test_create_dockerfile_with_weird_flow_name():
Expand Down

0 comments on commit 0e2a95e

Please sign in to comment.