From 972bf96e31ad98fd4052c05c0b2dfc8e4b23791d Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Thu, 16 Jul 2020 23:15:31 -0400 Subject: [PATCH] use multi-stage builds in Dockerfile to reduce final image size This commit reduces the uncompressed image size by about 1 GB. A major change is that a python virtual environment is used within the Docker image. This is done so that all installed dependencies are below one directory, and this directory can be copied into the final build stage. --- docker/Dockerfile | 56 ++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 456ab4d1..a3115113 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,31 +1,47 @@ -FROM python:3.6-slim +FROM python:3.6-slim as builder ARG DEBIAN_FRONTEND="noninteractive" WORKDIR /opt/pliers COPY . . -RUN chmod a+rX -R . -RUN apt-get update -qq \ - && tmp_pkgs="cmake gcc g++ libc6-dev libgraphviz-dev libmagic-dev make" \ +# Install dependencies into a virtual environment so they can be easily copied into +# the second stage. +ENV PATH="/opt/venv/bin:$PATH" +RUN chmod a+rX -R . \ + && apt-get update -qq \ + && apt-get install -yq --no-install-recommends \ + cmake \ + gcc \ + g++ \ + libc6-dev \ + libgraphviz-dev \ + libmagic-dev \ + make \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* +# Run this in different layers for faster rebuilds when debugging. The extra layers +# do impose a size penalty, because we build the final image in a separate stage. +RUN python -m venv /opt/venv +RUN python -m pip install --no-cache-dir --requirement requirements.txt +RUN python -m pip install --no-cache-dir --requirement optional-dependencies.txt +RUN python -m pip install ipython notebook +RUN python -m pip install --no-cache-dir --editable . +RUN python -m spacy download en_core_web_sm + +FROM python:3.6-slim +RUN useradd --no-user-group --create-home --shell /bin/bash pliers \ + # Empty top level directories to facilitate use of the image in singularity + # on a box with kernel lacking overlay FS support + && mkdir -p /data /backup \ + && apt-get update -qq \ && apt-get install -yq --no-install-recommends \ ffmpeg \ graphviz \ libmagic1 \ tesseract-ocr \ - $tmp_pkgs \ - && pip install --no-cache-dir \ - --requirement requirements.txt \ - --requirement optional-dependencies.txt \ - ipython \ - notebook \ - && pip install --no-cache-dir --editable . \ - && python -m spacy download en_core_web_sm \ - && rm -rf ~/.cache/pip \ - && apt-get autoremove --purge -yq $tmp_pkgs \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ - && useradd --no-user-group --create-home --shell /bin/bash pliers -# Empty top level directories to facilitate use of the image in singularity -# on a box with kernel lacking overlay FS support -RUN mkdir -p /data /backup + && rm -rf /var/lib/apt/lists/* +COPY --from=builder --chown=pliers /opt/venv /opt/venv +COPY --from=builder --chown=pliers /opt/pliers /opt/pliers +ENV PATH="/opt/venv/bin:$PATH" USER pliers RUN python -m pliers.support.download -WORKDIR /work \ No newline at end of file +WORKDIR /work