forked from ocrmypdf/OCRmyPDF
/
Dockerfile
54 lines (46 loc) · 1.55 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# OCRmyPDF
#
# VERSION 4.4.2
FROM ubuntu:16.10
MAINTAINER James R. Barlow <jim@purplerock.ca>
RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common python-software-properties \
python3-wheel \
python3-reportlab \
python3-venv \
ghostscript \
qpdf \
poppler-utils \
unpaper \
libffi-dev \
tesseract-ocr \
tesseract-ocr-eng \
tesseract-ocr-fra \
tesseract-ocr-spa \
tesseract-ocr-deu
RUN python3 -m venv --system-site-packages /appenv
# This installs the latest binary wheel instead of the code in the current
# folder. Installing from source will fail, apparently because cffi needs
# build-essentials (gcc) to do a source installation
# (i.e. "pip install ."). It's unclear to me why this is the case.
RUN . /appenv/bin/activate; \
pip install --upgrade pip \
&& pip install ocrmypdf
# Now copy the application in, mainly to get the test suite.
# Do this now to make the best use of Docker cache.
COPY . /application
RUN . /appenv/bin/activate; \
pip install -r /application/test_requirements.txt
# Remove the junk, including the source version of application since it was
# already installed
RUN rm -rf /tmp/* /var/tmp/* /root/* /application/ocrmypdf \
&& apt-get autoremove -y \
&& apt-get autoclean -y
RUN useradd docker \
&& mkdir /home/docker \
&& chown docker:docker /home/docker
USER docker
WORKDIR /home/docker
# Must use array form of ENTRYPOINT
# Non-array form does not append other arguments, because that is "intuitive"
ENTRYPOINT ["/application/docker-wrapper.sh"]