-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
175 lines (154 loc) · 5.19 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
FROM alpine:3.9
# Install Python.
# ensure local python is preferred over distribution python
ENV PATH /usr/local/bin:$PATH
# http://bugs.python.org/issue19846
# > At the moment, setting "LANG=C" on a Linux system *fundamentally breaks Python 3*, and that's not OK.
ENV LANG C.UTF-8
# https://github.com/docker-library/python/issues/147
ENV PYTHONIOENCODING UTF-8
# install ca-certificates so that HTTPS works consistently
# other runtime dependencies for Python are installed later
RUN apk add --no-cache ca-certificates
ENV GPG_KEY C01E1CAD5EA2C4F0B8E3571504C367C218ADD4FF
ENV PYTHON_VERSION 2.7.15
RUN set -ex \
&& apk add --no-cache --virtual .fetch-deps \
gnupg \
tar \
xz \
\
&& wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \
&& wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \
&& export GNUPGHOME="$(mktemp -d)" \
&& gpg --batch --keyserver ha.pool.sks-keyservers.net --recv-keys "$GPG_KEY" \
&& gpg --batch --verify python.tar.xz.asc python.tar.xz \
&& { command -v gpgconf > /dev/null && gpgconf --kill all || :; } \
&& rm -rf "$GNUPGHOME" python.tar.xz.asc \
&& mkdir -p /usr/src/python \
&& tar -xJC /usr/src/python --strip-components=1 -f python.tar.xz \
&& rm python.tar.xz \
\
&& apk add --no-cache --virtual .build-deps \
bzip2-dev \
coreutils \
dpkg-dev dpkg \
expat-dev \
findutils \
gcc \
gdbm-dev \
libc-dev \
libffi-dev \
libnsl-dev \
libtirpc-dev \
linux-headers \
make \
ncurses-dev \
openssl-dev \
pax-utils \
readline-dev \
sqlite-dev \
tcl-dev \
tk \
tk-dev \
zlib-dev \
# add build deps before removing fetch deps in case there's overlap
&& apk del .fetch-deps \
\
&& cd /usr/src/python \
&& gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)" \
&& ./configure \
--build="$gnuArch" \
--enable-shared \
--enable-unicode=ucs4 \
--with-system-expat \
--with-system-ffi \
&& make -j "$(nproc)" \
# set thread stack size to 1MB so we don't segfault before we hit sys.getrecursionlimit()
# https://github.com/alpinelinux/aports/commit/2026e1259422d4e0cf92391ca2d3844356c649d0
EXTRA_CFLAGS="-DTHREAD_STACK_SIZE=0x100000" \
&& make install \
\
&& find /usr/local -type f -executable -not \( -name '*tkinter*' \) -exec scanelf --needed --nobanner --format '%n#p' '{}' ';' \
| tr ',' '\n' \
| sort -u \
| awk 'system("[ -e /usr/local/lib/" $1 " ]") == 0 { next } { print "so:" $1 }' \
| xargs -rt apk add --no-cache --virtual .python-rundeps \
&& apk del .build-deps \
\
&& find /usr/local -depth \
\( \
\( -type d -a \( -name test -o -name tests \) \) \
-o \
\( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \
\) -exec rm -rf '{}' + \
&& rm -rf /usr/src/python \
\
&& python2 --version
# if this is called "PIP_VERSION", pip explodes with "ValueError: invalid truth value '<VERSION>'"
ENV PYTHON_PIP_VERSION 19.0.2
RUN set -ex; \
\
wget -O get-pip.py 'https://bootstrap.pypa.io/get-pip.py'; \
\
python get-pip.py \
--disable-pip-version-check \
--no-cache-dir \
"pip==$PYTHON_PIP_VERSION" \
; \
pip --version; \
\
find /usr/local -depth \
\( \
\( -type d -a \( -name test -o -name tests \) \) \
-o \
\( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \
\) -exec rm -rf '{}' +; \
rm -f get-pip.py
# add a simple script that can auto-detect the appropriate JAVA_HOME value
# based on whether the JDK or only the JRE is installed
RUN { \
echo '#!/bin/sh'; \
echo 'set -e'; \
echo; \
echo 'dirname "$(dirname "$(readlink -f "$(which javac || which java)")")"'; \
} > /usr/local/bin/docker-java-home \
&& chmod +x /usr/local/bin/docker-java-home
ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk
ENV PATH $PATH:/usr/lib/jvm/java-1.8-openjdk/jre/bin:/usr/lib/jvm/java-1.8-openjdk/bin
ENV JAVA_VERSION 8u191
ENV JAVA_ALPINE_VERSION 8.191.12-r0
RUN set -x \
&& apk add --no-cache \
openjdk8="$JAVA_ALPINE_VERSION" \
&& [ "$JAVA_HOME" = "$(docker-java-home)" ]
# Add crontab file in the cron directory
ADD crontab /etc/cron.d/simple-cron
# Create the log file to be able to run tail
RUN touch /var/log/cron.log
# Give execution rights on the cron job
RUN chmod 0644 /etc/cron.d/simple-cron
WORKDIR /usr/src/app
COPY requirements.txt ./
# Install gcc
RUN apk add --no-cache --virtual .build-deps gcc musl-dev
RUN apk add linux-headers
RUN pip2 install setuptools
RUN pip2 install --no-cache-dir -r requirements.txt
# Remove gcc
RUN apk del .build-deps gcc musl-dev
RUN python -m nltk.downloader punkt
RUN python -m nltk.downloader stopwords
RUN python -m nltk.downloader averaged_perceptron_tagger
# Install bash
RUN apk add --no-cache bash
RUN wget http://nlp.stanford.edu/software/stanford-corenlp-full-2018-10-05.zip
RUN mkdir stanford-corenlp && unzip stanford-corenlp-full-2018-10-05.zip \
&& mv stanford-corenlp-full-2018-10-05/* stanford-corenlp/ \
&& rmdir stanford-corenlp-full-2018-10-05 \
&& rm stanford-corenlp-full-2018-10-05.zip
COPY . .
# Set Islandora API password
ENV ISLANDORA_PASSWORD SECRET
# Run the command on container startup
CMD chmod +x /usr/src/app/run.sh && echo ${ISLANDORA_PASSWORD} >> /usr/src/app/UploadContent/configuration.ini && crond -f