From 44dbcc3cc164fb6eabaf6b5baa4866de78e931e5 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Tue, 7 Aug 2018 13:52:52 -0400
Subject: [PATCH 01/15] Use standard library singleton mechanism

---
 app/faceanalysis/api.py                     | 17 ++++++++++-------
 app/faceanalysis/auth.py                    |  5 +++--
 app/faceanalysis/models/database_manager.py | 17 ++++++-----------
 app/faceanalysis/models/models.py           |  5 +++--
 app/faceanalysis/pipeline.py                |  4 ++--
 app/faceanalysis/tests/test_api.py          |  4 ++--
 app/worker.py                               |  4 ++--
 7 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/app/faceanalysis/api.py b/app/faceanalysis/api.py
index 7abfa34..6672498 100644
--- a/app/faceanalysis/api.py
+++ b/app/faceanalysis/api.py
@@ -8,7 +8,7 @@
 from flask_restful import Resource, Api, reqparse
 from flask import Flask, g
 from .models.models import Match, Image, User, ImageStatus
-from .models.database_manager import DatabaseManager
+from .models.database_manager import get_database_manager
 from .models.image_status_enum import ImageStatusEnum
 from .log import get_logger
 from .auth import auth
@@ -46,7 +46,7 @@ def post(self):
         args = parser.parse_args()
         username = args['username']
         password = args['password']
-        db = DatabaseManager()
+        db = get_database_manager()
         session = db.get_session()
         query = session.query(User).filter(User.username == username).first()
         session.close()
@@ -71,7 +71,7 @@ def post(self):
                             help="img_id missing in the post body")
         args = parser.parse_args()
         img_id = args['img_id']
-        db = DatabaseManager()
+        db = get_database_manager()
         session = db.get_session()
         img_status = session.query(ImageStatus).filter(
             ImageStatus.img_id == img_id).first()
@@ -99,7 +99,8 @@ def post(self):
 
     def get(self, img_id):
         logger.debug('checking if img has been processed')
-        session = DatabaseManager().get_session()
+        db = get_database_manager()
+        session = db.get_session()
         img_status = session.query(ImageStatus).filter(
             ImageStatus.img_id == img_id).first()
         session.close()
@@ -125,7 +126,7 @@ def post(self):
                             location='files')
         args = parser.parse_args()
         img = args['image']
-        db = DatabaseManager()
+        db = get_database_manager()
         if self._allowed_file(img.filename):
             filename = secure_filename(img.filename)
             img_id = filename[:filename.find('.')]
@@ -165,7 +166,8 @@ class ImgMatchList(Resource):
     # pylint: disable=assignment-from-no-return
     def get(self, img_id):
         logger.debug('getting img match list')
-        session = DatabaseManager().get_session()
+        db = get_database_manager()
+        session = db.get_session()
         query = session.query(Match).filter(Match.this_img_id == img_id)
         imgs = []
         distances = []
@@ -182,7 +184,8 @@ class ImgList(Resource):
 
     def get(self):
         logger.debug('getting img list')
-        session = DatabaseManager().get_session()
+        db = get_database_manager()
+        session = db.get_session()
         query = session.query(Image).all()
         imgs = [f.img_id for f in query]
         session.close()
diff --git a/app/faceanalysis/auth.py b/app/faceanalysis/auth.py
index ae0e85e..a701848 100644
--- a/app/faceanalysis/auth.py
+++ b/app/faceanalysis/auth.py
@@ -1,7 +1,7 @@
 from flask_httpauth import HTTPBasicAuth
 from flask import g
 from .models.models import User
-from .models.models import DatabaseManager
+from .models.database_manager import get_database_manager
 
 auth = HTTPBasicAuth()
 
@@ -9,7 +9,8 @@
 @auth.verify_password
 def verify_password(username_or_token, password):
     user = User.verify_auth_token(username_or_token)
-    session = DatabaseManager().get_session()
+    db = get_database_manager()
+    session = db.get_session()
     if not user:
         user = session.query(User).filter(
             User.username == username_or_token).first()
diff --git a/app/faceanalysis/models/database_manager.py b/app/faceanalysis/models/database_manager.py
index c135c4f..f8ceed8 100644
--- a/app/faceanalysis/models/database_manager.py
+++ b/app/faceanalysis/models/database_manager.py
@@ -1,20 +1,10 @@
+from functools import lru_cache
 import os
 from sqlalchemy.orm import sessionmaker
 from sqlalchemy import create_engine
 from ..log import get_logger
 
 
-def singleton(class_):
-    instances = {}
-
-    def getinstance(*args, **kwargs):
-        if class_ not in instances:
-            instances[class_] = class_(*args, **kwargs)
-        return instances[class_]
-    return getinstance
-
-
-@singleton
 class DatabaseManager:
     def __init__(self):
         mysql_user = os.environ['MYSQL_USER']
@@ -53,3 +43,8 @@ def safe_commit(self, session):
     def close_engine(self):
         self.logger.debug("engine closed")
         self.engine.dispose()
+
+
+@lru_cache(maxsize=1)
+def get_database_manager():
+    return DatabaseManager()
diff --git a/app/faceanalysis/models/models.py b/app/faceanalysis/models/models.py
index e776a95..580cd82 100644
--- a/app/faceanalysis/models/models.py
+++ b/app/faceanalysis/models/models.py
@@ -9,7 +9,7 @@
                         Integer, DateTime, UniqueConstraint)
 from itsdangerous import (TimedJSONWebSignatureSerializer
                           as Serializer, BadSignature, SignatureExpired)
-from .database_manager import DatabaseManager
+from .database_manager import get_database_manager
 
 Base = declarative_base()
 
@@ -43,7 +43,8 @@ def verify_auth_token(token):
             return None
         except BadSignature:
             return None
-        session = DatabaseManager().get_session()
+        db = get_database_manager()
+        session = db.get_session()
         user = session.query(User).filter(User.id == data['id']).first()
         session.close()
         return user
diff --git a/app/faceanalysis/pipeline.py b/app/faceanalysis/pipeline.py
index b79d432..4b30698 100644
--- a/app/faceanalysis/pipeline.py
+++ b/app/faceanalysis/pipeline.py
@@ -5,7 +5,7 @@
 import numpy as np
 import face_recognition as fr
 from .queue_poll import QueuePoll
-from .models.database_manager import DatabaseManager
+from .models.database_manager import get_database_manager
 from .models.models import Image, FeatureMapping, Match, ImageStatus
 from .models.image_status_enum import ImageStatusEnum
 from .log import get_logger
@@ -13,7 +13,7 @@
 
 class Pipeline:
     def __init__(self):
-        self.db = DatabaseManager()
+        self.db = get_database_manager()
         self.logger = get_logger(__name__, os.environ['LOGGING_LEVEL'])
         dirname = os.path.dirname(os.path.abspath(__file__))
         self.img_dir = os.path.join(dirname, 'images')
diff --git a/app/faceanalysis/tests/test_api.py b/app/faceanalysis/tests/test_api.py
index 7bf3fab..9224821 100644
--- a/app/faceanalysis/tests/test_api.py
+++ b/app/faceanalysis/tests/test_api.py
@@ -6,7 +6,7 @@
 from time import sleep
 from http import HTTPStatus
 from faceanalysis.api import app
-from faceanalysis.models.database_manager import DatabaseManager
+from faceanalysis.models.database_manager import get_database_manager
 from faceanalysis.models.image_status_enum import ImageStatusEnum
 from faceanalysis.models.models import init_models, delete_models
 
@@ -17,7 +17,7 @@ class ApiTestCase(unittest.TestCase):
     def setUp(self):
         app.testing = True
         self.app = app.test_client()
-        self.db = DatabaseManager()
+        self.db = get_database_manager()
         init_models(self.db.engine)
         username = 'username'
         password = 'password'
diff --git a/app/worker.py b/app/worker.py
index 022fd8c..94b9f32 100644
--- a/app/worker.py
+++ b/app/worker.py
@@ -1,9 +1,9 @@
-from faceanalysis.models.database_manager import DatabaseManager
+from faceanalysis.models.database_manager import get_database_manager
 from faceanalysis.models.models import init_models
 from faceanalysis.pipeline import Pipeline
 
 if __name__ == "__main__":
-    db = DatabaseManager()
+    db = get_database_manager()
     init_models(db.engine)
     pipeline = Pipeline()
     pipeline.begin_pipeline()

From 4f92f19d62aab41486f04dc1bd37e24ed5f22bfc Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Tue, 7 Aug 2018 15:20:15 -0400
Subject: [PATCH 02/15] Extract face vectorizer interface

---
 app/faceanalysis/face_vectorizer.py | 14 ++++++
 app/faceanalysis/pipeline.py        | 70 ++++++++++++++---------------
 2 files changed, 49 insertions(+), 35 deletions(-)
 create mode 100644 app/faceanalysis/face_vectorizer.py

diff --git a/app/faceanalysis/face_vectorizer.py b/app/faceanalysis/face_vectorizer.py
new file mode 100644
index 0000000..4753cfc
--- /dev/null
+++ b/app/faceanalysis/face_vectorizer.py
@@ -0,0 +1,14 @@
+import face_recognition as fr
+
+
+def get_face_vectors(img_path):
+    img = fr.load_image_file(img_path)
+    face_locations = fr.face_locations(img)
+    face_vectors = []
+    for top, right, bottom, left in face_locations:
+        cropped_img = img[top:bottom, left:right]
+        cropped_features = fr.face_encodings(cropped_img)
+        if cropped_features:
+            face_vector = cropped_features[0]
+            face_vectors.append(face_vector)
+    return face_vectors
diff --git a/app/faceanalysis/pipeline.py b/app/faceanalysis/pipeline.py
index 4b30698..7e268e0 100644
--- a/app/faceanalysis/pipeline.py
+++ b/app/faceanalysis/pipeline.py
@@ -3,7 +3,7 @@
 import os
 import base64
 import numpy as np
-import face_recognition as fr
+from .face_vectorizer import get_face_vectors
 from .queue_poll import QueuePoll
 from .models.database_manager import get_database_manager
 from .models.models import Image, FeatureMapping, Match, ImageStatus
@@ -27,25 +27,23 @@ def _add_entry_to_session(self, cls, session, **kwargs):
         session.add(row)
         return row
 
-    # pylint: disable=broad-except
-    def _process_img(self, img_id, session):
-        self.logger.debug('processing img')
-        img_has_been_uploaded = False
-        img = None
+    def _find_image(self, img_id, session):
+        self.logger.debug('finding image %s', img_id)
+
+        img_path = None
         for extension in self.allowed_file_extensions:
             img_name = "{}.{}".format(img_id, extension)
             fpath = os.path.join(self.img_dir, img_name)
-            try:
-                img = fr.load_image_file(fpath)
-                img_has_been_uploaded = True
+            if os.path.isfile(fpath):
+                img_path = fpath
                 break
-            except Exception:
-                continue
-        if img_has_been_uploaded:
+
+        if img_path:
             self._add_entry_to_session(Image,
                                        session,
                                        img_id=img_id)
-        return img
+
+        return img_path
 
     # pylint: disable=broad-except
     def _delete_img(self, img_id):
@@ -129,6 +127,14 @@ def _img_should_be_processed(self, img_id):
             return img_status.status == ImageStatusEnum.on_queue.name
         return False
 
+    # pylint: disable=len-as-condition
+    @classmethod
+    def _compute_distances(cls, face_encodings, face_to_compare):
+        if len(face_encodings) == 0:
+            return np.empty((0))
+
+        return np.linalg.norm(face_encodings - face_to_compare, axis=1)
+
     # pylint: disable=too-many-locals
     def _handle_message_from_queue(self, message):
         self.logger.debug("handling message from queue")
@@ -139,32 +145,26 @@ def _handle_message_from_queue(self, message):
             return
         self._update_img_status(
             curr_img_id, status=ImageStatusEnum.processing.name)
-        curr_img = self._process_img(curr_img_id, session)
-        if curr_img is not None:
+        curr_img_path = self._find_image(curr_img_id, session)
+        if curr_img_path is not None:
             prev_img_ids, prev_features = self._get_img_ids_and_features()
             curr_matches = []
-            face_locations = fr.face_locations(curr_img)
-            if not face_locations:
+            face_vectors = get_face_vectors(curr_img_path)
+            if not face_vectors:
                 error_msg = "No faces found in image"
                 self._update_img_status(curr_img_id, error_msg=error_msg)
-            for face_location in face_locations:
-                top, right, bottom, left = face_location
-                curr_cropped_img = curr_img[top:bottom, left:right]
-                curr_cropped_features = fr.face_encodings(
-                    curr_cropped_img)
-                if curr_cropped_features:
-                    self._process_feature_mapping(curr_cropped_features[0],
-                                                  curr_img_id,
-                                                  session)
-                    face_distances = fr.face_distance(prev_features,
-                                                      curr_cropped_features)
-                    for count, distance_score in enumerate(face_distances):
-                        distance_score = float(distance_score)
-                        that_img_id = prev_img_ids[count]
-                        if distance_score < 0.6 and curr_img_id != that_img_id:
-                            self._prepare_matches(curr_matches,
-                                                  that_img_id,
-                                                  distance_score)
+            for face_vector in face_vectors:
+                self._process_feature_mapping(
+                    face_vector, curr_img_id, session)
+                face_distances = self._compute_distances(
+                    prev_features, face_vector)
+                for count, distance_score in enumerate(face_distances):
+                    distance_score = float(distance_score)
+                    that_img_id = prev_img_ids[count]
+                    if distance_score < 0.6 and curr_img_id != that_img_id:
+                        self._prepare_matches(curr_matches,
+                                              that_img_id,
+                                              distance_score)
             for curr_match in curr_matches:
                 self._process_matches(curr_img_id,
                                       curr_match["that_img_id"],

From 9c081b005431cefa3e447d3eb80e7fcaa0df5297 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Tue, 7 Aug 2018 15:26:41 -0400
Subject: [PATCH 03/15] Reduce nesting in scoring loop

---
 app/faceanalysis/pipeline.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/app/faceanalysis/pipeline.py b/app/faceanalysis/pipeline.py
index 7e268e0..da474d0 100644
--- a/app/faceanalysis/pipeline.py
+++ b/app/faceanalysis/pipeline.py
@@ -10,6 +10,8 @@
 from .models.image_status_enum import ImageStatusEnum
 from .log import get_logger
 
+DISTANCE_SCORE_THRESHOLD = 0.6
+
 
 class Pipeline:
     def __init__(self):
@@ -158,18 +160,19 @@ def _handle_message_from_queue(self, message):
                     face_vector, curr_img_id, session)
                 face_distances = self._compute_distances(
                     prev_features, face_vector)
-                for count, distance_score in enumerate(face_distances):
+                for i, distance_score in enumerate(face_distances):
+                    that_img_id = prev_img_ids[i]
+                    if curr_img_id == that_img_id:
+                        continue
                     distance_score = float(distance_score)
-                    that_img_id = prev_img_ids[count]
-                    if distance_score < 0.6 and curr_img_id != that_img_id:
-                        self._prepare_matches(curr_matches,
-                                              that_img_id,
-                                              distance_score)
+                    if distance_score >= DISTANCE_SCORE_THRESHOLD:
+                        continue
+                    self._prepare_matches(
+                        curr_matches, that_img_id, distance_score)
             for curr_match in curr_matches:
-                self._process_matches(curr_img_id,
-                                      curr_match["that_img_id"],
-                                      curr_match["distance_score"],
-                                      session)
+                self._process_matches(
+                    curr_img_id, curr_match["that_img_id"],
+                    curr_match["distance_score"], session)
         else:
             error_msg = "Image processed before uploaded"
             self._update_img_status(curr_img_id, error_msg=error_msg)

From 8c4bca2a65a3564f30e645617a97de2e9be83912 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Tue, 7 Aug 2018 16:19:20 -0400
Subject: [PATCH 04/15] Split out face algorithm into docker container

---
 algorithms/face_recognition/Dockerfile       | 41 +++++++++++++++
 algorithms/face_recognition/requirements.txt |  1 +
 algorithms/face_recognition/vectorize.py     | 35 +++++++++++++
 app/Dockerfile                               | 33 +------------
 app/faceanalysis/face_vectorizer.py          | 52 +++++++++++++++-----
 app/faceanalysis/pipeline.py                 | 18 +++++--
 app/requirements.txt                         |  3 +-
 docker-compose.yml                           |  4 ++
 8 files changed, 135 insertions(+), 52 deletions(-)
 create mode 100644 algorithms/face_recognition/Dockerfile
 create mode 100644 algorithms/face_recognition/requirements.txt
 create mode 100644 algorithms/face_recognition/vectorize.py

diff --git a/algorithms/face_recognition/Dockerfile b/algorithms/face_recognition/Dockerfile
new file mode 100644
index 0000000..db685b0
--- /dev/null
+++ b/algorithms/face_recognition/Dockerfile
@@ -0,0 +1,41 @@
+FROM python:3.6-slim-stretch
+
+RUN apt-get -y update && \
+    apt-get install -y --fix-missing \
+        build-essential \
+        cmake \
+        gfortran \
+        git \
+        wget \
+        curl \
+        graphicsmagick \
+        libgraphicsmagick1-dev \
+        libatlas-dev \
+        libavcodec-dev \
+        libavformat-dev \
+        libgtk2.0-dev \
+        libjpeg-dev \
+        liblapack-dev \
+        libswscale-dev \
+        pkg-config \
+        python3-dev \
+        python3-numpy \
+        software-properties-common \
+        zip && \
+    apt-get clean && \
+    rm -rf /tmp/* /var/tmp/*
+
+RUN cd ~ && \
+    mkdir -p dlib && \
+    git clone -b 'v19.9' --single-branch https://github.com/davisking/dlib.git dlib/ && \
+    cd dlib/ && \
+    python3 setup.py install --yes USE_AVX_INSTRUCTIONS
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY vectorize.py .
+
+ENTRYPOINT ["python3", "vectorize.py"]
diff --git a/algorithms/face_recognition/requirements.txt b/algorithms/face_recognition/requirements.txt
new file mode 100644
index 0000000..6a0dfc0
--- /dev/null
+++ b/algorithms/face_recognition/requirements.txt
@@ -0,0 +1 @@
+face-recognition==1.0.0
diff --git a/algorithms/face_recognition/vectorize.py b/algorithms/face_recognition/vectorize.py
new file mode 100644
index 0000000..0415ccf
--- /dev/null
+++ b/algorithms/face_recognition/vectorize.py
@@ -0,0 +1,35 @@
+import json
+import face_recognition as fr
+
+
+def get_face_vectors(img_path):
+    img = fr.load_image_file(img_path)
+    face_locations = fr.face_locations(img)
+    face_vectors = []
+    for top, right, bottom, left in face_locations:
+        cropped_img = img[top:bottom, left:right]
+        cropped_features = fr.face_encodings(cropped_img)
+        if cropped_features:
+            face_vector = cropped_features[0]
+            face_vectors.append(face_vector.tolist())
+    return face_vectors
+
+
+def _cli():
+    from argparse import ArgumentParser
+    from argparse import FileType
+
+    parser = ArgumentParser(description=__doc__)
+    parser.add_argument('image', type=FileType('r'))
+
+    args = parser.parse_args()
+    image = args.image
+    image.close()
+    img_path = image.name
+
+    vectors = get_face_vectors(img_path)
+    print(json.dumps({'faceVectors': vectors}))
+
+
+if __name__ == '__main__':
+    _cli()
diff --git a/app/Dockerfile b/app/Dockerfile
index 3b3846c..a6da92d 100644
--- a/app/Dockerfile
+++ b/app/Dockerfile
@@ -3,40 +3,9 @@ FROM python:3.5-slim
 RUN apt-get -y update \
     && apt-get install -y \
         mysql-client \
-    && apt-get clean \
-    && rm -rf /tmp/* /var/tmp/*
-
-RUN apt-get -y update \
-    && apt-get install -y --fix-missing \
-        build-essential \
-        cmake \
-        gfortran \
-        git \
-        wget \
-        curl \
-        graphicsmagick \
-        libgraphicsmagick1-dev \
-        libatlas-dev \
-        libavcodec-dev \
-        libavformat-dev \
-        libboost-all-dev \
-        libgtk2.0-dev \
-        libjpeg-dev \
-        liblapack-dev \
-        libswscale-dev \
-        pkg-config \
-        python3-dev \
         python3-numpy \
-        software-properties-common \
-        zip \
-        mysql-client \
     && apt-get clean \
-    && rm -rf /tmp/* /var/tmp/* \
-    && cd ~ \
-    && mkdir -p dlib \
-    && git clone -b 'v19.7' --single-branch https://github.com/davisking/dlib.git dlib/ \
-    && cd dlib/ \
-    && python3 setup.py install --yes USE_AVX_INSTRUCTIONS
+    && rm -rf /tmp/* /var/tmp/*
 
 WORKDIR /app
 
diff --git a/app/faceanalysis/face_vectorizer.py b/app/faceanalysis/face_vectorizer.py
index 4753cfc..262962a 100644
--- a/app/faceanalysis/face_vectorizer.py
+++ b/app/faceanalysis/face_vectorizer.py
@@ -1,14 +1,40 @@
-import face_recognition as fr
-
-
-def get_face_vectors(img_path):
-    img = fr.load_image_file(img_path)
-    face_locations = fr.face_locations(img)
-    face_vectors = []
-    for top, right, bottom, left in face_locations:
-        cropped_img = img[top:bottom, left:right]
-        cropped_features = fr.face_encodings(cropped_img)
-        if cropped_features:
-            face_vector = cropped_features[0]
-            face_vectors.append(face_vector)
+import os
+import json
+import docker
+from .log import get_logger
+
+logger = get_logger(__name__, os.environ['LOGGING_LEVEL'])
+
+mounted_data_dir = os.environ['MOUNTED_DATA_DIR']
+host_data_dir = os.environ['HOST_DATA_DIR']
+
+
+def _format_mount_path(img_path):
+    return '/{}'.format(os.path.basename(img_path))
+
+
+def _format_host_path(img_path):
+    # volume mounts must be absolute
+    if not img_path.startswith('/'):
+        img_path = os.path.abspath(img_path)
+
+    # adjust the path if it itself is a mount and if we're spawning a
+    # sibling container
+    if mounted_data_dir and host_data_dir:
+        img_path = img_path.replace(mounted_data_dir, host_data_dir)
+
+    return img_path
+
+
+def get_face_vectors(img_path, algorithm):
+    img_mount = _format_mount_path(img_path)
+    img_host = _format_host_path(img_path)
+    volumes = {img_host: {'bind': img_mount, 'mode': 'ro'}}
+
+    logger.debug('Running container %s with image %s', algorithm, img_host)
+    client = docker.from_env()
+    stdout = client.containers.run(algorithm, img_mount,
+                                   volumes=volumes, auto_remove=True)
+
+    face_vectors = json.loads(stdout.decode('ascii')).get('faceVectors', [])
     return face_vectors
diff --git a/app/faceanalysis/pipeline.py b/app/faceanalysis/pipeline.py
index da474d0..fb44c06 100644
--- a/app/faceanalysis/pipeline.py
+++ b/app/faceanalysis/pipeline.py
@@ -1,7 +1,7 @@
 # pylint: disable=too-few-public-methods
 
 import os
-import base64
+import json
 import numpy as np
 from .face_vectorizer import get_face_vectors
 from .queue_poll import QueuePoll
@@ -10,7 +10,13 @@
 from .models.image_status_enum import ImageStatusEnum
 from .log import get_logger
 
-DISTANCE_SCORE_THRESHOLD = 0.6
+DISTANCE_SCORE_THRESHOLD = float(os.environ.get(
+    'DISTANCE_SCORE_THRESHOLD',
+    '0.6'))
+
+FACE_VECTORIZE_ALGORITHM = os.environ.get(
+    'FACE_VECTORIZE_ALGORITHM',
+    'cwolff/face_recognition')
 
 
 class Pipeline:
@@ -61,7 +67,7 @@ def _delete_img(self, img_id):
 
     def _process_feature_mapping(self, features, img_id, session):
         self.logger.debug('processing feature mapping')
-        feature_str = base64.b64encode(features.dumps())
+        feature_str = json.dumps(features)
         self._add_entry_to_session(FeatureMapping,
                                    session,
                                    img_id=img_id,
@@ -91,7 +97,7 @@ def _get_img_ids_and_features(self):
         img_ids = []
         for row in rows:
             img_ids.append(row.img_id)
-            current_features = np.loads(base64.b64decode(row.features))
+            current_features = np.array(json.loads(row.features))
             known_features.append(current_features)
         return img_ids, np.array(known_features)
 
@@ -135,6 +141,7 @@ def _compute_distances(cls, face_encodings, face_to_compare):
         if len(face_encodings) == 0:
             return np.empty((0))
 
+        face_to_compare = np.array(face_to_compare)
         return np.linalg.norm(face_encodings - face_to_compare, axis=1)
 
     # pylint: disable=too-many-locals
@@ -151,7 +158,8 @@ def _handle_message_from_queue(self, message):
         if curr_img_path is not None:
             prev_img_ids, prev_features = self._get_img_ids_and_features()
             curr_matches = []
-            face_vectors = get_face_vectors(curr_img_path)
+            face_vectors = get_face_vectors(
+                curr_img_path, FACE_VECTORIZE_ALGORITHM)
             if not face_vectors:
                 error_msg = "No faces found in image"
                 self._update_img_status(curr_img_id, error_msg=error_msg)
diff --git a/app/requirements.txt b/app/requirements.txt
index 2fc734d..b2f5a64 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -1,10 +1,9 @@
 Flask-HTTPAuth==3.2.3
 Flask-RESTful==0.3.6
 Flask==0.12.2
-Pillow==5.0.0
 SQLAlchemy==1.2.0
 azure-storage-queue==0.37.0
-face-recognition==1.0.0
+docker==3.4.1
 gunicorn==19.7.1
 mysql-connector-python==8.0.5
 numpy==1.14.0
diff --git a/docker-compose.yml b/docker-compose.yml
index de5371f..c299656 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -30,7 +30,11 @@ services:
         max-size: "50m"
         max-file: "5"
     volumes:
+      - /var/run/docker.sock:/var/run/docker.sock
       - "${DATA_DIR}:/app/faceanalysis/images"
+    environment:
+      HOST_DATA_DIR: "${DATA_DIR}"
+      MOUNTED_DATA_DIR: /app/faceanalysis/images
     env_file:
       - ./environment_variables.env
 

From cbde6542c8ea67554c591d1d6b65298dda9dddc4 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Tue, 7 Aug 2018 16:19:20 -0400
Subject: [PATCH 05/15] Split out face algorithm into docker container

---
 app/faceanalysis/face_vectorizer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/app/faceanalysis/face_vectorizer.py b/app/faceanalysis/face_vectorizer.py
index 262962a..073770b 100644
--- a/app/faceanalysis/face_vectorizer.py
+++ b/app/faceanalysis/face_vectorizer.py
@@ -31,7 +31,6 @@ def get_face_vectors(img_path, algorithm):
     img_host = _format_host_path(img_path)
     volumes = {img_host: {'bind': img_mount, 'mode': 'ro'}}
 
-    logger.debug('Running container %s with image %s', algorithm, img_host)
     client = docker.from_env()
     stdout = client.containers.run(algorithm, img_mount,
                                    volumes=volumes, auto_remove=True)

From 92b3a4d8d52f2870168445120fb5358ac5ad4c35 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Tue, 7 Aug 2018 18:36:27 -0400
Subject: [PATCH 06/15] Factor out settings

---
 app/faceanalysis/api.py                     | 19 +++++++------
 app/faceanalysis/face_vectorizer.py         | 12 ++++-----
 app/faceanalysis/log.py                     |  3 ++-
 app/faceanalysis/models/database_manager.py | 17 +++++-------
 app/faceanalysis/models/models.py           | 10 +++----
 app/faceanalysis/pipeline.py                | 20 +++++---------
 app/faceanalysis/queue_poll.py              | 11 ++++----
 app/faceanalysis/settings.py                | 30 +++++++++++++++++++++
 app/faceanalysis/tests/test_api.py          |  7 +++--
 9 files changed, 72 insertions(+), 57 deletions(-)
 create mode 100644 app/faceanalysis/settings.py

diff --git a/app/faceanalysis/api.py b/app/faceanalysis/api.py
index 6672498..4a0c3e5 100644
--- a/app/faceanalysis/api.py
+++ b/app/faceanalysis/api.py
@@ -12,6 +12,8 @@
 from .models.image_status_enum import ImageStatusEnum
 from .log import get_logger
 from .auth import auth
+from .settings import (STORAGE_ACCOUNT_NAME, STORAGE_ACCOUNT_KEY,
+                       IMAGE_PROCESSOR_QUEUE, ALLOWED_EXTENSIONS)
 
 app = Flask(__name__)
 app.config['UPLOAD_FOLDER'] = os.path.join(
@@ -20,10 +22,10 @@
     'images')
 app.url_map.strict_slashes = False
 api = Api(app)
-queue_service = QueueService(account_name=os.environ['STORAGE_ACCOUNT_NAME'],
-                             account_key=os.environ['STORAGE_ACCOUNT_KEY'])
-queue_service.create_queue(os.environ['IMAGE_PROCESSOR_QUEUE'])
-logger = get_logger(__name__, os.environ['LOGGING_LEVEL'])
+queue_service = QueueService(account_name=STORAGE_ACCOUNT_NAME,
+                             account_key=STORAGE_ACCOUNT_KEY)
+queue_service.create_queue(IMAGE_PROCESSOR_QUEUE)
+logger = get_logger(__name__)
 
 
 class AuthenticationToken(Resource):
@@ -81,8 +83,7 @@ def post(self):
                 return ('Image previously placed on queue',
                         HTTPStatus.BAD_REQUEST.value)
             try:
-                queue_service.put_message(os.environ['IMAGE_PROCESSOR_QUEUE'],
-                                          img_id)
+                queue_service.put_message(IMAGE_PROCESSOR_QUEUE, img_id)
                 img_status.status = ImageStatusEnum.on_queue.name
                 db.safe_commit(session)
                 logger.info('img successfully put on queue')
@@ -112,8 +113,6 @@ def get(self, img_id):
 
 class ImgUpload(Resource):
     method_decorators = [auth.login_required]
-    env_extensions = os.environ['ALLOWED_IMAGE_FILE_EXTENSIONS']
-    allowed_extensions = env_extensions.lower().split('_')
 
     # pylint: disable=broad-except
     def post(self):
@@ -152,12 +151,12 @@ def post(self):
         else:
             error_msg = ('Image upload failed: please use one of the '
                          'following extensions --> {}'
-                         .format(self.allowed_extensions))
+                         .format(ALLOWED_EXTENSIONS))
             return error_msg, HTTPStatus.BAD_REQUEST.value
 
     def _allowed_file(self, filename):
         return ('.' in filename and
-                filename.rsplit('.', 1)[1].lower() in self.allowed_extensions)
+                filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS)
 
 
 class ImgMatchList(Resource):
diff --git a/app/faceanalysis/face_vectorizer.py b/app/faceanalysis/face_vectorizer.py
index 073770b..2628294 100644
--- a/app/faceanalysis/face_vectorizer.py
+++ b/app/faceanalysis/face_vectorizer.py
@@ -2,11 +2,10 @@
 import json
 import docker
 from .log import get_logger
+from .settings import MOUNTED_DATA_DIR
+from .settings import HOST_DATA_DIR
 
-logger = get_logger(__name__, os.environ['LOGGING_LEVEL'])
-
-mounted_data_dir = os.environ['MOUNTED_DATA_DIR']
-host_data_dir = os.environ['HOST_DATA_DIR']
+logger = get_logger(__name__)
 
 
 def _format_mount_path(img_path):
@@ -20,8 +19,8 @@ def _format_host_path(img_path):
 
     # adjust the path if it itself is a mount and if we're spawning a
     # sibling container
-    if mounted_data_dir and host_data_dir:
-        img_path = img_path.replace(mounted_data_dir, host_data_dir)
+    if MOUNTED_DATA_DIR and HOST_DATA_DIR:
+        img_path = img_path.replace(MOUNTED_DATA_DIR, HOST_DATA_DIR)
 
     return img_path
 
@@ -31,6 +30,7 @@ def get_face_vectors(img_path, algorithm):
     img_host = _format_host_path(img_path)
     volumes = {img_host: {'bind': img_mount, 'mode': 'ro'}}
 
+    logger.debug('Running container %s with image %s', algorithm, img_host)
     client = docker.from_env()
     stdout = client.containers.run(algorithm, img_mount,
                                    volumes=volumes, auto_remove=True)
diff --git a/app/faceanalysis/log.py b/app/faceanalysis/log.py
index 91d0264..4238a12 100644
--- a/app/faceanalysis/log.py
+++ b/app/faceanalysis/log.py
@@ -1,8 +1,9 @@
 import sys
 import logging
+from .settings import LOGGING_LEVEL
 
 
-def get_logger(module_name, logging_level):
+def get_logger(module_name, logging_level=LOGGING_LEVEL):
     logging_levels = {
         'DEBUG': logging.DEBUG,
         'INFO': logging.INFO,
diff --git a/app/faceanalysis/models/database_manager.py b/app/faceanalysis/models/database_manager.py
index f8ceed8..deb2ff3 100644
--- a/app/faceanalysis/models/database_manager.py
+++ b/app/faceanalysis/models/database_manager.py
@@ -1,29 +1,26 @@
 from functools import lru_cache
-import os
 from sqlalchemy.orm import sessionmaker
 from sqlalchemy import create_engine
 from ..log import get_logger
+from ..settings import (MYSQL_USER, MYSQL_PASSWORD, MYSQL_DATABASE,
+                        MYSQL_CONTAINER_NAME)
 
 
 class DatabaseManager:
     def __init__(self):
-        mysql_user = os.environ['MYSQL_USER']
-        mysql_password = os.environ['MYSQL_PASSWORD']
-        mysql_container_name = os.environ['MYSQL_CONTAINER_NAME']
-        mysql_database = os.environ['MYSQL_DATABASE']
         mysql_connector_str = 'mysql+mysqlconnector'
         mysql_port = '3306'
         engine_credential = "{}://{}:{}@{}:{}/{}".format(mysql_connector_str,
-                                                         mysql_user,
-                                                         mysql_password,
-                                                         mysql_container_name,
+                                                         MYSQL_USER,
+                                                         MYSQL_PASSWORD,
+                                                         MYSQL_CONTAINER_NAME,
                                                          mysql_port,
-                                                         mysql_database)
+                                                         MYSQL_DATABASE)
         self.engine = create_engine(engine_credential,
                                     pool_recycle=3600)
 
         self.session_factory = sessionmaker(bind=self.engine)
-        self.logger = get_logger(__name__, os.environ['LOGGING_LEVEL'])
+        self.logger = get_logger(__name__)
 
     def get_session(self):
         return self.session_factory()
diff --git a/app/faceanalysis/models/models.py b/app/faceanalysis/models/models.py
index 580cd82..5aef626 100644
--- a/app/faceanalysis/models/models.py
+++ b/app/faceanalysis/models/models.py
@@ -1,5 +1,4 @@
 # pylint: disable=too-few-public-methods
-import os
 from passlib.apps import custom_app_context as pwd_context
 from sqlalchemy.orm import relationship
 from sqlalchemy.sql import func
@@ -10,6 +9,7 @@
 from itsdangerous import (TimedJSONWebSignatureSerializer
                           as Serializer, BadSignature, SignatureExpired)
 from .database_manager import get_database_manager
+from ..settings import TOKEN_EXPIRATION, TOKEN_SECRET_KEY
 
 Base = declarative_base()
 
@@ -28,15 +28,13 @@ def verify_password(self, password):
         return pwd_context.verify(password, self.password_hash)
 
     def generate_auth_token(self, expiration=None):
-        token = os.environ['TOKEN_SECRET_KEY']
-        expiration = expiration or int(
-            os.environ['DEFAULT_TOKEN_EXPIRATION_SECS'])
-        serializer = Serializer(token, expires_in=expiration)
+        expiration = expiration or TOKEN_EXPIRATION
+        serializer = Serializer(TOKEN_SECRET_KEY, expires_in=expiration)
         return serializer.dumps({'id': self.id})
 
     @staticmethod
     def verify_auth_token(token):
-        serializer = Serializer(os.environ['TOKEN_SECRET_KEY'])
+        serializer = Serializer(TOKEN_SECRET_KEY)
         try:
             data = serializer.loads(token)
         except SignatureExpired:
diff --git a/app/faceanalysis/pipeline.py b/app/faceanalysis/pipeline.py
index fb44c06..4e75cdc 100644
--- a/app/faceanalysis/pipeline.py
+++ b/app/faceanalysis/pipeline.py
@@ -9,24 +9,16 @@
 from .models.models import Image, FeatureMapping, Match, ImageStatus
 from .models.image_status_enum import ImageStatusEnum
 from .log import get_logger
-
-DISTANCE_SCORE_THRESHOLD = float(os.environ.get(
-    'DISTANCE_SCORE_THRESHOLD',
-    '0.6'))
-
-FACE_VECTORIZE_ALGORITHM = os.environ.get(
-    'FACE_VECTORIZE_ALGORITHM',
-    'cwolff/face_recognition')
+from .settings import (IMAGE_PROCESSOR_QUEUE, ALLOWED_EXTENSIONS,
+                       DISTANCE_SCORE_THRESHOLD, FACE_VECTORIZE_ALGORITHM)
 
 
 class Pipeline:
     def __init__(self):
         self.db = get_database_manager()
-        self.logger = get_logger(__name__, os.environ['LOGGING_LEVEL'])
+        self.logger = get_logger(__name__)
         dirname = os.path.dirname(os.path.abspath(__file__))
         self.img_dir = os.path.join(dirname, 'images')
-        file_extensions = os.environ['ALLOWED_IMAGE_FILE_EXTENSIONS'].lower()
-        self.allowed_file_extensions = file_extensions.split('_')
         self.logger.debug('pipeline initialized')
 
     def _add_entry_to_session(self, cls, session, **kwargs):
@@ -39,7 +31,7 @@ def _find_image(self, img_id, session):
         self.logger.debug('finding image %s', img_id)
 
         img_path = None
-        for extension in self.allowed_file_extensions:
+        for extension in ALLOWED_EXTENSIONS:
             img_name = "{}.{}".format(img_id, extension)
             fpath = os.path.join(self.img_dir, img_name)
             if os.path.isfile(fpath):
@@ -56,7 +48,7 @@ def _find_image(self, img_id, session):
     # pylint: disable=broad-except
     def _delete_img(self, img_id):
         self.logger.debug('deleting img')
-        for extension in self.allowed_file_extensions:
+        for extension in ALLOWED_EXTENSIONS:
             img_name = "{}.{}".format(img_id, extension)
             fpath = os.path.join(self.img_dir, img_name)
             try:
@@ -191,7 +183,7 @@ def _handle_message_from_queue(self, message):
 
     def begin_pipeline(self):
         self.logger.debug('pipeline began')
-        qp = QueuePoll(os.environ['IMAGE_PROCESSOR_QUEUE'])
+        qp = QueuePoll(IMAGE_PROCESSOR_QUEUE)
         for message in qp.poll():
             self._handle_message_from_queue(message)
             self.logger.debug("polling next iteration")
diff --git a/app/faceanalysis/queue_poll.py b/app/faceanalysis/queue_poll.py
index 33988f1..e0ee9e2 100644
--- a/app/faceanalysis/queue_poll.py
+++ b/app/faceanalysis/queue_poll.py
@@ -1,20 +1,19 @@
 # pylint: disable=too-few-public-methods
 
-import os
 from time import sleep
 from azure.storage.queue import QueueService
 from .log import get_logger
+from .settings import STORAGE_ACCOUNT_KEY, STORAGE_ACCOUNT_NAME
 
 
 class QueuePoll:
     def __init__(self, queue_name):
-        env_acc_name = os.environ['STORAGE_ACCOUNT_NAME']
-        env_acc_key = os.environ['STORAGE_ACCOUNT_KEY']
-        self.queue_service = QueueService(account_name=env_acc_name,
-                                          account_key=env_acc_key)
+        self.queue_service = QueueService(
+            account_name=STORAGE_ACCOUNT_NAME,
+            account_key=STORAGE_ACCOUNT_KEY)
         self.queue_name = queue_name
         self.queue_service.create_queue(self.queue_name)
-        self.logger = get_logger(__name__, os.environ['LOGGING_LEVEL'])
+        self.logger = get_logger(__name__)
 
     # pylint: disable=broad-except
     def _get_messages_from_queue(self):
diff --git a/app/faceanalysis/settings.py b/app/faceanalysis/settings.py
new file mode 100644
index 0000000..398eb7c
--- /dev/null
+++ b/app/faceanalysis/settings.py
@@ -0,0 +1,30 @@
+from os import environ
+
+LOGGING_LEVEL = environ.get('LOGGING_LEVEL', 'WARNING')
+
+MOUNTED_DATA_DIR = environ.get('MOUNTED_DATA_DIR')
+HOST_DATA_DIR = environ.get('HOST_DATA_DIR')
+
+STORAGE_ACCOUNT_NAME = environ['STORAGE_ACCOUNT_NAME']
+STORAGE_ACCOUNT_KEY = environ['STORAGE_ACCOUNT_KEY']
+IMAGE_PROCESSOR_QUEUE = environ.get('IMAGE_PROCESSOR_QUEUE', 'faceanalysis')
+
+ALLOWED_EXTENSIONS = environ.get('ALLOWED_IMAGE_FILE_EXTENSIONS', '')\
+    .lower().split('_')
+
+DISTANCE_SCORE_THRESHOLD = float(environ.get(
+    'DISTANCE_SCORE_THRESHOLD',
+    '0.6'))
+FACE_VECTORIZE_ALGORITHM = environ.get(
+    'FACE_VECTORIZE_ALGORITHM',
+    'cwolff/face_recognition')
+
+TOKEN_SECRET_KEY = environ['TOKEN_SECRET_KEY']
+TOKEN_EXPIRATION = int(environ.get(
+    'DEFAULT_TOKEN_EXPIRATION_SECS',
+    '500'))
+
+MYSQL_USER = environ['MYSQL_USER']
+MYSQL_PASSWORD = environ['MYSQL_PASSWORD']
+MYSQL_CONTAINER_NAME = environ['MYSQL_CONTAINER_NAME']
+MYSQL_DATABASE = environ['MYSQL_DATABASE']
diff --git a/app/faceanalysis/tests/test_api.py b/app/faceanalysis/tests/test_api.py
index 9224821..0814574 100644
--- a/app/faceanalysis/tests/test_api.py
+++ b/app/faceanalysis/tests/test_api.py
@@ -9,6 +9,7 @@
 from faceanalysis.models.database_manager import get_database_manager
 from faceanalysis.models.image_status_enum import ImageStatusEnum
 from faceanalysis.models.models import init_models, delete_models
+from faceanalysis.settings import ALLOWED_EXTENSIONS
 
 
 class ApiTestCase(unittest.TestCase):
@@ -164,10 +165,8 @@ def test_upload_and_process_twice(self):
 
     def test_end_to_end_with_different_file_formats(self):
         # test jpg && png
-        file_extensions = os.environ['ALLOWED_IMAGE_FILE_EXTENSIONS'].lower()
-        allowed_file_extensions = file_extensions.split('_')
-        self.assertIn('jpg', allowed_file_extensions)
-        self.assertIn('png', allowed_file_extensions)
+        self.assertIn('jpg', ALLOWED_EXTENSIONS)
+        self.assertIn('png', ALLOWED_EXTENSIONS)
         fnames = {'11.jpg', '12.png'}
         self._test_end_to_end_with_matching_imgs(fnames)
 

From 50a648293e4dddb6072354e4ddc036a15241f512 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Wed, 8 Aug 2018 08:41:23 -0400
Subject: [PATCH 07/15] Make real environment variables available to CI

---
 .travis.yml                           | 4 ++++
 .travis/environment_variables.env.enc | 2 ++
 2 files changed, 6 insertions(+)
 create mode 100644 .travis/environment_variables.env.enc

diff --git a/.travis.yml b/.travis.yml
index 50aef48..ed83a70 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,5 +3,9 @@ language: generic
 services:
   - docker
 
+before_install:
+- openssl aes-256-cbc -K $encrypted_e08e3836e1cc_key -iv $encrypted_e08e3836e1cc_iv
+  -in ./.travis/environment_variables.env.enc -out ./environment_variables.env -d
+
 script:
   - ./run-test.sh
diff --git a/.travis/environment_variables.env.enc b/.travis/environment_variables.env.enc
new file mode 100644
index 0000000..36acd87
--- /dev/null
+++ b/.travis/environment_variables.env.enc
@@ -0,0 +1,2 @@
+�j�Y��6��KHo�8U��l\%�����'�W�?���ʁlEǜ�.�Y�q5�#iyg�7�k©s�~��O.��?��kA~{Q��R��� ��)�˄陧l��l����E��/��k���o��η���5�z#��l������L m��G-����`�GN�lm����
+T���y�q-Xp�<i١�F�9��_΀�L�aPi�l°;=�g�@��}$>�L{���	H��lSJ�=g5�^����%���T.�%N��T\���2�����k�yX�d�8�WQ���w5,���<��f��8��1 d�1UCۋ���^�4Q����@�p�s1SV�G�vR~�8FS�+Z�r�;�1�N�3qc%τ��jE��1��ev��_�L���X�d�\������?U�	�}�y�d����n�	��,7k�I
\ No newline at end of file

From f456fce5a7c80d4ae839d349c8e3905bfb7c5317 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Wed, 8 Aug 2018 08:41:51 -0400
Subject: [PATCH 08/15] Fix formatting

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index ed83a70..9cc46d3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,11 +1,11 @@
 language: generic
 
 services:
-  - docker
+- docker
 
 before_install:
 - openssl aes-256-cbc -K $encrypted_e08e3836e1cc_key -iv $encrypted_e08e3836e1cc_iv
   -in ./.travis/environment_variables.env.enc -out ./environment_variables.env -d
 
 script:
-  - ./run-test.sh
+- ./run-test.sh

From 5131e8a96cf7a4b9e012d91b9e590ebe11636333 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Wed, 8 Aug 2018 08:42:01 -0400
Subject: [PATCH 09/15] Ensure dependency containers are cleaned up

---
 run-test.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/run-test.sh b/run-test.sh
index 267c84d..d109ff6 100755
--- a/run-test.sh
+++ b/run-test.sh
@@ -5,11 +5,9 @@ set -e
 data_dir="$(mktemp -d)"
 db_dir="$(mktemp -d)"
 
-cleanup() { rm -rf "${data_dir}" "${db_dir}"; }
+cleanup() { rm -rf "${data_dir}" "${db_dir}"; docker-compose down; }
 trap cleanup EXIT
 
-docker-compose down
-
 DEVTOOLS="true" \
 docker-compose build
 

From 6108ab95c1c3213645232ed0b296192e31210349 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Wed, 8 Aug 2018 08:49:26 -0400
Subject: [PATCH 10/15] Fix cleanup crash on Travis

---
 run-test.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/run-test.sh b/run-test.sh
index d109ff6..6f3549b 100755
--- a/run-test.sh
+++ b/run-test.sh
@@ -5,7 +5,11 @@ set -e
 data_dir="$(mktemp -d)"
 db_dir="$(mktemp -d)"
 
-cleanup() { rm -rf "${data_dir}" "${db_dir}"; docker-compose down; }
+cleanup() {
+  set +e
+  rm -rf "${data_dir}" "${db_dir}"
+  docker-compose down
+}
 trap cleanup EXIT
 
 DEVTOOLS="true" \

From fc380a6a5eb00d5c7cc1d3176a1610c8941447b4 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Wed, 8 Aug 2018 09:03:00 -0400
Subject: [PATCH 11/15] Fail test if processing takes too long

---
 app/faceanalysis/tests/test_api.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/app/faceanalysis/tests/test_api.py b/app/faceanalysis/tests/test_api.py
index 0814574..22f4e4b 100644
--- a/app/faceanalysis/tests/test_api.py
+++ b/app/faceanalysis/tests/test_api.py
@@ -80,8 +80,13 @@ def _get_matches(self, img_id, expected_status_code=HTTPStatus.OK.value):
         return response
 
     def _wait_for_img_to_finish_processing(
-            self, img_id, expected_status_code=HTTPStatus.OK.value):
-        while True:
+            self, img_id, expected_status_code=HTTPStatus.OK.value,
+            max_wait_time_seconds=300):
+
+        total_wait_time_seconds = 0
+        polling_interval_seconds = 5
+
+        while total_wait_time_seconds < max_wait_time_seconds:
             rel_path = '/process_image/'
             response = self.app.get(self.BASE_PATH + rel_path + img_id,
                                     headers=self.headers)
@@ -91,7 +96,12 @@ def _wait_for_img_to_finish_processing(
             data = json.loads(response.get_data(as_text=True))
             if data['status'] == ImageStatusEnum.finished_processing.name:
                 return response
-            sleep(3)
+
+            sleep(polling_interval_seconds)
+            total_wait_time_seconds += polling_interval_seconds
+
+        self.fail('Waited for more than {} seconds for image {}'
+                  .format(max_wait_time_seconds, img_id))
 
     def _test_end_to_end_with_matching_imgs(self, fnames):
         img_ids = set()

From 532fe1169b91d8a474330e4b1560ab52da02b9e8 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Wed, 8 Aug 2018 09:03:15 -0400
Subject: [PATCH 12/15] Revert back to cleaning up before test run

This prevents potential unexpected behavior due to volumes sticking
around between runs.
---
 run-test.sh | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/run-test.sh b/run-test.sh
index 6f3549b..6e2f5fd 100755
--- a/run-test.sh
+++ b/run-test.sh
@@ -5,16 +5,14 @@ set -e
 data_dir="$(mktemp -d)"
 db_dir="$(mktemp -d)"
 
-cleanup() {
-  set +e
-  rm -rf "${data_dir}" "${db_dir}"
-  docker-compose down
-}
+cleanup() { set +e; rm -rf "${data_dir}" "${db_dir}"; }
 trap cleanup EXIT
 
 DEVTOOLS="true" \
 docker-compose build
 
+docker-compose down
+
 docker-compose run --rm --no-deps --entrypoint=python3 api -m pylint /app/faceanalysis
 docker-compose run --rm --no-deps --entrypoint=python3 api -m flake8 /app/faceanalysis
 

From 6a4258cd88f4818177c6a5bd328959897202ca21 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Wed, 8 Aug 2018 09:38:49 -0400
Subject: [PATCH 13/15] Make use of environment variables explicit

This change also makes it easier to override environment variables, e.g.
to configure a queue per test run.
---
 .env                                  |  26 +++++++++++++++++++++++-
 .travis.yml                           |   2 +-
 .travis/environment_variables.env.enc |   2 --
 .travis/secrets.env.enc               | Bin 0 -> 160 bytes
 README.md                             |   2 +-
 app/faceanalysis/queue_poll.py        |   3 ++-
 app/faceanalysis/settings.py          |   2 +-
 docker-compose.yml                    |  28 ++++++++++++++++++++++----
 environment_variables.env             |  14 -------------
 run-test.sh                           |   1 +
 secrets.env                           |   2 ++
 11 files changed, 57 insertions(+), 25 deletions(-)
 delete mode 100644 .travis/environment_variables.env.enc
 create mode 100644 .travis/secrets.env.enc
 delete mode 100644 environment_variables.env
 create mode 100644 secrets.env

diff --git a/.env b/.env
index 49b13b7..401d85e 100644
--- a/.env
+++ b/.env
@@ -1,5 +1,29 @@
+# these values configure the build of the docker images
 BUILD_TAG=latest
+DEVTOOLS=false
+
+# the port on which the api will be available
 APP_PORT=80
+
+# paths to the directories where data will be persisted on disk
 DATA_DIR=./persisted_data/prod/images
 DB_DIR=./persisted_data/prod/database
-DEVTOOLS=false
+
+# queue configuration
+IMAGE_PROCESSOR_QUEUE=faceanalysis
+
+# configuration values for mysql
+MYSQL_USER=faceanalysisrw
+MYSQL_PASSWORD=some-secure-string
+MYSQL_ROOT_PASSWORD=some-very-secure-string
+MYSQL_DATABASE=faceanalysis
+
+# allowed values are DEBUG, INFO, WARNING, ERROR and CRITICAL
+LOGGING_LEVEL=DEBUG
+
+# separate multiple extensions with underscores
+ALLOWED_IMAGE_FILE_EXTENSIONS=JPG_PNG
+
+# api access token configuration
+TOKEN_SECRET_KEY=some-long-random-string
+DEFAULT_TOKEN_EXPIRATION_SECS=500
diff --git a/.travis.yml b/.travis.yml
index 9cc46d3..9ce9663 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,7 +5,7 @@ services:
 
 before_install:
 - openssl aes-256-cbc -K $encrypted_e08e3836e1cc_key -iv $encrypted_e08e3836e1cc_iv
-  -in ./.travis/environment_variables.env.enc -out ./environment_variables.env -d
+  -in ./.travis/secrets.env.enc -out ./secrets.env -d
 
 script:
 - ./run-test.sh
diff --git a/.travis/environment_variables.env.enc b/.travis/environment_variables.env.enc
deleted file mode 100644
index 36acd87..0000000
--- a/.travis/environment_variables.env.enc
+++ /dev/null
@@ -1,2 +0,0 @@
-�j�Y��6��KHo�8U��l\%�����'�W�?���ʁlEǜ�.�Y�q5�#iyg�7�k©s�~��O.��?��kA~{Q��R��� ��)�˄陧l��l����E��/��k���o��η���5�z#��l������L m��G-����`�GN�lm����
-T���y�q-Xp�<i١�F�9��_΀�L�aPi�l°;=�g�@��}$>�L{���	H��lSJ�=g5�^����%���T.�%N��T\���2�����k�yX�d�8�WQ���w5,���<��f��8��1 d�1UCۋ���^�4Q����@�p�s1SV�G�vR~�8FS�+Z�r�;�1�N�3qc%τ��jE��1��ev��_�L���X�d�\������?U�	�}�y�d����n�	��,7k�I
\ No newline at end of file
diff --git a/.travis/secrets.env.enc b/.travis/secrets.env.enc
new file mode 100644
index 0000000000000000000000000000000000000000..0825ee7ec3d260adafbbc35ef7dc2b777f3c8399
GIT binary patch
literal 160
zcmV;R0AK&t5&omcUBZ~~-gB4$lDKwBsg4=@ZW!9y1q;YpM`PX?`G8)PJ4=d9hOr>6
zKg@vYqvWg?<h~Y)^PXvkF|amvTvkJ2`M`xzn`|amWI}rvS(yt(gcA6Om0A7CPTY#w
zXvFV)AlSTW=p^E50{5o)pdrIbqAw9BfoD}R#+&_zdoK6sciQ@1nAH6}eABK*`BkMX
Ogiz$H_@*H%O^*O92~&&!

literal 0
HcmV?d00001

diff --git a/README.md b/README.md
index 9c1a82c..cadb5cb 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 1. Create an Azure VM (preferably Ubuntu 16.04)
 2. Install Docker and Docker Compose
 3. Clone this repo
-4. Replace default environment variables in environment_variables.yml
+4. Replace default configuration values in `.env` and `secrets.env`
 5. To run tests type './run-test.sh' from within the top level directory
 6. To run in production type './run-prod.sh' from within the top level directory
 7. If you would like to clear the production database, run './delete-prod-data.sh'
diff --git a/app/faceanalysis/queue_poll.py b/app/faceanalysis/queue_poll.py
index e0ee9e2..346333f 100644
--- a/app/faceanalysis/queue_poll.py
+++ b/app/faceanalysis/queue_poll.py
@@ -8,12 +8,13 @@
 
 class QueuePoll:
     def __init__(self, queue_name):
+        self.logger = get_logger(__name__)
         self.queue_service = QueueService(
             account_name=STORAGE_ACCOUNT_NAME,
             account_key=STORAGE_ACCOUNT_KEY)
         self.queue_name = queue_name
+        self.logger.debug('Creating queue %s', queue_name)
         self.queue_service.create_queue(self.queue_name)
-        self.logger = get_logger(__name__)
 
     # pylint: disable=broad-except
     def _get_messages_from_queue(self):
diff --git a/app/faceanalysis/settings.py b/app/faceanalysis/settings.py
index 398eb7c..25fb372 100644
--- a/app/faceanalysis/settings.py
+++ b/app/faceanalysis/settings.py
@@ -19,7 +19,7 @@
     'FACE_VECTORIZE_ALGORITHM',
     'cwolff/face_recognition')
 
-TOKEN_SECRET_KEY = environ['TOKEN_SECRET_KEY']
+TOKEN_SECRET_KEY = environ.get('TOKEN_SECRET_KEY')
 TOKEN_EXPIRATION = int(environ.get(
     'DEFAULT_TOKEN_EXPIRATION_SECS',
     '500'))
diff --git a/docker-compose.yml b/docker-compose.yml
index c299656..0cf203d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -35,8 +35,15 @@ services:
     environment:
       HOST_DATA_DIR: "${DATA_DIR}"
       MOUNTED_DATA_DIR: /app/faceanalysis/images
+      LOGGING_LEVEL: ${LOGGING_LEVEL}
+      ALLOWED_IMAGE_FILE_EXTENSIONS: ${ALLOWED_IMAGE_FILE_EXTENSIONS}
+      IMAGE_PROCESSOR_QUEUE: ${IMAGE_PROCESSOR_QUEUE}
+      MYSQL_CONTAINER_NAME: mysql
+      MYSQL_USER: ${MYSQL_USER}
+      MYSQL_PASSWORD: ${MYSQL_PASSWORD}
+      MYSQL_DATABASE: ${MYSQL_DATABASE}
     env_file:
-      - ./environment_variables.env
+      - ./secrets.env
 
   api:
     restart: always
@@ -55,8 +62,18 @@ services:
         max-file: "5"
     volumes:
       - "${DATA_DIR}:/app/faceanalysis/images"
+    environment:
+      LOGGING_LEVEL: ${LOGGING_LEVEL}
+      ALLOWED_IMAGE_FILE_EXTENSIONS: ${ALLOWED_IMAGE_FILE_EXTENSIONS}
+      IMAGE_PROCESSOR_QUEUE: ${IMAGE_PROCESSOR_QUEUE}
+      MYSQL_CONTAINER_NAME: mysql
+      MYSQL_USER: ${MYSQL_USER}
+      MYSQL_PASSWORD: ${MYSQL_PASSWORD}
+      MYSQL_DATABASE: ${MYSQL_DATABASE}
+      TOKEN_SECRET_KEY: ${TOKEN_SECRET_KEY}
+      DEFAULT_TOKEN_EXPIRATION_SECS: ${DEFAULT_TOKEN_EXPIRATION_SECS}
     env_file:
-      - ./environment_variables.env
+      - ./secrets.env
 
   mysql:
     restart: always
@@ -69,5 +86,8 @@ services:
         max-file: "5"
     volumes:
       - "${DB_DIR}:/var/lib/mysql"
-    env_file:
-      - ./environment_variables.env
+    environment:
+      MYSQL_USER: ${MYSQL_USER}
+      MYSQL_PASSWORD: ${MYSQL_PASSWORD}
+      MYSQL_ROOT_PASSWORD: ${MYSQL_ROOT_PASSWORD}
+      MYSQL_DATABASE: ${MYSQL_DATABASE}
diff --git a/environment_variables.env b/environment_variables.env
deleted file mode 100644
index 6830d2e..0000000
--- a/environment_variables.env
+++ /dev/null
@@ -1,14 +0,0 @@
-STORAGE_ACCOUNT_NAME=YOUR_AZURE_QUEUE_STORAGE_ACCOUNT_NAME
-STORAGE_ACCOUNT_KEY=YOUR_AZURE_QUEUE_STORAGE_ACCOUNT_QUEUE
-IMAGE_PROCESSOR_QUEUE=YOUR_IMAGE_PROCESSOR_QUEUE
-
-MYSQL_USER=YOUR_MYSQL_USER
-MYSQL_PASSWORD=YOUR_MYSQL_PASSWORD
-MYSQL_ROOT_PASSWORD=YOUR_MYSQL_ROOT_PASSWORD
-MYSQL_DATABASE=YOUR_MYSQL_DATABASE
-MYSQL_CONTAINER_NAME=mysql
-
-LOGGING_LEVEL=DEBUG_OR_INFO_OR_WARNING_OR_ERROR_OR_CRITICAL
-TOKEN_SECRET_KEY=LONG_RANDOM_STRING
-ALLOWED_IMAGE_FILE_EXTENSIONS=JPG_PNG #extensions separated by _
-DEFAULT_TOKEN_EXPIRATION_SECS=NUM_SECS
diff --git a/run-test.sh b/run-test.sh
index 6e2f5fd..ab264ca 100755
--- a/run-test.sh
+++ b/run-test.sh
@@ -19,4 +19,5 @@ docker-compose run --rm --no-deps --entrypoint=python3 api -m flake8 /app/facean
 APP_PORT="8080" \
 DATA_DIR="${data_dir}" \
 DB_DIR="${db_dir}" \
+IMAGE_PROCESSOR_QUEUE="faceanalysis${RANDOM}" \
 docker-compose run --rm api nose2
diff --git a/secrets.env b/secrets.env
new file mode 100644
index 0000000..2b79226
--- /dev/null
+++ b/secrets.env
@@ -0,0 +1,2 @@
+STORAGE_ACCOUNT_NAME=YOUR_AZURE_QUEUE_STORAGE_ACCOUNT_NAME
+STORAGE_ACCOUNT_KEY=YOUR_AZURE_QUEUE_STORAGE_ACCOUNT_QUEUE

From b1532c7585184b0e333cbaef4eae443f541c2120 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Wed, 8 Aug 2018 09:47:06 -0400
Subject: [PATCH 14/15] Clean up test queue after test run

---
 app/faceanalysis/api.py            |  9 +++------
 app/faceanalysis/queue_poll.py     | 26 ++++++++++++++++----------
 app/faceanalysis/tests/test_api.py |  8 +++++++-
 3 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/app/faceanalysis/api.py b/app/faceanalysis/api.py
index 4a0c3e5..48a882c 100644
--- a/app/faceanalysis/api.py
+++ b/app/faceanalysis/api.py
@@ -4,16 +4,15 @@
 from http import HTTPStatus
 import werkzeug
 from werkzeug.utils import secure_filename
-from azure.storage.queue import QueueService
 from flask_restful import Resource, Api, reqparse
 from flask import Flask, g
 from .models.models import Match, Image, User, ImageStatus
 from .models.database_manager import get_database_manager
 from .models.image_status_enum import ImageStatusEnum
 from .log import get_logger
+from .queue_poll import create_queue_service
 from .auth import auth
-from .settings import (STORAGE_ACCOUNT_NAME, STORAGE_ACCOUNT_KEY,
-                       IMAGE_PROCESSOR_QUEUE, ALLOWED_EXTENSIONS)
+from .settings import IMAGE_PROCESSOR_QUEUE, ALLOWED_EXTENSIONS
 
 app = Flask(__name__)
 app.config['UPLOAD_FOLDER'] = os.path.join(
@@ -22,9 +21,7 @@
     'images')
 app.url_map.strict_slashes = False
 api = Api(app)
-queue_service = QueueService(account_name=STORAGE_ACCOUNT_NAME,
-                             account_key=STORAGE_ACCOUNT_KEY)
-queue_service.create_queue(IMAGE_PROCESSOR_QUEUE)
+queue_service = create_queue_service(IMAGE_PROCESSOR_QUEUE)
 logger = get_logger(__name__)
 
 
diff --git a/app/faceanalysis/queue_poll.py b/app/faceanalysis/queue_poll.py
index 346333f..7bdc53f 100644
--- a/app/faceanalysis/queue_poll.py
+++ b/app/faceanalysis/queue_poll.py
@@ -5,16 +5,22 @@
 from .log import get_logger
 from .settings import STORAGE_ACCOUNT_KEY, STORAGE_ACCOUNT_NAME
 
+logger = get_logger(__name__)
+
+
+def create_queue_service(queue_name):
+    queue_service = QueueService(
+        account_name=STORAGE_ACCOUNT_NAME,
+        account_key=STORAGE_ACCOUNT_KEY)
+    logger.debug('Creating queue %s', queue_name)
+    queue_service.create_queue(queue_name)
+    return queue_service
+
 
 class QueuePoll:
     def __init__(self, queue_name):
-        self.logger = get_logger(__name__)
-        self.queue_service = QueueService(
-            account_name=STORAGE_ACCOUNT_NAME,
-            account_key=STORAGE_ACCOUNT_KEY)
+        self.queue_service = create_queue_service(queue_name)
         self.queue_name = queue_name
-        self.logger.debug('Creating queue %s', queue_name)
-        self.queue_service.create_queue(self.queue_name)
 
     # pylint: disable=broad-except
     def _get_messages_from_queue(self):
@@ -22,13 +28,13 @@ def _get_messages_from_queue(self):
         try:
             messages = self.queue_service.get_messages(self.queue_name)
             if messages:
-                self.logger.debug("Successfully received messages from queue")
-        except Exception as e:
-            self.logger.error(e, exc_info=True)
+                logger.debug('Got %d messages from queue', len(messages))
+        except Exception:
+            logger.exception('Unable to fetch messages from queue')
         return messages
 
     def poll(self):
-        self.logger.debug("Polling...")
+        logger.debug('Starting polling')
         while True:
             for message in self._get_messages_from_queue():
                 self.queue_service.delete_message(
diff --git a/app/faceanalysis/tests/test_api.py b/app/faceanalysis/tests/test_api.py
index 22f4e4b..7ec950a 100644
--- a/app/faceanalysis/tests/test_api.py
+++ b/app/faceanalysis/tests/test_api.py
@@ -9,7 +9,8 @@
 from faceanalysis.models.database_manager import get_database_manager
 from faceanalysis.models.image_status_enum import ImageStatusEnum
 from faceanalysis.models.models import init_models, delete_models
-from faceanalysis.settings import ALLOWED_EXTENSIONS
+from faceanalysis.queue_poll import create_queue_service
+from faceanalysis.settings import ALLOWED_EXTENSIONS, IMAGE_PROCESSOR_QUEUE
 
 
 class ApiTestCase(unittest.TestCase):
@@ -30,6 +31,11 @@ def setUp(self):
     def tearDown(self):
         delete_models(self.db.engine)
 
+    @classmethod
+    def tearDownClass(cls):
+        queue_service = create_queue_service(IMAGE_PROCESSOR_QUEUE)
+        queue_service.delete_queue(IMAGE_PROCESSOR_QUEUE)
+
     def _register_default_user(self,
                                username,
                                password,

From ad6dec879ead26692a68389169c3ec5436526e26 Mon Sep 17 00:00:00 2001
From: Clemens Wolff <clewolff@microsoft.com>
Date: Wed, 8 Aug 2018 09:54:30 -0400
Subject: [PATCH 15/15] Add face settings to dotenv

---
 .env               | 6 ++++++
 docker-compose.yml | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/.env b/.env
index 401d85e..3f7fc05 100644
--- a/.env
+++ b/.env
@@ -27,3 +27,9 @@ ALLOWED_IMAGE_FILE_EXTENSIONS=JPG_PNG
 # api access token configuration
 TOKEN_SECRET_KEY=some-long-random-string
 DEFAULT_TOKEN_EXPIRATION_SECS=500
+
+# maximum distance between two face vectors for them to be considered the same person
+DISTANCE_SCORE_THRESHOLD=0.6
+
+# docker image name of the algorithm to use for face vectorization
+FACE_VECTORIZE_ALGORITHM=cwolff/face_recognition
diff --git a/docker-compose.yml b/docker-compose.yml
index 0cf203d..d51d91f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -42,6 +42,8 @@ services:
       MYSQL_USER: ${MYSQL_USER}
       MYSQL_PASSWORD: ${MYSQL_PASSWORD}
       MYSQL_DATABASE: ${MYSQL_DATABASE}
+      DISTANCE_SCORE_THRESHOLD: ${DISTANCE_SCORE_THRESHOLD}
+      FACE_VECTORIZE_ALGORITHM: ${FACE_VECTORIZE_ALGORITHM}
     env_file:
       - ./secrets.env