diff --git a/.env b/.env index 49b13b7..3f7fc05 100644 --- a/.env +++ b/.env @@ -1,5 +1,35 @@ +# these values configure the build of the docker images BUILD_TAG=latest +DEVTOOLS=false + +# the port on which the api will be available APP_PORT=80 + +# paths to the directories where data will be persisted on disk DATA_DIR=./persisted_data/prod/images DB_DIR=./persisted_data/prod/database -DEVTOOLS=false + +# queue configuration +IMAGE_PROCESSOR_QUEUE=faceanalysis + +# configuration values for mysql +MYSQL_USER=faceanalysisrw +MYSQL_PASSWORD=some-secure-string +MYSQL_ROOT_PASSWORD=some-very-secure-string +MYSQL_DATABASE=faceanalysis + +# allowed values are DEBUG, INFO, WARNING, ERROR and CRITICAL +LOGGING_LEVEL=DEBUG + +# separate multiple extensions with underscores +ALLOWED_IMAGE_FILE_EXTENSIONS=JPG_PNG + +# api access token configuration +TOKEN_SECRET_KEY=some-long-random-string +DEFAULT_TOKEN_EXPIRATION_SECS=500 + +# maximum distance between two face vectors for them to be considered the same person +DISTANCE_SCORE_THRESHOLD=0.6 + +# docker image name of the algorithm to use for face vectorization +FACE_VECTORIZE_ALGORITHM=cwolff/face_recognition diff --git a/.travis.yml b/.travis.yml index 50aef48..9ce9663 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,11 @@ language: generic services: - - docker +- docker + +before_install: +- openssl aes-256-cbc -K $encrypted_e08e3836e1cc_key -iv $encrypted_e08e3836e1cc_iv + -in ./.travis/secrets.env.enc -out ./secrets.env -d script: - - ./run-test.sh +- ./run-test.sh diff --git a/.travis/secrets.env.enc b/.travis/secrets.env.enc new file mode 100644 index 0000000..0825ee7 Binary files /dev/null and b/.travis/secrets.env.enc differ diff --git a/README.md b/README.md index 9c1a82c..cadb5cb 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ 1. Create an Azure VM (preferably Ubuntu 16.04) 2. Install Docker and Docker Compose 3. Clone this repo -4. Replace default environment variables in environment_variables.yml +4. Replace default configuration values in `.env` and `secrets.env` 5. To run tests type './run-test.sh' from within the top level directory 6. To run in production type './run-prod.sh' from within the top level directory 7. If you would like to clear the production database, run './delete-prod-data.sh' diff --git a/algorithms/face_recognition/Dockerfile b/algorithms/face_recognition/Dockerfile new file mode 100644 index 0000000..db685b0 --- /dev/null +++ b/algorithms/face_recognition/Dockerfile @@ -0,0 +1,41 @@ +FROM python:3.6-slim-stretch + +RUN apt-get -y update && \ + apt-get install -y --fix-missing \ + build-essential \ + cmake \ + gfortran \ + git \ + wget \ + curl \ + graphicsmagick \ + libgraphicsmagick1-dev \ + libatlas-dev \ + libavcodec-dev \ + libavformat-dev \ + libgtk2.0-dev \ + libjpeg-dev \ + liblapack-dev \ + libswscale-dev \ + pkg-config \ + python3-dev \ + python3-numpy \ + software-properties-common \ + zip && \ + apt-get clean && \ + rm -rf /tmp/* /var/tmp/* + +RUN cd ~ && \ + mkdir -p dlib && \ + git clone -b 'v19.9' --single-branch https://github.com/davisking/dlib.git dlib/ && \ + cd dlib/ && \ + python3 setup.py install --yes USE_AVX_INSTRUCTIONS + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY vectorize.py . + +ENTRYPOINT ["python3", "vectorize.py"] diff --git a/algorithms/face_recognition/requirements.txt b/algorithms/face_recognition/requirements.txt new file mode 100644 index 0000000..6a0dfc0 --- /dev/null +++ b/algorithms/face_recognition/requirements.txt @@ -0,0 +1 @@ +face-recognition==1.0.0 diff --git a/algorithms/face_recognition/vectorize.py b/algorithms/face_recognition/vectorize.py new file mode 100644 index 0000000..0415ccf --- /dev/null +++ b/algorithms/face_recognition/vectorize.py @@ -0,0 +1,35 @@ +import json +import face_recognition as fr + + +def get_face_vectors(img_path): + img = fr.load_image_file(img_path) + face_locations = fr.face_locations(img) + face_vectors = [] + for top, right, bottom, left in face_locations: + cropped_img = img[top:bottom, left:right] + cropped_features = fr.face_encodings(cropped_img) + if cropped_features: + face_vector = cropped_features[0] + face_vectors.append(face_vector.tolist()) + return face_vectors + + +def _cli(): + from argparse import ArgumentParser + from argparse import FileType + + parser = ArgumentParser(description=__doc__) + parser.add_argument('image', type=FileType('r')) + + args = parser.parse_args() + image = args.image + image.close() + img_path = image.name + + vectors = get_face_vectors(img_path) + print(json.dumps({'faceVectors': vectors})) + + +if __name__ == '__main__': + _cli() diff --git a/app/Dockerfile b/app/Dockerfile index 3b3846c..a6da92d 100644 --- a/app/Dockerfile +++ b/app/Dockerfile @@ -3,40 +3,9 @@ FROM python:3.5-slim RUN apt-get -y update \ && apt-get install -y \ mysql-client \ - && apt-get clean \ - && rm -rf /tmp/* /var/tmp/* - -RUN apt-get -y update \ - && apt-get install -y --fix-missing \ - build-essential \ - cmake \ - gfortran \ - git \ - wget \ - curl \ - graphicsmagick \ - libgraphicsmagick1-dev \ - libatlas-dev \ - libavcodec-dev \ - libavformat-dev \ - libboost-all-dev \ - libgtk2.0-dev \ - libjpeg-dev \ - liblapack-dev \ - libswscale-dev \ - pkg-config \ - python3-dev \ python3-numpy \ - software-properties-common \ - zip \ - mysql-client \ && apt-get clean \ - && rm -rf /tmp/* /var/tmp/* \ - && cd ~ \ - && mkdir -p dlib \ - && git clone -b 'v19.7' --single-branch https://github.com/davisking/dlib.git dlib/ \ - && cd dlib/ \ - && python3 setup.py install --yes USE_AVX_INSTRUCTIONS + && rm -rf /tmp/* /var/tmp/* WORKDIR /app diff --git a/app/faceanalysis/api.py b/app/faceanalysis/api.py index 7abfa34..48a882c 100644 --- a/app/faceanalysis/api.py +++ b/app/faceanalysis/api.py @@ -4,14 +4,15 @@ from http import HTTPStatus import werkzeug from werkzeug.utils import secure_filename -from azure.storage.queue import QueueService from flask_restful import Resource, Api, reqparse from flask import Flask, g from .models.models import Match, Image, User, ImageStatus -from .models.database_manager import DatabaseManager +from .models.database_manager import get_database_manager from .models.image_status_enum import ImageStatusEnum from .log import get_logger +from .queue_poll import create_queue_service from .auth import auth +from .settings import IMAGE_PROCESSOR_QUEUE, ALLOWED_EXTENSIONS app = Flask(__name__) app.config['UPLOAD_FOLDER'] = os.path.join( @@ -20,10 +21,8 @@ 'images') app.url_map.strict_slashes = False api = Api(app) -queue_service = QueueService(account_name=os.environ['STORAGE_ACCOUNT_NAME'], - account_key=os.environ['STORAGE_ACCOUNT_KEY']) -queue_service.create_queue(os.environ['IMAGE_PROCESSOR_QUEUE']) -logger = get_logger(__name__, os.environ['LOGGING_LEVEL']) +queue_service = create_queue_service(IMAGE_PROCESSOR_QUEUE) +logger = get_logger(__name__) class AuthenticationToken(Resource): @@ -46,7 +45,7 @@ def post(self): args = parser.parse_args() username = args['username'] password = args['password'] - db = DatabaseManager() + db = get_database_manager() session = db.get_session() query = session.query(User).filter(User.username == username).first() session.close() @@ -71,7 +70,7 @@ def post(self): help="img_id missing in the post body") args = parser.parse_args() img_id = args['img_id'] - db = DatabaseManager() + db = get_database_manager() session = db.get_session() img_status = session.query(ImageStatus).filter( ImageStatus.img_id == img_id).first() @@ -81,8 +80,7 @@ def post(self): return ('Image previously placed on queue', HTTPStatus.BAD_REQUEST.value) try: - queue_service.put_message(os.environ['IMAGE_PROCESSOR_QUEUE'], - img_id) + queue_service.put_message(IMAGE_PROCESSOR_QUEUE, img_id) img_status.status = ImageStatusEnum.on_queue.name db.safe_commit(session) logger.info('img successfully put on queue') @@ -99,7 +97,8 @@ def post(self): def get(self, img_id): logger.debug('checking if img has been processed') - session = DatabaseManager().get_session() + db = get_database_manager() + session = db.get_session() img_status = session.query(ImageStatus).filter( ImageStatus.img_id == img_id).first() session.close() @@ -111,8 +110,6 @@ def get(self, img_id): class ImgUpload(Resource): method_decorators = [auth.login_required] - env_extensions = os.environ['ALLOWED_IMAGE_FILE_EXTENSIONS'] - allowed_extensions = env_extensions.lower().split('_') # pylint: disable=broad-except def post(self): @@ -125,7 +122,7 @@ def post(self): location='files') args = parser.parse_args() img = args['image'] - db = DatabaseManager() + db = get_database_manager() if self._allowed_file(img.filename): filename = secure_filename(img.filename) img_id = filename[:filename.find('.')] @@ -151,12 +148,12 @@ def post(self): else: error_msg = ('Image upload failed: please use one of the ' 'following extensions --> {}' - .format(self.allowed_extensions)) + .format(ALLOWED_EXTENSIONS)) return error_msg, HTTPStatus.BAD_REQUEST.value def _allowed_file(self, filename): return ('.' in filename and - filename.rsplit('.', 1)[1].lower() in self.allowed_extensions) + filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS) class ImgMatchList(Resource): @@ -165,7 +162,8 @@ class ImgMatchList(Resource): # pylint: disable=assignment-from-no-return def get(self, img_id): logger.debug('getting img match list') - session = DatabaseManager().get_session() + db = get_database_manager() + session = db.get_session() query = session.query(Match).filter(Match.this_img_id == img_id) imgs = [] distances = [] @@ -182,7 +180,8 @@ class ImgList(Resource): def get(self): logger.debug('getting img list') - session = DatabaseManager().get_session() + db = get_database_manager() + session = db.get_session() query = session.query(Image).all() imgs = [f.img_id for f in query] session.close() diff --git a/app/faceanalysis/auth.py b/app/faceanalysis/auth.py index ae0e85e..a701848 100644 --- a/app/faceanalysis/auth.py +++ b/app/faceanalysis/auth.py @@ -1,7 +1,7 @@ from flask_httpauth import HTTPBasicAuth from flask import g from .models.models import User -from .models.models import DatabaseManager +from .models.database_manager import get_database_manager auth = HTTPBasicAuth() @@ -9,7 +9,8 @@ @auth.verify_password def verify_password(username_or_token, password): user = User.verify_auth_token(username_or_token) - session = DatabaseManager().get_session() + db = get_database_manager() + session = db.get_session() if not user: user = session.query(User).filter( User.username == username_or_token).first() diff --git a/app/faceanalysis/face_vectorizer.py b/app/faceanalysis/face_vectorizer.py new file mode 100644 index 0000000..2628294 --- /dev/null +++ b/app/faceanalysis/face_vectorizer.py @@ -0,0 +1,39 @@ +import os +import json +import docker +from .log import get_logger +from .settings import MOUNTED_DATA_DIR +from .settings import HOST_DATA_DIR + +logger = get_logger(__name__) + + +def _format_mount_path(img_path): + return '/{}'.format(os.path.basename(img_path)) + + +def _format_host_path(img_path): + # volume mounts must be absolute + if not img_path.startswith('/'): + img_path = os.path.abspath(img_path) + + # adjust the path if it itself is a mount and if we're spawning a + # sibling container + if MOUNTED_DATA_DIR and HOST_DATA_DIR: + img_path = img_path.replace(MOUNTED_DATA_DIR, HOST_DATA_DIR) + + return img_path + + +def get_face_vectors(img_path, algorithm): + img_mount = _format_mount_path(img_path) + img_host = _format_host_path(img_path) + volumes = {img_host: {'bind': img_mount, 'mode': 'ro'}} + + logger.debug('Running container %s with image %s', algorithm, img_host) + client = docker.from_env() + stdout = client.containers.run(algorithm, img_mount, + volumes=volumes, auto_remove=True) + + face_vectors = json.loads(stdout.decode('ascii')).get('faceVectors', []) + return face_vectors diff --git a/app/faceanalysis/log.py b/app/faceanalysis/log.py index 91d0264..4238a12 100644 --- a/app/faceanalysis/log.py +++ b/app/faceanalysis/log.py @@ -1,8 +1,9 @@ import sys import logging +from .settings import LOGGING_LEVEL -def get_logger(module_name, logging_level): +def get_logger(module_name, logging_level=LOGGING_LEVEL): logging_levels = { 'DEBUG': logging.DEBUG, 'INFO': logging.INFO, diff --git a/app/faceanalysis/models/database_manager.py b/app/faceanalysis/models/database_manager.py index c135c4f..deb2ff3 100644 --- a/app/faceanalysis/models/database_manager.py +++ b/app/faceanalysis/models/database_manager.py @@ -1,39 +1,26 @@ -import os +from functools import lru_cache from sqlalchemy.orm import sessionmaker from sqlalchemy import create_engine from ..log import get_logger +from ..settings import (MYSQL_USER, MYSQL_PASSWORD, MYSQL_DATABASE, + MYSQL_CONTAINER_NAME) -def singleton(class_): - instances = {} - - def getinstance(*args, **kwargs): - if class_ not in instances: - instances[class_] = class_(*args, **kwargs) - return instances[class_] - return getinstance - - -@singleton class DatabaseManager: def __init__(self): - mysql_user = os.environ['MYSQL_USER'] - mysql_password = os.environ['MYSQL_PASSWORD'] - mysql_container_name = os.environ['MYSQL_CONTAINER_NAME'] - mysql_database = os.environ['MYSQL_DATABASE'] mysql_connector_str = 'mysql+mysqlconnector' mysql_port = '3306' engine_credential = "{}://{}:{}@{}:{}/{}".format(mysql_connector_str, - mysql_user, - mysql_password, - mysql_container_name, + MYSQL_USER, + MYSQL_PASSWORD, + MYSQL_CONTAINER_NAME, mysql_port, - mysql_database) + MYSQL_DATABASE) self.engine = create_engine(engine_credential, pool_recycle=3600) self.session_factory = sessionmaker(bind=self.engine) - self.logger = get_logger(__name__, os.environ['LOGGING_LEVEL']) + self.logger = get_logger(__name__) def get_session(self): return self.session_factory() @@ -53,3 +40,8 @@ def safe_commit(self, session): def close_engine(self): self.logger.debug("engine closed") self.engine.dispose() + + +@lru_cache(maxsize=1) +def get_database_manager(): + return DatabaseManager() diff --git a/app/faceanalysis/models/models.py b/app/faceanalysis/models/models.py index e776a95..5aef626 100644 --- a/app/faceanalysis/models/models.py +++ b/app/faceanalysis/models/models.py @@ -1,5 +1,4 @@ # pylint: disable=too-few-public-methods -import os from passlib.apps import custom_app_context as pwd_context from sqlalchemy.orm import relationship from sqlalchemy.sql import func @@ -9,7 +8,8 @@ Integer, DateTime, UniqueConstraint) from itsdangerous import (TimedJSONWebSignatureSerializer as Serializer, BadSignature, SignatureExpired) -from .database_manager import DatabaseManager +from .database_manager import get_database_manager +from ..settings import TOKEN_EXPIRATION, TOKEN_SECRET_KEY Base = declarative_base() @@ -28,22 +28,21 @@ def verify_password(self, password): return pwd_context.verify(password, self.password_hash) def generate_auth_token(self, expiration=None): - token = os.environ['TOKEN_SECRET_KEY'] - expiration = expiration or int( - os.environ['DEFAULT_TOKEN_EXPIRATION_SECS']) - serializer = Serializer(token, expires_in=expiration) + expiration = expiration or TOKEN_EXPIRATION + serializer = Serializer(TOKEN_SECRET_KEY, expires_in=expiration) return serializer.dumps({'id': self.id}) @staticmethod def verify_auth_token(token): - serializer = Serializer(os.environ['TOKEN_SECRET_KEY']) + serializer = Serializer(TOKEN_SECRET_KEY) try: data = serializer.loads(token) except SignatureExpired: return None except BadSignature: return None - session = DatabaseManager().get_session() + db = get_database_manager() + session = db.get_session() user = session.query(User).filter(User.id == data['id']).first() session.close() return user diff --git a/app/faceanalysis/pipeline.py b/app/faceanalysis/pipeline.py index b79d432..4e75cdc 100644 --- a/app/faceanalysis/pipeline.py +++ b/app/faceanalysis/pipeline.py @@ -1,24 +1,24 @@ # pylint: disable=too-few-public-methods import os -import base64 +import json import numpy as np -import face_recognition as fr +from .face_vectorizer import get_face_vectors from .queue_poll import QueuePoll -from .models.database_manager import DatabaseManager +from .models.database_manager import get_database_manager from .models.models import Image, FeatureMapping, Match, ImageStatus from .models.image_status_enum import ImageStatusEnum from .log import get_logger +from .settings import (IMAGE_PROCESSOR_QUEUE, ALLOWED_EXTENSIONS, + DISTANCE_SCORE_THRESHOLD, FACE_VECTORIZE_ALGORITHM) class Pipeline: def __init__(self): - self.db = DatabaseManager() - self.logger = get_logger(__name__, os.environ['LOGGING_LEVEL']) + self.db = get_database_manager() + self.logger = get_logger(__name__) dirname = os.path.dirname(os.path.abspath(__file__)) self.img_dir = os.path.join(dirname, 'images') - file_extensions = os.environ['ALLOWED_IMAGE_FILE_EXTENSIONS'].lower() - self.allowed_file_extensions = file_extensions.split('_') self.logger.debug('pipeline initialized') def _add_entry_to_session(self, cls, session, **kwargs): @@ -27,30 +27,28 @@ def _add_entry_to_session(self, cls, session, **kwargs): session.add(row) return row - # pylint: disable=broad-except - def _process_img(self, img_id, session): - self.logger.debug('processing img') - img_has_been_uploaded = False - img = None - for extension in self.allowed_file_extensions: + def _find_image(self, img_id, session): + self.logger.debug('finding image %s', img_id) + + img_path = None + for extension in ALLOWED_EXTENSIONS: img_name = "{}.{}".format(img_id, extension) fpath = os.path.join(self.img_dir, img_name) - try: - img = fr.load_image_file(fpath) - img_has_been_uploaded = True + if os.path.isfile(fpath): + img_path = fpath break - except Exception: - continue - if img_has_been_uploaded: + + if img_path: self._add_entry_to_session(Image, session, img_id=img_id) - return img + + return img_path # pylint: disable=broad-except def _delete_img(self, img_id): self.logger.debug('deleting img') - for extension in self.allowed_file_extensions: + for extension in ALLOWED_EXTENSIONS: img_name = "{}.{}".format(img_id, extension) fpath = os.path.join(self.img_dir, img_name) try: @@ -61,7 +59,7 @@ def _delete_img(self, img_id): def _process_feature_mapping(self, features, img_id, session): self.logger.debug('processing feature mapping') - feature_str = base64.b64encode(features.dumps()) + feature_str = json.dumps(features) self._add_entry_to_session(FeatureMapping, session, img_id=img_id, @@ -91,7 +89,7 @@ def _get_img_ids_and_features(self): img_ids = [] for row in rows: img_ids.append(row.img_id) - current_features = np.loads(base64.b64decode(row.features)) + current_features = np.array(json.loads(row.features)) known_features.append(current_features) return img_ids, np.array(known_features) @@ -129,6 +127,15 @@ def _img_should_be_processed(self, img_id): return img_status.status == ImageStatusEnum.on_queue.name return False + # pylint: disable=len-as-condition + @classmethod + def _compute_distances(cls, face_encodings, face_to_compare): + if len(face_encodings) == 0: + return np.empty((0)) + + face_to_compare = np.array(face_to_compare) + return np.linalg.norm(face_encodings - face_to_compare, axis=1) + # pylint: disable=too-many-locals def _handle_message_from_queue(self, message): self.logger.debug("handling message from queue") @@ -139,37 +146,33 @@ def _handle_message_from_queue(self, message): return self._update_img_status( curr_img_id, status=ImageStatusEnum.processing.name) - curr_img = self._process_img(curr_img_id, session) - if curr_img is not None: + curr_img_path = self._find_image(curr_img_id, session) + if curr_img_path is not None: prev_img_ids, prev_features = self._get_img_ids_and_features() curr_matches = [] - face_locations = fr.face_locations(curr_img) - if not face_locations: + face_vectors = get_face_vectors( + curr_img_path, FACE_VECTORIZE_ALGORITHM) + if not face_vectors: error_msg = "No faces found in image" self._update_img_status(curr_img_id, error_msg=error_msg) - for face_location in face_locations: - top, right, bottom, left = face_location - curr_cropped_img = curr_img[top:bottom, left:right] - curr_cropped_features = fr.face_encodings( - curr_cropped_img) - if curr_cropped_features: - self._process_feature_mapping(curr_cropped_features[0], - curr_img_id, - session) - face_distances = fr.face_distance(prev_features, - curr_cropped_features) - for count, distance_score in enumerate(face_distances): - distance_score = float(distance_score) - that_img_id = prev_img_ids[count] - if distance_score < 0.6 and curr_img_id != that_img_id: - self._prepare_matches(curr_matches, - that_img_id, - distance_score) + for face_vector in face_vectors: + self._process_feature_mapping( + face_vector, curr_img_id, session) + face_distances = self._compute_distances( + prev_features, face_vector) + for i, distance_score in enumerate(face_distances): + that_img_id = prev_img_ids[i] + if curr_img_id == that_img_id: + continue + distance_score = float(distance_score) + if distance_score >= DISTANCE_SCORE_THRESHOLD: + continue + self._prepare_matches( + curr_matches, that_img_id, distance_score) for curr_match in curr_matches: - self._process_matches(curr_img_id, - curr_match["that_img_id"], - curr_match["distance_score"], - session) + self._process_matches( + curr_img_id, curr_match["that_img_id"], + curr_match["distance_score"], session) else: error_msg = "Image processed before uploaded" self._update_img_status(curr_img_id, error_msg=error_msg) @@ -180,7 +183,7 @@ def _handle_message_from_queue(self, message): def begin_pipeline(self): self.logger.debug('pipeline began') - qp = QueuePoll(os.environ['IMAGE_PROCESSOR_QUEUE']) + qp = QueuePoll(IMAGE_PROCESSOR_QUEUE) for message in qp.poll(): self._handle_message_from_queue(message) self.logger.debug("polling next iteration") diff --git a/app/faceanalysis/queue_poll.py b/app/faceanalysis/queue_poll.py index 33988f1..7bdc53f 100644 --- a/app/faceanalysis/queue_poll.py +++ b/app/faceanalysis/queue_poll.py @@ -1,20 +1,26 @@ # pylint: disable=too-few-public-methods -import os from time import sleep from azure.storage.queue import QueueService from .log import get_logger +from .settings import STORAGE_ACCOUNT_KEY, STORAGE_ACCOUNT_NAME + +logger = get_logger(__name__) + + +def create_queue_service(queue_name): + queue_service = QueueService( + account_name=STORAGE_ACCOUNT_NAME, + account_key=STORAGE_ACCOUNT_KEY) + logger.debug('Creating queue %s', queue_name) + queue_service.create_queue(queue_name) + return queue_service class QueuePoll: def __init__(self, queue_name): - env_acc_name = os.environ['STORAGE_ACCOUNT_NAME'] - env_acc_key = os.environ['STORAGE_ACCOUNT_KEY'] - self.queue_service = QueueService(account_name=env_acc_name, - account_key=env_acc_key) + self.queue_service = create_queue_service(queue_name) self.queue_name = queue_name - self.queue_service.create_queue(self.queue_name) - self.logger = get_logger(__name__, os.environ['LOGGING_LEVEL']) # pylint: disable=broad-except def _get_messages_from_queue(self): @@ -22,13 +28,13 @@ def _get_messages_from_queue(self): try: messages = self.queue_service.get_messages(self.queue_name) if messages: - self.logger.debug("Successfully received messages from queue") - except Exception as e: - self.logger.error(e, exc_info=True) + logger.debug('Got %d messages from queue', len(messages)) + except Exception: + logger.exception('Unable to fetch messages from queue') return messages def poll(self): - self.logger.debug("Polling...") + logger.debug('Starting polling') while True: for message in self._get_messages_from_queue(): self.queue_service.delete_message( diff --git a/app/faceanalysis/settings.py b/app/faceanalysis/settings.py new file mode 100644 index 0000000..25fb372 --- /dev/null +++ b/app/faceanalysis/settings.py @@ -0,0 +1,30 @@ +from os import environ + +LOGGING_LEVEL = environ.get('LOGGING_LEVEL', 'WARNING') + +MOUNTED_DATA_DIR = environ.get('MOUNTED_DATA_DIR') +HOST_DATA_DIR = environ.get('HOST_DATA_DIR') + +STORAGE_ACCOUNT_NAME = environ['STORAGE_ACCOUNT_NAME'] +STORAGE_ACCOUNT_KEY = environ['STORAGE_ACCOUNT_KEY'] +IMAGE_PROCESSOR_QUEUE = environ.get('IMAGE_PROCESSOR_QUEUE', 'faceanalysis') + +ALLOWED_EXTENSIONS = environ.get('ALLOWED_IMAGE_FILE_EXTENSIONS', '')\ + .lower().split('_') + +DISTANCE_SCORE_THRESHOLD = float(environ.get( + 'DISTANCE_SCORE_THRESHOLD', + '0.6')) +FACE_VECTORIZE_ALGORITHM = environ.get( + 'FACE_VECTORIZE_ALGORITHM', + 'cwolff/face_recognition') + +TOKEN_SECRET_KEY = environ.get('TOKEN_SECRET_KEY') +TOKEN_EXPIRATION = int(environ.get( + 'DEFAULT_TOKEN_EXPIRATION_SECS', + '500')) + +MYSQL_USER = environ['MYSQL_USER'] +MYSQL_PASSWORD = environ['MYSQL_PASSWORD'] +MYSQL_CONTAINER_NAME = environ['MYSQL_CONTAINER_NAME'] +MYSQL_DATABASE = environ['MYSQL_DATABASE'] diff --git a/app/faceanalysis/tests/test_api.py b/app/faceanalysis/tests/test_api.py index 7bf3fab..7ec950a 100644 --- a/app/faceanalysis/tests/test_api.py +++ b/app/faceanalysis/tests/test_api.py @@ -6,9 +6,11 @@ from time import sleep from http import HTTPStatus from faceanalysis.api import app -from faceanalysis.models.database_manager import DatabaseManager +from faceanalysis.models.database_manager import get_database_manager from faceanalysis.models.image_status_enum import ImageStatusEnum from faceanalysis.models.models import init_models, delete_models +from faceanalysis.queue_poll import create_queue_service +from faceanalysis.settings import ALLOWED_EXTENSIONS, IMAGE_PROCESSOR_QUEUE class ApiTestCase(unittest.TestCase): @@ -17,7 +19,7 @@ class ApiTestCase(unittest.TestCase): def setUp(self): app.testing = True self.app = app.test_client() - self.db = DatabaseManager() + self.db = get_database_manager() init_models(self.db.engine) username = 'username' password = 'password' @@ -29,6 +31,11 @@ def setUp(self): def tearDown(self): delete_models(self.db.engine) + @classmethod + def tearDownClass(cls): + queue_service = create_queue_service(IMAGE_PROCESSOR_QUEUE) + queue_service.delete_queue(IMAGE_PROCESSOR_QUEUE) + def _register_default_user(self, username, password, @@ -79,8 +86,13 @@ def _get_matches(self, img_id, expected_status_code=HTTPStatus.OK.value): return response def _wait_for_img_to_finish_processing( - self, img_id, expected_status_code=HTTPStatus.OK.value): - while True: + self, img_id, expected_status_code=HTTPStatus.OK.value, + max_wait_time_seconds=300): + + total_wait_time_seconds = 0 + polling_interval_seconds = 5 + + while total_wait_time_seconds < max_wait_time_seconds: rel_path = '/process_image/' response = self.app.get(self.BASE_PATH + rel_path + img_id, headers=self.headers) @@ -90,7 +102,12 @@ def _wait_for_img_to_finish_processing( data = json.loads(response.get_data(as_text=True)) if data['status'] == ImageStatusEnum.finished_processing.name: return response - sleep(3) + + sleep(polling_interval_seconds) + total_wait_time_seconds += polling_interval_seconds + + self.fail('Waited for more than {} seconds for image {}' + .format(max_wait_time_seconds, img_id)) def _test_end_to_end_with_matching_imgs(self, fnames): img_ids = set() @@ -164,10 +181,8 @@ def test_upload_and_process_twice(self): def test_end_to_end_with_different_file_formats(self): # test jpg && png - file_extensions = os.environ['ALLOWED_IMAGE_FILE_EXTENSIONS'].lower() - allowed_file_extensions = file_extensions.split('_') - self.assertIn('jpg', allowed_file_extensions) - self.assertIn('png', allowed_file_extensions) + self.assertIn('jpg', ALLOWED_EXTENSIONS) + self.assertIn('png', ALLOWED_EXTENSIONS) fnames = {'11.jpg', '12.png'} self._test_end_to_end_with_matching_imgs(fnames) diff --git a/app/requirements.txt b/app/requirements.txt index 2fc734d..b2f5a64 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -1,10 +1,9 @@ Flask-HTTPAuth==3.2.3 Flask-RESTful==0.3.6 Flask==0.12.2 -Pillow==5.0.0 SQLAlchemy==1.2.0 azure-storage-queue==0.37.0 -face-recognition==1.0.0 +docker==3.4.1 gunicorn==19.7.1 mysql-connector-python==8.0.5 numpy==1.14.0 diff --git a/app/worker.py b/app/worker.py index 022fd8c..94b9f32 100644 --- a/app/worker.py +++ b/app/worker.py @@ -1,9 +1,9 @@ -from faceanalysis.models.database_manager import DatabaseManager +from faceanalysis.models.database_manager import get_database_manager from faceanalysis.models.models import init_models from faceanalysis.pipeline import Pipeline if __name__ == "__main__": - db = DatabaseManager() + db = get_database_manager() init_models(db.engine) pipeline = Pipeline() pipeline.begin_pipeline() diff --git a/docker-compose.yml b/docker-compose.yml index de5371f..d51d91f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,9 +30,22 @@ services: max-size: "50m" max-file: "5" volumes: + - /var/run/docker.sock:/var/run/docker.sock - "${DATA_DIR}:/app/faceanalysis/images" + environment: + HOST_DATA_DIR: "${DATA_DIR}" + MOUNTED_DATA_DIR: /app/faceanalysis/images + LOGGING_LEVEL: ${LOGGING_LEVEL} + ALLOWED_IMAGE_FILE_EXTENSIONS: ${ALLOWED_IMAGE_FILE_EXTENSIONS} + IMAGE_PROCESSOR_QUEUE: ${IMAGE_PROCESSOR_QUEUE} + MYSQL_CONTAINER_NAME: mysql + MYSQL_USER: ${MYSQL_USER} + MYSQL_PASSWORD: ${MYSQL_PASSWORD} + MYSQL_DATABASE: ${MYSQL_DATABASE} + DISTANCE_SCORE_THRESHOLD: ${DISTANCE_SCORE_THRESHOLD} + FACE_VECTORIZE_ALGORITHM: ${FACE_VECTORIZE_ALGORITHM} env_file: - - ./environment_variables.env + - ./secrets.env api: restart: always @@ -51,8 +64,18 @@ services: max-file: "5" volumes: - "${DATA_DIR}:/app/faceanalysis/images" + environment: + LOGGING_LEVEL: ${LOGGING_LEVEL} + ALLOWED_IMAGE_FILE_EXTENSIONS: ${ALLOWED_IMAGE_FILE_EXTENSIONS} + IMAGE_PROCESSOR_QUEUE: ${IMAGE_PROCESSOR_QUEUE} + MYSQL_CONTAINER_NAME: mysql + MYSQL_USER: ${MYSQL_USER} + MYSQL_PASSWORD: ${MYSQL_PASSWORD} + MYSQL_DATABASE: ${MYSQL_DATABASE} + TOKEN_SECRET_KEY: ${TOKEN_SECRET_KEY} + DEFAULT_TOKEN_EXPIRATION_SECS: ${DEFAULT_TOKEN_EXPIRATION_SECS} env_file: - - ./environment_variables.env + - ./secrets.env mysql: restart: always @@ -65,5 +88,8 @@ services: max-file: "5" volumes: - "${DB_DIR}:/var/lib/mysql" - env_file: - - ./environment_variables.env + environment: + MYSQL_USER: ${MYSQL_USER} + MYSQL_PASSWORD: ${MYSQL_PASSWORD} + MYSQL_ROOT_PASSWORD: ${MYSQL_ROOT_PASSWORD} + MYSQL_DATABASE: ${MYSQL_DATABASE} diff --git a/environment_variables.env b/environment_variables.env deleted file mode 100644 index 6830d2e..0000000 --- a/environment_variables.env +++ /dev/null @@ -1,14 +0,0 @@ -STORAGE_ACCOUNT_NAME=YOUR_AZURE_QUEUE_STORAGE_ACCOUNT_NAME -STORAGE_ACCOUNT_KEY=YOUR_AZURE_QUEUE_STORAGE_ACCOUNT_QUEUE -IMAGE_PROCESSOR_QUEUE=YOUR_IMAGE_PROCESSOR_QUEUE - -MYSQL_USER=YOUR_MYSQL_USER -MYSQL_PASSWORD=YOUR_MYSQL_PASSWORD -MYSQL_ROOT_PASSWORD=YOUR_MYSQL_ROOT_PASSWORD -MYSQL_DATABASE=YOUR_MYSQL_DATABASE -MYSQL_CONTAINER_NAME=mysql - -LOGGING_LEVEL=DEBUG_OR_INFO_OR_WARNING_OR_ERROR_OR_CRITICAL -TOKEN_SECRET_KEY=LONG_RANDOM_STRING -ALLOWED_IMAGE_FILE_EXTENSIONS=JPG_PNG #extensions separated by _ -DEFAULT_TOKEN_EXPIRATION_SECS=NUM_SECS diff --git a/run-test.sh b/run-test.sh index 267c84d..ab264ca 100755 --- a/run-test.sh +++ b/run-test.sh @@ -5,18 +5,19 @@ set -e data_dir="$(mktemp -d)" db_dir="$(mktemp -d)" -cleanup() { rm -rf "${data_dir}" "${db_dir}"; } +cleanup() { set +e; rm -rf "${data_dir}" "${db_dir}"; } trap cleanup EXIT -docker-compose down - DEVTOOLS="true" \ docker-compose build +docker-compose down + docker-compose run --rm --no-deps --entrypoint=python3 api -m pylint /app/faceanalysis docker-compose run --rm --no-deps --entrypoint=python3 api -m flake8 /app/faceanalysis APP_PORT="8080" \ DATA_DIR="${data_dir}" \ DB_DIR="${db_dir}" \ +IMAGE_PROCESSOR_QUEUE="faceanalysis${RANDOM}" \ docker-compose run --rm api nose2 diff --git a/secrets.env b/secrets.env new file mode 100644 index 0000000..2b79226 --- /dev/null +++ b/secrets.env @@ -0,0 +1,2 @@ +STORAGE_ACCOUNT_NAME=YOUR_AZURE_QUEUE_STORAGE_ACCOUNT_NAME +STORAGE_ACCOUNT_KEY=YOUR_AZURE_QUEUE_STORAGE_ACCOUNT_QUEUE