Skip to content

Commit

Permalink
Merge cb4c353 into 09c1b09
Browse files Browse the repository at this point in the history
  • Loading branch information
rstorey committed Nov 16, 2018
2 parents 09c1b09 + cb4c353 commit 7fea313
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 24 deletions.
31 changes: 31 additions & 0 deletions build_indexer.sh
@@ -0,0 +1,31 @@
#!/bin/bash

set -euox pipefail

# Get an unique venv folder to using *inside* workspace
VENV=".venv-$BUILD_NUMBER"

# Initialize new venv
python3 -m venv "$VENV"

# Update pip
source "$VENV/bin/activate"

pip3 install pipenv
pipenv install --dev --deploy

AWS_ACCOUNT_ID="$(aws sts get-caller-identity --output=text --query "Account")"

FULL_VERSION_NUMBER="$(python3 setup.py --version)"

VERSION_NUMBER=$(echo $FULL_VERSION_NUMBER| cut -d '+' -f 1)

eval "$(aws ecr get-login --no-include-email --region us-east-1)"

python3 setup.py build

docker build -t concordia/indexer --file indexer/Dockerfile .
docker tag concordia/indexer:latest ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/concordia/indexer:${VERSION_NUMBER}
docker tag concordia/indexer:latest ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/concordia/indexer:${TAG}
docker push ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/concordia/indexer:${VERSION_NUMBER}
docker push ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/concordia/indexer:${TAG}
103 changes: 83 additions & 20 deletions concordia/documents.py
@@ -1,29 +1,92 @@
# documents.py

from django.contrib.auth.models import User
from django_elasticsearch_dsl import DocType, Index
from django_elasticsearch_dsl import fields, DocType, Index

from .models import Transcription, UserAssetTagCollection


# Name of the Elasticsearch index
user = Index("users")
# See Elasticsearch Indices API reference for available settings
user.settings(number_of_shards=1, number_of_replicas=0)

tag_collection = Index("tags")
tag_collection.settings(number_of_shards=1, number_of_replicas=0)

transcription = Index("transcriptions")
transcription.settings(number_of_shards=1, number_of_replicas=0)


@user.doc_type
class UserDocument(DocType):
class Meta:
model = User # The model associated with this DocType

# The fields of the model you want to be indexed in Elasticsearch
fields = ["last_login", "date_joined"]

# Ignore auto updating of Elasticsearch when a model is saved
# or deleted:
# ignore_signals = True
# Don't perform an index refresh after every update
# (overrides global setting):
# auto_refresh = False
# Paginate the django queryset used to populate the index with
# the specified size
# (by default there is no pagination)
# queryset_pagination = 5000
model = User

fields = ["last_login", "date_joined", "username"]


@tag_collection.doc_type
class TagCollectionDocument(DocType):
tags = fields.TextField(attr="tags_to_string")
asset = fields.ObjectField(
properties={
"title": fields.TextField(),
"slug": fields.TextField(),
"transcription_status": fields.TextField(),
"item": fields.ObjectField(
properties={
"item_id": fields.TextField(),
"project": fields.ObjectField(
properties={
"slug": fields.TextField(),
"campaign": fields.ObjectField(
properties={"slug": fields.TextField()}
),
}
),
}
),
}
)
user = fields.ObjectField(properties={"username": fields.TextField()})

class Meta:
model = UserAssetTagCollection
fields = ["created_on", "updated_on"]


@transcription.doc_type
class TranscriptionDocument(DocType):
asset = fields.ObjectField(
properties={
"title": fields.TextField(),
"slug": fields.TextField(),
"transcription_status": fields.TextField(),
"item": fields.ObjectField(
properties={
"item_id": fields.TextField(),
"project": fields.ObjectField(
properties={
"slug": fields.TextField(),
"campaign": fields.ObjectField(
properties={"slug": fields.TextField()}
),
}
),
}
),
}
)
user = fields.ObjectField(properties={"username": fields.TextField()})
reviewed_by = fields.ObjectField(properties={"username": fields.TextField()})
supersedes = fields.ObjectField(properties={"id": fields.IntegerField()})

class Meta:
model = Transcription

fields = [
"id",
"created_on",
"updated_on",
"text",
"accepted",
"rejected",
"submitted",
]
3 changes: 3 additions & 0 deletions concordia/models.py
Expand Up @@ -242,6 +242,9 @@ class UserAssetTagCollection(
def __str__(self):
return "{} - {}".format(self.asset, self.user)

def tags_to_string(self):
return ", ".join(t.value for t in self.tags.all())


class Transcription(MetricsModelMixin("transcription"), models.Model):
asset = models.ForeignKey(Asset, on_delete=models.CASCADE)
Expand Down
2 changes: 1 addition & 1 deletion concordia/settings_prod.py
Expand Up @@ -61,7 +61,7 @@
else:
MEDIA_URL = "https://%s.s3.amazonaws.com/" % S3_BUCKET_NAME

ELASTICSEARCH_DSL_AUTOSYNC = False
ELASTICSEARCH_DSL_AUTOSYNC = os.getenv("ELASTICSEARCH_DSL_AUTOSYNC", False)

INSTALLED_APPS += ["django_elasticsearch_dsl"]

Expand Down
3 changes: 0 additions & 3 deletions entrypoint.sh
Expand Up @@ -20,8 +20,5 @@ echo "Testing Sentry configuration"
echo Running collectstatic
./manage.py collectstatic --clear --noinput -v0

# echo Running indexing
# ./manage.py search_index --rebuild -f

echo Running Django dev server
gunicorn --log-level=warn --bind 0.0.0.0:80 --workers=4 concordia.wsgi
32 changes: 32 additions & 0 deletions indexer/Dockerfile
@@ -0,0 +1,32 @@
FROM ubuntu:18.04

ENV DEBIAN_FRONTEND="noninteractive"

RUN apt-get update -qy && apt-get install -o Dpkg::Options::='--force-confnew' -qy \
git curl \
libmemcached-dev \
python3 python3-dev python3-pip \
libz-dev libfreetype6-dev \
libtiff-dev libjpeg-dev libopenjp2-7-dev libwebp-dev zlib1g-dev \
graphviz \
locales

RUN locale-gen en_US.UTF-8
ENV LC_ALL=en_US.UTF-8
ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US.UTF-8

ENV PYTHONUNBUFFERED 1
ENV PYTHONPATH /app
ENV DJANGO_SETTINGS_MODULE=concordia.settings_prod

ENV ELASTICSEARCH_DSL_AUTOSYNC=True

RUN pip3 install pipenv

WORKDIR /app
COPY . /app

RUN pipenv install --system --dev --deploy

CMD /bin/bash indexer/entrypoint.sh
12 changes: 12 additions & 0 deletions indexer/entrypoint.sh
@@ -0,0 +1,12 @@
#!/bin/bash

set -e -u # Exit immediately for unhandled errors or undefined variables


mkdir -p /app/logs
touch /app/logs/concordia.log
touch /app/logs/concordia-celery.log


echo Running indexing
./manage.py search_index --rebuild -f

0 comments on commit 7fea313

Please sign in to comment.