Skip to content

Commit

Permalink
Merge pull request #698 from Ilhasoft/feature/refactor_elasticsearch
Browse files Browse the repository at this point in the history
Feature/refactor elasticsearch
  • Loading branch information
helllllllder committed Mar 4, 2022
2 parents 5653c3f + c48267b commit aaf4b4d
Show file tree
Hide file tree
Showing 27 changed files with 503 additions and 94 deletions.
8 changes: 5 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ before_install:
- sudo -E sed -i -e '/local.*peer/s/postgres/all/' -e 's/peer\|md5/trust/g' /etc/postgresql/*/main/pg_hba.conf
- sudo -E sed -i 's/port = 5433/port = 5432/' /etc/postgresql/*/main/postgresql.conf

# give PG and ES some time to finish setup
# give PG some time to finish setup
- sleep 10

# stop any running postgres versions
Expand Down Expand Up @@ -45,13 +45,15 @@ env:
- SECRET_KEY=SK
- SUPPORTED_LANGUAGES="en|pt"
- DEFAULT_DATABASE="postgres://bothub:bothub@localhost:5432/bothub"
- ELASTICSEARCH_DSL="localhost:9200"
- ELASTICSEARCH_DSL="http://localhost:9200"
- ELASTICSEARCH_SIGNAL_PROCESSOR="realtime"
- ELASTICSEARCH_NUMBER_OF_SHARDS=1
- ELASTICSEARCH_NUMBER_OF_REPLICAS=0
- USE_ELASTICSEARCH="false"
script:
- python manage.py migrate
- python manage.py search_index --rebuild -f
- python manage.py search_index --create -f
- python manage.py search_index --populate -f
- python manage.py collectstatic
- flake8
- travis_wait coverage run manage.py test
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,12 @@ You can set environment variables in your OS, write on ```.env``` file or pass v
| ELASTICSEARCH_REPOSITORYQANLPLOG_INDEX | ```string``` | ```ai_repositoryqanlplog``` | Specify the index title for the RepositoryQANLPLog document.
| ELASTICSEARCH_REPOSITORYBASICEXAMPLE_INDEX | ```string``` | ```ai_repositorybasicexample``` | Specify the index title for the RepositoryBasicExample document.
| ELASTICSEARCH_SIGNAL_PROCESSOR | ```string``` | ```celery``` | Specify the signal processor responsible for updating the Elasticsearch data.
| GUNICORN_WORKERS | ``` int ``` | ``` multiprocessing.cpu_count() * 2 + 1 ``` | Gunicorn number of workers
| ELASTICSEARCH_DELETE_ILM_NAME | ```string``` | ```delete_nlp_logs``` | Specify the name of the ILM responsible to delete the logs.
| ELASTICSEARCH_TIMESTAMP_PIPELINE_NAME | ```string``` | ```set_timestamp``` | Specify the pipeline name that will be responsible to create the @timestamp field.
| ES_TIMESTAMP_PIPELINE_FIELD | ```string``` | ```created_at``` | Specify the field that will be used to populate the @timestamp field.
| ELASTICSEARCH_LOGS_ROLLOVER_AGE | ```string``` | ```1d``` | Specify the ILM rollover age, when a new index will be created.
| ELASTICSEARCH_LOGS_DELETE_AGE | ```string``` | ```90d``` | Specify the ILM delete age, when the index will be deleted.
| GUNICORN_WORKERS | ``` int ``` | ``` multiprocessing.cpu_count() * 2 + 1 ``` | Gunicorn number of workers.
| USE_ELASTICSEARCH | ```boolean``` | ```true``` | Change the logic in requirements_to_train to use either elasticsearch or postgres.


Expand Down
4 changes: 3 additions & 1 deletion bothub/api/v2/knowledge_base/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ class Meta:
]
read_only_fields = ["created_at", "last_update"]

text = serializers.CharField(required=False, validators=[ExampleTextHasLettersValidator()])
text = serializers.CharField(
required=False, validators=[ExampleTextHasLettersValidator()]
)
knowledge_base = serializers.PrimaryKeyRelatedField(
queryset=QAKnowledgeBase.objects
)
Expand Down
26 changes: 13 additions & 13 deletions bothub/api/v2/nlp/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,26 +63,26 @@ class RepositoryAuthorizationTrainViewSet(

def retrieve(self, request, *args, **kwargs):
check_auth(request)
repository_authorization = self.get_object()
repository_auth = self.get_object()

if not repository_authorization.can_contribute:
if not repository_auth.can_contribute:
raise PermissionDenied()

repository_version = request.query_params.get("repository_version")
if repository_version:
current_version = repository_authorization.repository.get_specific_version_id(
current_version = repository_auth.repository.get_specific_version_id(
repository_version, str(request.query_params.get("language"))
)
else:
current_version = repository_authorization.repository.current_version(
current_version = repository_auth.repository.current_version(
str(request.query_params.get("language"))
)

return Response(
{
"ready_for_train": current_version.ready_for_train,
"current_version_id": current_version.id,
"repository_authorization_user_id": repository_authorization.user.id,
"repository_authorization_user_id": repository_auth.user.id,
"language": current_version.language,
"algorithm": current_version.repository_version.repository.algorithm,
"use_name_entities": current_version.repository_version.repository.use_name_entities,
Expand Down Expand Up @@ -588,7 +588,9 @@ def retrieve(self, request, *args, **kwargs):
)

try:
repository.validate_if_can_run_automatic_evaluate(language=language, repository_version_id=repository_version)
repository.validate_if_can_run_automatic_evaluate(
language=language, repository_version_id=repository_version
)
can_run_automatic_evaluate = True
except ValidationError:
can_run_automatic_evaluate = False
Expand Down Expand Up @@ -752,20 +754,18 @@ class RepositoryAuthorizationExamplesViewSet(mixins.RetrieveModelMixin, GenericV

def retrieve(self, request, *args, **kwargs):
check_auth(request)
repository_authorization = self.get_object()
repo_authorization = self.get_object()

if not repository_authorization.can_contribute:
if not repo_authorization.can_contribute:
raise PermissionDenied()

repository_version = request.query_params.get("repository_version")
if repository_version:
current_version = (
repository_authorization.repository.get_specific_version_id(
repository_version, str(request.query_params.get("language"))
)
current_version = repo_authorization.repository.get_specific_version_id(
repository_version, str(request.query_params.get("language"))
)
else:
current_version = repository_authorization.repository.current_version(
current_version = repo_authorization.repository.current_version(
str(request.query_params.get("language"))
)

Expand Down
9 changes: 8 additions & 1 deletion bothub/api/v2/repository/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1251,7 +1251,14 @@ class Meta:
ref_name = None

id = serializers.PrimaryKeyRelatedField(read_only=True, style={"show": False})
text = EntityText(style={"entities_field": "entities"}, required=False, validators=[ExampleTextHasLettersValidator(), ExampleTextHasLimitedWordsValidator()])
text = EntityText(
style={"entities_field": "entities"},
required=False,
validators=[
ExampleTextHasLettersValidator(),
ExampleTextHasLimitedWordsValidator(),
],
)
repository = serializers.PrimaryKeyRelatedField(
queryset=Repository.objects,
validators=[CanContributeInRepositoryValidator()],
Expand Down
6 changes: 5 additions & 1 deletion bothub/api/v2/repository/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
from django.shortcuts import get_object_or_404
from django.utils.decorators import method_decorator
from django.utils.translation import ugettext_lazy as _
from django_elasticsearch_dsl_drf.constants import LOOKUP_FILTER_RANGE, LOOKUP_QUERY_GTE, LOOKUP_QUERY_LTE
from django_elasticsearch_dsl_drf.constants import (
LOOKUP_FILTER_RANGE,
LOOKUP_QUERY_GTE,
LOOKUP_QUERY_LTE,
)
from django_elasticsearch_dsl_drf.filter_backends import (
CompoundSearchFilterBackend,
FilteringFilterBackend,
Expand Down
12 changes: 5 additions & 7 deletions bothub/api/v2/tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,7 @@ def test_filter_entity(self):

class CreateExamplesAPITestCase(DefaultExamplesAPITestCase):
def request(self, data, token):
authorization_header = (
{"HTTP_AUTHORIZATION": "Token {}".format(token.key)}
)
authorization_header = {"HTTP_AUTHORIZATION": "Token {}".format(token.key)}
request = self.factory.post(
"/v2/repository/example/",
json.dumps(data),
Expand All @@ -284,7 +282,7 @@ def test_ok(self):
}
],
"intent": str(self.example_intent_1.pk),
"is_corrected": False
"is_corrected": False,
}

response, content_data = self.request(
Expand All @@ -307,7 +305,7 @@ def test_text_without_letters(self):
}
],
"intent": str(self.example_intent_1.pk),
"is_corrected": False
"is_corrected": False,
}
response, content_data = self.request(
data,
Expand All @@ -318,7 +316,7 @@ def test_text_without_letters(self):

def test_text_words_limit(self):
limit = settings.REPOSITORY_EXAMPLE_TEXT_WORDS_LIMIT + 1
text = " ".join(['teste' for x in range(limit)])
text = " ".join(["teste" for _ in range(limit)])
data = {
"repository": str(self.repository.uuid),
"repository_version": self.repository.current_version().repository_version.pk,
Expand All @@ -332,7 +330,7 @@ def test_text_words_limit(self):
}
],
"intent": str(self.example_intent_1.pk),
"is_corrected": False
"is_corrected": False,
}
response, content_data = self.request(
data,
Expand Down
5 changes: 4 additions & 1 deletion bothub/api/v2/tests/test_knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,10 @@ def test_okay(self):
response, content_data = self.request(self.repository, self.owner_token)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(content_data.get("title"), self.knowledge_base_1.title)
self.assertEqual(content_data.get("description"), self.context_1.text[:settings.REPOSITORY_KNOWLEDGE_BASE_DESCRIPTION_LIMIT])
self.assertEqual(
content_data.get("description"),
self.context_1.text[: settings.REPOSITORY_KNOWLEDGE_BASE_DESCRIPTION_LIMIT],
)


class ListQAtextAPITestCase(DefaultSetUpKnowledgeBaseMixin, TestCase):
Expand Down
8 changes: 7 additions & 1 deletion bothub/api/v2/tests/test_logs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import requests

from django.conf import settings
from django.test import RequestFactory
from django.test import tag
from django.test import TestCase
Expand All @@ -18,6 +20,7 @@
RepositoryIntent,
)
from bothub.common.models import RepositoryExample
from bothub.common.documents.repositorynlplog import REPOSITORYNLPLOG_INDEX_NAME


class RepositoryNLPLogTestCase(TestCase):
Expand Down Expand Up @@ -100,7 +103,6 @@ def test_okay(self):
class ListRepositoryNLPLogTestCase(TestCase):
def setUp(self):
self.factory = RequestFactory()

self.owner, self.owner_token = create_user_and_token("owneres")

self.repository = Repository.objects.create(
Expand All @@ -120,6 +122,7 @@ def setUp(self):
)

nlp_log = RepositoryNLPLog.objects.create(
id=2,
text="testes",
user_agent="python-requests/2.20.1",
from_backend=True,
Expand Down Expand Up @@ -171,6 +174,9 @@ def setUp(self):
is_default=False,
repository_nlp_log=nlp_log,
)
requests.delete(
f"{settings.ELASTICSEARCH_DSL['default']['hosts']}/_data_stream/{REPOSITORYNLPLOG_INDEX_NAME}"
)
registry.update(nlp_log)

def request(self, data, token=None):
Expand Down
21 changes: 14 additions & 7 deletions bothub/api/v2/tests/test_qa_logs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import requests

from django.conf import settings
from django.test import TestCase
from django.test import tag
from django_elasticsearch_dsl.registries import registry
Expand All @@ -9,6 +11,7 @@
from bothub.common.models import QALogs
from bothub.api.v2.repository.views import RepositoryQANLPLogViewSet
from bothub.api.v2.nlp.views import RepositoryQANLPLogsViewSet
from bothub.common.documents.repositoryqanlplog import REPOSITORYQANLPLOG_INDEX_NAME


class QALogsTestCase(DefaultSetUpKnowledgeBaseMixin, TestCase):
Expand Down Expand Up @@ -60,26 +63,30 @@ class ListQALogTestCase(DefaultSetUpKnowledgeBaseMixin, TestCase):
def setUp(self):
super().setUp()
self.log = QALogs.objects.create(
id=2,
knowledge_base=self.knowledge_base_1,
question="test",
answer="test",
question="t2est",
answer="te123st",
language=self.context_1.language,
confidence=0.0505176697224809,
user_agent="python-requests/2.20.1",
from_backend=True,
nlp_log=json.dumps(
{
"answers": [
{"text": "biases", "confidence": 0.9994810819625854},
{"text": "doubtes", "confidence": 0.039212167263031006},
{"text": "negativees", "confidence": 0.0},
{"text": "affirmativees", "confidence": 0.0},
{"text": "bias123es", "confidence": 0.9994810819625854},
{"text": "dou1123btes", "confidence": 0.039212167263031006},
{"text": "negat123ivees", "confidence": 0.0},
{"text": "affir132mativees", "confidence": 0.0},
],
"id": 0,
"id": 1,
}
),
user=self.owner,
)
requests.delete(
f"{settings.ELASTICSEARCH_DSL['default']['hosts']}/_data_stream/{REPOSITORYQANLPLOG_INDEX_NAME}"
)
registry.update(self.log)

def request(self, data, token=None):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
class Migration(migrations.Migration):

dependencies = [
('authentication', '0009_auto_20210506_1453'),
("authentication", "0009_auto_20210506_1453"),
]

operations = [
migrations.AlterField(
model_name='repositoryowner',
name='name',
field=models.CharField(help_text="User's name.", max_length=150, verbose_name='name'),
model_name="repositoryowner",
name="name",
field=models.CharField(
help_text="User's name.", max_length=150, verbose_name="name"
),
),
]
18 changes: 18 additions & 0 deletions bothub/authentication/migrations/0011_alter_user_language.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.8 on 2022-03-03 13:51

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('authentication', '0010_alter_repositoryowner_name'),
]

operations = [
migrations.AlterField(
model_name='user',
name='language',
field=models.CharField(blank=True, choices=[('en-us', 'English'), ('pt-br', 'Brazilian Portuguese'), ('es', 'Spanish')], max_length=5, null=True),
),
]
2 changes: 1 addition & 1 deletion bothub/common/documents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
__all__ = (
"RepositoryNLPLogDocument",
"RepositoryQANLPLogDocument",
"RepositoryExampleDocument"
"RepositoryExampleDocument",
)
2 changes: 2 additions & 0 deletions bothub/common/documents/repositorybasicexample.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

@REPOSITORYBASICEXAMPLE_INDEX.doc_type
class RepositoryExampleDocument(Document):
time_based = False

repository_version_language = fields.ObjectField(
properties={
"pk": fields.IntegerField(),
Expand Down
14 changes: 8 additions & 6 deletions bothub/common/documents/repositorynlplog.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import json

from django.conf import settings
from django_elasticsearch_dsl import Document, Index, fields

from django_elasticsearch_dsl import Index, fields
from bothub.common.models import RepositoryNLPLog
from bothub.utils import TimeBasedDocument

REPOSITORYNLPLOG_INDEX = Index(settings.ELASTICSEARCH_INDEX_NAMES[__name__])
REPOSITORYNLPLOG_INDEX_NAME = settings.ELASTICSEARCH_INDEX_NAMES[__name__]
REPOSITORYNLPLOG_INDEX = Index(REPOSITORYNLPLOG_INDEX_NAME)


@REPOSITORYNLPLOG_INDEX.doc_type
class RepositoryNLPLogDocument(Document):
user = fields.IntegerField(attr="user.id")
@REPOSITORYNLPLOG_INDEX.document
class RepositoryNLPLogDocument(TimeBasedDocument):
time_based = True

user = fields.IntegerField(attr="user.id")
log_intent = fields.NestedField(
attr="log_intent_field_indexing",
properties={
Expand Down
Loading

0 comments on commit aaf4b4d

Please sign in to comment.