Skip to content

Commit

Permalink
Merge pull request #207 from Ilhasoft/rasa-validate
Browse files Browse the repository at this point in the history
Add validation to minimal train
  • Loading branch information
Douglas Paz committed Sep 26, 2018
2 parents 312b18f + 11f8ba1 commit 0d6a7ea
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 29 deletions.
2 changes: 2 additions & 0 deletions bothub/api/serializers/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ class Meta:
'available_request_authorization',
'request_authorization',
'ready_for_train',
'requirements_to_train',
'languages_ready_for_train',
'votes_sum',
'created_at',
]
Expand Down
20 changes: 20 additions & 0 deletions bothub/common/migrations/0021_auto_20180921_1259.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 2.0.6 on 2018-09-21 12:59

import django.core.validators
from django.db import migrations, models
import re


class Migration(migrations.Migration):

dependencies = [
('common', '0020_auto_20180813_1320'),
]

operations = [
migrations.AlterField(
model_name='repositoryexample',
name='intent',
field=models.CharField(default='no_intent', help_text='Example intent reference', max_length=64, validators=[django.core.validators.RegexValidator(re.compile('^[-a-z0-9_]+\\Z'), 'Enter a valid value consisting of lowercase letters, numbers, underscores or hyphens.', 'invalid')], verbose_name='intent'),
),
]
105 changes: 81 additions & 24 deletions bothub/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import base64
import requests

from functools import reduce
from django.db import models
from django.utils.translation import gettext as _
from django.utils import timezone
Expand Down Expand Up @@ -168,19 +169,25 @@ def languages_status(self):
))

@property
def ready_for_train(self):
updates = self.updates.filter(training_started_at=None)

if RepositoryExample.objects.filter(
models.Q(repository_update__in=updates) |
models.Q(deleted_in__in=updates)).exists():
return True
def requirements_to_train(self):
return dict(filter(
lambda l: l[1],
map(
lambda u: (u.language, u.requirements_to_train,),
self.updates.filter(training_started_at__isnull=True))))

if RepositoryTranslatedExample.objects.filter(
repository_update__in=updates).exists():
return True
@property
def languages_ready_for_train(self):
return dict(map(
lambda u: (u.language, u.ready_for_train,),
self.updates.filter(training_started_at__isnull=True)))

return False
@property
def ready_for_train(self):
return reduce(
lambda current, u: u.ready_for_train or current,
self.updates.filter(training_started_at__isnull=True),
False)

@property
def votes_sum(self):
Expand Down Expand Up @@ -311,6 +318,9 @@ class Meta:
verbose_name_plural = _('repository updates')
ordering = ['-created_at']

MIN_EXAMPLES_PER_INTENT = 2
MIN_EXAMPLES_PER_ENTITY = 2

repository = models.ForeignKey(
Repository,
models.CASCADE,
Expand Down Expand Up @@ -361,26 +371,73 @@ def examples(self):
examples = examples.exclude(deleted_in__isnull=False)
return examples

@property
def requirements_to_train(self):
try:
self.validate_init_train()
except RepositoryUpdateAlreadyTrained as e:
return [_('This bot version has already been trained.')]
except RepositoryUpdateAlreadyStartedTraining as e:
return [_('This bot version is being trained.')]

r = []

if not self.added.exists() and \
not self.translated_added.exists() and \
not self.deleted.exists():
r.append(_('There was no change in this bot version. No ' +
'examples or translations for {} have been added or ' +
'removed.').format(
languages.VERBOSE_LANGUAGES.get(self.language)))

intents = self.examples.values_list('intent', flat=True)

if '' in intents:
r.append(_('All examples need have a intent.'))

weak_intents = self.examples.values('intent').annotate(
intent_count=models.Count('id')).order_by().exclude(
intent_count__gte=self.MIN_EXAMPLES_PER_INTENT)
if weak_intents.exists():
for i in weak_intents:
r.append(_('Intent "{}" has only {} examples. ' +
'Minimum is {}.').format(
i.get('intent'),
i.get('intent_count'),
self.MIN_EXAMPLES_PER_INTENT))

weak_entities = self.examples.annotate(
es_count=models.Count('entities')).filter(
es_count__gte=1).values(
'entities__entity__value').annotate(
entities_count=models.Count('id')).order_by().exclude(
entities_count__gte=self.MIN_EXAMPLES_PER_ENTITY)
if weak_entities.exists():
for e in weak_entities:
r.append(_('Entity "{}" has only {} examples. ' +
'Minimum is {}.').format(
e.get('entities__entity__value'),
e.get('entities_count'),
self.MIN_EXAMPLES_PER_ENTITY))

return r

@property
def ready_for_train(self):
if self.added.exists():
return True
if self.translated_added.exists():
return True
if self.deleted.exists():
return True
return False
return len(self.requirements_to_train) is 0

def start_training(self, by):
def validate_init_train(self, by=None):
if self.trained_at:
raise RepositoryUpdateAlreadyTrained()
if self.training_started_at:
raise RepositoryUpdateAlreadyStartedTraining()
if by:
authorization = self.repository.get_user_authorization(by)
if not authorization.can_write:
raise TrainingNotAllowed()

authorization = self.repository.get_user_authorization(by)
if not authorization.can_write:
raise TrainingNotAllowed()

def start_training(self, by):
self.validate_init_train(by)
self.by = by
self.training_started_at = timezone.now()
self.save(
Expand Down Expand Up @@ -435,7 +492,7 @@ class Meta:
intent = models.CharField(
_('intent'),
max_length=64,
blank=True,
default='no_intent',
help_text=_('Example intent reference'),
validators=[validate_item_key])
created_at = models.DateTimeField(
Expand Down
101 changes: 96 additions & 5 deletions bothub/common/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,10 +705,26 @@ def setUp(self):
name='Test',
slug='test',
language=languages.LANGUAGE_EN)
self.example = RepositoryExample.objects.create(
self.example_1 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hi',
intent='greet')
self.example_2 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hello',
intent='greet')
self.example_3 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='bye!',
intent='bye')
self.example_4 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='good bye',
intent='bye')
self.example_5 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hellow',
intent='greet')

def test_be_true(self):
self.assertTrue(self.repository.ready_for_train)
Expand All @@ -720,15 +736,20 @@ def test_be_false(self):
def test_be_true_when_new_translate(self):
self.repository.current_update().start_training(self.owner)
RepositoryTranslatedExample.objects.create(
original_example=self.example,
original_example=self.example_1,
language=languages.LANGUAGE_PT,
text='oi')
RepositoryTranslatedExample.objects.create(
original_example=self.example_2,
language=languages.LANGUAGE_PT,
text='olá')
self.repository.current_update()
self.assertTrue(self.repository.ready_for_train)

def test_be_true_when_deleted_example(self):
self.repository.current_update()
self.repository.current_update().start_training(self.owner)
self.example.delete()
self.example_1.delete()
self.assertTrue(self.repository.ready_for_train)


Expand All @@ -743,6 +764,10 @@ def setUp(self):
language=languages.LANGUAGE_EN)

def test_be_true(self):
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hi',
intent='greet')
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hi',
Expand All @@ -753,15 +778,23 @@ def test_be_false(self):
self.assertFalse(self.repository.current_update().ready_for_train)

def test_new_translate(self):
example = RepositoryExample.objects.create(
example_1 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hi',
intent='greet')
example_2 = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hello',
intent='greet')
self.repository.current_update().start_training(self.owner)
RepositoryTranslatedExample.objects.create(
original_example=example,
original_example=example_1,
language=languages.LANGUAGE_PT,
text='oi')
RepositoryTranslatedExample.objects.create(
original_example=example_2,
language=languages.LANGUAGE_PT,
text='olá')
self.assertTrue(self.repository.current_update(
languages.LANGUAGE_PT).ready_for_train)

Expand All @@ -770,10 +803,68 @@ def test_when_deleted(self):
repository_update=self.repository.current_update(),
text='hi',
intent='greet')
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hello',
intent='greet')
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hellow',
intent='greet')
self.repository.current_update().start_training(self.owner)
example.delete()
self.assertTrue(self.repository.current_update().ready_for_train)

def test_empty_intent(self):
example = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='douglas',
intent='')
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='douglas',
intent='')
RepositoryExampleEntity.objects.create(
repository_example=example,
start=0,
end=7,
entity='name')
RepositoryExampleEntity.objects.create(
repository_example=example,
start=0,
end=7,
entity='name')
self.assertFalse(self.repository.current_update().ready_for_train)

def test_intent_dont_have_min_examples(self):
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hi',
intent='greet')
self.assertFalse(self.repository.current_update().ready_for_train)

def test_entity_dont_have_min_examples(self):
example = RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hi',
intent='greet')
RepositoryExample.objects.create(
repository_update=self.repository.current_update(),
text='hello',
intent='greet')
RepositoryExampleEntity.objects.create(
repository_example=example,
start=0,
end=2,
entity='hi')
self.assertFalse(self.repository.current_update().ready_for_train)
RepositoryExampleEntity.objects.create(
repository_example=example,
start=1,
end=2,
entity='hi')
self.assertTrue(self.repository.current_update().ready_for_train)


class RequestRepositoryAuthorizationTestCase(TestCase):
def setUp(self):
Expand Down

0 comments on commit 0d6a7ea

Please sign in to comment.