Skip to content

Commit

Permalink
refactor/padacioso (#183)
Browse files Browse the repository at this point in the history
* feat/padacioso

because fann2 is copyleft it was made optional in ovos-core, padaos was used instead of padatious

however padaos is very rigid and made intents virtually unusable, this replaces it with padacioso which is already a dependency dragged by OCP. It should be a little more usable and a drop in replacement

this is a temporary measure until #100 is merged in version 0.0.5

* add couple test for intent register bus messages

* padacioso~=0.1.2

allow more than exact matches via the new fuzz flag

authored-by: jarbasai <jarbasai@mailfence.com>
  • Loading branch information
NeonJarbas committed Aug 6, 2022
1 parent 4a8f6eb commit 033502e
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 48 deletions.
4 changes: 3 additions & 1 deletion mycroft/configuration/mycroft.conf
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,9 @@
"intent_cache": "~/.local/share/mycroft/intent_cache",
"train_delay": 4,
"single_thread": false,
"padaos_only": false
// fallback settings for padacioso (pure regex)
"regex_only": false,
"fuzz": true
},

"Audio": {
Expand Down
2 changes: 1 addition & 1 deletion mycroft/skills/intent_service_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def register_padatious_intent(self, intent_name, filename, lang):
if not isinstance(filename, str):
raise ValueError('Filename path must be a string')
if not exists(filename):
raise FileNotFoundError('Unable to find "{}"'.format(filename))
raise FileNotFoundError(f'Unable to find "{filename}"')

data = {'file_name': filename,
'name': intent_name,
Expand Down
117 changes: 73 additions & 44 deletions mycroft/skills/intent_services/padatious_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.
#
"""Intent service wrapping padatious."""
from functools import lru_cache
from subprocess import call
from threading import Event
from time import time as get_time, sleep
Expand All @@ -26,7 +25,43 @@
from mycroft.util.log import LOG
from mycroft.skills.intent_services.base import IntentMatch

from padaos import IntentContainer as PadaosIntentContainer
from padacioso import IntentContainer as FallbackIntentContainer

try:
import padatious as _pd
from padatious.match_data import MatchData as PadatiousIntent
except ImportError:
_pd = None

# padatious is optional, this class is just for compat
class PadatiousIntent:
"""
A set of data describing how a query fits into an intent
Attributes:
name (str): Name of matched intent
sent (str): The query after entity extraction
conf (float): Confidence (from 0.0 to 1.0)
matches (dict of str -> str): Key is the name of the entity and
value is the extracted part of the sentence
"""

def __init__(self, name, sent, matches=None, conf=0.0):
self.name = name
self.sent = sent
self.matches = matches or {}
self.conf = conf

def __getitem__(self, item):
return self.matches.__getitem__(item)

def __contains__(self, item):
return self.matches.__contains__(item)

def get(self, key, default=None):
return self.matches.get(key, default)

def __repr__(self):
return repr(self.__dict__)


class PadatiousMatcher:
Expand All @@ -53,17 +88,6 @@ def _match_level(self, utterances, limit, lang=None):
for utt in utterances:
for variant in utt:
intent = self.service.calc_intent(variant, lang)
if self.service._padaos:
if not intent.get("name"):
continue
# exact matches only
return IntentMatch(
'Padaos',
intent["name"],
intent["entities"],
intent["name"].split(':')[0]
)

if intent:
best = padatious_intent.conf if padatious_intent else 0.0
if best < intent.conf:
Expand Down Expand Up @@ -114,34 +138,29 @@ def __init__(self, bus, config):
self.padatious_config = config
self.bus = bus
intent_cache = expanduser(self.padatious_config['intent_cache'])
self._padaos = self.padatious_config.get("padaos_only", False)

core_config = Configuration()
self.lang = core_config.get("lang", "en-us")
langs = core_config.get('secondary_langs') or []
if self.lang not in langs:
langs.append(self.lang)

try:
if not self._padaos:
from padatious import IntentContainer
self.containers = {
lang: IntentContainer(path.join(intent_cache, lang))
for lang in langs}
except ImportError:
LOG.error('Padatious not installed. Falling back to Padaos, pure regex alternative')
try:
call(['notify-send', 'Padatious not installed',
'Falling back to Padaos, pure regex alternative'])
except OSError:
pass
self._padaos = True

if self._padaos:
LOG.warning('using padaos instead of padatious. Some intents may '
'be hard to trigger')
self.containers = {lang: PadaosIntentContainer()
if self.is_regex_only:
if not _pd:
LOG.error('Padatious not installed. Falling back to pure regex alternative')
try:
call(['notify-send', 'Padatious not installed',
'Falling back to pure regex alternative'])
except OSError:
pass
LOG.warning('using pure regex intent parser. '
'Some intents may be hard to trigger')
self.containers = {lang: FallbackIntentContainer(self.padatious_config.get("fuzz"))
for lang in langs}
else:
self.containers = {
lang: _pd.IntentContainer(path.join(intent_cache, lang))
for lang in langs}

self.bus.on('padatious:register_intent', self.register_intent)
self.bus.on('padatious:register_entity', self.register_entity)
Expand All @@ -158,25 +177,30 @@ def __init__(self, bus, config):
self.registered_intents = []
self.registered_entities = []

@property
def is_regex_only(self):
if not _pd:
return True
return self.padatious_config.get("regex_only") or False

def train(self, message=None):
"""Perform padatious training.
Args:
message (Message): optional triggering message
"""
self.finished_training_event.clear()
if not self._padaos:
if not self.is_regex_only:
padatious_single_thread = self.padatious_config['single_thread']
if message is None:
single_thread = padatious_single_thread
else:
single_thread = message.data.get('single_thread',
padatious_single_thread)
LOG.info('Training... (single_thread={})'.format(single_thread))
for lang in self.containers:
self.containers[lang].train(single_thread=single_thread)
LOG.info('Training complete.')

LOG.info('Training complete.')
self.finished_training_event.set()
if not self.finished_initial_train:
self.bus.emit(Message('mycroft.skills.trained'))
Expand Down Expand Up @@ -241,7 +265,7 @@ def _register_object(self, message, object_name, register_func):
LOG.warning('Could not find file ' + file_name)
return

if self._padaos:
if self.is_regex_only:
# padaos does not accept a file path like padatious
with open(file_name) as f:
samples = [l.strip() for l in f.readlines()]
Expand All @@ -258,9 +282,10 @@ def register_intent(self, message):
message (Message): message triggering action
"""
lang = message.data.get('lang', self.lang)
lang = lang.lower()
if lang in self.containers:
self.registered_intents.append(message.data['name'])
if self._padaos:
if self.is_regex_only:
self._register_object(
message, 'intent', self.containers[lang].add_intent)
else:
Expand All @@ -274,9 +299,10 @@ def register_entity(self, message):
message (Message): message triggering action
"""
lang = message.data.get('lang', self.lang)
lang = lang.lower()
if lang in self.containers:
self.registered_entities.append(message.data)
if self._padaos:
if self.is_regex_only:
self._register_object(
message, 'intent', self.containers[lang].add_entity)
else:
Expand All @@ -289,13 +315,16 @@ def calc_intent(self, utt, lang=None):
This improves speed when called multiple times for different confidence
levels.
NOTE: This cache will keep a reference to this class
(PadatiousService), but we can live with that since it is used as a
singleton.
Args:
utt (str): utterance to calculate best intent for
"""
lang = lang or self.lang
lang = lang.lower()
if lang in self.containers:
return self.containers[lang].calc_intent(utt)
intent = self.containers[lang].calc_intent(utt)
if isinstance(intent, dict):
if "entities" in intent:
intent["matches"] = intent.pop("entities")
intent["sent"] = utt
intent = PadatiousIntent(**intent)
return intent
2 changes: 1 addition & 1 deletion requirements/extra-skills-lgpl.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
adapt-parser~=0.5
padaos~=0.1
padacioso~=0.1.2
ovos-lingua-franca~=0.4, >=0.4.2
PyYAML~=5.4
ovos_workshop~=0.0, >=0.0.7a9
Expand Down
2 changes: 1 addition & 1 deletion requirements/extra-skills.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
adapt-parser~=0.5
padaos~=0.1
padacioso~=0.1.2
ovos-lingua-franca~=0.4, >=0.4.2
PyYAML~=5.4
ovos_workshop~=0.0, >=0.0.7a9
59 changes: 59 additions & 0 deletions test/unittests/skills/test_intent_service_interface.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest

from adapt.intent import IntentBuilder
from mycroft.skills.intent_service_interface import IntentServiceInterface


Expand Down Expand Up @@ -84,3 +85,61 @@ def test_register_regex(self):
intent_service = IntentServiceInterface(self.emitter)
intent_service.register_adapt_regex('.*', lang="en-us")
self.check_emitter([{'regex': '.*', 'lang': 'en-us'}])


class KeywordIntentRegistrationTest(unittest.TestCase):
def check_emitter(self, expected_message_data):
"""Verify that the registration messages matches the expected."""
for msg_type in self.emitter.get_types():
self.assertEqual(msg_type, 'register_intent')

self.assertEqual(
sorted(self.emitter.get_results(),
key=lambda d: sorted(d.items())),
sorted(expected_message_data, key=lambda d: sorted(d.items())))
self.emitter.reset()

def setUp(self):
self.emitter = MockEmitter()

def test_register_intent(self):
intent_service = IntentServiceInterface(self.emitter)
intent_service.register_adapt_keyword('testA', 'testA', lang='en-US')
intent_service.register_adapt_keyword('testB', 'testB', lang='en-US')
self.emitter.reset()

intent = IntentBuilder("test").require("testA").optionally("testB")
intent_service.register_adapt_intent("test", intent)
expected_data = {'at_least_one': [],
'name': 'test',
'optional': [('testB', 'testB')],
'requires': [('testA', 'testA')]}
self.check_emitter([expected_data])



class UtteranceIntentRegistrationTest(unittest.TestCase):
def check_emitter(self, expected_message_data):
"""Verify that the registration messages matches the expected."""
for msg_type in self.emitter.get_types():
self.assertEqual(msg_type, 'padatious:register_intent')

self.assertEqual(
sorted(self.emitter.get_results(),
key=lambda d: sorted(d.items())),
sorted(expected_message_data, key=lambda d: sorted(d.items())))
self.emitter.reset()

def setUp(self):
self.emitter = MockEmitter()

def test_register_intent(self):
intent_service = IntentServiceInterface(self.emitter)
filename = "/tmp/test.intent"
with open(filename, "w") as f:
f.write("this is a test\ntest the intent")

intent_service.register_padatious_intent('test', filename, lang='en-US')
expected_data = {'file_name': '/tmp/test.intent', 'lang': 'en-US', 'name': 'test'}
self.check_emitter([expected_data])

0 comments on commit 033502e

Please sign in to comment.