## Runtime extraction of PAS tuples from a given extraction rule


Sentence = `Full-mouth debridement is not payable on the same date of service as other prophylactic or preventive procedures.`

```
{'rule_id': -1930383187189869905,
 'semgrex_pattern': [{'PATTERN': {'DEP': 'ROOT'},
                      'SPEC': {'NODE_NAME': 'node1'}},
                     {'PATTERN': {'DEP': 'nsubj'},
                      'SPEC': {'NBOR_NAME': 'node1',
                               'NBOR_RELOP': '>',
                               'NODE_NAME': 'node0'}},
                     {'PATTERN': {'DEP': 'pobj'},
                      'SPEC': {'NBOR_NAME': 'node0',
                               'NBOR_RELOP': '$--',
                               'NODE_NAME': 'node2'}},
                     {'PATTERN': {'DEP': 'prep'},
                      'SPEC': {'NBOR_NAME': 'node2',
                               'NBOR_RELOP': '>',
                               'NODE_NAME': 'node3'}}],
 'sentence': 'Full-mouth debridement is not payable on the same date of '
             'services as other prophylactic or preventive procedures.',
 'slotting_rule': {'comp': [],
                   'cord': [],
                   'obj': {'dep': 'pobj', 'pos': 'NOUN', 'tag': 'NN'},
                   'pred': {'dep': 'ROOT', 'pos': 'AUX', 'tag': 'VBZ'},
                   'prep': [],
                   'prep_adv': [],
                   'subj': {'dep': 'nsubj', 'pos': 'ADJ', 'tag': 'JJ'}},
 'tuple': {'comp': 'NA',
           'cord': 'NA',
           'obj': 'NA',
           'pred': 4,
           'prep': 'NA',
           'prep_adv': 'NA',
           'subj': 0}}
```

In [1]:
import spacy
from spacy.matcher import Matcher
from spacy import displacy
from spacy_pattern_builder import build_dependency_pattern
from spacy_pattern_builder import util
from spacy.matcher import DependencyMatcher
from pprint import pprint
from pprint import PrettyPrinter

In [2]:
#Domain specific Retokenizer
sentence="Full-mouth debridement is not payable on the same date of service as other prophylactic or preventive procedures."
nlp = spacy.load('en_core_web_sm')
doc=nlp(sentence)
with doc.retokenize() as retokenizer:
        #For purposes of this sample code,
        #Retokenization is done here based on known index ranges
        #In real application, a call is made to domain-specific-retokenization module
        retokenizer.merge(doc[0:4])#Full-mouth debridement
        retokenizer.merge(doc[8:11])#the same date
        retokenizer.merge(doc[17:19])#preventive procedures
        retokenizer.merge(doc[4:8])#is not payable


In [3]:
"""
This method is used to 
1. Extract tokens using linguistic semgrex patterns
2. Apply sytactic labels to extracted tokens using slotting rules
"""
def extract_pas_tuples(doc, extraction_rule):
    matcher = DependencyMatcher(doc.vocab)
    pattern = extraction_rule['semgrex_pattern']
    matcher.add('pattern', None, pattern)
    matches = matcher(doc)
    pas_elements = dict()
    pas_elements['subj'] = None
    pas_elements['pred'] = None
    pas_elements['obj'] = None

    subtrees = []
    for _, trees in matches:
        for subtree in trees:
            subtrees.append(subtree)

    for tree in subtrees:
        for t in tree:
            token = doc[t]
            if token.dep_ in extraction_rule['slotting_rule']['subj']['dep']: pas_elements['subj'] = token
            if token.dep_ in extraction_rule['slotting_rule']['pred']['dep']: pas_elements['pred'] = token
            if token.dep_ in extraction_rule['slotting_rule']['obj']['dep']: pas_elements['obj'] = token 
    return pas_elements

### Extract tuple using extraction rule

In [4]:
#For the purpose of sample code, this is being hardcoded here. Normally this rule-set is retrieved from a database.
extraction_rule = {"sentence": "Full-mouth debridement is not payable on the same date of service as other prophylactic or preventive procedures.", "tuple": {"subj": 0, "pred": 4, "obj": "NA", "comp": "NA", "prep_adv": "NA", "cord": "NA", "prep": "NA"}, "rule_id": -1930383187189869905, "semgrex_pattern": [{'SPEC': {'NODE_NAME': 'node1'}, 'PATTERN': {'DEP': 'ROOT'}}, {'SPEC': {'NODE_NAME': 'node0', 'NBOR_NAME': 'node1', 'NBOR_RELOP': '>'}, 'PATTERN': {'DEP': 'nsubj'}}, {'SPEC': {'NODE_NAME': 'node2', 'NBOR_NAME': 'node0', 'NBOR_RELOP': '$--'}, 'PATTERN': {'DEP': 'pobj'}}, {'SPEC': {'NODE_NAME': 'node3', 'NBOR_NAME': 'node2', 'NBOR_RELOP': '>'}, 'PATTERN': {'DEP': 'prep'}}], "slotting_rule": {"pred": {"dep": "ROOT", "pos": "AUX", "tag": "VBZ"}, "subj": {"dep": "nsubj", "pos": "ADJ", "tag": "JJ"}, "obj": {"dep": "pobj", "pos": "NOUN", "tag": "NN"}, "prep": [], "comp": [], "cord": [], "prep_adv": []}}
extracted_pas_tuple = extract_pas_tuples(doc,extraction_rule)

In [5]:
print("Extracted PAS tuples are: ")
pprint(extracted_pas_tuple)

Extracted PAS tuples are: 
{'obj': the same date,
 'pred': is not payable on,
 'subj': Full-mouth debridement}
