In [20]:
import spacy
from glob import glob
import os, copy

In [3]:
nlp = spacy.en.English()

In [4]:
def add_pattern_to_matcher(veridicality_element_class, tokens):
    matcher.add_pattern(veridicality_element_class, 
                        [{spacy.matcher.attrs.LOWER: token} for token in tokens])

In [5]:
from kanren import Relation, facts
from kanren import run, eq, membero, var, conde, vars

In [31]:
class VeriticalityElements(object):
    def __init__(self):
        unsorted_patterns = []
        for file_name in self._list_veridicality_element_files():
            for line in open(file_name).readlines():
                ve_class_name = os.path.basename(file_name)
                ve_tokens = line.strip().lower().split()
                unsorted_patterns.append([-len(ve_tokens), ve_tokens, ve_class_name])
        self._patterns = [(tokens, ve_class) 
                          for _, tokens, ve_class 
                          in sorted(unsorted_patterns)]
    def _list_veridicality_element_files(self):
        return glob('lexicon/*')
    def get_patterns(self):
        return self._patterns


In [32]:
class Sentence(object):
    def __init__(self, doc, toks, follows_facts, headof_facts, label_facts):
        self.doc = doc
        self.toks = toks
        self.follows_facts = follows_facts
        self.headof_facts = headof_facts
        self.label_facts = label_facts
        self.tok_index = {tok:i for i,tok in self.toks}
    
    def print_labeled(self):
        for i,tok in self.toks:
            x=var()
            print(i, tok, [label for label_i,label in self.label_facts if i == label_i])

    def print_orig(self):
        print(doc)
    
    def search_and_merge(self, search, label):
        try:
            start_idx = self.tok_index[search[0]]
        except KeyError:
            return self
        if start_idx + len(search) > len(self.toks):
            return self
        toks_to_merge = [i for i,tok in self.toks[start_idx:start_idx+len(search)]]
        return self._merge_and_label_elements(toks_to_merge, label)
            
    
    def search_and_merge_old(self, search, label):
        search_vars = {tok: var() for tok in search}
        clauses = [self.nameof(search_vars[tok], tok) for tok in search]
        if len(search) > 1:
            clauses += [self.follows(search_vars[left], search_vars[right]) 
               for left,right in zip(search[:-1], search[1:])]
        matches = run(1, search_vars[search[0]], *clauses)
        if len(matches) > 0:
            match = matches[0]
            elements_to_merge = [match]
            cur_element = match
            for i in range(len(search) - 1):
                x = var()
                cur_element = run(1, x, follows(cur_element, x))[0]
                elements_to_merge.append(cur_element)
            return self._merge_and_label_elements(elements_to_merge, label)
        else:
            return self
        
    def _merge_and_label_elements(self, elements_to_merge, label):
        self.label_facts.append((elements_to_merge[0], label))
        #facts(self.labels, (elements_to_merge[0], label))
        if len(elements_to_merge) == 1:
            return self
        else:
            new_toks = []
            new_follows_facts = []
            new_headof_facts = []
            last_tok = None
            headof_dict = {}
            for tok_i, head_i in self.headof_facts:
                headof_dict.setdefault(tok_i, []).append(head_i)
            
            for i, tok in self.toks:
                cur_tok = None
                if i not in elements_to_merge:
                    cur_tok = (i, tok)
                    new_toks.append(cur_tok)
                    for head_i, rel in headof_dict[i]:
                        if head_i in elements_to_merge:
                            new_headof_facts.append((i, (elements_to_merge[0], rel)))
                        else: 
                            new_headof_facts.append((i, (head_i, rel)))
                elif i == elements_to_merge[0]:
                    new_string = ' '.join([tok for i, tok in self.toks 
                                           if i in elements_to_merge])
                    new_i = elements_to_merge[0];
                    cur_tok = (new_i, new_string)
                    new_toks.append(cur_tok)
                    for el_to_merge_i in elements_to_merge:
                        for head_i, rel in headof_dict[el_to_merge_i]:
                            if head_i not in elements_to_merge:
                                new_headof_facts.append((new_i, (head_i, rel)))
                if cur_tok is not None and last_tok is not None:
                    new_follows_facts.append((last_tok[0], cur_tok[0]))
                if cur_tok is not None:
                    last_tok = cur_tok
            return Sentence(doc, new_toks, new_follows_facts, new_headof_facts, copy.copy(self.label_facts))
        
def make_sentence_from_doc(doc):
    toks = [(tok.i, tok.lower_) for tok in doc]
    tokidx = [i for i,tok in toks]
    follows_facts = list(zip(tokidx, (tokidx[1:] + [None])))
    headof_facts = [(tok.i, (-1 if tok.head == tok else tok.head.i, tok.dep_)) for tok in doc]
    return Sentence(doc, toks, follows_facts, headof_facts, [])
    

In [38]:
# start of vertidicality transformation code
# !!! to do
class SentenceFinder(object):
    def find(self, query):
        pass
        
        

SyntaxError: unexpected EOF while parsing (<ipython-input-38-ff81518adf44>, line 8)

In [35]:
doc = nlp('If however you think abortion is murder, then this story will make you sick.')


In [37]:
sent = make_sentence_from_doc(doc)

sent = sent.search_and_merge('abortion is murder'.split(), 'proposition')
veridicality_elements = VeriticalityElements()
for pattern, label in veridicality_elements.get_patterns():
    sent = sent.search_and_merge(pattern, label)
sent.print_labeled()


0 if ['conditionals']
1 however ['causals']
2 you []
3 think ['positive_verbs']
4 abortion is murder ['proposition']
7 , []
8 then []
9 this []
10 story []
11 will []
12 make ['positive_nouns']
13 you []
14 sick . ['counter_factive_verbs']


TypeError: goal() missing 1 required positional argument: 'substitution'

In [242]:
#sent = 'If however you think abortion is murder, then this story will make you sick.'
raw = 'You should stop your make believe that abortion is murder.'
sent = Sentence(nlp(raw))
ves = VeriticalityElements()
sent.match(ves)

You should stop your make believe that abortion is murder.
[~make, ~believe]
[<function Relation.__call__.<locals>.goal at 0x17643f7b8>, <function Relation.__call__.<locals>.goal at 0x1762ee730>, <function Relation.__call__.<locals>.goal at 0x1762ee840>]
['make', 'believe'] ()
You should stop your make believe that abortion is murder.
[~belie]
[<function Relation.__call__.<locals>.goal at 0x17643f7b8>]
['belie'] ()
You should stop your make believe that abortion is murder.
[~believe]
[<function Relation.__call__.<locals>.goal at 0x17643f7b8>]
['believe'] ()
You should stop your make believe that abortion is murder.
[~er]
[<function Relation.__call__.<locals>.goal at 0x17643f7b8>]
['er'] ()
You should stop your make believe that abortion is murder.
[~lie]
[<function Relation.__call__.<locals>.goal at 0x17643f7b8>]
['lie'] ()
