This is still under development.  I lost the code to the original ICWSM paper, and this is my effort to reconstruct it using spaCy and Python.

In [16]:
import spacy
from glob import glob
from pprint import pprint
import os, copy
from rdflib import ConjunctiveGraph, URIRef, Namespace, Literal
from collections import defaultdict

In [3]:
nlp = spacy.en.English()

In [4]:
class VeriticalityElements(object):
    def __init__(self):
        unsorted_patterns = []
        for file_name in self._list_veridicality_element_files():
            for line in open(file_name).readlines():
                ve_class_name = os.path.basename(file_name)
                ve_tokens = line.strip().lower().split()
                unsorted_patterns.append([-len(ve_tokens), ve_tokens, ve_class_name])
        self._patterns = [(tokens, ve_class) 
                          for _, tokens, ve_class 
                          in sorted(unsorted_patterns)]
    def _list_veridicality_element_files(self):
        return glob('lexicon/*')
    def get_patterns(self):
        return self._patterns


In [5]:
set([x for _,x in VeriticalityElements().get_patterns()])

{'causals',
 'conditionals',
 'counter_factive_verbs',
 'factive_verbs',
 'negative_adjectives',
 'negative_nouns',
 'negative_sources',
 'negative_verbs',
 'neutral_verbs',
 'positive_adjectives',
 'positive_nouns',
 'positive_verbs'}

In [11]:
class Sentence(object):
    def __init__(self, doc, toks, follows_facts, headof_facts, label_facts):
        self.doc = doc
        self.toks = toks
        self.follows_facts = follows_facts
        self.headof_facts = headof_facts
        self.label_facts = label_facts
        self._setup()

    def _setup(self):
        self._tok_index = {tok:i for i,tok in self.toks}
        self._index_tok = {i:tok for i,tok in self.toks}
        self._dep_index = defaultdict(set)
        self._dauts = defaultdict(set)
        self._heads = defaultdict(set)
        for daut, (head, dep) in self.headof_facts:
            self._dep_index[dep].add((daut, head))
            self._dauts[head].add((daut, dep))
            self._heads[daut].add((head, dep))
        self._labels = defaultdict(set)
        self._label_index = defaultdict(set)
        for tok_i, label in self.label_facts:
            self._label_index[label].add(tok_i)
            self._labels[tok_i].add(label)
        return self

    def print_labeled(self):
        for i,tok in self.toks:
            #x=var()
            print(i, tok, [label for label_i,label in self.label_facts if i == label_i])

    def print_orig(self):
        print(doc)
    
    def search_and_merge(self, search, label):
        try:
            start_idx = self._tok_index[search[0]]
        except KeyError:
            return self
        if start_idx + len(search) > len(self.toks):
            return self
        elements_to_merge = list(range(start_idx, start_idx+len(search)))
        # ensure that all search tokens, and not just first, match
        if not all(self._index_tok[tok_i] == search[search_i] 
                   for search_i, tok_i in enumerate(elements_to_merge)):
            return self
        return self._merge_and_label_elements(elements_to_merge, label)
    
    def query(self,
              deps, 
              daut_labels = set(), 
              head_labels = set(),
              valid_dauts = set(),
              valid_heads = set()):
        if type(deps) == str:
            deps = set([deps])
        if type(daut_labels) == str:
            daut_labels = set([daut_labels])
        if type(head_labels) == str:
            head_labels = set([head_labels])
        if type(valid_dauts) == int:
            valid_dauts = set([valid_dauts])
        if type(valid_heads) == int:
            valid_heads = set([valid_heads])
        daut_labels = set(daut_labels)
        head_labels = set(head_labels)
        valid_dauts = set(valid_dauts)
        valid_heads = set(valid_heads)
        pairs = []
        for dep in deps: 
            for daut, head in self._dep_index.get(dep,[]):
                if daut_labels and self._labels[daut] & daut_labels == set():
                    continue
                if head_labels and self._labels[head] & head_labels == set():
                    continue
                if valid_dauts and daut not in valid_dauts:
                    continue
                if valid_heads and head not in valid_heads:
                    continue
                pairs.append((daut, head))
        return pairs
        
    def _merge_and_label_elements(self, elements_to_merge, label):
        self.label_facts.append((elements_to_merge[0], label))
        if len(elements_to_merge) == 1:
            return self._setup()
        else:
            new_toks = []
            new_follows_facts = []
            new_headof_facts = []
            last_tok = None
            headof_dict = {}
            for tok_i, head_i in self.headof_facts:
                headof_dict.setdefault(tok_i, []).append(head_i)
            for i, tok in self.toks:
                cur_tok = None
                if i not in elements_to_merge:
                    cur_tok = (i, tok)
                    new_toks.append(cur_tok)
                    for head_i, rel in headof_dict[i]:
                        if head_i in elements_to_merge:
                            new_headof_facts.append((i, (elements_to_merge[0], rel)))
                        else: 
                            new_headof_facts.append((i, (head_i, rel)))
                elif i == elements_to_merge[0]:
                    new_string = ' '.join([tok for i, tok in self.toks 
                                           if i in elements_to_merge])
                    new_i = elements_to_merge[0];
                    cur_tok = (new_i, new_string)
                    new_toks.append(cur_tok)
                    for el_to_merge_i in elements_to_merge:
                        for head_i, rel in headof_dict[el_to_merge_i]:
                            if head_i not in elements_to_merge:
                                new_headof_facts.append((new_i, (head_i, rel)))
                if cur_tok is not None and last_tok is not None:
                    new_follows_facts.append((last_tok[0], cur_tok[0]))
                if cur_tok is not None:
                    last_tok = cur_tok
            new_sent = Sentence(doc, new_toks, new_follows_facts, new_headof_facts, self.label_facts)
            return new_sent._setup()
        
def make_sentence_from_doc(doc):
    toks = [(tok.i, tok.lower_) for tok in doc]
    tokidx = [i for i,tok in toks]
    follows_facts = list(zip(tokidx, (tokidx[1:] + [None])))
    headof_facts = [(tok.i, (-1 if tok.head == tok else tok.head.i, tok.dep_)) for tok in doc]
    return Sentence(doc, toks, follows_facts, headof_facts, [])
    

In [19]:

doc = nlp("Sam argues in defense of the idiot's assertion that abortion is murder.")
sent = make_sentence_from_doc(doc)
sent = sent.search_and_merge('abortion is murder'.split(), 'proposition')
veridicality_elements = VeriticalityElements()
for pattern, label in veridicality_elements.get_patterns():
    sent = sent.search_and_merge(pattern, label)
sent.print_labeled();
pprint(sent._heads)
#print(sent._labels[1])
#print(sent.label_facts)

def heads(pairs):
    return set(head for daut,head in pairs)
def dauts(pairs):
    return set(daut for daut,head in paris)

#def chain(sent, query_params):
    

pairs = sent.query(['prep'], 
                   valid_heads = heads(sent.query(['ccomp', 'xcomp'], 
                                                  daut_labels='proposition')))
print("PAIRS")
pprint(pairs)

0 sam []
1 argues []
2 in []
3 defense ['positive_nouns']
4 of []
5 the []
6 idiot ['negative_sources']
7 's []
8 assertion ['positive_nouns']
9 that []
10 abortion is murder ['proposition']
13 . []
defaultdict(<class 'set'>,
            {0: {(1, 'nsubj')},
             1: {(-1, 'ROOT')},
             2: {(1, 'prep')},
             3: {(2, 'pobj')},
             4: {(3, 'prep')},
             5: {(6, 'det')},
             6: {(8, 'poss')},
             7: {(6, 'case')},
             8: {(4, 'pobj')},
             9: {(10, 'mark')},
             10: {(1, 'ccomp')},
             13: {(1, 'punct')}})
PAIRS
[(2, 1)]


In [None]:

class QueryableSentence(object):
    def _get_tok_id(self, tok_id):
        return self.NS['tok_%s'%(tok_id)]
    def _get_rel_id(self, child_id, head_id):
        return self.NS['head_%s_%s'%(child_id, head_id)]
    def __init__(self, sent: Sentence):
        self.NS = Namespace('#')
        self.g = ConjunctiveGraph()
        tok_nodes = {}
        dep_nodes = {}
        label_nodes = {}
        text_nodes = {}
        for tok_id, text in sent.toks:
            self.g.add((self._get_tok_id(tok_id),
                        self.NS['text'],
                        Literal(text)))            
        for child_id, (head_id, dep) in sent.headof_facts:
            rel_id = self._get_rel_id(child_id, head_id)
            self.g.add((self._get_tok_id(child_id),
                        self.NS['head'],
                        rel_id))            
            self.g.add((rel_id,
                        self.NS['daut'],
                        self._get_tok_id(head_id)))            
            self.g.add((rel_id,
                        self.NS['dep'],
                        Literal(dep)))            
        for tok_id, label_text in sent.label_facts:
            self.g.add((self._get_tok_id(tok_id),
                        self.NS['label'],
                        Literal(label_text)))            
        for left_tok_id, right_tok_id in sent.follows_facts:
            self.g.add((self._get_tok_id(left_tok_id),
                        self.NS['follows'],
                        self._get_tok_id(right_tok_id)))
    @staticmethod
    def _get_node(nodes, id, prefix):
        return nodes.setdefault(id, URIRef('ns:%s_%s'%(prefix,id)))

            

In [None]:
#from pprint import pprint
qs = QueryableSentence(sent) # .query('select ?s, ?r where { ?r follows ?s .}')
#pprint(list(qs.g.objects(subject=qs.NS['tok_1'])))
print(list(qs.g.query('''
PREFIX ns: <#s>
SELECT *
WHERE { 
 ?P ns:label "proposition" . 
 ?P ns:dep ?d .
 ?X ns:head ?P .
 ?X ns:head ?_1X .
 ?_1 ns:dep ?_1d .
 ?_1 ns:head ?_2 .
 ?_2 ns:dep ?_2d .
 ?_2 ns:head ?_3 .
 ?_3 ns:dep ?_3d .
 ?_3 ns:head ?_4 .
 ?_4 ns:dep ?_4d .
 ?_4 ns:head ?VE .
 ?VE ns:dep ?_VEd .
 ?VE ns:label ?VE_label . 
FILTER 
((?d = "ccomp" || ?d = "xcomp") 
 && (?_1d = "prep") 
 && (?_2d = "pobj") 
 && (?_3d = "prep") 
 && (?_4d = "pobj") 
 && (?_VEd = "poss") 
 && (?VE_label = "negative_sources")
) .
} 
''')))

In [None]:

def query_clause(tok1, tok2, rel=[], tok1_label = [], tok2_label = [], tok1_text = []):
    fmt_dict = {'tok1': tok1, 'tok2': tok2}
    filter_clauses = []
    where_clauses = ['?%(tok1)s ns:head ?rel_%(tok1)s_%(tok2)s' % fmt_dict,
                     '?rel_%(tok1)s_%(tok2)s ns:daut ?%(tok2)s' % fmt_dict]
    def perform_filter(relation, node_id, possible_values=[]):
        if possible_values != []:
            where_clauses.append('?%(node_id)s ns:%(rel)s ?%(rel)s_%(node_id)s' 
                                 % {'node_id':node_id, 'rel':relation})
            conjuncts = []
            for value in possible_values:
                conjuncts.append('?%(rel)s_%(node_id)s = "%(value)s"' 
                                 % {'node_id':node_id, 'rel':relation, 'value': value})
            filter_clauses.append('(%s)' % ' || '.join(conjuncts))
    perform_filter('dep', 'rel_%(tok1)s_%(tok2)s' % fmt_dict, rel)    
    perform_filter('label', '%(tok1)s' % fmt_dict, tok1_label)    
    perform_filter('label', '%(tok2)s' % fmt_dict, tok2_label)    
    if tok1_label != []:
        pass
    return ' . '.join(where_clauses) + ' .', ' && '.join(filter_clauses)


In [None]:
assert query_clause('A', 'B') == ('?A ns:head ?rel_A_B . ?rel_A_B ns:daut ?B .', '') 
assert query_clause('C', 'D') == ('?C ns:head ?rel_C_D . ?rel_C_D ns:daut ?D .', '') 
assert query_clause('C', 'D', rel=['xcomp'])[1] == '(?dep_rel_C_D = "xcomp")'
assert query_clause('C', 'D', rel=['xcomp'])[0] \
    == '?C ns:head ?rel_C_D . ?rel_C_D ns:daut ?D . ?rel_C_D ns:dep ?dep_rel_C_D .'
assert query_clause('C', 'D', rel=['xcomp', 'ccomp'])[1] == '(?dep_rel_C_D = "xcomp" || ?dep_rel_C_D = "ccomp")'
assert query_clause('C', 'D', tok1_label=['negative_sources'])[1] == '(?label_C = "negative_sources")'
assert query_clause('C', 'D', tok1_label=['dfs', 'blah'])[1] == '(?label_C = "dfs" || ?label_C = "blah")'
assert query_clause('C', 'D', tok1_label=['dfs', 'blah'])[0] \
    == '?C ns:head ?rel_C_D . ?rel_C_D ns:daut ?D . ?C ns:label ?label_C .'
assert query_clause('C', 'D', tok2_label=['dfs', 'blah'])[1] == '(?label_D = "dfs" || ?label_D = "blah")'
assert query_clause('C', 'D', tok2_label=['dfs', 'blah'])[0] \
    == '?C ns:head ?rel_C_D . ?rel_C_D ns:daut ?D . ?D ns:label ?label_D .'
assert query_clause('C', 'D', tok1_label=['A'], tok2_label=['B'])[0] \
    == '?C ns:head ?rel_C_D . ?rel_C_D ns:daut ?D . ?C ns:label ?label_C . ?D ns:label ?label_D .'
assert query_clause('C', 'D', tok1_label=['A'], tok2_label=['B'])[1] \
    == '(?label_C = "A") && (?label_D = "B")'


In [None]:
def query_builder(variables, clauses):
    where_clauses = []
    filter_clauses = []
    for where_clause, filter_clause in clauses:
        where_clauses.append(where_clause)
        if filter_clause != '':
            filter_clauses.append(filter_clause)
    filter_portion = ''
    if filter_clauses != []:
        filter_portion = 'FILTER ( %s )' % ' && '.join(filter_clauses)
    query = '''
PREFIX ns: <#>
SELECT %(variables)s
WHERE { 
 %(where_clauses)s
 %(filter_portion)s
}''' % {'variables': ' '.join('?'+v for v in variables), 
        'where_clauses': ' '.join(where_clauses),
        'filter_portion': filter_portion}
    return query

In [None]:
#pprint(list(zip(qs.g.subjects(), qs.g.predicates(), qs.g.objects())))

In [None]:
doc = nlp('Sam argues in defense of the idiot’s assertion that abortion is murder.')
sent = make_sentence_from_doc(doc)
sent = sent.search_and_merge('abortion is murder'.split(), 'proposition')
veridicality_elements = VeriticalityElements()
for pattern, label in veridicality_elements.get_patterns():
    sent = sent.search_and_merge(pattern, label)
sent.print_labeled()
pprint(sent.headof_facts)

query = query_builder(['P', 'VE'], 
                      [query_clause('X', 'P', rel=['ccomp', 'xcomp']),
                       query_clause('X', 'A', rel=['prep']),
                       query_clause('A', 'B', rel=['pobj']),
                       query_clause('B', 'C', rel=['prep']),
                       query_clause('C', 'D', rel=['pobj']),
                       query_clause('D', 'VE', rel=['poss'])])
qs = QueryableSentence(sent)
print(list(qs.g.query(query)))


In [None]:

mq = '''
PREFIX ns: <#s>
SELECT ?A ?B
WHERE { 
 ?A ns:head ?rel_A_B .
?rel_A_B ns:daut ?B .
 
}
'''
print(list(qs.g.query(mq)))


In [8]:


qs = QueryableSentence(sent) # .query('select ?s, ?r where { ?r follows ?s .}')
print(list(qs.g.query('''
PREFIX ns: <#>
SELECT ?P
WHERE { 
 ?P ns:label "proposition" . 
 ?P ns:head ?rel_tok1_2 .
 ?rel_tok1_2 ns:daut ?_tok2 . 
 ?rel_tok1_2 ns:dep ?dep1_2 .

 ?rel_tok2_3 ns:daut ?_tok2 . 
 ?rel_tok2_3 ns:dep ?dep2_3 .
 ?tok_3 ns:head ?rel_tok2_3 .
 
 ?rel_tok3_4 ns:daut ?_tok3 . 
 ?rel_tok3_4 ns:dep ?dep2_3 .
 ?tok_3 ns:head ?rel_tok3_4 .



FILTER  (
  (?dep1_2 = "ccomp" || ?dep1_2 = "xcomp")
  && ?dep2_3 = "prep"
 ) 
}
''')))


NameError: name 'QueryableSentence' is not defined

In [9]:
#from pprint import pprint
doc = nlp('Sam argues in defense of the idiot’s assertion that abortion is murder.')
sent = make_sentence_from_doc(doc)
sent = sent.search_and_merge('abortion is murder'.split(), 'proposition')
veridicality_elements = VeriticalityElements()
for pattern, label in veridicality_elements.get_patterns():
    sent = sent.search_and_merge(pattern, label)
sent.print_labeled()

qs = QueryableSentence(sent) # .query('select ?s, ?r where { ?r follows ?s .}')
#pprint(list(qs.g.objects(subject=qs.NS['tok_1'])))
print(list(qs.g.query('''
PREFIX ns: <#s>
SELECT *
WHERE { 
 ?P ns:label "proposition" . 
 ?P ns:dep ?d .
 ?X ns:head ?P .
 ?X ns:head ?_1X .
 ?_1 ns:dep ?_1d .
 ?_1 ns:head ?_2 .
 ?_2 ns:dep ?_2d .
 ?_2 ns:head ?_3 .
 ?_3 ns:dep ?_3d .
 ?_3 ns:head ?_4 .
 ?_4 ns:dep ?_4d .
 ?_4 ns:head ?VE .
 ?VE ns:dep ?_VEd .
 ?VE ns:label ?VE_label . 
 FILTER 
  ((?d = "ccomp" || ?d = "xcomp") 
   && (?_1d = "prep") 
   && (?_2d = "pobj") 
   && (?_3d = "prep") 
   && (?_4d = "pobj") 
   && (?_VEd = "poss") 
   && (?VE_label = "negative_sources")
  ) .
} 
''')))

NameError: name 'var' is not defined