# Resources

http://allennlp.org/tutorials  
https://github.com/allenai/allennlp  
https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2017.09.05.tar.gz  
https://spacy.io/usage/processing-pipelines  

In [1]:
import pprint

from nltk.corpus import verbnet, wordnet
from nltk.classify.maxent import MaxentClassifier, accuracy

from allennlp.service.predictors import SemanticRoleLabelerPredictor
from allennlp.models.archival import load_archive

from SPARQLWrapper import SPARQLWrapper, JSON

import spacy
from spacy.tokens import Doc, Span, Token

nlp = spacy.load('en')

  from ._conv import register_converters as _register_converters


# Semantic role labeling

In [3]:
class SRLTagger(object):
    name = 'SRLTagger'
    
    def __init__(self, vocab, srl_predictor):
        self.vocab = vocab
        self.srl_predictor = srl_predictor
        
        Token.set_extension('srl_tag', default='O')
        #Span.set_extension('srl_tag', default='O')
        
    def __call__(self, doc):
        predictions = self.srl_predictor.predict_json({'sentence': doc.text})
        
        if len(predictions['verbs']):
            for i, token in enumerate(doc):
                if token.text == predictions['tokens'][i]:
                    token._.set('srl_tag', predictions['verbs'][0]['tags'][i])
        return doc

In [4]:
archive = load_archive('../models/srl-model-2017.09.05.tar.gz')
predictor = SemanticRoleLabelerPredictor.from_archive(archive, "semantic-role-labeling")

srl_tagger = SRLTagger(nlp, predictor)
nlp.add_pipe(srl_tagger, last=True)

Did not use initialization regex that was passed: stacked_encoder\\\\._module.layer_.*bias
Did not use initialization regex that was passed: stacked_encoder\\\\._module.layer_0\\\\.input_linearity\\\\.weight
Did not use initialization regex that was passed: stacked_encoder\\\\._module\\\\.layer_0\\\\.state_linearity\\\\.weight


In [5]:
#doc = nlp('What cartel trades guns illegally?')
doc = nlp('Who wrote The Little Prince?')

In [6]:
for token in doc:
    print(token, token._.srl_tag, sep='\t')

Who	B-ARG0
wrote	B-V
The	B-ARG1
Little	I-ARG1
Prince	I-ARG1
?	O


In [7]:
for ent in doc.ents:
    print(ent.text, ent.label_, sep='\t')

The Little Prince	WORK_OF_ART


# WordNet and VerbNet Corpus

In [10]:
wn = wordnet.synsets('wrote')[0]
print('Examples:', wn.examples())
print('POS:', wn.pos())
print('Hypernyms:', wn.hypernyms())
print('Hyponyms:')
pprint.pprint(wn.hyponyms())
print('WordNet name:', wn.name())
print('Lemmas:')
pprint.pprint(wn.lemmas())

Examples: ['She composed a poem', 'He wrote four novels']
POS: v
Hypernyms: [Synset('create_verbally.v.01')]
Hyponyms:
[Synset('annotate.v.01'),
 Synset('author.v.01'),
 Synset('dash_off.v.01'),
 Synset('draft.v.01'),
 Synset('dramatize.v.01'),
 Synset('draw.v.18'),
 Synset('lyric.v.01'),
 Synset('paragraph.v.02'),
 Synset('paragraph.v.03'),
 Synset('profile.v.01'),
 Synset('reference.v.01'),
 Synset('rewrite.v.02'),
 Synset('script.v.01'),
 Synset('verse.v.01'),
 Synset('write_copy.v.01'),
 Synset('write_off.v.02'),
 Synset('write_on.v.01'),
 Synset('write_out.v.01')]
WordNet name: write.v.01
Lemmas:
[Lemma('write.v.01.write'),
 Lemma('write.v.01.compose'),
 Lemma('write.v.01.pen'),
 Lemma('write.v.01.indite')]


In [11]:
vnclass = verbnet.vnclass(verbnet.classids('write')[0])
verbnet.pprint(vnclass)

'lecture-37.11-1\n  Subclasses: lecture-37.11-1-1\n  Members: lecture moralize preach remark speak talk theorize write\n  Thematic roles:\n\n  Frames:\n    Basic Intransitive\n      Example: She lectured.\n      Syntax: NP[Agent] VERB\n      Semantics:\n        * transfer_info(during(E), Agent, ?Recipient, ?Topic)\n        * cause(Agent, E)\n    PP (about-PP)\n      Example: She lectured about her travels.\n      Syntax: NP[Agent] VERB PREP[about] NP[Topic -sentential]\n      Semantics:\n        * transfer_info(during(E), Agent, ?Recipient, Topic)\n        * cause(Agent, E)'

# Answer Type Prediction

In [12]:
import pickle

with open('../models/maxent_classifier.pkl', 'rb') as f:
    maxent_classifier = pickle.load(f)

In [14]:
# Lazy feature creation - spacy
def create_features(doc):
    pos_tags = [token.pos_ for token in doc]
    ner_tags = [ent.label_ for ent in doc.ents]
    
    features = {}
    features
    features['person'] = 'PERSON' in ner_tags
    features['work_of_art'] = 'WORK_OF_ART' in ner_tags
    features['proper_noun'] = ('NNP' or 'NNPS') in pos_tags
    features['length'] = len(doc)
    features['lemma_1'] = doc[0].lemma_
    
    return features

In [18]:
answer_type = maxent_classifier.classify_many([create_features(doc)])

In [20]:
answer_type = 'HUM:ind'

# SPARQL Query

In [29]:
def create_sparql_query(doc, answer_type):
    
    for token in doc:
        if token._.srl_tag == 'B-V':
            predicate = token.lemma_
            break
    
    query = 'SELECT DISTINCT ?x2 WHERE {\n'

    if ('who' in [token.lemma_ for token in doc] and 
        answer_type == 'HUM:ind'):
        
        query += ('\t?x0 rdf:type foaf:Person.\n' +
                  '\t?x0 foaf:name ?x2.\n')

        if predicate and predicate == 'write':
            query += ('\t?x1 rdf:type dbpedia-owl:Book.\n' +
                      '\t?x1 dbpedia-owl:author ?x0.\n')
            
        for ent in doc.ents:
            if ent.label_ == 'WORK_OF_ART':
                query += '\t?x1 rdfs:label "' + ent.text + '"@en.\n'
                break
        
    query += '}'
    return query

In [22]:
def print_sparql_answer(answer):
    if answer['results']['bindings']:
        for ans in answer['results']['bindings']:
            if ans['x2']['xml:lang'] == 'en':
                print(ans['x2']['value'])
    else:
        print('No results returned')

In [27]:
sparql = SPARQLWrapper('http://dbpedia.org/sparql')

In [23]:
# PREFIX dbres: <http://dbpedia.org/resource/>

prefix = """
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX quepy: <http://www.machinalis.com/quepy#>
PREFIX dbpedia: <http://dbpedia.org/ontology/>
PREFIX dbpprop: <http://dbpedia.org/property/>
PREFIX dbpedia-owl: <http://dbpedia.org/ontology/>

"""

In [30]:
query = create_sparql_query(doc, answer_type)
query = prefix + query
print(query)


PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX quepy: <http://www.machinalis.com/quepy#>
PREFIX dbpedia: <http://dbpedia.org/ontology/>
PREFIX dbpprop: <http://dbpedia.org/property/>
PREFIX dbpedia-owl: <http://dbpedia.org/ontology/>

SELECT DISTINCT ?x2 WHERE {
	?x0 rdf:type foaf:Person.
	?x0 foaf:name ?x2.
	?x1 rdf:type dbpedia-owl:Book.
	?x1 dbpedia-owl:author ?x0.
	?x1 rdfs:label "The Little Prince"@en.
}


In [31]:
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
answer = sparql.query().convert()

print_sparql_answer(answer)

Antoine de Saint-Exupéry
