# Dependencies

<ul>
<li>Currently uses the Stanford dependency parser provided by nltk.</li>
<li>loadW2V is a program that loads word2Vec model stored in disk. (The models are provide by gensim)</li>
<li>OxfordDictionary calls the Oxford Dictionary API to retrieve synonyms for certain words.</li>
</ul>

In [241]:
from nltk.parse.stanford import StanfordDependencyParser
from SPARQLWrapper import SPARQLWrapper, JSON
from urlparse import urlparse
import loadW2V
import numpy as np
import OxfordDictionary as od
import requests
import urllib
import ast
import json

If you want to see details of the extraction of answers, set the following to 'True':

In [172]:
log_details = False

In [163]:
bigJson = []

# Functions

### redirect(URI) !!!!![Unnecessary]!!!!!

The function <b>"redirect"</b> returns the proper DBpedia URI by fetching the property <b>wikiPageRidirects</b>.
<br/><br/>
For example: <br/>
If the provided URI is http://dbpedia.org/page/JFK, it returns http://dbpedia.org/page/John_F._Kennedy

In [3]:
def redirect(URI):
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        SELECT ?wikiPageRedirects
        WHERE { <""" + URI + """>
        dbo:wikiPageRedirects ?wikiPageRedirects }
    """
    if log_details:
        print query
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    # print results
    new_URI = [r["wikiPageRedirects"]["value"] for r in results["results"]["bindings"]]
    if log_details:
        print "\nREDIRECT:"
    if len(new_URI) > 0:
        if log_details:
            print new_URI[0]
        return new_URI[0]
    else:
        if log_details:
            print URI
        return URI

In [158]:
def get_Label(URI):
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    query = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            SELECT ?label
            WHERE { <""" + URI + """>
            rdfs:label ?label 
            FILTER (lang(?label) = 'en') }            
        """
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return results['results']['bindings'][0]['label']['value']

In [159]:
get_Label('http://dbpedia.org/ontology/birthDate')

u'birth date'

### get_closest_word(word, URI)

The function <b>"get_closest_word"</b> returns the property of the URI which is closest to the provided word.
<br/><br/>
For example: <br/>
If the provided word is <b>wife</b> for http://dbpedia.org/page/John_F._Kennedy, it returns the property <b>spouse</b>.

In [249]:
def get_closest_keyword(word, URI, json_unit):
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    query = """
        select distinct ?prop where {
             <"""+ URI +""">
             ?prop ?ent }
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    properties = []
    if log_details:
        print "\nPROPERTIES:"
    for result in results["results"]["bindings"]:
        if log_details:
            print result
        arr = result["prop"]["value"].split('/')
        # properties.append(arr[len(arr) - 1])
        if arr[len(arr) - 1] != 'abstract':
            properties.append([arr[len(arr) - 1], arr[len(arr) - 2]])

    stack_details = {}
    stack_details["word"] = word
    stack_details["properties"] = properties
    
    properties = np.array(properties)
    
    if log_details:
        print "To be matched with: ", word
    # print properties
    for p in properties:
        if p[0] == word:
            json_unit["stackDetails"].append(stack_details)
            return p

    synonyms = od.getSynonyms(word)
    stack_details["synonyms"] = synonyms
    
    log = []
    if synonyms == False:
        synonyms = []

    synonyms.append((word, word))
    if log_details:
        print "\nSYNONYMS:"
        print synonyms
        
    for synonym in synonyms:
        for p in properties:
            if p[0] == synonym[0]:
                json_unit["stackDetails"].append(stack_details)
                return p

    
#     prop_syn_avg = []
#     for p in properties:
#         avg = 0
#         for synonym in synonyms:
#             try:
#                 s = loadW2V.b.similarity(p[0], synonym[0])
#                 # print '(', p[0], ',' , synonym[0], ') ', s
#                 avg += s
#             except KeyError:
#                 log.append(KeyError.message)

#         prop_syn_avg.append([p[0], avg/len(synonyms)])

#     sortd = np.argsort(np.array(prop_syn_avg), 0)[:,1]

    text_properties = ''
    for p in properties:
        text_properties = text_properties + '+' + p[0]
    
    text_synonyms = ''
    for s in synonyms:
        text_synonyms = text_synonyms + '+' + s[0]
    
#     print text_properties
#     print text_synonyms
    
    url_w2v_server = 'http://172.16.1.5:5000/' + urllib.quote_plus(text_synonyms) + '/'+urllib.quote_plus(text_properties)
    if log_details:
        print "\nURL OF WORD2VEC SERVER:"
        print url_w2v_server
        
    response = requests.get(url_w2v_server)
    prop_syn_avg = ast.literal_eval(response.text)
    
    stack_details["propertySynonymAvg"] = prop_syn_avg
    
    prop_syn_avg = np.array(prop_syn_avg)
    
    if log_details:
        print "\nRESPONSE FROM W2V SERVER:"
        print prop_syn_avg
    
#     for p in prop_syn_avg:
#         print p
    
#     if log_details:
#         print "\nSYNONYM SIMILARITIES:"
#         for s in sortd:
#             print prop_syn_avg[s]
#     keyword = prop_syn_avg[np.argmax(np.array(prop_syn_avg), 0)[1]][0]
    
    keyword = prop_syn_avg[np.argmax(prop_syn_avg[:,1])][0]
    
    stack_details["keyword"] = keyword
    json_unit["stackDetails"].append(stack_details)
    print keyword
    for p in properties:
        if p[0] == keyword:
            if log_details:
                print "\nKEYWORD:"
                print p
            return p

### fetch_compound(word, model)

The function <b>"fetch_compound"</b> returns compound words for words from the given model after dependency parsing.
<br/><br/>
For example: <br/>
If the provided word is <b>date</b>, whereas we are actually looking for <b>birth date</b> in the question, it will return so by looking up the model

In [5]:
def fetch_compound(word, model):
    compound_word = []
    for m in model:
        if m[1] == u'compound' and m[0][0] == word:
            compound_word.append(m[2][0])

    compound_word.append(word)
    return compound_word

### call_sparql(keyword, URI, pType)

The function <b>"call_sparql"</b> returns values of properties provided as keywords in the given URI.
<br/><br/>
For example: <br/>
If we are looking for <b>spouse</b> of http://dbpedia.org/page/John_F._Kennedy it will return http://dbpedia.org/page/Jacqueline_Kennedy_Onassis

In [6]:
def call_sparql(keyword, URI, pType):
    # print "\nCALL SPARQL:"
    pTypes = np.array([[u'ontology', 'dbo'],[u'property', 'dbp']])
    
    for p in pTypes:
        if p[0] == pType:
            pType = p[1]

    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        SELECT ?"""+ keyword +"""
        WHERE { <""" + URI + """>
        """+pType+""":"""+keyword+""" ?"""+keyword+""" }
    """
    if log_details:
        print query
        
    sparql.setQuery(query)

    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    return [r[keyword]["value"] for r in results["results"]["bindings"]]

In [194]:
def getResources(text):
    cookies = {
        'JSESSIONID': 'BC9CD43D9E1AE3E7CF51E00D3A3A7702',
    }

    headers = {
        'Origin': 'http://agdistis.aksw.org',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'en-GB,en-US;q=0.8,en;q=0.6',
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.68 Safari/537.36',
        'Content-Type': 'application/json;charset=UTF-8',
        'Accept': 'application/json, text/plain, */*',
        'Referer': 'http://agdistis.aksw.org/demo/',
        'Connection': 'keep-alive',
    }

#     data = '{"text":"[Barack Obama] arrives in [Washington, D.C.]."}'
    data = '{"text": "'+text+'"}'
    response = requests.post('http://agdistis.aksw.org/demo/agdistis', headers=headers, cookies=cookies, data=data)

#     print(response)
#     print(response.text)
    return response.json()

### answer(question)

The function <b>"answer"</b> returns an answer for a question using the functions above.
<br/><br/>
For example: <br/>
If we are looking for an answer for <b>"What was the religion of the wife of JFK?"</b>, it should return the <b>abstract for catholic church</b>.

In [229]:
def answer(question):
    print "..."
    path_to_jar = 'stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0.jar'
    path_to_models_jar = 'stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0-models.jar'

    dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar,
                                                 path_to_models_jar=path_to_models_jar)


    json_unit = {}
    json_unit["question"] = question
    if log_details:
        print "\nQUESTION:"
        print question

    result = dependency_parser.raw_parse(question)
    dep = result.next()

    model = list(dep.triples())

    json_unit["model"] = model
    if log_details:
        print '\nMODEL:'
        print model


    URI = "http://dbpedia.org/resource/"
    keyword = None
    handle = None
    trail = None
    stack = []

    # print models
    for i in range(len(model)):
        if model[i][1] == u'nsubj':
            stack.append(model[i][2][0])

    for i in range(len(model)):
        # print model[i][0][0], stack[len(stack) - 1]
        if model[i][0][0] == stack[len(stack) - 1] and model[i][1] == u'nmod':
            stack.append(model[i][2][0])

    json_unit["stack"] = list(stack)
    if log_details:
        print '\nSTACK:'
        print stack
    handle = stack.pop()
    handle = fetch_compound(handle, model)
    
    compound = ' '.join(handle)
    markers = ["[","]"]
    # markers = ["<entity>", "<entity>"] 
    text = question[:question.index(compound)] \
                + markers[0] + question[question.index(compound):question.index(compound)+len(compound)] + markers[1] \
                + question[(question.index(compound)+len(compound)):]
    
#     print text
    resjson = getResources(text)
    URI = resjson['namedEntities'][0]['disambiguatedURL']
    json_unit["primaryURL"] = {}
    json_unit["primaryURL"]["src"] = "agdistis"
    json_unit["primaryURL"]["value"] = URI
#     u'': [{u'':
#     URI = URI + '_'.join(handle)
#     URI = redirect(URI)
#     print URI
    results = []
    if len(stack) == 0:
        results.append(URI)
        
    i = len(stack)
    json_unit["stackDetails"] = []
    
    while i > 0:
        #if len(stack) == 0:
        #    keyword = "label"
        #else:
        
        trail = stack.pop()
        keyword = fetch_compound(trail, model)

        # print URI
        keyword = ' '.join(keyword)
        keyword = ''.join(x for x in keyword.title() if not x.isspace())
        keyword = list(keyword)
        keyword[0] = keyword[0].lower()
        keyword = ''.join(keyword)
        pair = get_closest_keyword(keyword, URI, json_unit)
        if log_details:
            print pair
        keyword = pair[0]
        pType = pair[1]

        if log_details:
            print keyword, ", ", URI
        results = call_sparql(keyword, URI, pType)
        if log_details:
            print results

        for res in results:
            parsed_url = urlparse(res)

            if parsed_url.scheme == u'http' or parsed_url.scheme == u'https':
                # res = call_sparql(keyword, res)
                URI = res
            else:
                break

        i -= 1
    
    json_unit["answer"] = results
    bigJson.append(json_unit)
    return results


### Set of Questions

In [130]:
questions = [
    'Where is the birth place of Bal Gangadhar Tilak?',
    'What is the capital of Germany?',
    'Who is the president of United States?',
    'When is the birth date of Tom Cruise?',
    'What is the color of the flag of Germany?',
    'Who is Donald Trump?',
    'Where is birth place of wife of Mahatma Gandhi?',
    'Who is the vice president of John Kennedy?',
    'What is the birth place of wife of John Kennedy?',
    'What is the municipality of Roberto Clemente Bridge',
    'What is the nationality of the prime minister of Thanong Bidaya?',
    #'which are the films of Richard Gere and Julia Roberts?',
    #'What is the party of Mumbai North?',
    'Who is the founder of Facebook?'
]

### Write your question here

In [199]:
log_details = False

In [244]:
question = 'When is the birth date of JFK?'

In [238]:
results = answer(question)

for res in results:
    print res

...
1917-05-29
1917-5-29


In [250]:
bigJson = []

In [251]:
for q in questions:
    print "\nQUESTION:", q
    print "\nANSWER:"
    results = answer(q)

    for res in results:
        print res
    
    print "_____________________________"


QUESTION: Where is the birth place of Bal Gangadhar Tilak?

ANSWER:
...
http://dbpedia.org/resource/India
http://dbpedia.org/resource/Ratnagiri
http://dbpedia.org/resource/Bombay_State
http://dbpedia.org/resource/British_India
_____________________________

QUESTION: What is the capital of Germany?

ANSWER:
...
http://dbpedia.org/resource/Berlin
_____________________________

QUESTION: Who is the president of United States?

ANSWER:
...
legislature
http://dbpedia.org/resource/United_States_Congress
_____________________________

QUESTION: When is the birth date of Tom Cruise?

ANSWER:
...
1962-07-03
1962-7-3
_____________________________

QUESTION: What is the color of the flag of Germany?

ANSWER:
...
leader
http://dbpedia.org/resource/Angela_Merkel
http://dbpedia.org/resource/Stanislaw_Tillich
http://dbpedia.org/resource/Joachim_Gauck
http://dbpedia.org/resource/Andreas_Voßkuhle
http://dbpedia.org/resource/Norbert_Lammert
_____________________________

QUESTION: Who is Donald Trump?

In [252]:
print json.dumps(bigJson)

[{"question": "Where is the birth place of Bal Gangadhar Tilak?", "stackDetails": [{"word": "birthPlace", "properties": [["22-rdf-syntax-ns#type", "02"], ["rdf-schema#label", "01"], ["rdf-schema#comment", "01"], ["rdf-schema#seeAlso", "01"], ["owl#sameAs", "07"], ["deathPlace", "ontology"], ["deathDate", "ontology"], ["birthPlace", "ontology"], ["birthDate", "ontology"], ["subject", "terms"], ["wikiPageID", "ontology"], ["wikiPageRevisionID", "ontology"], ["wikiPageWikiLink", "ontology"], ["wikiPageExternalLink", "ontology"], ["name", "0.1"], ["depiction", "0.1"], ["description", "terms"], ["gender", "0.1"], ["isPrimaryTopicOf", "0.1"], ["thumbnail", "ontology"], ["prov#wasDerivedFrom", "ns"], ["birthYear", "ontology"], ["deathYear", "ontology"], ["ethnicity", "ontology"], ["nationality", "ontology"], ["religion", "ontology"], ["stateOfOrigin", "ontology"], ["alt", "property"], ["caption", "property"], ["movement", "property"], ["organization", "property"], ["vrank#hasRank", "voc"], ["