# Process the text

In [90]:
import pandas as pd
import numpy as np
import nltk

In [91]:
data = pd.read_csv('src/main/resources/qa1_single-supporting-fact_train.txt', delimiter='\t', names=['sentence', 'answer', 'fact']).fillna('')
data['sentence'] = data.apply(lambda row: nltk.word_tokenize(row['sentence'])[1:], axis=1)

statements = data[data['answer'] == '']
statements = statements.reset_index(drop=True)
statements = statements.drop('answer', axis=1)
statements = statements.drop('fact', axis=1)

questions = data[data['answer'] != '']
questions = questions.reset_index(drop=True)

In [92]:
statements[:5]

Unnamed: 0,sentence
0,"[Mary, moved, to, the, bathroom, .]"
1,"[John, went, to, the, hallway, .]"
2,"[Daniel, went, back, to, the, hallway, .]"
3,"[Sandra, moved, to, the, garden, .]"
4,"[John, moved, to, the, office, .]"


In [93]:
questions[:5]

Unnamed: 0,sentence,answer,fact
0,"[Where, is, Mary, ?]",bathroom,1
1,"[Where, is, Daniel, ?]",hallway,4
2,"[Where, is, Daniel, ?]",hallway,4
3,"[Where, is, Daniel, ?]",office,11
4,"[Where, is, Sandra, ?]",bathroom,8


In [94]:
statements['tag'] = statements.apply(lambda row: nltk.pos_tag(row['sentence']), axis=1)

In [95]:
def extract_triple(tags):
    subject, relation, obj = '', '', ''
    for word,tag in tags:
        if tag == 'NNP':
            subject = word
        elif tag == 'VBD':
            relation = word
        elif tag == 'NN':
            obj = word
    return (subject, relation, obj)

In [96]:
statements['triple'] = statements.apply(lambda row: extract_triple(row['tag']), axis=1)

In [220]:
statements[:5]

Unnamed: 0,sentence,tag,triple
0,"[Mary, moved, to, the, bathroom, .]","[(Mary, NNP), (moved, VBD), (to, TO), (the, DT...","(Mary, moved, bathroom)"
1,"[John, went, to, the, hallway, .]","[(John, NNP), (went, VBD), (to, TO), (the, DT)...","(John, went, hallway)"
2,"[Daniel, went, back, to, the, hallway, .]","[(Daniel, NNP), (went, VBD), (back, RB), (to, ...","(Daniel, went, hallway)"
3,"[Sandra, moved, to, the, garden, .]","[(Sandra, NNP), (moved, VBD), (to, TO), (the, ...","(Sandra, moved, garden)"
4,"[John, moved, to, the, office, .]","[(John, NNP), (moved, VBD), (to, TO), (the, DT...","(John, moved, office)"


### Debug Functions

In [222]:
statements[statements['triple'].map(lambda t: t[0] == 'Mary')][:5]

Unnamed: 0,sentence,tag,triple
0,"[Mary, moved, to, the, bathroom, .]","[(Mary, NNP), (moved, VBD), (to, TO), (the, DT...","(Mary, moved, bathroom)"
6,"[Mary, moved, to, the, hallway, .]","[(Mary, NNP), (moved, VBD), (to, TO), (the, DT...","(Mary, moved, hallway)"
12,"[Mary, went, to, the, bedroom, .]","[(Mary, NNP), (went, VBD), (to, TO), (the, DT)...","(Mary, went, bedroom)"
20,"[Mary, went, to, the, bedroom, .]","[(Mary, NNP), (went, VBD), (to, TO), (the, DT)...","(Mary, went, bedroom)"
29,"[Mary, moved, to, the, office, .]","[(Mary, NNP), (moved, VBD), (to, TO), (the, DT...","(Mary, moved, office)"


# Run Queries

In [205]:
from neo4j.v1 import GraphDatabase, basic_auth

In [206]:
# Create a neo4j session
driver = GraphDatabase.driver("bolt://localhost:7687", auth=basic_auth("neo4j", "neo4j"))

In [207]:
# WARNING: This will clear the database!
def reset_db():
    session = driver.session()
    session.run("MATCH (n) DETACH DELETE n")

In [208]:
# Create the graph based on each triple
def create(query):
    session = driver.session()
    for subject,relation,obj in statements['triple']:
        session.run(query, { 
            'subject': subject,
            'relation': relation,
            'obj': obj
        })

### V1: Direct relationships

In [209]:
reset_db()

In [210]:
create("""
    MERGE (s:SUBJECT {name: {subject}}) 
    MERGE (o:OBJECT  {name: {obj}}) 
    MERGE (s)-[r:RELATION {name: {relation}}]->(o)
""")

### V2: Nodes for relationships

In [211]:
reset_db()

In [212]:
create("""
    MERGE (s:SUBJECT {name: {subject}})
    CREATE (r:RELATION {name: {relation}})
    MERGE (o:OBJECT  {name: {obj}})

    MERGE (s)-[:R0]->(r)-[:R1]->(o)
""")

### V3: Linked list of relationships

In [213]:
reset_db()

In [214]:
create("""
    MERGE (s:SUBJECT {name: {subject}})
    MERGE (o:OBJECT  {name: {obj}})

    CREATE (s)-[:R0]->(r:RELATION {name: {relation}})-[:R1]->(o)

    WITH s,r,o

    MATCH (s)-[:R0]->(r2:RELATION)
    WHERE r2 <> r AND NOT (r2)-[:NEXT]->() 
    CREATE (r2)-[:NEXT]->(r)
""")