# [Workshop] Knowledge Graph (KG) Reasoner

In [1]:
# setup neo4j 
# https://www.digitalocean.com/community/tutorials/how-to-install-and-configure-neo4j-on-ubuntu-20-04#:~:text=Run%20the%20following%20command%20to,%7C%20sudo%20apt%2Dkey%20add%20%2D&text=Next%2C%20add%20the%20Neo4j%204.1,.neo4j.com%20stable%204.1%22

In [2]:
# Follow ReadMe.txt

In [3]:
# !pip install neo4j # Graph Database
# !pip install opennre # Open-source toolkit for relation extraction

In [4]:
# !pip install spacy==2.3.5
# !python -m spacy download en
# !python -m spacy download en_core_web_lg

# 1. Import Library

In [1]:
from neo4j import GraphDatabase
from neo4j.exceptions import ServiceUnavailable
import logging
import spacy
import opennre
import warnings
warnings.filterwarnings("ignore")

# 2. Initialize working environment

### Connect Neo4j

In [2]:
# Initialize the graph DB and delete all the nodes and relationships
graph = GraphDatabase.driver(
    "neo4j://localhost:7687",
#     auth=("neo4j", "7895123k")
    auth=("neo4j", "ai-user")
)

In [3]:
query = (
        "MATCH (all_nodes)"
        "OPTIONAL MATCH (all_nodes)-[all_rels]->()"
        "DELETE all_nodes, all_rels"
    )
with graph.session() as session:
    result = session.run(query)

### Load OpenNRE

In [4]:
# https://github.com/thunlp/OpenNRE

# https://github.com/thunlp/OpenNRE/issues/312
# !pip install transformers==3.4.0

# For Linux - If face any OSError, delete hidden folder .opennre in home
import transformers

# model = opennre.get_model('wiki80_cnn_softmax')
# model = opennre.get_model('wiki80_bertentity_softmax')
# model = opennre.get_model('tacred_bert_softmax')
# model = opennre.get_model('tacred_bertentity_softmax')

model = opennre.get_model('wiki80_bert_softmax')

2021-07-25 17:59:10,011 - root - INFO - Loading BERT pre-trained checkpoint.


# 3. Create Knowledge Graph

## 3.1 Build KG by extracting name-entities (vertices/nodes) & relations (edges/links/predicates) from textual corpus.

In [9]:
sentences = []

with open("./corpus/Family.txt", "r") as f:
    sentence = f.readline()
    while sentence:
        sentences.append(sentence)
        sentence = f.readline()

sentences[0:5]

['Happy Village is located in Florida, USA.\n',
 'Mr. Wilson is a well-known local silversmith in Happy Village.\n',
 'He is very very old.\n',
 'When Mr. Wilson was young, he came to Happy Village to seek refuge from a war.\n',
 'Mr. Wilson found his wife Mary here and raised three children.\n']

In [5]:
def create_graph(sentences):
    nlp = spacy.load('en_core_web_lg') # !python -m spacy download en_core_web_lg

    exist_ent = {}
    exist_relationship = []

    for sentence in sentences:
        doc = nlp(sentence)
        entities = []
        names_of_entities = []

        ######################## Extract Entities in Python ##################################
        for ent in doc.ents:
            if ent.label_ in ['PERSON', 'GPE', 'ORG'] and ent.text not in names_of_entities:
    #        if ent not in entities:
                names_of_entities.append(ent.text)
                entities.append(ent)

        ################## Create nodes in Neo4j using extracted entities ######################
        for ent in entities:
            if exist_ent.get(ent.text) is None:
                exist_ent[ent.text] = ent.text
                query = (
                    "MERGE (node: "+ent.label_+" {name: $name})"
                    "RETURN node"
                )
                with graph.session() as session:
                    result = session.run(query, name=ent.text)
                print("create new node with label as {0} and name as {1}".format(ent.label_, ent.text))

        for i in range(len(entities)):

            ######################## Extract Relationships in Python ###########################

            for j in range(i + 1, len(entities)):
                text_i = entities[i].text
                text_j = entities[j].text
                loc_h = sentence.find(text_i)
                loc_t = sentence.find(text_j)
                result = model.infer({'text': sentence, 'h': {'pos': (loc_h, loc_h + len(text_i))},
                                   't': {'pos': (loc_t, loc_t + len(text_j))}})
                (rel, confidence) = result[0].replace(' ', '_'), result[1]

                record = (text_i, text_j, rel, confidence)

                result_rev = model.infer({'text': sentence, 'h': {'pos': (loc_t, loc_t + len(text_j))},
                                   't': {'pos': (loc_h, loc_h + len(text_i))}})
                (rel_rev, confidence) = result_rev[0].replace(' ', '_'), result_rev[1]

                record_rev = (text_j, text_i, rel_rev, confidence)

                ############## Create Relationships in Neo4j using extracted Relationships ###########

                if record not in exist_relationship:
                    exist_relationship.append(record)
                    if record[3] > 0.8:
                        query = (
                            "MATCH (n1 {name: $name1})"
                            "MATCH (n2 {name: $name2})"
                            "MERGE (n1) - [r:"+record[2]+"] -> (n2)"
                            "RETURN n1, n2, r"
                        )
                        with graph.session() as session:
                            result = session.run(query, name1=exist_ent[text_i], name2=exist_ent[text_j])
                        print("create new relationship {0} - {1} -> {2} with confidence of {3}".format(record[0], record[2], record[1], record[3]))

                if record_rev not in exist_relationship:
                    exist_relationship.append(record_rev)
                    if record_rev[3] > 0.8:
                        query = (
                            "MATCH (n1 {name: $name1})"
                            "MATCH (n2 {name: $name2})"
                            "MERGE (n1) - [r:"+record_rev[2]+"] -> (n2)"   
                            "RETURN n1, n2, r"
                        )

                        with graph.session() as session:
                            result = session.run(query, name1=exist_ent[text_j], name2=exist_ent[text_i])
                        print("create new relationship {0} - {1} -> {2} with confidence of {3}".format(record_rev[0], record_rev[2], record_rev[1], record_rev[3]))


### Visualize created knowledge graph at: http://localhost:7474/browser/

* Username: neo4j

* Password: ai-user

## 3.2 Extend/Expand KG by inferecing (auto-reasoning) new relations/entites using univeral rules (static knowledge / common sense).

Automatic Reasoner

### Practise graph modification:

In [12]:
# Modify KG, e.g. Delete all link(s) from 'Happy Village' --to--> 'Wilson'
query = (
        "MATCH (n1:GPE {name:'Happy Village'})"
        "MATCH (n2:PERSON {name:'Wilson'})"
        "MATCH (n1)-[r]->(n2)"
        "DELETE r"
)

# Execuate above query to modify neo4j db:
with graph.session() as session:
    results = session.run(query)

In [13]:
# Your exercise: Modify KG, e.g. Delete all link(s) from 'Florida' --to--> 'Happy Village'
query = (
        "MATCH (n1:GPE {name:'Florida'})"
        "MATCH (n2:GPE {name:'Happy Village'})"
        "MATCH (n1)-[r]->(n2)"
        "DELETE r"
)

# Execuate above query to modify neo4j db:
with graph.session() as session:
    results = session.run(query)

### Add rule set:

In [14]:
# Rule 1: Siblings relationship/link


query = (
        "MATCH (n1:PERSON)-[r:father]->(n2:PERSON)-[r2:child]->(n3:PERSON)"
        "WHERE (n1) <> (n3)"
        "MERGE (n1)-[r3:sibling]->(n3)"
        "MERGE (n3)-[r4:sibling]->(n1)"
        "RETURN n1, n3"
)

# Execuate above query to modify neo4j db:
with graph.session() as session:
    results = session.run(query)

In [15]:
# Rule 2: Mother relationship/link

query = (
        "MATCH (n1:PERSON)-[r:father]->(n2:PERSON)-[r2:spouse]->(n3:PERSON)"
        "MERGE (n1)-[r3:mother]->(n3)"
        "MERGE (n3)-[r4:child]->(n1)"
        "RETURN n1, n3"
)

# Execuate above query to modify neo4j db:
with graph.session() as session:
    results = session.run(query)

In [16]:
# Rule 3: Grandfather relationship/link

query = (
        "MATCH (n1:PERSON)-[r:father]->(n2:PERSON)-[r2:father]->(n3:PERSON)"
        "MERGE (n1)-[r3:grandfather]->(n3)"
        "MERGE (n3)-[r4:grandchild]->(n1)"
        "RETURN n1, n3"
)

# Execuate above query to modify neo4j db:
with graph.session() as session:
    results = session.run(query)

## 3.3 Retrieve informatino by querying / reasoning over KG.

### Our query: 'Deep Thought, what's the relationship between Wilson and William?'

In [17]:
query = (
        "MATCH (n1 {name:'Wilson'})-[r]->(n2 {name:'William'})"
        "RETURN n1, r, n2"
)
with graph.session() as session:
    results = session.run(query)
    for result in results:
        print("Deep Thought: {0} has {1} {2}.".format(result['n1']['name'], 
                                                           result['r'].type, 
                                                           result['n2']['name']))

Deep Thought: Wilson has grandchild William.


### Our query: 'Deep Thought, what's the relationship between James and Marry?'

In [18]:
query = (
        "MATCH (n1 {name:'James'})-[r]->(n2 {name:'Mary'})"
        "RETURN n1, r, n2"
)
with graph.session() as session:
    results = session.run(query)
    for result in results:
        print("Deep Thought: {0} has {1} {2}.".format(result['n1']['name'], 
                                                           result['r'].type, 
                                                           result['n2']['name']))

Deep Thought: James has mother Mary.


# 4. Knowledge Graph [Workshop]

### Ground Work:

Read novel "Animal Farm" By author: George Orwell (pseudonym of Eric Blair) (1903-1950) http://gutenberg.net.au/ebooks01/0100011.txt


<img src="https://upload.wikimedia.org/wikipedia/commons/f/fb/Animal_Farm_-_1st_edition.jpg" width=300>

### Task:
Build knowledge graph for "Animal Farm". Then query various entities and relationships 

In [31]:
query = (
        "MATCH (all_nodes)"
        "OPTIONAL MATCH (all_nodes)-[all_rels]->()"
        "DELETE all_nodes, all_rels"
    )
with graph.session() as session:
    result = session.run(query)

In [32]:
sentences = []

# with open("./corpus/Family.txt", "r") as f:
with open("./corpus/Animal Farm by George Orwell.txt", "r") as f:
    sentence = f.readline()
    while sentence:
        sentences.append(sentence)
        sentence = f.readline()

sentences[0:5]

['Title: Animal Farm \n',
 'Author: George Orwell (pseudonym of Eric Blair) (1903-1950)\n',
 'Chapter I\n',
 'Mr Jones, of the Manor Farm, had locked the hen-houses for the night, but was too drunk to remember to shut the pop-holes.\n',
 'With the ring of light from his lantern dancing from side to side, he lurched across the yard, kicked off his boots at the back door, drew himself a last glass of beer from the barrel in the scullery, and made his way up to bed, where Mrs Jones was already snoring.\n']

In [33]:
# Construct knowledge graph

create_graph(sentences)

create new node with label as ORG and name as Animal Farm
create new node with label as PERSON and name as George Orwell
create new node with label as PERSON and name as Eric Blair
create new relationship George Orwell - said_to_be_the_same_as -> Eric Blair with confidence of 0.9982545971870422
create new relationship Eric Blair - said_to_be_the_same_as -> George Orwell with confidence of 0.9979104399681091
create new node with label as PERSON and name as Mr Jones
create new node with label as ORG and name as the Manor Farm
create new relationship Mr Jones - residence -> the Manor Farm with confidence of 0.8815862536430359
create new node with label as PERSON and name as Mrs Jones
create new node with label as PERSON and name as Major
create new node with label as PERSON and name as Jones
create new node with label as PERSON and name as Willingdon Beauty
create new node with label as ORG and name as Bluebell, Jessie
create new node with label as ORG and name as Pincher
create new relat

create new relationship Napoleon - sibling -> Snowball with confidence of 0.8651211261749268
create new node with label as ORG and name as the Meeting on the following
create new relationship Napoleon - military_rank -> Major with confidence of 0.8343890309333801
create new node with label as ORG and name as Minimus
create new relationship Squealer - sibling -> Minimus with confidence of 0.9815104007720947
create new relationship Minimus - sibling -> Squealer with confidence of 0.9768145680427551
create new relationship Snowball - characters -> Napoleon with confidence of 0.9917336702346802
create new relationship Napoleon - characters -> Snowball with confidence of 0.993183434009552
create new relationship Squealer - characters -> Napoleon with confidence of 0.9187507629394531
create new relationship Napoleon - characters -> Squealer with confidence of 0.8692359924316406
create new relationship Muriel - sibling -> Benjamin with confidence of 0.9966681599617004
create new relationship 

create new relationship Snowball - characters -> Napoleon with confidence of 0.9986172914505005
create new relationship Napoleon - characters -> Snowball with confidence of 0.99822598695755
create new relationship Napoleon - spouse -> Pilkington with confidence of 0.9059435725212097
create new relationship Napoleon - owned_by -> Animal Farm with confidence of 0.9893791079521179
create new relationship Animal Farm - owned_by -> Napoleon with confidence of 0.9967494010925293
create new relationship Napoleon - owned_by -> Foxwood with confidence of 0.9954119324684143
create new relationship Foxwood - owned_by -> Napoleon with confidence of 0.997226893901825
create new relationship Pilkington - owned_by -> Animal Farm with confidence of 0.8273995518684387
create new relationship Animal Farm - owned_by -> Pilkington with confidence of 0.9819629192352295
create new relationship Pilkington - owned_by -> Foxwood with confidence of 0.9708229303359985
create new relationship Foxwood - owned_by -

create new relationship Willingdon - manufacturer -> Glue Boiler with confidence of 0.9843706488609314
create new node with label as ORG and name as Kennels Supplied
create new relationship Boxer - characters -> Napoleon with confidence of 0.8605455756187439
create new relationship Napoleon - characters -> Boxer with confidence of 0.8998172879219055
create new relationship Rebellion - participant -> Benjamin with confidence of 0.9977999329566956
create new relationship Benjamin - participant -> Rebellion with confidence of 0.9968400001525879
create new relationship Rebellion - participant -> Moses with confidence of 0.9966009259223938
create new relationship Moses - participant -> Rebellion with confidence of 0.9945344924926758
create new relationship Rebellion - participant -> raven with confidence of 0.997183620929718
create new relationship raven - participant -> Rebellion with confidence of 0.9964698553085327
create new relationship Benjamin - sibling -> Moses with confidence of 0.

In [34]:
#  Sample and observe the generated relationships, are they reasonable? 
# Can you optimize/ modify some relationshps, e.g. many suspicious ‘spouse’ relationships?

################## Delete relationships #####################

for rel in ['spouse','father','characters','religion','sibling','said_to_be_the_same_as','followed_by','residence']:
    query = (
            "MATCH (n) - [r:"+rel+"] -> ()"
            "DELETE r"
    )

    with graph.session() as session:
        results = session.run(query)
    
    
#################### Delete specific relationships ###############

# Rebellion-participant->
query = (
        "MATCH (n {name:'Rebellion'})-[r:participant]->()"
        "DELETE r"
)

with graph.session() as session:
    results = session.run(query)

# the Manor Farm-participant->
query = (
        "MATCH (n {name:'the Manor Farm'})-[r:participant]->()"
        "DELETE r"
)

with graph.session() as session:
    results = session.run(query)

   
    
############################# Delete nodes #######################
for node in ['Jones','Animal Farm']:
    query = (
            "MATCH (n {name:'"+node+"'})-[r]-()"
            "DELETE n,r"
    )

    with graph.session() as session:
        results = session.run(query)
    
    
    
########################## Add relationships ######################

# spouse link b/w Mr. Jones and Mrs. Jones

query = (
        "MATCH (n1 {name:'Mr Jones'})"
        "MATCH (n2 {name:'Mrs Jones'})"
        "MERGE (n1)-[r1:spouse]->(n2)"
        "MERGE (n2)-[r2:spouse]->(n1)"
        "RETURN n1, n2,r1,r2"
)

with graph.session() as session:
    results = session.run(query)
    
# common_goal link b/w pig ‘Snowball’ and horse ‘Boxer’

query = (
        "MATCH (n1 {name:'Snowball'})"
        "MATCH (n2 {name:'Boxer'})"
        "MERGE (n1)-[r1:common_goal]->(n2)"
        "MERGE (n2)-[r2:common_goal]->(n1)"
        "RETURN n1, n2,r1,r2"
)

with graph.session() as session:
    results = session.run(query)
    
# Animals and people -lives_in-> Manor Farm
# Note: Some people/nodes like 'Eric Blair' amd 'George Orwell' are also taken into account using the query below. I haven't separated/deleted them for now.
# So kindly assume nodes like these are not considered in the relationship created below.
# Also note that some animals are extracted as 'ORG' by OPENNRE. So they won't be taken into account as well.

query = (
        "MATCH (n1:PERSON)"
        "MATCH (n2 {name:'the Manor Farm'})"
        "MERGE (n1)-[r1:lives_in]->(n2)"
        "RETURN n1,n2,r1"
)

with graph.session() as session:
    results = session.run(query)
    
# Boxer -lives_in-> Manor Farm
query = (
        "MATCH (n1 {name:'Boxer'})"
        "MATCH (n2 {name:'the Manor Farm'})"
        "MERGE (n1)-[r1:lives_in]->(n2)"
        "RETURN n1,n2,r1"
)

with graph.session() as session:
    results = session.run(query)
    
# Manor Farm -> is_located_in -> England

query = (
        "MATCH (n1 {name:'the Manor Farm'})"
        "MATCH (n2 {name:'England'})"
        "MERGE (n1)-[r1:is_located_in]->(n2)"
        "RETURN n1,n2,r1"
)

with graph.session() as session:
    results = session.run(query)
    
# Benjamin and Boxer talked to each other

query = (
        "MATCH (n1 {name:'Benjamin'})"
        "MATCH (n2 {name:'Boxer'})"
        "MERGE (n1)-[r1:talked]->(n2)"
        "MERGE (n2)-[r2:talked]->(n1)"
        "RETURN n1,n2,r1,r2"
)

with graph.session() as session:
    results = session.run(query)

In [35]:
# two reasonable inference rules to extend/modify the Animal Farm KG


# Rule 1 - Mr. Jones -lives_in-> Manor Farm -> is_located_in -> England = Mr. Jones -belongs_to-> England
# Note: 'the Manor Farm' was extracted as a 'ORG' by OPENNRE. For now I haven't changed the label to 'GPE' and kept it same.
 
query = (
        "MATCH (n1:PERSON)-[r:lives_in]->(n2:ORG)-[r2:is_located_in]->(n3:GPE)"
        "MERGE (n1)-[r3:belongs_to]->(n3)"
        "RETURN n1, n3"
)

with graph.session() as session:
    results = session.run(query)
    
# If anyone lived in Manor Farm and talked -> they are friends (Boxer and Benjamin)
# Note: Boxer was extracted as a 'ORG' by OPENNRE. For now I haven't changed the label to 'PERSON' and kept it same.

query = (
        "MATCH (n1:PERSON)-[r1:talked]-(n2:ORG)"
    "MATCH (n2)-[r5:talked]->(n1)"
    
        "MATCH (n1)-[r2:lives_in]->(n3:ORG)"
        "MATCH (n2)-[r3:lives_in]->(n3)"
    
        "MERGE (n1)-[r4:friends]-(n2)"
     "MERGE (n2)-[r6:friends]->(n1)"
    
        "RETURN n1, n2"
)

# Execuate above query to modify neo4j db:
with graph.session() as session:
    results = session.run(query)

In [36]:
# what is the relationship between pig ‘Snowball’ and horse ‘Boxer’


query = (
        "MATCH (n1 {name:'Snowball'})-[r]->(n2 {name:'Boxer'})"
        "RETURN n1, r, n2"
)
with graph.session() as session:
    results = session.run(query)
    for result in results:
        print("Deep Thought: {0} is {1} {2}.".format(result['n1']['name'], 
                                                           result['r'].type, 
                                                           result['n2']['name']))

Deep Thought: Snowball is common_goal Boxer.


---

In [None]:
query = (
        "MATCH (n1 {name:'Napoleon'})-[r]->(n2 {name:'Animal Farm'})"
        "RETURN n1, r, n2"
)
with graph.session() as session:
    results = session.run(query)
    for result in results:
        print("Deep Thought: {0} is {1} {2}.".format(result['n1']['name'], 
                                                           result['r'].type, 
                                                           result['n2']['name']))

In [None]:
query = (
        "MATCH (n1 {name:'Napoleon'})-[r]->(n2 {name:'Snowball'})"
        "RETURN n1, r, n2"
)
with graph.session() as session:
    results = session.run(query)
    for result in results:
        print("Deep Thought: {0} has {1} {2}.".format(result['n1']['name'], 
                                                           result['r'].type, 
                                                           result['n2']['name']))

In [None]:
query = (
        "MATCH (n1 {name:'Rebellion'})-[r]->(n2)"
        "RETURN n1, r, n2"
)
with graph.session() as session:
    results = session.run(query)
    for result in results:
        print("Deep Thought: {0} has {1} {2}.".format(result['n1']['name'], 
                                                           result['r'].type, 
                                                           result['n2']['name']))

# References

* SpaCy: Named Entity Recognition:https://spacy.io/api/annotation#named-entities
* OpenNRE: Relation Extraction: https://github.com/thunlp/OpenNRE#what-is-relation-extraction

---