Relationship Extraction

In [None]:
!pip install --upgrade spacy



In [None]:
!python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl (400.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.7/400.7 MB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
import spacy
nlp = spacy.load("en_core_web_lg")

In [None]:
def extract_relations(text):
  doc = nlp(text)
  relations = []
  for ent1 in doc.ents:
    for ent2 in doc.ents:
       if ent1.start != ent2.start:
        # Simple relation finding: check words between entities
        start = min(ent1.end, ent2.end)
        end = max(ent1.start, ent2.start)
        #print(ent1.text)
        #print(ent2.text)
        #print(ent1.end, ent2.end)
        #print(start)
        #print(end)
        #exit(0)
        relation_tokens = [token.text for token in doc[start:end]]
        if relation_tokens:
          relations.append({"entity1": ent1.text,"relation": " ".join(relation_tokens),"entity2": ent2.text})
  return relations

In [None]:
text = "Apple was founded by Steve Jobs in Cupertino."
relations = extract_relations(text)
for relation in relations:
  print(relation)

{'entity1': 'Apple', 'relation': 'was founded by', 'entity2': 'Steve Jobs'}
{'entity1': 'Apple', 'relation': 'was founded by Steve Jobs in', 'entity2': 'Cupertino'}
{'entity1': 'Steve Jobs', 'relation': 'was founded by', 'entity2': 'Apple'}
{'entity1': 'Steve Jobs', 'relation': 'in', 'entity2': 'Cupertino'}
{'entity1': 'Cupertino', 'relation': 'was founded by Steve Jobs in', 'entity2': 'Apple'}
{'entity1': 'Cupertino', 'relation': 'in', 'entity2': 'Steve Jobs'}


Event Extraction

In [None]:
def extract_events(text):
  doc = nlp(text)
  events = []
  for token in doc:
    if token.dep_ == "ROOT":
      event = {"verb": token.text, "subject": "", "object": ""}
      for child in token.children:
        if  child.dep_ in ("nsubj", "nsubjpass"):
          event["subject"] = child.text
        elif child.dep_ in ("dobj", "iobj"):
          event["object"] = child.text
      events.append(event)
  return events

In [None]:
text = "Apple acquired a UK startup for $1 billion."
events = extract_events(text)
print(events)

[{'verb': 'acquired', 'subject': 'Apple', 'object': ''}]
