In [None]:
# 1. Link with GoogleDrive for easy file import
#     The following two steps are how to link colab(fixed)
from google.colab import drive
drive.mount('/content/drive/')

# Optional, one gives the path directly and then just imports it
#     Another, %cd to that path and then import by filename

dir_path = '/content/drive/MyDrive/2023NLPCourse/Assignment2/'

# The second one goes path
%cd /content/drive/MyDrive/2023NLPCourse/Assignment2/

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/MyDrive/2023NLPCourse/Assignment2


In [None]:
# 2. install package and import
!pip install pyvis==0.3.1
!pip install wikipedia
from pyvis import network as net
import networkx as nx
import pickle
import wikipedia

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# 3. Define the functions
class KB():
    def __init__(self):
        self.entities = {} # { entity_title: {...} }
        self.relations = [] # [ head: entity_title, type: ..., tail: entity_title,
          # meta: { article_url: { spans: [...] } } ]
        self.sources = {} # { article_url: {...} }

    def merge_with_kb(self, kb2):
        for r in kb2.relations:
            article_url = list(r["meta"].keys())[0]
            source_data = kb2.sources[article_url]
            self.add_relation(r, source_data["article_title"],
                              source_data["article_publish_date"])

    def are_relations_equal(self, r1, r2):
        return all(r1[attr] == r2[attr] for attr in ["head", "type", "tail"])

    def exists_relation(self, r1):
        return any(self.are_relations_equal(r1, r2) for r2 in self.relations)

    def merge_relations(self, r2):
        r1 = [r for r in self.relations
              if self.are_relations_equal(r2, r)][0]

        # if different article
        article_url = list(r2["meta"].keys())[0]
        if article_url not in r1["meta"]:
            r1["meta"][article_url] = r2["meta"][article_url]

        # if existing article
        else:
            spans_to_add = [span for span in r2["meta"][article_url]["spans"]
                            if span not in r1["meta"][article_url]["spans"]]
            r1["meta"][article_url]["spans"] += spans_to_add

    def get_wikipedia_data(self, candidate_entity):
        try:
          #page = wikipedia.page(candidate_entity, auto_suggest=False)
          page = wikipedia.page(candidate_entity, auto_suggest=False)

          entity_data = {
            "title": page.title,
            "url": page.url,
            "summary": page.summary
          }
          return entity_data
        except:
          entity_data = {
            "title": candidate_entity+"*",
            "url": "",
            "summary": ""
          }
          return entity_data
          #return None

    def add_entity(self, e):
        self.entities[e["title"]] = {k:v for k,v in e.items() if k != "title"}

    def add_relation(self, r, article_title, article_publish_date):
        # check on wikipedia
        candidate_entities = [r["head"], r["tail"]]
        entities = [self.get_wikipedia_data(ent) for ent in candidate_entities]

        # if one entity does not exist, stop
        if any(ent is None for ent in entities):
            return

        # manage new entities
        for e in entities:
            self.add_entity(e)

        # rename relation entities with their wikipedia titles
        r["head"] = entities[0]["title"]
        r["tail"] = entities[1]["title"]

        # add source if not in kb
        article_url = list(r["meta"].keys())[0]
        if article_url not in self.sources:
            self.sources[article_url] = {
                "article_title": article_title,
                "article_publish_date": article_publish_date
            }

        # manage new relation
        if not self.exists_relation(r):
            self.relations.append(r)
        else:
            self.merge_relations(r)

    def print(self):
        print("Entities:")
        for e in self.entities.items():
            print(f"  {e}")
        print("Relations:")
        for r in self.relations:
            print(f"  {r}")
        print("Sources:")
        for s in self.sources.items():
            print(f"  {s}")

In [None]:
# 4. load kb
kb = pickle.load(open("Data/Rebel.kb", "rb"))

In [None]:
# 5 construct graph from the saved Named Entities and Relations (in kb file)
net = net.Network(
    directed=True,
    width="1200px",
    height="1000px",
    bgcolor="#FFFFFF",
    notebook=True,
    )

# nodes
color_entity = "#00FF00"
for e in kb.entities:
    #net.add_node(e, shape="circle", color=color_entity)
    net.add_node(e)
    print("add note",e)

#edges
for r in kb.relations:
    net.add_edge(r["head"], r["tail"], title=r["type"], label=r["type"])
    #net.add_edge(r["head"], r["tail"], label=r["type"])
    print("add relation",r["head"]," ",r["tail"])

net.repulsion(
    node_distance=230,
    #central_gravity=0.02,
    #spring_length=200,
    #spring_strength=0.05,
    damping=0.01
    )

Local cdn resources have problems on chrome/safari when used in jupyter-notebook. 
add note John McCarthy*
add note Computer scientist
add note Turing Award
add note United States National Medal of Science*
add note Kyoto Prize
add note September 4, 1927*
add note Stanford University
add note ALGOL
add note Cromane
add note County Kerry
add note Ireland
add note Republican*
add note Alan Turing
add note Princeton University
add note Marvin Minsky
add note Allen Newell
add note Herbert A. Simon
add note Donald C. Spencer
add note Nathaniel Rochester
add note Artificial intelligence
add note Claude Shannon
add note ALGOL 60
add note August 1959
add note Compatible Time-Sharing System
add note 1961
add note time-sharing systems*
add note BBN Time-Sharing System
add note Dartmouth Time Sharing System
add note Space fountain
add note 1982
add note The Robot and the Baby*
add note 2001
add note Short story
add note Social network
add note Internet culture
add note Carolyn Talcott
add note SR

In [None]:
net.set_edge_smooth('dynamic')
net.show('Data/Graph_html/Rebel.html') #saved the graph html file to the same directory as this Jupyter Notebook