# Install the relevant libraries

In [6]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import math
import torch
import wikipedia
from newspaper import Article, ArticleException
from GoogleNews import GoogleNews
import IPython
from pyvis.network import Network

In [7]:
import pickle

def save_kb(kb, filename):
    with open(filename, "wb") as f:
        pickle.dump(kb, f)

def load_kb(filename):
    res = None
    with open(filename, "rb") as f:
        res = pickle.load(f)
    return res

In [8]:
import glob
from tqdm.notebook import tqdm, trange

# Load the REBEL model

In [9]:
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("Babelscape/rebel-large")
model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/rebel-large")

In [10]:
def extract_relations_from_model_output(text):
    relations = []
    relation, subject, relation, object_ = '', '', '', ''
    text = text.strip()
    current = 'x'
    text_replaced = text.replace("<s>", "").replace("<pad>", "").replace("</s>", "")
    for token in text_replaced.split():
        if token == "<triplet>":
            current = 't'
            if relation != '':
                relations.append({
                    'head': subject.strip(),
                    'type': relation.strip(),
                    'tail': object_.strip()
                })
                relation = ''
            subject = ''
        elif token == "<subj>":
            current = 's'
            if relation != '':
                relations.append({
                    'head': subject.strip(),
                    'type': relation.strip(),
                    'tail': object_.strip()
                })
            object_ = ''
        elif token == "<obj>":
            current = 'o'
            relation = ''
        else:
            if current == 't':
                subject += ' ' + token
            elif current == 's':
                object_ += ' ' + token
            elif current == 'o':
                relation += ' ' + token
    if subject != '' and relation != '' and object_ != '':
        relations.append({
            'head': subject.strip(),
            'type': relation.strip(),
            'tail': object_.strip()
        })
    return relations

# Split spans: from long text to KB

In [11]:
def from_text_to_kb(text, kb, span_length=128, verbose=False):
    # tokenize whole text
    inputs = tokenizer([text], return_tensors="pt")

    # compute span boundaries
    num_tokens = len(inputs["input_ids"][0])
    if verbose:
        print(f"Input has {num_tokens} tokens")
    num_spans = math.ceil(num_tokens / span_length)
    if verbose:
        print(f"Input has {num_spans} spans")
    overlap = math.ceil((num_spans * span_length - num_tokens) / 
                        max(num_spans - 1, 1))
    spans_boundaries = []
    start = 0
    for i in range(num_spans):
        spans_boundaries.append([start + span_length * i,
                                 start + span_length * (i + 1)])
        start -= overlap
    if verbose:
        print(f"Span boundaries are {spans_boundaries}")

    # transform input with spans
    tensor_ids = [inputs["input_ids"][0][boundary[0]:boundary[1]]
                  for boundary in spans_boundaries]
    tensor_masks = [inputs["attention_mask"][0][boundary[0]:boundary[1]]
                    for boundary in spans_boundaries]
    inputs = {
        "input_ids": torch.stack(tensor_ids),
        "attention_mask": torch.stack(tensor_masks)
    }

    # generate relations
    num_return_sequences = 3
    gen_kwargs = {
        "max_length": 256,
        "length_penalty": 0,
        "num_beams": 3,
        "num_return_sequences": num_return_sequences
    }
    generated_tokens = model.generate(
        **inputs,
        **gen_kwargs,
    )

    # decode relations
    decoded_preds = tokenizer.batch_decode(generated_tokens,
                                           skip_special_tokens=False)

    # create kb
#     kb = KB()
    i = 0
    for sentence_pred in decoded_preds:
        current_span_index = i // num_return_sequences
        relations = extract_relations_from_model_output(sentence_pred)
        for relation in relations:
            relation["meta"] = {
                "spans": [spans_boundaries[current_span_index]]
            }
            kb.add_relation(relation)
        i += 1

    return kb

# Filter and normalize entities with Wikipedia

- remove all entities that doesn't have a page on Wikipedia
- merge entities if they have the same wikipedia page

In [12]:
class KB():
    def __init__(self):
        self.entities = {}
        self.relations = []

    # To Do: Check if we need to load extracting knwoldege here or not!
    # add load code here
    
    def are_relations_equal(self, r1, r2):
        return all(r1[attr] == r2[attr] for attr in ["head", "type", "tail"])

    def exists_relation(self, r1):
        return any(self.are_relations_equal(r1, r2) for r2 in self.relations)

    def merge_relations(self, r1):
        r2 = [r for r in self.relations
              if self.are_relations_equal(r1, r)][0]
        spans_to_add = [span for span in r1["meta"]["spans"]
                        if span not in r2["meta"]["spans"]]
        r2["meta"]["spans"] += spans_to_add

    def get_wikipedia_data(self, candidate_entity):
        try:
            page = wikipedia.page(candidate_entity, auto_suggest=False)
            entity_data = {
                "title": page.title,
                "url": page.url,
                "summary": page.summary
            }
            return entity_data
        except:
            return None

    def add_entity(self, e):
        self.entities[e["title"]] = {k:v for k,v in e.items() if k != "title"}

    def add_relation(self, r):
        # check on wikipedia
        candidate_entities = [r["head"], r["tail"]]
        
        # To Do: 
        ### Please check to find entities already exist in your dataset or not
        entities = [self.get_wikipedia_data(ent) for ent in candidate_entities]
        

        # if one entity does not exist, stop
        if any(ent is None for ent in entities):
            return

        # manage new entities
        for e in entities:
            self.add_entity(e)

        # rename relation entities with their wikipedia titles
        r["head"] = entities[0]["title"]
        r["tail"] = entities[1]["title"]

        # manage new relation
        if not self.exists_relation(r):
            self.relations.append(r)
        else:
            self.merge_relations(r)

    def print(self):
        print("Entities:")
        for e in self.entities.items():
            print(f"  {e}")
        print("Relations:")
        for r in self.relations:
            print(f"  {r}")

# Extract KB from All texts files:

In [13]:
kb = KB()

In [14]:
def divide_chunks(l, n):
     
    # looping till length l
    for i in range(0, len(l), n):
        yield l[i:i + n]

In [15]:
! ls data/wiki_texts/ped_pt | wc -l

13615


## Wiki pedestrian

In [16]:
file_names_wi_p = [f for f in glob.glob("data/wiki_texts/ped_pt/*.txt")]
print(len(file_names_wi_p))

13615


In [17]:
def get_length(fn):
    ln = 0
    with open(fn, "r") as rf:
        r_text = rf.readlines()
        ln = len(r_text)
    return ln

In [18]:
! ls data/britanica_texts/ped_pt | wc -l

1003


In [19]:
new_br = [f for f in glob.glob("data/britanica_texts/ped_pt/*.txt") if get_length(f) < 30000]
len(new_br)

992

In [20]:
extracted_files = []

In [21]:
x = list(divide_chunks(file_names_wi_p, 1000))
print(len(x))
print(len(x[0]))

14
1000


In [44]:
search_files = list(set(x[6]) - set(extracted_files))
len(search_files)

0

In [None]:
search_files = list(set(x[6]) - set(extracted_files))
for fn in tqdm(search_files):
    with open(fn, "r") as rf:
        c_txt = rf.read()
        kb = from_text_to_kb(c_txt, kb)
    extracted_files.append(fn)
    with open("data/processed_list_1.txt", 'a') as file1:
        file1.write(f"{fn}\n")
#     print(f"remaining file count: {len(list(set(file_names_wi_p) - set(extracted_files)))}")
    

  0%|          | 0/609 [00:00<?, ?it/s]

In [45]:
save_kb(kb, 'data/kb/ped_kb_1.pk')

## britanica

In [1]:
search_files = list(set(new_br) - set(extracted_files))
len(search_files)

NameError: name 'new_br' is not defined

In [None]:
search_files = list(set(new_br) - set(extracted_files))
for fn in tqdm(search_files):
    with open(fn, "r") as rf:
        c_txt = rf.read()
        from_text_to_kb(c_txt, kb)
    extracted_files.append(fn)

  0%|          | 0/540 [00:00<?, ?it/s]

In [29]:
save_kb(kb, 'ped_kb.pk')

In [None]:
! ls

In [None]:
# for fn in tqdm(search_files):
#     with open(fn, "r") as rf:
#         c_txt = rf.read()
#         from_text_to_kb(c_txt, kb, verbose=True)
    

In [32]:
kb.print()

Entities:
  ('Geoffrey Chaucer', {'url': 'https://en.wikipedia.org/wiki/Geoffrey_Chaucer', 'summary': 'Geoffrey Chaucer (; c.\u20091340s – 25 October 1400) was an English poet, author, and civil servant best known for The Canterbury Tales. He has been called the "father of English literature", or, alternatively, the "father of English poetry".  He was the first writer to be buried in what has since come to be called Poets\' Corner, in Westminster Abbey. Chaucer also gained fame as a philosopher and astronomer, composing the scientific A Treatise on the Astrolabe for his 10-year-old son Lewis. He maintained a career in the civil service as a bureaucrat, courtier, diplomat, and member of parliament.\nAmong Chaucer\'s many other works are The Book of the Duchess, The House of Fame, The Legend of Good Women, and Troilus and Criseyde. He is seen as crucial in legitimising the literary use of Middle English when the dominant literary languages in England were still Anglo-Norman French and La

# Extract KB from web article

In [13]:
def from_text_to_kb(text, article_url, span_length=128, article_title=None,
                    article_publish_date=None, verbose=False):
    # tokenize whole text
    inputs = tokenizer([text], return_tensors="pt")

    # compute span boundaries
    num_tokens = len(inputs["input_ids"][0])
    if verbose:
        print(f"Input has {num_tokens} tokens")
    num_spans = math.ceil(num_tokens / span_length)
    if verbose:
        print(f"Input has {num_spans} spans")
    overlap = math.ceil((num_spans * span_length - num_tokens) / 
                        max(num_spans - 1, 1))
    spans_boundaries = []
    start = 0
    for i in range(num_spans):
        spans_boundaries.append([start + span_length * i,
                                 start + span_length * (i + 1)])
        start -= overlap
    if verbose:
        print(f"Span boundaries are {spans_boundaries}")

    # transform input with spans
    tensor_ids = [inputs["input_ids"][0][boundary[0]:boundary[1]]
                  for boundary in spans_boundaries]
    tensor_masks = [inputs["attention_mask"][0][boundary[0]:boundary[1]]
                    for boundary in spans_boundaries]
    inputs = {
        "input_ids": torch.stack(tensor_ids),
        "attention_mask": torch.stack(tensor_masks)
    }

    # generate relations
    num_return_sequences = 3
    gen_kwargs = {
        "max_length": 256,
        "length_penalty": 0,
        "num_beams": 3,
        "num_return_sequences": num_return_sequences
    }
    generated_tokens = model.generate(
        **inputs,
        **gen_kwargs,
    )

    # decode relations
    decoded_preds = tokenizer.batch_decode(generated_tokens,
                                           skip_special_tokens=False)

    # create kb
    kb = KB()
    i = 0
    for sentence_pred in decoded_preds:
        current_span_index = i // num_return_sequences
        relations = extract_relations_from_model_output(sentence_pred)
        for relation in relations:
            relation["meta"] = {
                article_url: {
                    "spans": [spans_boundaries[current_span_index]]
                }
            }
            kb.add_relation(relation, article_title, article_publish_date)
        i += 1

    return kb

In [14]:
class KB():
    def __init__(self):
        self.entities = {} # { entity_title: {...} }
        self.relations = [] # [ head: entity_title, type: ..., tail: entity_title,
          # meta: { article_url: { spans: [...] } } ]
        self.sources = {} # { article_url: {...} }

    def merge_with_kb(self, kb2):
        for r in kb2.relations:
            article_url = list(r["meta"].keys())[0]
            source_data = kb2.sources[article_url]
            self.add_relation(r, source_data["article_title"],
                              source_data["article_publish_date"])

    def are_relations_equal(self, r1, r2):
        return all(r1[attr] == r2[attr] for attr in ["head", "type", "tail"])

    def exists_relation(self, r1):
        return any(self.are_relations_equal(r1, r2) for r2 in self.relations)

    def merge_relations(self, r2):
        r1 = [r for r in self.relations
              if self.are_relations_equal(r2, r)][0]

        # if different article
        article_url = list(r2["meta"].keys())[0]
        if article_url not in r1["meta"]:
            r1["meta"][article_url] = r2["meta"][article_url]

        # if existing article
        else:
            spans_to_add = [span for span in r2["meta"][article_url]["spans"]
                            if span not in r1["meta"][article_url]["spans"]]
            r1["meta"][article_url]["spans"] += spans_to_add

    def get_wikipedia_data(self, candidate_entity):
        try:
            page = wikipedia.page(candidate_entity, auto_suggest=False)
            entity_data = {
                "title": page.title,
                "url": page.url,
                "summary": page.summary
            }
            return entity_data
        except:
            return None

    def add_entity(self, e):
        self.entities[e["title"]] = {k:v for k,v in e.items() if k != "title"}

    def add_relation(self, r, article_title, article_publish_date):
        # check on wikipedia
        candidate_entities = [r["head"], r["tail"]]
        entities = [self.get_wikipedia_data(ent) for ent in candidate_entities]

        # if one entity does not exist, stop
        if any(ent is None for ent in entities):
            return

        # manage new entities
        for e in entities:
            self.add_entity(e)

        # rename relation entities with their wikipedia titles
        r["head"] = entities[0]["title"]
        r["tail"] = entities[1]["title"]

        # add source if not in kb
        article_url = list(r["meta"].keys())[0]
        if article_url not in self.sources:
            self.sources[article_url] = {
                "article_title": article_title,
                "article_publish_date": article_publish_date
            }

        # manage new relation
        if not self.exists_relation(r):
            self.relations.append(r)
        else:
            self.merge_relations(r)

    def print(self):
        print("Entities:")
        for e in self.entities.items():
            print(f"  {e}")
        print("Relations:")
        for r in self.relations:
            print(f"  {r}")
        print("Sources:")
        for s in self.sources.items():
            print(f"  {s}")

In [15]:
def get_article(url):
    article = Article(url)
    article.download()
    article.parse()
    return article

def from_url_to_kb(url):
    article = get_article(url)
    config = {
        "article_title": article.title,
        "article_publish_date": article.publish_date
    }
    kb = from_text_to_kb(article.text, article.url, **config)
    return kb

In [16]:
url = "https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html"
kb = from_url_to_kb(url)
kb.print()
# Entities:
#   ('MicroStrategy', {'url': 'https://en.wikipedia.org/wiki/MicroStrategy',
#     'summary': "MicroStrategy Incorporated is an American company that ..."})
#   ('Michael J. Saylor', {'url': 'https://en.wikipedia.org/wiki/Michael_J._Saylor',
#     'summary': 'Michael J. Saylor (born February 4, 1965) is an American ..."})
#   ...
# Relations:
#   {'head': 'MicroStrategy', 'type': 'founded by', 'tail': 'Michael J. Saylor',
#    'meta': {'https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html': 
#      {'spans': [[0, 128]]}}}
#   {'head': 'Michael J. Saylor', 'type': 'employer', 'tail': 'MicroStrategy',
#    'meta': {'https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html':
#      {'spans': [[0, 128]]}}}
#   ...
# Sources:
#   ('https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html',
#     {'article_title': "Microstrategy chief: 'Bitcoin is going to go into the millions'",
#      'article_publish_date': None})

Entities:
  ('MicroStrategy', {'url': 'https://en.wikipedia.org/wiki/MicroStrategy', 'summary': "MicroStrategy Incorporated is an American company that provides business intelligence (BI), mobile software, and cloud-based services. Founded in 1989 by Michael J. Saylor, Sanju Bansal, and Thomas Spahr, the firm develops software to analyze internal and external data in order to make business decisions and to develop mobile apps. It is a public company headquartered in Tysons Corner, Virginia, in the Washington metropolitan area. Its primary business analytics competitors include SAP AG Business Objects, IBM Cognos, and Oracle Corporation's BI Platform. Saylor is the Executive Chairman and, from 1989 to 2022, was the CEO."})
  ('Michael J. Saylor', {'url': 'https://en.wikipedia.org/wiki/Michael_J._Saylor', 'summary': "Michael J. Saylor (born February 4, 1965) is an American entrepreneur and business executive. He is the executive chairman and a co-founder of MicroStrategy, a company that 

# Google News: extract KB from multiple articles

In [17]:
def get_news_links(query, lang="en", region="US", pages=1, max_links=100000):
    googlenews = GoogleNews(lang=lang, region=region)
    googlenews.search(query)
    all_urls = []
    for page in range(pages):
        googlenews.get_page(page)
        all_urls += googlenews.get_links()
    return list(set(all_urls))[:max_links]

def from_urls_to_kb(urls, verbose=False):
    kb = KB()
    if verbose:
        print(f"{len(urls)} links to visit")
    for url in urls:
        if verbose:
            print(f"Visiting {url}...")
        try:
            kb_url = from_url_to_kb(url)
            kb.merge_with_kb(kb_url)
        except ArticleException:
            if verbose:
                print(f"  Couldn't download article at url {url}")
    return kb

In [8]:
import pickle

def save_kb(kb, filename):
    with open(filename, "wb") as f:
        pickle.dump(kb, f)

def load_kb(filename):
    res = None
    with open(filename, "rb") as f:
        res = pickle.load(f)
    return res

In [19]:
news_links = get_news_links("Google", pages=1, max_links=3)
kb = from_urls_to_kb(news_links, verbose=True)
kb.print()
# 3 links to visit
# Visiting https://www.hindustantimes.com/india-news/google-doodle-celebrates-india-s-gama-pehlwan-the-undefeated-wrestling-champion-101653180853982.html...
# Visiting https://tech.hindustantimes.com/tech/news/google-doodle-today-celebrates-gama-pehlwan-s-144th-birth-anniversary-know-who-he-is-71653191916538.html...
# Visiting https://www.moneycontrol.com/news/trends/current-affairs-trends/google-doodle-celebrates-gama-pehlwan-the-amritsar-born-wrestling-champ-who-inspired-bruce-lee-8552171.html...
# Entities:
#   ('Google', {'url': 'https://en.wikipedia.org/wiki/Google',
#     'summary': 'Google LLC is an American ...'})
#   ...
# Relations:
#   {'head': 'Google', 'type': 'owner of', 'tail': 'Google Doodle',
#     'meta': {'https://tech.hindustantimes.com/tech/news/google-doodle-today-celebrates-gama-pehlwan-s-144th-birth-anniversary-know-who-he-is-71653191916538.html':
#       {'spans': [[0, 128]]}}}
#   ...
# Sources:
#   ('https://www.hindustantimes.com/india-news/google-doodle-celebrates-india-s-gama-pehlwan-the-undefeated-wrestling-champion-101653180853982.html',
#     {'article_title': "Google Doodle celebrates India's Gama Pehlwan, the undefeated wrestling champion",
#     'article_publish_date': datetime.datetime(2022, 5, 22, 6, 59, 56, tzinfo=tzoffset(None, 19800))})
#   ('https://tech.hindustantimes.com/tech/news/google-doodle-today-celebrates-gama-pehlwan-s-144th-birth-anniversary-know-who-he-is-71653191916538.html',
#     {'article_title': "Google Doodle today celebrates Gama Pehlwan's 144th birth anniversary; know who he is",
#     'article_publish_date': datetime.datetime(2022, 5, 22, 9, 32, 38, tzinfo=tzoffset(None, 19800))})
#   ('https://www.moneycontrol.com/news/trends/current-affairs-trends/google-doodle-celebrates-gama-pehlwan-the-amritsar-born-wrestling-champ-who-inspired-bruce-lee-8552171.html',
#     {'article_title': 'Google Doodle celebrates Gama Pehlwan, the Amritsar-born wrestling champ who inspired Bruce Lee',
#     'article_publish_date': None})

0 links to visit
Entities:
Relations:
Sources:


# Visualize KB

In [20]:
def save_network_html(kb, filename="network.html"):
    # create network
    net = Network(directed=True, width="700px", height="700px", bgcolor="#eeeeee")

    # nodes
    color_entity = "#00FF00"
    for e in kb.entities:
        net.add_node(e, shape="circle", color=color_entity)

    # edges
    for r in kb.relations:
        net.add_edge(r["head"], r["tail"],
                    title=r["type"], label=r["type"])
        
    # save network
    net.repulsion(
        node_distance=200,
        central_gravity=0.2,
        spring_length=200,
        spring_strength=0.05,
        damping=0.09
    )
    net.set_edge_smooth('dynamic')
    net.show(filename)

In [None]:
news_links = get_news_links("Google", pages=5, max_links=20)
kb = from_urls_to_kb(news_links, verbose=True)
filename = "network_3_google.html"
save_network_html(kb, filename=filename)
IPython.display.HTML(filename=filename)

0 links to visit


In [None]:
news_links = get_news_links("Google", pages=5, max_links=20)
kb = from_urls_to_kb(news_links, verbose=True)
filename = "network_3_google.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
news_links = get_news_links("Amazon", pages=5, max_links=20)
kb = from_urls_to_kb(news_links, verbose=True)
filename = "network_3_amazon.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
news_links = get_news_links("Apple", pages=5, max_links=20)
kb = from_urls_to_kb(news_links, verbose=True)
filename = "network_3_apple.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
news_links = get_news_links("Elon Musk", pages=5, max_links=20)
kb = from_urls_to_kb(news_links, verbose=True)
filename = "network_3_musk.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
news_links = get_news_links("Kobe Bryant", pages=5, max_links=20)
kb = from_urls_to_kb(news_links, verbose=True)
filename = "network_3_bryant.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
text = "Napoleon Bonaparte (born Napoleone di Buonaparte; 15 August 1769 – 5 May 1821), and later known by his regnal name Napoleon I, was a French military and political leader who rose to prominence during the French Revolution and led several successful campaigns during the Revolutionary Wars. He was the de facto leader of the French Republic as First Consul from 1799 to 1804. As Napoleon I, he was Emperor of the French from 1804 until 1814 and again in 1815. Napoleon's political and cultural legacy has endured, and he has been one of the most celebrated and controversial leaders in world history."
kb = from_text_to_kb(text, "", verbose=True)
filename = "network_1_napoleon.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
text = "Kobe Bean Bryant (August 23, 1978 – January 26, 2020) was an American professional basketball player. A shooting guard, he spent his entire 20-year career with the Los Angeles Lakers in the National Basketball Association (NBA). Widely regarded as one of the greatest basketball players of all time, Bryant won five NBA championships, was an 18-time All-Star, a 15-time member of the All-NBA Team, a 12-time member of the All-Defensive Team, the 2008 NBA Most Valuable Player (MVP), and a two-time NBA Finals MVP. Bryant also led the NBA in scoring twice, and ranks fourth in league all-time regular season and postseason scoring. He was posthumously voted into the Naismith Memorial Basketball Hall of Fame in 2020 and named to the NBA 75th Anniversary Team in 2021."
kb = from_text_to_kb(text, "", verbose=True)
filename = "network_1_bryant.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
text = "Originally known as BackRub. Google is a search engine that started development in 1996 by Sergey Brin and Larry Page as a research project at Stanford University to find files on the Internet. Larry and Sergey later decided the name of their search engine needed to change and chose Google, which is inspired from the term googol. The company is headquartered in Mountain View, California."
kb = from_text_to_kb(text, "", verbose=True)
filename = "network_1_google.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
url = "https://www.investopedia.com/terms/c/cryptocurrency.asp"
kb = from_url_to_kb(url)
filename = "network_2_crypto.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
url = "https://www.britannica.com/biography/Johnny-Depp"
kb = from_url_to_kb(url)
filename = "network_2_depp.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
url = "https://www.timeout.com/rome/things-to-do/best-things-to-do-in-rome"
kb = from_url_to_kb(url)
filename = "network_2_rome.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)