# Install the relevant libraries

In [1]:
!pip install transformers wikipedia newspaper3k GoogleNews pyvis

Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/7.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/7.2 MB[0m [31m8.0 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/7.2 MB[0m [31m27.5 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━[0m [32m6.2/7.2 MB[0m [31m57.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m7.2/7.2 MB[0m [31m61.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m42.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting newspaper3

In [2]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import math
import torch
import wikipedia
from newspaper import Article, ArticleException
from GoogleNews import GoogleNews
from IPython.display import HTML
import IPython
from pyvis.network import Network

# Load the REBEL model

In [3]:
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("Babelscape/rebel-large")
model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/rebel-large")

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/344 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

# From short text to KB
 takes a text with special tokens indicating relations, subjects, and objects, and extracts the relations along with their corresponding subjects and objects.

In [4]:
def extract_relations_from_model_output(text):
    relations = []
    relation, subject, relation, object_ = '', '', '', ''
    text = text.strip()
    current = 'x'
    text_replaced = text.replace("<s>", "").replace("<pad>", "").replace("</s>", "")
    for token in text_replaced.split():
        if token == "<triplet>":
            current = 't'
            if relation != '':
                relations.append({
                    'head': subject.strip(),
                    'type': relation.strip(),
                    'tail': object_.strip()
                })
                relation = ''
            subject = ''
        elif token == "<subj>":
            current = 's'
            if relation != '':
                relations.append({
                    'head': subject.strip(),
                    'type': relation.strip(),
                    'tail': object_.strip()
                })
            object_ = ''
        elif token == "<obj>":
            current = 'o'
            relation = ''
        else:
            if current == 't':
                subject += ' ' + token
            elif current == 's':
                object_ += ' ' + token
            elif current == 'o':
                relation += ' ' + token
    if subject != '' and relation != '' and object_ != '':
        relations.append({
            'head': subject.strip(),
            'type': relation.strip(),
            'tail': object_.strip()
        })
    return relations

The KB class provides functionality to manage a collection of relations in a knowledge base. It allows checking for the existence of relations, adding new relations, and printing the stored relations.

In [5]:
class KB():
    def __init__(self):
        self.relations = []

    def are_relations_equal(self, r1, r2):
        return all(r1[attr] == r2[attr] for attr in ["head", "type", "tail"])

    def exists_relation(self, r1):
        return any(self.are_relations_equal(r1, r2) for r2 in self.relations)

    def add_relation(self, r):
        if not self.exists_relation(r):
            self.relations.append(r)

    def print(self):
        print("Relations:")
        for r in self.relations:
            print(f"  {r}")

tokenization----> generate button-----> extract realtion------> populate KB

The function combines text generation and relation extraction to construct a knowledge base from the input text. The knowledge base is populated with relations extracted from the generated predictions.






In [6]:
def from_small_text_to_kb(text, verbose=False):
    kb = KB()

    # Tokenizer text
    model_inputs = tokenizer(text, max_length=512, padding=True, truncation=True,
                            return_tensors='pt')
    if verbose:
        print(f"Num tokens: {len(model_inputs['input_ids'][0])}")

    # Generate
    gen_kwargs = {
        "max_length": 216,
        "length_penalty": 0,
        "num_beams": 3,
        "num_return_sequences": 3
    }
    generated_tokens = model.generate(
        **model_inputs,
        **gen_kwargs,
    )
    decoded_preds = tokenizer.batch_decode(generated_tokens, skip_special_tokens=False)

    # create kb
    for sentence_pred in decoded_preds:
        relations = extract_relations_from_model_output(sentence_pred)
        for r in relations:
            kb.add_relation(r)

    return kb

In [7]:
text = "Napoleon Bonaparte (born Napoleone di Buonaparte; 15 August 1769 – 5 " \
"May 1821), and later known by his regnal name Napoleon I, was a French military " \
"and political leader who rose to prominence during the French Revolution and led " \
"several successful campaigns during the Revolutionary Wars. He was the de facto " \
"leader of the French Republic as First Consul from 1799 to 1804. As Napoleon I, " \
"he was Emperor of the French from 1804 until 1814 and again in 1815. Napoleon's " \
"political and cultural legacy has endured, and he has been one of the most " \
"celebrated and controversial leaders in world history."

kb = from_small_text_to_kb(text, verbose=True)
kb.print()
# Num tokens: 133
# Relations:
#   {'head': 'Napoleon Bonaparte', 'type': 'date of birth', 'tail': '15 August 1769'}
#   {'head': 'Napoleon Bonaparte', 'type': 'date of death', 'tail': '5 May 1821'}
#   {'head': 'Napoleon Bonaparte', 'type': 'participant in', 'tail': 'French Revolution'}
#   {'head': 'Napoleon Bonaparte', 'type': 'conflict', 'tail': 'Revolutionary Wars'}
#   {'head': 'Revolutionary Wars', 'type': 'part of', 'tail': 'French Revolution'}
#   {'head': 'French Revolution', 'type': 'participant', 'tail': 'Napoleon Bonaparte'}
#   {'head': 'Revolutionary Wars', 'type': 'participant', 'tail': 'Napoleon Bonaparte'}

Num tokens: 133
Relations:
  {'head': 'Napoleon Bonaparte', 'type': 'date of birth', 'tail': '15 August 1769'}
  {'head': 'Napoleon Bonaparte', 'type': 'date of death', 'tail': '5 May 1821'}
  {'head': 'Napoleon Bonaparte', 'type': 'participant in', 'tail': 'French Revolution'}
  {'head': 'Napoleon Bonaparte', 'type': 'conflict', 'tail': 'Revolutionary Wars'}
  {'head': 'Revolutionary Wars', 'type': 'part of', 'tail': 'French Revolution'}
  {'head': 'French Revolution', 'type': 'participant', 'tail': 'Napoleon Bonaparte'}
  {'head': 'Revolutionary Wars', 'type': 'participant', 'tail': 'Napoleon Bonaparte'}


# Split spans: from long text to KB

In [8]:
class KB():
    def __init__(self):
        self.relations = []

    def are_relations_equal(self, r1, r2):
        return all(r1[attr] == r2[attr] for attr in ["head", "type", "tail"])

    def exists_relation(self, r1):
        return any(self.are_relations_equal(r1, r2) for r2 in self.relations)

    def merge_relations(self, r1):
        r2 = [r for r in self.relations
              if self.are_relations_equal(r1, r)][0]
        spans_to_add = [span for span in r1["meta"]["spans"]
                        if span not in r2["meta"]["spans"]]
        r2["meta"]["spans"] += spans_to_add

    def add_relation(self, r):
        if not self.exists_relation(r):
            self.relations.append(r)
        else:
            self.merge_relations(r)

    def print(self):
        print("Relations:")
        for r in self.relations:
            print(f"  {r}")

This function enables the extraction of relations from a given text and organizes them in a KB, facilitating further analysis and processing.

In [9]:
def from_text_to_kb(text, span_length=128, verbose=False):
    # tokenize whole text
    inputs = tokenizer([text], return_tensors="pt")

    # compute span boundaries
    num_tokens = len(inputs["input_ids"][0])
    if verbose:
        print(f"Input has {num_tokens} tokens")
    num_spans = math.ceil(num_tokens / span_length)
    if verbose:
        print(f"Input has {num_spans} spans")
    overlap = math.ceil((num_spans * span_length - num_tokens) /
                        max(num_spans - 1, 1))
    spans_boundaries = []
    start = 0
    for i in range(num_spans):
        spans_boundaries.append([start + span_length * i,
                                 start + span_length * (i + 1)])
        start -= overlap
    if verbose:
        print(f"Span boundaries are {spans_boundaries}")

    # transform input with spans
    tensor_ids = [inputs["input_ids"][0][boundary[0]:boundary[1]]
                  for boundary in spans_boundaries]
    tensor_masks = [inputs["attention_mask"][0][boundary[0]:boundary[1]]
                    for boundary in spans_boundaries]
    inputs = {
        "input_ids": torch.stack(tensor_ids),
        "attention_mask": torch.stack(tensor_masks)
    }

    # generate relations
    num_return_sequences = 3
    gen_kwargs = {
        "max_length": 256,
        "length_penalty": 0,
        "num_beams": 3,
        "num_return_sequences": num_return_sequences
    }
    generated_tokens = model.generate(
        **inputs,
        **gen_kwargs,
    )

    # decode relations
    decoded_preds = tokenizer.batch_decode(generated_tokens,
                                           skip_special_tokens=False)

    # create kb
    kb = KB()
    i = 0
    for sentence_pred in decoded_preds:
        current_span_index = i // num_return_sequences
        relations = extract_relations_from_model_output(sentence_pred)
        for relation in relations:
            relation["meta"] = {
                "spans": [spans_boundaries[current_span_index]]
            }
            kb.add_relation(relation)
        i += 1

    return kb

In [10]:
text = """
Napoleon Bonaparte (born Napoleone di Buonaparte; 15 August 1769 – 5 May 1821), and later known by his regnal name Napoleon I, was a French military and political leader who rose to prominence during the French Revolution and led several successful campaigns during the Revolutionary Wars. He was the de facto leader of the French Republic as First Consul from 1799 to 1804. As Napoleon I, he was Emperor of the French from 1804 until 1814 and again in 1815. Napoleon's political and cultural legacy has endured, and he has been one of the most celebrated and controversial leaders in world history. Napoleon was born on the island of Corsica not long after its annexation by the Kingdom of France.[5] He supported the French Revolution in 1789 while serving in the French army, and tried to spread its ideals to his native Corsica. He rose rapidly in the Army after he saved the governing French Directory by firing on royalist insurgents. In 1796, he began a military campaign against the Austrians and their Italian allies, scoring decisive victories and becoming a national hero. Two years later, he led a military expedition to Egypt that served as a springboard to political power. He engineered a coup in November 1799 and became First Consul of the Republic. Differences with the British meant that the French faced the War of the Third Coalition by 1805. Napoleon shattered this coalition with victories in the Ulm Campaign, and at the Battle of Austerlitz, which led to the dissolving of the Holy Roman Empire. In 1806, the Fourth Coalition took up arms against him because Prussia became worried about growing French influence on the continent. Napoleon knocked out Prussia at the battles of Jena and Auerstedt, marched the Grande Armée into Eastern Europe, annihilating the Russians in June 1807 at Friedland, and forcing the defeated nations of the Fourth Coalition to accept the Treaties of Tilsit. Two years later, the Austrians challenged the French again during the War of the Fifth Coalition, but Napoleon solidified his grip over Europe after triumphing at the Battle of Wagram. Hoping to extend the Continental System, his embargo against Britain, Napoleon invaded the Iberian Peninsula and declared his brother Joseph King of Spain in 1808. The Spanish and the Portuguese revolted in the Peninsular War, culminating in defeat for Napoleon's marshals. Napoleon launched an invasion of Russia in the summer of 1812. The resulting campaign witnessed the catastrophic retreat of Napoleon's Grande Armée. In 1813, Prussia and Austria joined Russian forces in a Sixth Coalition against France. A chaotic military campaign resulted in a large coalition army defeating Napoleon at the Battle of Leipzig in October 1813. The coalition invaded France and captured Paris, forcing Napoleon to abdicate in April 1814. He was exiled to the island of Elba, between Corsica and Italy. In France, the Bourbons were restored to power. However, Napoleon escaped Elba in February 1815 and took control of France.[6][7] The Allies responded by forming a Seventh Coalition, which defeated Napoleon at the Battle of Waterloo in June 1815. The British exiled him to the remote island of Saint Helena in the Atlantic, where he died in 1821 at the age of 51. Napoleon had an extensive impact on the modern world, bringing liberal reforms to the many countries he conquered, especially the Low Countries, Switzerland, and parts of modern Italy and Germany. He implemented liberal policies in France and Western Europe.
"""

kb = from_text_to_kb(text, verbose=True)
kb.print()
# Input has 726 tokens
# Input has 6 spans
# Span boundaries are [[0, 128], [119, 247], [238, 366], [357, 485], [476, 604], [595, 723]]
# Relations:
#   {'head': 'Napoleon Bonaparte', 'type': 'date of birth',
#    'tail': '15 August 1769', 'meta': {'spans': [[0, 128]]}}
#   ...
#   {'head': 'Napoleon', 'type': 'place of birth',
#    'tail': 'Corsica', 'meta': {'spans': [[119, 247]]}}
#   ...
#   {'head': 'Fourth Coalition', 'type': 'start time',
#    'tail': '1806', 'meta': {'spans': [[238, 366]]}}
#   ...

Input has 726 tokens
Input has 6 spans
Span boundaries are [[0, 128], [119, 247], [238, 366], [357, 485], [476, 604], [595, 723]]
Relations:
  {'head': 'Napoleon Bonaparte', 'type': 'date of birth', 'tail': '15 August 1769', 'meta': {'spans': [[0, 128]]}}
  {'head': 'Napoleon Bonaparte', 'type': 'date of death', 'tail': '5 May 1821', 'meta': {'spans': [[0, 128]]}}
  {'head': 'Napoleon Bonaparte', 'type': 'participant in', 'tail': 'French Revolution', 'meta': {'spans': [[0, 128]]}}
  {'head': 'Napoleon Bonaparte', 'type': 'conflict', 'tail': 'Revolutionary Wars', 'meta': {'spans': [[0, 128]]}}
  {'head': 'Revolutionary Wars', 'type': 'part of', 'tail': 'French Revolution', 'meta': {'spans': [[0, 128]]}}
  {'head': 'French Revolution', 'type': 'participant', 'tail': 'Napoleon Bonaparte', 'meta': {'spans': [[0, 128]]}}
  {'head': 'Revolutionary Wars', 'type': 'participant', 'tail': 'Napoleon Bonaparte', 'meta': {'spans': [[0, 128]]}}
  {'head': 'French Revolution', 'type': 'country', 'tai

# Filter and normalize entities with Wikipedia
The KB class represents a knowledge base that contains entities and relations. It provides methods to add entities and relations, check for existing relations, merge duplicate relations, retrieve data from Wikipedia for candidate entities, and print the entities and relations in a formatted manner.
- remove all entities that doesn't have a page on Wikipedia
- merge entities if they have the same wikipedia page

In [11]:
class KB():
    def __init__(self):
        self.entities = {}
        self.relations = []

    def are_relations_equal(self, r1, r2):
        return all(r1[attr] == r2[attr] for attr in ["head", "type", "tail"])

    def exists_relation(self, r1):
        return any(self.are_relations_equal(r1, r2) for r2 in self.relations)

    def merge_relations(self, r1):
        r2 = [r for r in self.relations
              if self.are_relations_equal(r1, r)][0]
        spans_to_add = [span for span in r1["meta"]["spans"]
                        if span not in r2["meta"]["spans"]]
        r2["meta"]["spans"] += spans_to_add

    def get_wikipedia_data(self, candidate_entity):
        try:
            page = wikipedia.page(candidate_entity, auto_suggest=False)
            entity_data = {
                "title": page.title,
                "url": page.url,
                "summary": page.summary
            }
            return entity_data
        except:
            return None

    def add_entity(self, e):
        self.entities[e["title"]] = {k:v for k,v in e.items() if k != "title"}

    def add_relation(self, r):
        # check on wikipedia
        candidate_entities = [r["head"], r["tail"]]
        entities = [self.get_wikipedia_data(ent) for ent in candidate_entities]

        # if one entity does not exist, stop
        if any(ent is None for ent in entities):
            return

        # manage new entities
        for e in entities:
            self.add_entity(e)

        # rename relation entities with their wikipedia titles
        r["head"] = entities[0]["title"]
        r["tail"] = entities[1]["title"]

        # manage new relation
        if not self.exists_relation(r):
            self.relations.append(r)
        else:
            self.merge_relations(r)

    def print(self):
        print("Entities:")
        for e in self.entities.items():
            print(f"  {e}")
        print("Relations:")
        for r in self.relations:
            print(f"  {r}")

In [12]:
text = """
Napoleon Bonaparte (born Napoleone di Buonaparte; 15 August 1769 – 5 May 1821), and later known by his regnal name Napoleon I, was a French military and political leader who rose to prominence during the French Revolution and led several successful campaigns during the Revolutionary Wars. He was the de facto leader of the French Republic as First Consul from 1799 to 1804. As Napoleon I, he was Emperor of the French from 1804 until 1814 and again in 1815. Napoleon's political and cultural legacy has endured, and he has been one of the most celebrated and controversial leaders in world history. Napoleon was born on the island of Corsica not long after its annexation by the Kingdom of France.[5] He supported the French Revolution in 1789 while serving in the French army, and tried to spread its ideals to his native Corsica. He rose rapidly in the Army after he saved the governing French Directory by firing on royalist insurgents. In 1796, he began a military campaign against the Austrians and their Italian allies, scoring decisive victories and becoming a national hero. Two years later, he led a military expedition to Egypt that served as a springboard to political power. He engineered a coup in November 1799 and became First Consul of the Republic. Differences with the British meant that the French faced the War of the Third Coalition by 1805. Napoleon shattered this coalition with victories in the Ulm Campaign, and at the Battle of Austerlitz, which led to the dissolving of the Holy Roman Empire. In 1806, the Fourth Coalition took up arms against him because Prussia became worried about growing French influence on the continent. Napoleon knocked out Prussia at the battles of Jena and Auerstedt, marched the Grande Armée into Eastern Europe, annihilating the Russians in June 1807 at Friedland, and forcing the defeated nations of the Fourth Coalition to accept the Treaties of Tilsit. Two years later, the Austrians challenged the French again during the War of the Fifth Coalition, but Napoleon solidified his grip over Europe after triumphing at the Battle of Wagram. Hoping to extend the Continental System, his embargo against Britain, Napoleon invaded the Iberian Peninsula and declared his brother Joseph King of Spain in 1808. The Spanish and the Portuguese revolted in the Peninsular War, culminating in defeat for Napoleon's marshals. Napoleon launched an invasion of Russia in the summer of 1812. The resulting campaign witnessed the catastrophic retreat of Napoleon's Grande Armée. In 1813, Prussia and Austria joined Russian forces in a Sixth Coalition against France. A chaotic military campaign resulted in a large coalition army defeating Napoleon at the Battle of Leipzig in October 1813. The coalition invaded France and captured Paris, forcing Napoleon to abdicate in April 1814. He was exiled to the island of Elba, between Corsica and Italy. In France, the Bourbons were restored to power. However, Napoleon escaped Elba in February 1815 and took control of France.[6][7] The Allies responded by forming a Seventh Coalition, which defeated Napoleon at the Battle of Waterloo in June 1815. The British exiled him to the remote island of Saint Helena in the Atlantic, where he died in 1821 at the age of 51. Napoleon had an extensive impact on the modern world, bringing liberal reforms to the many countries he conquered, especially the Low Countries, Switzerland, and parts of modern Italy and Germany. He implemented liberal policies in France and Western Europe.
"""

kb = from_text_to_kb(text)
kb.print()
# Entities:
#  ('Napoleon', {'url': 'https://en.wikipedia.org/wiki/Napoleon',
#   'summary': "Napoleon Bonaparte (born Napoleone di Buonaparte; 15 August ..."})
#  ('French Revolution', {'url': 'https://en.wikipedia.org/wiki/French_Revolution',
#   'summary': 'The French Revolution (French: Révolution française..."})
#  ...
# Relations:
#  {'head': 'Napoleon', 'type': 'participant in', 'tail': 'French Revolution',
#   'meta': {'spans': [[0, 128], [119, 247]]}}
#  {'head': 'French Revolution', 'type': 'participant', 'tail': 'Napoleon',
#   'meta': {'spans': [[0, 128]]}}
#  ...



  lis = BeautifulSoup(html).find_all('li')


Entities:
  ('Napoleon', {'url': 'https://en.wikipedia.org/wiki/Napoleon', 'summary': "Napoleon Bonaparte (born Napoleone Buonaparte; 15 August 1769 – 5 May 1821), later known by his regnal name Napoleon I, was a Corsican-born French military commander and political leader who rose to prominence during the French Revolution and led successful campaigns during the Revolutionary Wars. He was the de facto leader of the French Republic as First Consul from 1799 to 1804, then Emperor of the French from 1804 until 1814 and again in 1815. Napoleon's political and cultural legacy endures to this day, as a highly celebrated and controversial leader. He initiated many liberal reforms that have persisted in society, and is considered one of the greatest military commanders in history. His campaigns are still studied at military academies worldwide. Between three and six million civilians and soldiers died in what became known as the Napoleonic Wars.Napoleon was born on the island of Corsica, not 

# Extract KB from web article
The from_text_to_kb function takes a text input, along with optional parameters such as the URL, title, and publish date of an article. It tokenizes the text, computes the boundaries for dividing the text into spans, transforms the input with spans, generates relations using the model, decodes the predictions, and creates a knowledge base (KB) object.

In [13]:
def from_text_to_kb(text, article_url, span_length=128, article_title=None,
                    article_publish_date=None, verbose=False):
    # tokenize whole text
    inputs = tokenizer([text], return_tensors="pt")

    # compute span boundaries
    num_tokens = len(inputs["input_ids"][0])
    if verbose:
        print(f"Input has {num_tokens} tokens")
    num_spans = math.ceil(num_tokens / span_length)
    if verbose:
        print(f"Input has {num_spans} spans")
    overlap = math.ceil((num_spans * span_length - num_tokens) /
                        max(num_spans - 1, 1))
    spans_boundaries = []
    start = 0
    for i in range(num_spans):
        spans_boundaries.append([start + span_length * i,
                                 start + span_length * (i + 1)])
        start -= overlap
    if verbose:
        print(f"Span boundaries are {spans_boundaries}")

    # transform input with spans
    tensor_ids = [inputs["input_ids"][0][boundary[0]:boundary[1]]
                  for boundary in spans_boundaries]
    tensor_masks = [inputs["attention_mask"][0][boundary[0]:boundary[1]]
                    for boundary in spans_boundaries]
    inputs = {
        "input_ids": torch.stack(tensor_ids),
        "attention_mask": torch.stack(tensor_masks)
    }

    # generate relations
    num_return_sequences = 3
    gen_kwargs = {
        "max_length": 256,
        "length_penalty": 0,
        "num_beams": 3,
        "num_return_sequences": num_return_sequences
    }
    generated_tokens = model.generate(
        **inputs,
        **gen_kwargs,
    )

    # decode relations
    decoded_preds = tokenizer.batch_decode(generated_tokens,
                                           skip_special_tokens=False)

    # create kb
    kb = KB()
    i = 0
    for sentence_pred in decoded_preds:
        current_span_index = i // num_return_sequences
        relations = extract_relations_from_model_output(sentence_pred)
        for relation in relations:
            relation["meta"] = {
                article_url: {
                    "spans": [spans_boundaries[current_span_index]]
                }
            }
            kb.add_relation(relation, article_title, article_publish_date)
        i += 1

    return kb

These updates allow the KB class to store information about entities, relations, and their associated sources. The merge_with_kb method enables merging knowledge bases, and the print method provides a way to visualize the stored data.

In [14]:
class KB():
    def __init__(self):
        self.entities = {} # { entity_title: {...} }
        self.relations = [] # [ head: entity_title, type: ..., tail: entity_title,
          # meta: { article_url: { spans: [...] } } ]
        self.sources = {} # { article_url: {...} }

    def merge_with_kb(self, kb2):
        for r in kb2.relations:
            article_url = list(r["meta"].keys())[0]
            source_data = kb2.sources[article_url]
            self.add_relation(r, source_data["article_title"],
                              source_data["article_publish_date"])

    def are_relations_equal(self, r1, r2):
        return all(r1[attr] == r2[attr] for attr in ["head", "type", "tail"])

    def exists_relation(self, r1):
        return any(self.are_relations_equal(r1, r2) for r2 in self.relations)

    def merge_relations(self, r2):
        r1 = [r for r in self.relations
              if self.are_relations_equal(r2, r)][0]

        # if different article
        article_url = list(r2["meta"].keys())[0]
        if article_url not in r1["meta"]:
            r1["meta"][article_url] = r2["meta"][article_url]

        # if existing article
        else:
            spans_to_add = [span for span in r2["meta"][article_url]["spans"]
                            if span not in r1["meta"][article_url]["spans"]]
            r1["meta"][article_url]["spans"] += spans_to_add

    def get_wikipedia_data(self, candidate_entity):
        try:
            page = wikipedia.page(candidate_entity, auto_suggest=False)
            entity_data = {
                "title": page.title,
                "url": page.url,
                "summary": page.summary
            }
            return entity_data
        except:
            return None

    def add_entity(self, e):
        self.entities[e["title"]] = {k:v for k,v in e.items() if k != "title"}

    def add_relation(self, r, article_title, article_publish_date):
        # check on wikipedia
        candidate_entities = [r["head"], r["tail"]]
        entities = [self.get_wikipedia_data(ent) for ent in candidate_entities]

        # if one entity does not exist, stop
        if any(ent is None for ent in entities):
            return

        # manage new entities
        for e in entities:
            self.add_entity(e)

        # rename relation entities with their wikipedia titles
        r["head"] = entities[0]["title"]
        r["tail"] = entities[1]["title"]

        # add source if not in kb
        article_url = list(r["meta"].keys())[0]
        if article_url not in self.sources:
            self.sources[article_url] = {
                "article_title": article_title,
                "article_publish_date": article_publish_date
            }

        # manage new relation
        if not self.exists_relation(r):
            self.relations.append(r)
        else:
            self.merge_relations(r)

    def print(self):
        print("Entities:")
        for e in self.entities.items():
            print(f"  {e}")
        print("Relations:")
        for r in self.relations:
            print(f"  {r}")
        print("Sources:")
        for s in self.sources.items():
            print(f"  {s}")

In [15]:
def get_article(url):
    article = Article(url)
    article.download()
    article.parse()
    return article

def from_url_to_kb(url):
    article = get_article(url)
    config = {
        "article_title": article.title,
        "article_publish_date": article.publish_date
    }
    kb = from_text_to_kb(article.text, article.url, **config)
    return kb

In [None]:
url = "https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html"
kb = from_url_to_kb(url)
kb.print()
# Entities:
#   ('MicroStrategy', {'url': 'https://en.wikipedia.org/wiki/MicroStrategy',
#     'summary': "MicroStrategy Incorporated is an American company that ..."})
#   ('Michael J. Saylor', {'url': 'https://en.wikipedia.org/wiki/Michael_J._Saylor',
#     'summary': 'Michael J. Saylor (born February 4, 1965) is an American ..."})
#   ...
# Relations:
#   {'head': 'MicroStrategy', 'type': 'founded by', 'tail': 'Michael J. Saylor',
#    'meta': {'https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html':
#      {'spans': [[0, 128]]}}}
#   {'head': 'Michael J. Saylor', 'type': 'employer', 'tail': 'MicroStrategy',
#    'meta': {'https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html':
#      {'spans': [[0, 128]]}}}
#   ...
# Sources:
#   ('https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html',
#     {'article_title': "Microstrategy chief: 'Bitcoin is going to go into the millions'",
#      'article_publish_date': None})

Entities:
  ('MicroStrategy', {'url': 'https://en.wikipedia.org/wiki/MicroStrategy', 'summary': "MicroStrategy Incorporated is an American company that provides business intelligence (BI), mobile software, and cloud-based services. Founded in 1989 by Michael J. Saylor, Sanju Bansal, and Thomas Spahr, the firm develops software to analyze internal and external data in order to make business decisions and to develop mobile apps. It is a public company headquartered in Tysons Corner, Virginia, in the Washington metropolitan area. Its primary business analytics competitors include SAP AG Business Objects, IBM Cognos, and Oracle Corporation's BI Platform. Saylor is the Executive Chairman and, from 1989 to 2022, was the CEO.\n\n"})
  ('Michael J. Saylor', {'url': 'https://en.wikipedia.org/wiki/Michael_J._Saylor', 'summary': "Michael J. Saylor (born February 4, 1965) is an American entrepreneur and business executive. He is the executive chairman and a co-founder of MicroStrategy, a company t

# Google News: extract KB from multiple articles
The function iterates through each URL, visits the article, and attempts to extract knowledge from it using the from_url_to_kb function. The resulting knowledge base is then merged with the main kb using the merge_with_kb method. If an ArticleException occurs during the process (e.g., the article cannot be downloaded), a message will be displayed.

In [16]:
def get_news_links(query, lang="en", region="US", pages=1, max_links=100000):
    googlenews = GoogleNews(lang=lang, region=region)
    googlenews.search(query)
    all_urls = []
    for page in range(pages):
        googlenews.get_page(page)
        all_urls += googlenews.get_links()
    return list(set(all_urls))[:max_links]

def from_urls_to_kb(urls, verbose=False):
    kb = KB()
    if verbose:
        print(f"{len(urls)} links to visit")
    for url in urls:
        if verbose:
            print(f"Visiting {url}...")
        try:
            kb_url = from_url_to_kb(url)
            kb.merge_with_kb(kb_url)
        except ArticleException:
            if verbose:
                print(f"  Couldn't download article at url {url}")
    return kb

The function opens the specified file in binary read mode, uses pickle.load to deserialize and load the knowledge base object from the file, and returns the loaded knowledge base.

In [17]:
import pickle

def save_kb(kb, filename):
    with open(filename, "wb") as f:
        pickle.dump(kb, f)

def load_kb(filename):
    res = None
    with open(filename, "rb") as f:
        res = pickle.load(f)
    return res

In [None]:
news_links = get_news_links("Google", pages=1, max_links=10)
kb = from_urls_to_kb(news_links, verbose=True)
kb.print()
# 3 links to visit
# Visiting https://www.hindustantimes.com/india-news/google-doodle-celebrates-india-s-gama-pehlwan-the-undefeated-wrestling-champion-101653180853982.html...
# Visiting https://tech.hindustantimes.com/tech/news/google-doodle-today-celebrates-gama-pehlwan-s-144th-birth-anniversary-know-who-he-is-71653191916538.html...
# Visiting https://www.moneycontrol.com/news/trends/current-affairs-trends/google-doodle-celebrates-gama-pehlwan-the-amritsar-born-wrestling-champ-who-inspired-bruce-lee-8552171.html...
# Entities:
#   ('Google', {'url': 'https://en.wikipedia.org/wiki/Google',
#     'summary': 'Google LLC is an American ...'})
#   ...
# Relations:
#   {'head': 'Google', 'type': 'owner of', 'tail': 'Google Doodle',
#     'meta': {'https://tech.hindustantimes.com/tech/news/google-doodle-today-celebrates-gama-pehlwan-s-144th-birth-anniversary-know-who-he-is-71653191916538.html':
#       {'spans': [[0, 128]]}}}
#   ...
# Sources:
#   ('https://www.hindustantimes.com/india-news/google-doodle-celebrates-india-s-gama-pehlwan-the-undefeated-wrestling-champion-101653180853982.html',
#     {'article_title': "Google Doodle celebrates India's Gama Pehlwan, the undefeated wrestling champion",
#     'article_publish_date': datetime.datetime(2022, 5, 22, 6, 59, 56, tzinfo=tzoffset(None, 19800))})
#   ('https://tech.hindustantimes.com/tech/news/google-doodle-today-celebrates-gama-pehlwan-s-144th-birth-anniversary-know-who-he-is-71653191916538.html',
#     {'article_title': "Google Doodle today celebrates Gama Pehlwan's 144th birth anniversary; know who he is",
#     'article_publish_date': datetime.datetime(2022, 5, 22, 9, 32, 38, tzinfo=tzoffset(None, 19800))})
#   ('https://www.moneycontrol.com/news/trends/current-affairs-trends/google-doodle-celebrates-gama-pehlwan-the-amritsar-born-wrestling-champ-who-inspired-bruce-lee-8552171.html',
#     {'article_title': 'Google Doodle celebrates Gama Pehlwan, the Amritsar-born wrestling champ who inspired Bruce Lee',
#     'article_publish_date': None})

10 links to visit
Visiting https://www.theinformation.com/articles/google-invests-in-ai-startup-runway-to-wrest-cloud-business-from-aws...
Visiting https://www.searchenginejournal.com/google-ads-tutorial-using-automatically-created-assets-for-ad-creation/488099/...
Visiting https://www.provokemedia.com/latest/article/google%27s-corey-dubrowa-named-bcw%27s-next-ceo...
Visiting https://www.youtube.com/watch?v=XXw48llH7OU...
Visiting https://www.sfchronicle.com/tech/article/googled-downsizing-bay-area-office-space-1-4m-18128281.php...


Token indices sequence length is longer than the specified maximum sequence length for this model (1840 > 1024). Running this sequence through the model will result in indexing errors


Visiting https://venturebeat.com/data-infrastructure/microsofts-data-and-analytics-platform-fabric-announces-unified-pricing-pressuring-google-and-amazon/...
Visiting https://www.gsmarena.com/our_google_pixel_7a_video_review_is_up-news-58735.php...
Visiting https://www.axios.com/2023/05/31/corey-dubrowa-departs-google-for-bcw...
Visiting https://www.theverge.com/2023/5/31/23744374/google-nest-hub-assistant-anylist-anydo-keep-third-party-notes-lists-support...
Visiting https://www.wral.com/story/beware-irs-text-scams-on-the-rise/20888723/...
Entities:
  ('Google', {'url': 'https://en.wikipedia.org/wiki/Google', 'summary': 'Google LLC ( (listen)) is an American multinational technology company focusing on artificial intelligence, online advertising, search engine technology, cloud computing, computer software, quantum computing, e-commerce, and consumer electronics. It has been referred to as "the most powerful company in the world" and one of the world\'s most valuable brands due to its

# Visualize KB
By using this function, you can generate an interactive visualization of the KB's entities and relations, making it easier to explore and understand the structure of the knowledge base.

In [18]:
def save_network_html(kb, filename):
    # create network
    net = Network(directed=True, width="2000px", height="2000px", bgcolor="#eeeeee")

    # nodes
    color_entity ="#00FF00"
    for e in kb.entities:
        net.add_node(e, shape="circle", color=color_entity)

    # edges
    for r in kb.relations:
        net.add_edge(r["head"], r["tail"],
                    title=r["type"], label=r["type"])

    # save network
    net.repulsion(
        node_distance=200,
        central_gravity=0.2,
        spring_length=200,
        spring_strength=0.05,
        damping=0.09
    )
    net.set_edge_smooth('dynamic')
    net.write_html(filename)

In [None]:
query =  "OpenAI"
news_links = get_news_links(query=query, pages=5, max_links=50)
kb = from_urls_to_kb(news_links, verbose=True)
kb.print()
filename = "network_3_OpenAI.html"
save_network_html(kb, filename=filename)
#IPython.display.HTML(filename=filename)

#HTML(filename="network_3_google.html")


38 links to visit
Visiting https://www.zdnet.com/article/openai-microsoft-and-alphabet-ceos-to-discuss-ai-with-biden-administration-today/...




  lis = BeautifulSoup(html).find_all('li')


Visiting https://www.theinformation.com/briefings/openai-rival-cohere-raises-270-million-including-from-oracle-and-nvidia...
Visiting https://www.thehindu.com/sci-tech/technology/todays-cache-meta-slapped-with-13-billion-privacy-fine-microsoft-scores-against-anti-acquisition-gamers-openai-takes-inspiration-from-wikipedia/article66883695.ece...
Visiting https://www.datacenterdynamics.com/en/news/microsoft-and-oracle-considered-sharing-ai-servers-amid-gpu-shortages/...
Visiting https://interestingengineering.com/innovation/athena-microsoft-secret-ai-chips...
Visiting https://www.reuters.com/technology/microsoft-developing-its-own-ai-chip-information-2023-04-18/...
Visiting https://www.youtube.com/watch?v=Rk3nTUfRZmo&vl=en...
Visiting https://www.tomshardware.com/news/microsoft-athena-ai-chip-tsmc...
Visiting https://coincodex.com/article/26277/chat-gpt-stock/...


Token indices sequence length is longer than the specified maximum sequence length for this model (1367 > 1024). Running this sequence through the model will result in indexing errors


Visiting https://www.datacenterdynamics.com/en/news/microsoft-is-developing-an-athena-ai-chip-for-large-language-models/...
Visiting https://www.theregister.com/2023/05/25/microsoft_azure_ai_cloud/...
Visiting https://wallethacks.com/how-to-buy-openai-stock/...
Visiting https://www.windowscentral.com/software-apps/microsoft-reportedly-tests-chips-in-stealth-mode-to-boost-ai-performance...
Visiting https://siliconangle.com/2023/04/18/microsoft-reportedly-developing-ai-chip-named-athena/...
  Couldn't download article at url https://siliconangle.com/2023/04/18/microsoft-reportedly-developing-ai-chip-named-athena/
Visiting https://winbuzzer.com/2023/05/04/microsoft-teams-up-with-amd-to-challenge-nvidia-in-ai-chips-xcxwbn/...
Visiting https://www.techtarget.com/searchenterpriseai/news/366538454/Microsoft-pushes-to-dominate-in-AI-race...
Visiting https://analyticsindiamag.com/microsofts-struggle-is-nvidias-strength/...
Visiting https://www.theinformation.com/articles/microsoft-readies-ai-ch

In [None]:
from google.colab import files
files.download('network_3_OpenAI.html')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
news_links = get_news_links("Google", pages=5, max_links=50)
kb = from_urls_to_kb(news_links, verbose=True)
filename = "network_3_google.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)


40 links to visit
Visiting https://www.cnet.com/tech/mobile/googles-next-pixel-watch-could-have-a-better-battery-life/...
Visiting https://www.provokemedia.com/latest/article/google%27s-corey-dubrowa-named-bcw%27s-next-ceo...
Visiting https://www.gsmarena.com/our_google_pixel_7a_video_review_is_up-news-58735.php...
Visiting https://finance.yahoo.com/news/nvidia-vulnerable-house-ai-chip-155612054.html...
Visiting https://adage.com/article/digital-marketing-ad-tech-news/how-google-expanded-its-data-clean-room-tools-post-cookie-ad-targeting/2497121...
Visiting https://www.theverge.com/2023/5/31/23743515/google-chromecast-support-ending-2013...
Visiting https://www.androidpolice.com/android-12l-guide/...
  Couldn't download article at url https://www.androidpolice.com/android-12l-guide/
Visiting https://www.theverge.com/2023/5/31/23744374/google-nest-hub-assistant-anylist-anydo-keep-third-party-notes-lists-support...
Visiting https://www.cnet.com/tech/services-and-software/google-ends-supp

In [None]:
from google.colab import files
files.download('network_3_google.html')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# New Section

In [None]:
news_links = get_news_links("Amazon", pages=5, max_links=30)
kb = from_urls_to_kb(news_links, verbose=True)
kb.print()
filename = "network_3_amazon.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
#IPython.display.HTML(filename=filename)

30 links to visit
Visiting https://www.hollywoodreporter.com/tv/tv-reviews/dead-ringers-review-tv-rachel-weisz-amazon-1235391718/...
Visiting https://247wallst.com/investing/2023/04/19/wednesdays-upgrades-and-downgrades-amazon-com-bath-bodyworks-danaher-enterprise-products-partners-marathon-petroleum-mgm-resorts-microsoft-mt-bank-nextier-oilfield-solutions-nike-okta/...
Visiting https://people.com/home/outdoor-rug-sale-amazon-april-2023/...
Visiting https://finance.yahoo.com/news/amazon-must-face-drivers-spying-183554450.html...
Visiting https://www.investors.com/research/options/amazon-stock-today-how-this-diagonal-put-spread-trade-could-earn-as-much-as-185-by-april-28/...
Visiting https://www.washingtonpost.com/dc-md-va/2023/04/19/amazon-headquarters-arlington-virginia-hybrid/...
Visiting https://www.engadget.com/apples-14-inch-m2-macbook-pro-is-up-to-250-off-at-amazon-133159632.html...
Visiting https://www.rockpapershotgun.com/samsungs-512gb-evo-select-micro-sd-card-is-down-to-3999-

In [None]:
from google.colab import files
files.download('network_3_amazon.html')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
news_links = get_news_links("Apple", pages=5, max_links=20)
kb = from_urls_to_kb(news_links, verbose=True)
kb.print()
filename = "network_3_apple.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
#IPython.display.HTML(filename=filename)

20 links to visit
Visiting https://seekingalpha.com/article/4595047-apple-current-valuations-unsupported-by-profit-drivers...
Visiting https://www.youtube.com/watch?v=euZQ0Tq1W7E...
Visiting https://lifehacker.com/you-can-run-windows-11-on-your-apple-silicon-mac-for-fr-1850347186...
Visiting https://www.reuters.com/technology/apple-ceo-meets-india-pm-modi-commits-growth-investment-2023-04-19/...
Visiting https://www.apple.com/newsroom/2023/04/apple-announces-major-progress-toward-climate-goals-ahead-of-earth-day/...
Visiting https://www.vulture.com/article/best-apple-tv-plus-tv-shows.html...
Visiting https://nypost.com/2023/04/19/apple-2020-13-256gb-macbook-air-is-200-off-on-amazon/...
Visiting https://www.theverge.com/2023/4/19/23688176/ipad-air-case-mate-fuel-usb-c-charger-jbl-flip-6-sale...
Visiting https://www.macrumors.com/2023/04/19/apple-shares-2023-environmental-progress-report/...
Visiting https://9to5mac.com/2023/04/19/apple-touts-environmental-progress-in-annual-report/...
V

In [None]:
from google.colab import files
files.download('network_3_apple.html')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
news_links = get_news_links("Doordash", pages=10, max_links=40)
kb = from_urls_to_kb(news_links, verbose=True)
filename = "network_3_doordash.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

'NoneType' object has no attribute 'group'
40 links to visit
Visiting https://www.investors.com/news/technology/dash-stock-doordash-stock-jumps-on-earnings/...
Visiting https://www.marketwatch.com/story/doordash-shakes-up-c-suite-with-coo-cfo-appointments-b7b6d040...
Visiting https://yourvalley.net/stories/doordash-sees-record-orders-and-users-but-losses-grow,369195...
  Couldn't download article at url https://yourvalley.net/stories/doordash-sees-record-orders-and-users-but-losses-grow,369195
Visiting https://www.portada-online.com/businesswire/doordash-releases-fourth-quarter-2022-financial-results/...
Visiting https://www.nasdaq.com/articles/is-the-pop-in-doordash-dash-stock-sustainable...
Visiting https://www.wsj.com/articles/doordash-dash-q4-earnings-report-2022-7bfd19ec...
  Couldn't download article at url https://www.wsj.com/articles/doordash-dash-q4-earnings-report-2022-7bfd19ec
Visiting https://www.investors.com/market-trend/stock-market-today/dow-jones-falls-on-hot-inflation

In [None]:
from google.colab import files
files.download('network_3_doordash.html')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
news_links = get_news_links("Kobe Bryant", pages=5, max_links=20)
kb = from_urls_to_kb(news_links, verbose=True)
filename = "network_3_bryant.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
text = "Napoleon Bonaparte (born Napoleone di Buonaparte; 15 August 1769 – 5 May 1821), and later known by his regnal name Napoleon I, was a French military and political leader who rose to prominence during the French Revolution and led several successful campaigns during the Revolutionary Wars. He was the de facto leader of the French Republic as First Consul from 1799 to 1804. As Napoleon I, he was Emperor of the French from 1804 until 1814 and again in 1815. Napoleon's political and cultural legacy has endured, and he has been one of the most celebrated and controversial leaders in world history."
kb = from_text_to_kb(text, "", verbose=True)
filename = "network_1_napoleon.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
text = "Kobe Bean Bryant (August 23, 1978 – January 26, 2020) was an American professional basketball player. A shooting guard, he spent his entire 20-year career with the Los Angeles Lakers in the National Basketball Association (NBA). Widely regarded as one of the greatest basketball players of all time, Bryant won five NBA championships, was an 18-time All-Star, a 15-time member of the All-NBA Team, a 12-time member of the All-Defensive Team, the 2008 NBA Most Valuable Player (MVP), and a two-time NBA Finals MVP. Bryant also led the NBA in scoring twice, and ranks fourth in league all-time regular season and postseason scoring. He was posthumously voted into the Naismith Memorial Basketball Hall of Fame in 2020 and named to the NBA 75th Anniversary Team in 2021."
kb = from_text_to_kb(text, "", verbose=True)
filename = "network_1_bryant.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
text = "Originally known as BackRub. Google is a search engine that started development in 1996 by Sergey Brin and Larry Page as a research project at Stanford University to find files on the Internet. Larry and Sergey later decided the name of their search engine needed to change and chose Google, which is inspired from the term googol. The company is headquartered in Mountain View, California."
kb = from_text_to_kb(text, "", verbose=True)
filename = "network_1_google.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
url = "https://www.investopedia.com/terms/c/cryptocurrency.asp"
kb = from_url_to_kb(url)
filename = "network_2_crypto.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
url = "https://www.britannica.com/biography/Johnny-Depp"
kb = from_url_to_kb(url)
filename = "network_2_depp.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)

In [None]:
url = "https://www.timeout.com/rome/things-to-do/best-things-to-do-in-rome"
kb = from_url_to_kb(url)
filename = "network_2_rome.html"
save_network_html(kb, filename=filename)
save_kb(kb, filename.split(".")[0] + ".p")
IPython.display.HTML(filename=filename)