# Goal of the notebook
(to be completed)

In [None]:
import os
import pathlib

import pandas as pd
import requests
import sqlalchemy
import ipywidgets

import bbsearch as bbs
from bbsearch.remote_searcher import RemoteSearcher
from bbsearch.widgets import ArticleSaver, SearchWidget, MiningWidget, SchemaRequest

# Set a Project
The user chooses / creates a project to host a KG.

In [None]:
# Use the Nexus Web application to get a token. Once a token is obtained, paste it below.
import getpass
TOKEN = getpass.getpass()

In [None]:
from kgforge.core import KnowledgeGraphForge

In [None]:
# Configure a 'forge' to manage (create, access and deploy) the knowledge graph within a given Blue Brain Nexus Project.
FORGE_CONFIG_FILE = os.getenv("FORGE_CONFIG_FILE") 
assert (FORGE_CONFIG_FILE is not None) 
forge = KnowledgeGraphForge(FORGE_CONFIG_FILE,token=TOKEN)

# Set topic
The user defines a topic.

In [None]:
topic_resource=None
topics = dict({'COVID-19': {
    'description': '',
    'tags': '',
    'questions': ['', '', '', '']}})
def save_topic(b):
    topics[widget.children[2].value] = {
        'description': widget.children[3].value,
        'tags': widget.children[4].value,
        'questions':  [widget.children[i].value for i in range(6,10)]}
    topic_to_save = {
        #"@context":"https://bbp.epfl.ch/nexus/v1/resources/covid19-kg/schemas/context",
        "id":str(widget.children[2].value).replace(' ', '_'),
        "type":"Topic",
        "name":widget.children[2].value,
        'description': widget.children[3].value,
        'keywords': widget.children[4].value,
        'question':  [widget.children[i].value for i in range(6,10)]
    }
    #print(topic_resource)
    global topic_resource
    topic_resource = forge.from_json(topic_to_save)
    forge.register(topic_resource)
    w0.options=list(topics.keys())
def topics_change(change):
    change['new']
    w1.value=change['new']
    w2.value=topics[w1.value]['description']
    w3.value=topics[w1.value]['tags']
    w4.value=topics[w1.value]['questions'][0]
    w5.value=topics[w1.value]['questions'][1]
    w6.value=topics[w1.value]['questions'][2]
    w7.value=topics[w1.value]['questions'][3]
w0 = ipywidgets.Dropdown(
    options=list(topics.keys()),
    description='Topics:',
    disabled=False)
w1 = ipywidgets.Text(
    placeholder='COVID-19',
    description='Name:',
    disabled=False)
w2 = ipywidgets.Textarea(
    placeholder='Add a description of your topic',
    description='Description:',
    disabled=False)
w3 = ipywidgets.Textarea(
    placeholder='Coronavirus; COVID-19; SARS; risk factor; glycosylation; sugar; carbohydrates',
    description='Tags:',
    disabled=False)
w4 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w5 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w6 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w7 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w8 = ipywidgets.Button(
    description='Add',
    button_style='',
    tooltip='Add topic',
    disabled=False)
widget = ipywidgets.VBox([w0, ipywidgets.HTML('<b>Add a new topic to the topics list:</b>'), w1, w2, w3, ipywidgets.Label('Please express your research topic in a few questions:'), 
                          w4, w5, w6, w7, w8], layout=ipywidgets.Layout(
    display='flex',
    align_items='flex-end',
    width='40%'))
w8.on_click(save_topic)
w0.observe(topics_change, names='value')
display(widget)

# Data Import
The user loads data from a data source (CORD-19).
The loaded data forms the corpus.
The user searches the CORPUS in Blue Brain Search.

URLs

In [None]:
SEARCH_ENGINE_URL = os.getenv("SEARCH_ENGINE_URL", "http://dgx1.bbp.epfl.ch:8850")
assert SEARCH_ENGINE_URL is not None

response = requests.post("{}/help".format(SEARCH_ENGINE_URL))
assert response.ok and response.json()['name'] == 'SearchServer', "The server is not accessible"

Paths

In [None]:
BBS_DATA_PATH = os.getenv("BBS_DATA_PATH", "/raid/bbs_data/")
BBS_DATA_PATH = pathlib.Path(BBS_DATA_PATH)

CORD19_VERSION = 'v7'

cord_path = BBS_DATA_PATH / f'cord19_{CORD19_VERSION}'
db_path = cord_path / 'databases' / 'cord19.db'
trained_models_path = BBS_DATA_PATH / 'trained_models'

In [None]:
searcher = RemoteSearcher(SEARCH_ENGINE_URL)
engine = sqlalchemy.create_engine(f"sqlite:///{db_path}")
article_saver = ArticleSaver(connection=engine)

In [None]:
search_widget = SearchWidget(
    searcher=searcher,
    connection=engine,
    article_saver=article_saver,
    results_per_page=3)
search_widget

Status of the Article Saver

In [None]:
article_saver.summary_table()

# Set schemas
The user defines the KG schema.

In [None]:
schema_request = SchemaRequest()

In [None]:
columns = ['entity_type', 'property', 'property_type', 'property_value_type', 'ontology_source']

etypes_sources = [('CELL_TYPE', None), 
                  ('CHEMICAL', 'NCIT'), 
                  ('CONDITION', None),
                  ('DISEASE', 'NCIT'),
                  ('ORGAN', 'NCIT'),
                  ('ORGANISM', 'NCIT'),
                  ('PATHWAY', 'Reactome'),
                  ('PROTEIN', 'NCIT')
                 ]
schema_request_data = [{'entity_type': etype, 'ontology_source': source} 
                       for etype, source in etypes_sources]

schema_request.schema = pd.DataFrame(schema_request_data, columns=columns)
display(schema_request.schema)

# Create a knowledge graph according to schemas
The user extracts data from the text of a set of papers using selected Named Entity Recognizers and Relation Extractors from Blue Brain Search.
The user can preview the extracted data.
The user curates extracted data.
The user links the extracted entities and relations to ontologies.
The user saves data into Knowledge Graph.

- **input**: raw text
- **output**: csv table of extracted entities/relations

In [None]:
DEFAULT_TEXT = """Autophagy maintains tumour growth through circulating
arginine. Autophagy captures intracellular components and delivers them to
lysosomes, where they are degraded and recycled to sustain metabolism and to
enable survival during starvation. Acute, whole-body deletion of the essential 
autophagy gene Atg7 in adult mice causes a systemic metabolic defect that 
manifests as starvation intolerance and gradual loss of white adipose tissue, 
liver glycogen and muscle mass.  Cancer cells also benefit from autophagy. 
Deletion of essential autophagy genes impairs the metabolism, proliferation, 
survival and malignancy of spontaneous tumours in models of autochthonous 
cancer. Acute, systemic deletion of Atg7 or acute, systemic expression of a 
dominant-negative ATG4b in mice induces greater regression of KRAS-driven 
cancers than does tumour-specific autophagy deletion, which suggests that host 
autophagy promotes tumour growth.
""".replace('\n', ' ').replace('  ', ' ')

In [None]:
TEXT_MINING_URL = os.getenv("TEXT_MINING_URL", "http://dgx1.bbp.epfl.ch:8852")
response = requests.post(TEXT_MINING_URL + "/help")
assert response.ok and response.json()['name'] == 'MiningServer'

In [None]:
mining_widget = MiningWidget(
    mining_server_url=TEXT_MINING_URL,
    schema_request=schema_request,
    article_saver=article_saver,
    default_text=DEFAULT_TEXT)

In [None]:
mining_widget

- **input**: csv table of extracted entities/relations
- **output**: knowledge graph

In [None]:
table_extractions = mining_widget.get_extracted_table()

In [None]:
print(f'The table has {table_extractions.shape[0]} rows.')

In [None]:
import jupyter_server_proxy
import jupyter_dash
import dash
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
import dash_table
from jupyter_dash import JupyterDash
import plotly.express as px

In [None]:
from jupyter_dash.comms import _send_jupyter_config_comm_request
_send_jupyter_config_comm_request()

In [None]:
JupyterDash.infer_jupyter_proxy_config()

In [None]:
curated_table_extractions = None
data= table_extractions.to_dict('records')

In [None]:
app = JupyterDash('Extracted Entities Curation App')

server = app.server

# Define UI layout
app.layout = html.Div(
    [
        dcc.Store(id='memory', data=data),

        dash_table.DataTable(
                id='datatable-upload-container',
                style_cell={
                    'whiteSpace': 'normal'
                },

                style_data_conditional=[
                    {
                        'if': {'row_index': 'odd'},
                        'backgroundColor': 'rgb(248, 248, 248)'
                    }
                ],
                style_header={
                    'backgroundColor': 'rgb(230, 230, 230)',
                    'fontWeight': 'bold'
                },

                css=[
                    {
                        'selector': 'dash-fixed-content',
                        'rule': 'height: 800;'
                    }
                ],
                virtualization=True,
                sort_action="native",
                sort_mode="multi",
                column_selectable="multi",
                filter_action="native",
                selected_columns=[],
                page_action="native",
                export_format='csv',
                export_headers='display',
                merge_duplicate_headers=True,
                selected_rows=[],
                page_current=0,
                page_size=30,

            ),

            html.Br(),
            dcc.Tabs(id='tabs', 
                     children=[
                        dcc.Tab(label='View extracted entities in a Bar diagram', children=[dcc.Graph(id='datatable-upload-graph')]),
                        dcc.Tab(label='View extracted entities in a Scatter diagram', children=[dcc.Graph(id='datatable-upload-Scatter')])
                     ]
            )
    
    ]
)

# Define callbacks
@app.callback(
    Output('datatable-upload-container', 'style_data_conditional'),
    [Input('datatable-upload-container', 'selected_columns')]
)
def update_styles(selected_columns):
    return [{
        'if': {'column_id': i},
        'background_color': '#D2F3FF'
    } for i in selected_columns]



@app.callback([Output('memory', 'data'),
               Output('datatable-upload-container', 'data'),
               Output('datatable-upload-container', 'columns'),
               Output('datatable-upload-container', 'editable'),
               Output('datatable-upload-container', 'row_deletable')],

              [Input('datatable-upload-container', 'page_size'),
               Input('datatable-upload-container', 'page_current'),
               Input('datatable-upload-container','data_timestamp'),
               Input('datatable-upload-container', 'data_previous')
              ],

              [State("datatable-upload-container", "data"),
               State("datatable-upload-container", "columns"),
              State("memory", "data")])

def update_output(page_size, page_current, ts,data_previous, data,columns,memory_data):
   
    if ts is None:
        data = table_extractions.to_dict('records')
        
        # if i not in ["ontology_source","paper_id","start_char","end_char","confidence_score"]
        columns= [{"name": i, "id": i, "clearable": True, "selectable": True, "renamable": True, "hideable": True, "deletable": False} for i in table_extractions.columns ]
    global curated_table_extractions
    curated_table_extractions = pd.DataFrame(list(data))
    
    if data_previous:
        removed = [row for row in data_previous if row not in data]
        for row in removed:
            curated_table_extractions= curated_table_extractions[curated_table_extractions.entity != row["entity"]]
    df = pd.DataFrame(data)
   
            
    return curated_table_extractions.to_dict('records'), curated_table_extractions.to_dict('records'), columns, True, True

@app.callback([Output('datatable-upload-graph', 'figure'),
               Output('datatable-upload-Scatter', 'figure')],
              [Input('datatable-upload-container', 'data_timestamp'),Input('datatable-upload-container', 'data')],)
def display_graph(dts, rows):
    df = pd.DataFrame(rows)
    
    
    if (df.empty or len(df.columns) < 1):
        bar = {
            'data': [{
                'x': [],
                'y': [],
                'type': 'bar'
            }]
        }
        scatter = {
                'data': [{
                    'x': [],
                    'y': []
                }]
            }
    else:
        if "paper_id" in df:
        
            df["paper_id"] = df["paper_id"].transform(lambda x:  str(x).split(":")[0])
            df_grouped = df[["paper_id","entity_type","entity"]].groupby(["entity","entity_type"]).paper_id.nunique().reset_index()
            df_grouped = df_grouped.rename(columns={"paper_id": "Frequency"})
            bar = px.bar(df_grouped, x=df_grouped.entity, y=df_grouped.Frequency, color="entity_type")
            scatter = px.scatter(df_grouped, x=df_grouped.entity, y=df_grouped.Frequency, color="entity_type")
            
    return [bar,scatter]


app.width = "100%"
app.height = "3000px"
app.run_server(mode="jupyterlab")

In [None]:
from typing import Iterator, Dict
import pandas as pd
import numpy as np
from pygments import highlight
from pygments.lexers import JsonLdLexer, TurtleLexer
from pygments.formatters import TerminalFormatter, TerminalTrueColorFormatter
import json
from tqdm.notebook import tqdm

def pretty_print(a_json):
    print(highlight(json.dumps(a_json, indent=2), JsonLdLexer(), TerminalFormatter()))
    
def represent_as_annotations(df: pd.DataFrame) -> Iterator[Dict]:
    def _(row):
        mention = row.entity
        if not pd.isnull(row.property):
        
            value_triple = {
                "@id":row.entity.replace(' ', '_'),
                "label":row.entity,
                row.property:{
                 "@id":row.property_value.replace(' ', '_')
                }
            }
            mention = row.property
        else:
            value_triple={
                "@id":row.entity.replace(' ', '_'),
                "label":row.entity,
                "@type":row.entity_type
            }
            
        result = {
            '@context': [
                    {
                        "@vocab":"https://bbp.epfl.ch/nexus/v1/resources/covid19-kg/vocab/",
                        "oa":"http://www.w3.org/ns/anno.jsonld",
                        "value":"rdf:value",
                        "source":"oa:source",
                        "target":"oa:target",
                        "selector":"oa:selector",
                        "start":"oa:start",
                        "end":"oa:end",
                        "exact":"oa:exact",
                        "body": "oa:body",
                        "label":"rdfs:label",
                        "rdf":"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
                        "rdfs":"http://www.w3.org/2000/01/rdf-schema#",
                        "@base":"https://bbp.epfl.ch/nexus/v1/resources/covid19-kg/data/entity/"
                    }
            ],
            
                '@id': f'https://bbp.epfl.ch/covid19/{row.Index}',
                '@type': 'Annotation',
                'target': {
                    'source': str(row.paper_id).split(":")[0],
                    'selector': {
                        '@type': 'TextPositionSelector',
                        'start': row.start_char,
                        'end': row.end_char,
                        'exact':mention,
                        'value': value_triple
                    },
                }
              

        }
    
        #pretty_print(result)
        return result
    return (_(x) for x in tqdm(df.itertuples()))

annotations = list(represent_as_annotations(curated_table_extractions))

In [None]:
print(f'{len(annotations)} annotations created.')

In [None]:
### Entity Linking ###

import pickle

import faiss
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

class Candidate:
    
    def __init__(self, distance, alias, uid, concept, definition):
        self.distance = distance
        self.alias = alias
        self.uid = uid
        self.concept = concept
        self.definition = definition
    
    def __repr__(self):
        attrs = (f"{k}={v!r}" for k, v in self.__dict__.items())
        return f"Candidate({', '.join(attrs)})"

class EntityLinker:
    
    def __init__(self, bulk):
        self.bulk = bulk
        self.ontology = None
        self.aliases = None
        self.model = None
        self.index = None
    
    def link(self, mentions, threshold=0.8):
        print('WARN   Entity Linking   '
              'Low performances because component not part of the NLP pipeline.')
        selections = self.candidates(mentions, 3)
        return [self.disambiguate(cs, m, None, threshold) for m, cs in selections]
    
    def disambiguate(self, candidates, mention, context, threshold):
        # TODO Disambiguation requires the component to be part of the NLP pipeline.        
        zeros = [x for x in candidates if x.distance == 0]
        if zeros:
            chosen = sorted(zeros, key=lambda x: len(x.concept))[0]
            return chosen
        else:
            chosen = sorted(candidates, key=lambda x: x.distance)[0]
            return chosen if chosen.distance <= threshold else None
    
    def candidates(self, mentions, limit):
        def _(d, i):
            alias, uid = self.aliases[int(i)]
            return Candidate(d, alias, uid, *self.ontology[uid])
        embeddings = self.model.transform(mentions)
        if self.bulk:
            distances, indexes = self.index.search(embeddings.toarray(), limit)
        else:
            distances, indexes = self.index.kneighbors(embeddings, limit)
        results = np.stack((distances, indexes), axis=2)
        return [(m, [_(d, i) for d, i in rs]) for m, rs in zip(mentions, results)]
    
    def train(self, ontology, model_params, index_params):
        self.ontology = {k: (v[0], v[2]) for k, v in ontology.items()}
        self.model = TfidfVectorizer(**model_params)
        aliases = [(x, k) for k, v in ontology.items() for x in [v[0], *v[1]]]
        embeddings = self.model.fit_transform(x for x, _ in aliases)
        flags = np.array(embeddings.sum(axis=1) != 0).reshape(-1)
        filtered_embeddings = embeddings[flags]
        self.aliases = [t for t, f in zip(aliases, flags) if f]
        if self.bulk:
            self.index = faiss.IndexFlatL2(filtered_embeddings.shape[1])
            self.index.add(filtered_embeddings.toarray())
        else:
            self.index = NearestNeighbors(**index_params)
            self.index.fit(filtered_embeddings)
        self._stats()
    
    def save_pretrained(self, dirpath):
        with open(f'{dirpath}/model', 'wb') as f:
            pickle.dump(linker.ontology, f)
            pickle.dump(linker.aliases, f)
            pickle.dump(linker.model, f)
            if not self.bulk:
                pickle.dump(linker.index, f)
        if self.bulk:
            faiss.write_index(linker.index, f'{dirpath}/index')
    
    @staticmethod
    def from_pretrained(dirpath, bulk):
        linker = EntityLinker(bulk)
        with open(f'{dirpath}/model', 'rb') as f:
            linker.ontology = pickle.load(f)
            linker.aliases = pickle.load(f)
            linker.model = pickle.load(f)
            if not bulk:
                linker.index = pickle.load(f)
        if bulk:
            linker.index = faiss.read_index(f'{dirpath}/index')
        linker._stats()
        return linker

    def _stats(self):
        ccount = len(self.ontology)
        tcount = len(self.aliases)
        print(f'INFO   EntityLinker   Links to {ccount} concepts ({tcount} aliases).')

In [None]:
linker = EntityLinker.from_pretrained('shared-data/entity-linking/', bulk=False)

In [None]:
from typing import Iterable, Dict, Iterator
from copy import deepcopy

def enrich_annotations(annotations: Iterable[Dict], linker: EntityLinker) -> Iterator[Dict]:
    def _(ann, can):
        new = deepcopy(ann)
        if can:
            new['body'] = {
                '@id': can.uid,
                'label': can.concept,
            }
        return new
    mentions = [x['target']['selector']['exact'] for x in annotations]
    linked_mentions = linker.link(mentions)
    return (_(ann, can) for ann, can in zip(annotations, linked_mentions))

In [None]:
enriched_annotations = list(enrich_annotations(annotations, linker))

In [None]:
import json
from typing import Iterable, Dict
from rdflib import Graph

def load_knowledge_graph(jsonlds: Iterable[Dict]) -> Graph:
    g = Graph()
    for x in tqdm(jsonlds):
        g.parse(data=json.dumps(x), format='json-ld')
    return g

In [None]:
%%time
# Build knowledge graph from enriched annotations
knowledge_graph = load_knowledge_graph(enriched_annotations)
# Note: Takes around 8 secs on a BBP issued MacBook Pro.

In [None]:
%%time
# Build knowledge graph from original annotations
knowledge_graph = load_knowledge_graph(annotations)
# Note: Takes around 8 secs on a BBP issued MacBook Pro.

In [None]:
print(f'The knowledge graph has {len(knowledge_graph)} triples.')

In [None]:
content_graph= Graph()
for s,p,o in knowledge_graph.triples((None,None,None)):
    if p == rdflib.term.URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#value"):
        for ss, pp, oo in knowledge_graph.triples((rdflib.term.URIRef(o),None,None)):
            content_graph.add((ss,pp,oo))

# Validate the knowledge graph
Thee User reviews content of Knowledge Graph.

# Correct knowledge graph
The correct the Knowledge Graph is errors occur.

# Access the knowledge graph
The user can search, visualize, and export the knowledge graph.

In [None]:
!pip install networkx

In [None]:
from rdflib.extras.external_graph_libs import rdflib_to_networkx_digraph
import networkx as nx

In [None]:
def _freequency(colunm, df, distinct_papers=True, debug=False):
    
    if debug:
        display(df.head(100))
    if distinct_papers:
        
        colunm_stats = df[[colunm, "paper_id"]].groupby(colunm).str(paper_id).split(":")[0].nunique()
    else:
        colunm_stats = df[[colunm, "paper_id"]].groupby(colunm).paper_id.count()
    if debug:
        display(colunm_stats)

    
    return colunm_stats
        
entity_stats = _freequency(colunm="entity",df=curated_table_extractions,distinct_papers=False, debug=False)
relation_stats = _freequency(colunm="property",df=curated_table_extractions,distinct_papers=False)

#display(entity_stats)
#display(relation_stats)

In [None]:
from rdflib.extras.external_graph_libs import rdflib_to_networkx_digraph
import base64
import io
import dash
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
import dash_table
import pandas as pd
import dash_cytoscape as cyto
def visualize_the_knowledgegraph_cytoscape(graph):
    # Will contain the element to send to Cytoscape.
    # Don't use it directly, use the functions addNode() and addEdge() instead.
    elements = []
    def addNode(id, label=None, label_size=10, label_color="black", radius=30, node_color='grey'):
        """
        Adds a node to the list of object to display in Cytoscape.
        Must have an id, the rest is optional.
        """
        actualLabel = None
        if label is not None:
            actualLabel = label.lower()
        else:
            actualLabel = str(id).lower().split("/")[-1].split("#")[-1]
            
        if radius == 0:
            radius = 30
            
        
        elements.append({
            "data": { 
                "id": str(id).lower(),
            },
            "style": {
                "label": actualLabel,
                "width": radius,
                "height": radius,
                "background-color": node_color,
                "font-size": f"{label_size}px",
                "color": label_color
            }
        })
    def addEdge(id, from_id, to_id, label=None, label_size=10, label_color="black", thickness=2, edge_color="grey", edge_style="solid"):
        """
        Adds an edge to the list of object to display in Cytoscape.
        Must have an id, the id of the node the link comes from (from_id) and the id of the node it going towards (to_id).
        """
        actualLabel = None
        if label is not None:
            actualLabel = label.lower()
        else:
            actualLabel = str(id).lower().split("/")[-1].split("#")[-1]
        
        if thickness == 0:
            thickness = 2
        elements.append({
            "data": { 
                "id": str(id),
                "source": str(from_id).lower(),
                "target": str(to_id).lower(),
            },
            "style": {
                "label": actualLabel,
                "font-size": f"{label_size}px",
                "width": thickness,
                "color": label_color,
                "line-color": edge_color,
                "line-style": edge_style
            }
        })
    # only the first param is mandatory, rest are options

    G = rdflib_to_networkx_digraph(graph)
    
    for node, node_attrs in G.nodes(data=True):
        if (str(node).startswith("http")):
            node_label = str(node).split("/")[-1].split("#")[-1]
            node_radius = 5

            for x in entity_stats.iteritems():
                if x[0] == node_label:
                    node_radius = int(x[1]) * node_radius

            addNode(str(node), node_color="pink", label_color='#ed87e0', radius=node_radius)
       

    for source, target, edge_attrs in G.edges(data=True):
        if not 'value' in edge_attrs and not 'width' in edge_attrs and 'weight' in edge_attrs:
            edge_attrs['value'] = edge_attrs['weight']
        if 'triples' in edge_attrs:
            edge_attrs['title'] = edge_attrs['triples'][0][1]
        edge_id = str(source).lower().replace(" ","_")+"_"+str(target).lower()
        edge_label = str(edge_attrs['title']).split("/")[-1].split("#")[-1]
        
        if edge_label != "label":
            
            thickness = 2
            for x in relation_stats.iteritems():
                if x[0] == edge_label:
                    thickness = int(x[1]) * thickness
            addEdge(id = edge_id, from_id = str(source), to_id = str(target), label=edge_label,label_size=6, thickness=thickness, edge_color="#DDDDDD" )
    
    
    
    
    app = JupyterDash("YourAppExample")
    
    app.layout = html.Div([
        cyto.Cytoscape(
            id="cytoscape",
            elements=elements,
            layout={"name": "cose"}, # "cose" is nice because it rearanges the nodes spatially in a smart way
            style={"height": "800px", "width": "100%"}, # to have a larger cell for the graph
            stylesheet=[
                
                {
                    'selector': 'edge',
                    'style': {
                        # The default curve style does not work with certain arrows
                        'curve-style': 'bezier'
                    }
                },{
                    'selector': 'edge',
                    'style': {
                        'source-arrow-color': 'black',
                        'target-arrow-shape': 'triangle',
                        'line-color': '#DDDDDD'
                    }
                }
            ]
          )
    ])
    
    app.width = "800px"
    app.height = "800px"
    return app, elements, G


app_vis, cyto_elements, G = visualize_the_knowledgegraph_cytoscape(content_graph)
app_vis.run_server(mode="inline")

In [None]:
import os
import json
from IPython.display import IFrame
from IPython.core.display import display

import random
import string

def randomString(stringLength=8):
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for i in range(stringLength))


def show_graph(G):
    # Don't use it directly, use the functions addNode() and addEdge() instead.
    elements = []
    
    
    def addNode(id, label=None, label_size=10, label_color="black", radius=30, node_color='grey'):
        """
        Adds a node to the list of object to display in Cytoscape.
        Must have an id, the rest is optional.
        """
        actualLabel = None
        
        if label is not None:
            actualLabel = label
        else:
            actualLabel = str(id).split("/")[-1].split("#")[-1]
            
        if radius == 0:
            radius = 5
        elements.append({
            "data": { 
                "id": str(id),
            },
            "style": {
                "label": actualLabel,
                "width": radius,
                "height": radius,
                # "background-color": node_color,
                "font-size": f"{label_size}px",
                "color": label_color
            }
        })
        
        
    def addEdge(id, from_id, to_id, label=None, label_size=10, label_color="black", thickness=2, edge_color="grey", edge_style="solid"):
        """
        Adds an edge to the list of object to display in Cytoscape.
        Must have an id, the id of the node the link comes from (from_id) and the id of the node it going towards (to_id).
        """
        actualLabel = None
        
        if label is not None:
            actualLabel = label
        else:
            actualLabel = str(id).split("/")[-1].split("#")[-1]
        
        if thickness == 0:
            thickness = 2
            
        elements.append({
            "data": { 
                "id": str(id),
                
                "source": str(from_id),
                "target": str(to_id),
            },
            "style": {
                "label": actualLabel,
                "font-size": f"{label_size}px",
                "width": thickness,
                "color": label_color,
                "line-color": edge_color,
                "line-style": edge_style,
                "curve-style": "bezier",
                "target-arrow-shape": "triangle",
                "target-arrow-color": edge_color,
                "arrow-scale": thickness * 0.5,
            }
        })

    nb_nodes = 100
    nb_edges = 250
    nodes = []
    

    for node, node_attrs in G.nodes(data=True):
        
        if (str(node).startswith("http")):
            node_label = str(node).split("/")[-1].split("#")[-1]
            node_radius = 5

            for x in entity_stats.iteritems():
                if x[0] == node_label:
                    node_radius = int(x[1]) * node_radius

            addNode(str(node), label_size=2, radius=node_radius)
       
    for source, target, edge_attrs in G.edges(data=True):
        
        if not 'value' in edge_attrs and not 'width' in edge_attrs and 'weight' in edge_attrs:
            edge_attrs['value'] = edge_attrs['weight']
        if 'triples' in edge_attrs:
            edge_attrs['title'] = edge_attrs['triples'][0][1]
        edge_id = str(source).lower().replace(" ","_")+"_"+str(target).lower()
        edge_label = str(edge_attrs['title']).split("/")[-1].split("#")[-1]
        
        if edge_label != "label":
            
            thickness = 2
            for x in relation_stats.iteritems():
                if x[0] == edge_label:
                    thickness = int(x[1]) * thickness
            addEdge(id = edge_id, from_id = str(source), to_id = str(target), label=edge_label,label_size=6, thickness=thickness, edge_color="#DDDDDD" )

    

    options = {
        "labelProperty": "style.label",      # Name of the property that contains the label. If not provided, the full id wil be used for display
        "nodeRadiusProperty": "style.width", # if provided, this prevails on "minNodeRadius" and "maxNodeRadius"
        "minNodeRadius": 14,                  # default: 10 : When computed automatically based on number of links (unused if nodeRadiusProperty is provided)
        "maxNodeRadius": 20,                 # default: 50 : When computed automatically based on number of links (unused if nodeRadiusProperty is provided)
        "minEdgeThickness": 1,               # default: 1.5 : When computed automatically basesd on number of links 
        "maxEdgeThickness": 20,              # default: 100 : When computed automatically based on number of links 
        "edgeThicknessProperty": "style.width", # if provided, it forces each edges to be of a given thickness. This prevails on "minEdgeThickness" and "maxEdgeThickness"
        #"nodePadding": 10,                 # default: (minNodeRadius + maxNodeRadius) : two nodes cannot get closer than this distance
        "graphPadding": 100,                  # default: minNodeRadius : blank space left around the graph. Make it bigger if labels are stepping outside
        "mode": "mono",                      # default: 'mono' (aggregate inbound and outbound link), can be 'directional' (differentiate inbound and outbout links)
        "defaultNodeColor": "orange",          # default: "grey" : color of links per default
        #"nodeColorProperty": "style.background-color", # if provided, this prevails on "defaultNodeColor". Convenient to give each node a different color
        "linkColor": "#00bef2",                 # default: "grey" : color of nodes per default
        #"inboundLinkColor": "blue",         # default: "#076dd9" : applies only with graphMode = "mono", color of inbound links when hovered (will have the color of the option "linkColor" whnen not hovered)
        #"outboundLinkColor": "red",         # default: "#db2612" : applies only with graphMode = "mono", color of outbound links when hovered (will have the color of the option "linkColor" whnen not hovered)
        "centerGravity": 0.3,                # default: 0.3 : Makes links being more attracted by the center of the graph when close to 1. 
        "smoothTransition": False,           # default: true : if True, the elements will transition-fade when hovered. If too many data, better put False for performance
    }
   
    # Creating the webapp to embed
    core_js = open('radialgraph/core.js').read()
    panzoom_js = open('radialgraph/panzoom.js').read()
    html = open('radialgraph/master.html').read()
    html = html.replace("SHARED_DATASET", json.dumps(cyto_elements))
    html = html.replace("SHARED_OPTIONS", json.dumps(options))
    html = html.replace("CORE_JS", core_js)
    html = html.replace("PANZOOM", panzoom_js)
    index_copy_filename = os.path.join("tmp", f"{randomString()}.html")
    local_html_copy = open(index_copy_filename,"w")
    local_html_copy.write(html)
    local_html_copy.close()
    
    # Display the webapp
    display(IFrame(src=index_copy_filename, width='1000px', height='3000px'))
    

show_graph(G)

# Version the knowledge graph
The user can save a knowledge graph with a version.