# Goal of the notebook
End to end pipeline for searching articles of interest, extracting entities of interest, building, accessing and deploying a knowled graph and a co-mention graph.

In [None]:
import getpass
import os
import pathlib

import pandas as pd
import requests
import sqlalchemy
import ipywidgets

from bbsearch.widgets import ArticleSaver, SearchWidget, MiningWidget, SchemaRequest

# Set a Project

The user chooses / creates a project to host a KG.

* Use the [Nexus web application](https://bbp.epfl.ch/nexus/web) to get a token.
* Once a token is obtained then proceed to paste it below.

In [None]:
import getpass
TOKEN = getpass.getpass()

In [None]:
from kgforge.core import KnowledgeGraphForge

In [None]:
# Configure a 'forge' to manage (create, access and deploy) the knowledge graph within a given Blue Brain Nexus Project.
FORGE_CONFIG_FILE = os.getenv("FORGE_CONFIG_FILE") 
assert (FORGE_CONFIG_FILE is not None) 
forge = KnowledgeGraphForge(FORGE_CONFIG_FILE,token=TOKEN, debug=True)

# Set topic
The user defines a topic.

In [None]:
import jwt
from IPython.display import display, HTML

In [None]:
topic_resource=None
kg_resource=None
agent_username = jwt.decode(TOKEN,  verify=False)['preferred_username']

def save_topic(b):
    output.clear_output()
    output2.clear_output()
    output3.clear_output()
    topic_to_save = {
        'id': str(widget.children[1].children[0].value).replace(' ', '_'),
        'type': 'Topic',
        'name': widget.children[1].children[0].value,
        'field': widget.children[1].children[1].value,
        'description': widget.children[1].children[2].value,
        'keywords': widget.children[1].children[3].value,
        'question':  [widget.children[1].children[i].value for i in range(5,9)]
    }
    global topic_resource
    topic_resource = forge.from_json(topic_to_save)
    forge.register(topic_resource)
    with output2:
        if w1.value == "":
            print("Please provide a topic name")
        else:
            print("Topic saved!")
            w1.value = ""
            w2.value = ""
            w3.value = ""
            w4.value = ""
            w5.value = ""
            w6.value = ""
            w7.value = ""
            w8.value = ""

def get_topics(b):
    output.clear_output()
    output2.clear_output()
    output3.clear_output()
    query = f"""
    SELECT ?id ?name ?description ?keywords ?field ?question ?createdAt
    WHERE {{
        ?id a Topic ;
            name ?name ;
            description ?description ;
            keywords ?keywords ;
            field ?field ;
            question ?question ;
            <https://bluebrain.github.io/nexus/vocabulary/deprecated> false ;
            <https://bluebrain.github.io/nexus/vocabulary/createdAt> ?createdAt ;
            <https://bluebrain.github.io/nexus/vocabulary/createdBy> <{forge._store.endpoint}/realms/bbp/users/{agent_username}> .
    }}
    """
    resources = forge.sparql(query, limit=100)
    if len(resources) >= 1:
        global topics_df
        topics_df = forge.as_dataframe(resources)
        output.clear_output()
        with output:
            topics_list = list(set(topics_df.name))
            topics_list.sort()
            w0.options = [""] + topics_list
            w0.value = ""
            w0.placeholder = "Select topic"
            w0.observe(topics_change, names='value')
            display(w0)
            display(s12)
    else:
        with output:
            print("No topics found!")

def topics_change(change):
    output3.clear_output()
    with output:
        if len(output.outputs) >= 1:
            output.outputs = (output.outputs[0],)
        s5.value = ""
        s6.value = ""
        s7.value = ""
        s8.value = ""
        s9.value = ""
        s10.value = ""
        s11.value = ""
        global topic_resource
        if change['new'] != "":
            topic_resource = forge.retrieve(list(set(topics_df[topics_df.name == change['new']].id))[0])
            s5.value = topic_resource.field
            s6.value = topic_resource.description
            s7.value = topic_resource.keywords
            question = topic_resource.question
            if isinstance(question, str):
                question = [question]
            if isinstance(question, list):
                for i in range(len(question)):
                    sq.children[i].value = question[i]            
        display(s12)

def update_topic(b):
    output2.clear_output()
    if w0.value != "":
        topic_resource.id = forge.as_jsonld(topic_resource, form="expanded")['@id']
        topic_resource.field = s5.value
        topic_resource.description = s6.value
        topic_resource.keywords = s7.value
        topic_resource.question = [sq.children[i].value for i in range(0,4)]
        forge.update(topic_resource)
        with output:
            print("topic updated!")
        
def get_datasets(b):
    output3.clear_output()
    if w0.value != "":
        topic_resource_id = forge.as_jsonld(topic_resource, form="expanded")['@id']
        query = f"""
            SELECT ?id ?name ?description ?keywords ?field ?question ?createdAt
            WHERE {{
                ?id a Dataset ;
                    name ?name ;
                    about <{topic_resource_id}> ;
                    <https://bluebrain.github.io/nexus/vocabulary/deprecated> false ;
                    <https://bluebrain.github.io/nexus/vocabulary/createdAt> ?createdAt ;
                    <https://bluebrain.github.io/nexus/vocabulary/createdBy> <{forge._store.endpoint}/realms/bbp/users/{agent_username}> .
            }}
            """
        global kg_resources
        kg_resources = forge.sparql(query, limit=100, debug=True)
        print(len(kg_resources))
        if len(kg_resources) >= 1:
            with output3:
                display(s2)
                s2.options = [r.name for r in kg_resources]
                display(s3)
        else:
            with output3:
                print("No datasets found!")
        
def download_dataset(b):
    resource_id = [r.id for r in kg_resources if r.name == s2.value][0]
    global kg_resource
    global table_extractions
    kg_resource = forge.retrieve(resource_id)
    forge.download(kg_resource, "distribution.contentUrl", "/tmp/", overwrite=True)
    for r in kg_resource.distribution:
        if "curated" in r.name:
            table_extractions = pd.read_csv(f"/tmp/{r.name}")
            if table_extractions is not None:
                message = f"Dataset '{r.name}' with {len(table_extractions)} entities ready to be reused. Its content has been assigned to the variable 'table_extractions'. Please continue with the interactive UI section to visualise this dataset."
            else:
                table_extractions = pd.DataFrame()
                message = "No dataset has been downloaded"
            with output3:
                print(message)

s0 = ipywidgets.Button(
    description= '🔬 List all your topics',
    button_style='',
    layout=ipywidgets.Layout(width='300px', height='30px'),
    disabled=False)
s1 = ipywidgets.Button(
    description= "📃 Show datasets for selected topic",
    button_style='',
    layout=ipywidgets.Layout(width='300px', height='30px'),
    disabled=False)
s2 = ipywidgets.RadioButtons(
    description='Select:',
    disabled=False)
s3 = ipywidgets.Button(
    description= '📈 Reuse selected dataset',
    button_style='',
    layout=ipywidgets.Layout(width='300px', height='30px'),
    disabled=False)
s4 = ipywidgets.Button(
    description= '✏️ Update topic',
    button_style='',
    layout=ipywidgets.Layout(width='300px', height='30px'),
    disabled=False)
s5 = ipywidgets.Text(
    description='Field:',
    disabled=False)
s6 = ipywidgets.Textarea(
    description='Description:',
    disabled=False)
s7 = ipywidgets.Textarea(
    description='Keywords:',
    disabled=False)
s8 = ipywidgets.Text(
    disabled=False)
s9 = ipywidgets.Text(
    disabled=False)
s10 = ipywidgets.Text(
    disabled=False)
s11 = ipywidgets.Text(
    disabled=False)

sq = ipywidgets.VBox(children=[s8, s9, s10, s11])

s12 = ipywidgets.VBox(children=[s5, s6, s7, ipywidgets.Label('Questions:'), sq, s4])

w0 = ipywidgets.Dropdown(
        description='Select topic:',
        disabled=False)
w1 = ipywidgets.Text(
    placeholder='e.g. COVID-19',
    description='Topic name:',
    disabled=False)
w2 = ipywidgets.Text(
    placeholder='e.g. Neuroscience',
    description='Field:',
    disabled=False)
w3 = ipywidgets.Textarea(
    placeholder='Add a description of your topic',
    description='Description:',
    disabled=False)
w4 = ipywidgets.Textarea(
    placeholder='e.g. Coronavirus; COVID-19; SARS; risk factor; glycosylation; sugar; carbohydrates',
    description='Keywords:',
    disabled=False)
w5 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w6 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w7 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w8 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w9 = ipywidgets.Button(
    description='Create',
    button_style='',
    tooltip='Create new topic',
    disabled=False)

output = ipywidgets.Output()
output2 = ipywidgets.Output()
output3 = ipywidgets.Output()

buttons = ipywidgets.HBox(children=[s0, s1])
outputs = ipywidgets.HBox(children=[output, output3])
tab1 = ipywidgets.VBox(children=[buttons, outputs])
tab2 = ipywidgets.VBox(children=[w1, w2, w3, w4, ipywidgets.Label('Please express your research topic in a few questions:'), w5, w6, w7, w8, w9, output2])
widget = ipywidgets.Tab(children=[tab1, tab2])
widget.set_title(0, 'Select topic')
widget.set_title(1, 'Create topic')

w9.on_click(save_topic)
s0.on_click(get_topics)
s1.on_click(get_datasets)
s3.on_click(download_dataset)
s4.on_click(update_topic)

display(widget)

# Data Import
The user loads data from a data source (CORD-19). The loaded data forms the corpus. The user searches the CORPUS in Blue Brain Search.

Search server URL

In [None]:
SEARCH_ENGINE_URL = os.getenv("SEARCH_ENGINE_URL", "http://dgx1.bbp.epfl.ch:8850")
assert SEARCH_ENGINE_URL is not None

response = requests.post("{}/help".format(SEARCH_ENGINE_URL))
assert response.ok and response.json()['name'] == 'SearchServer', "The server is not accessible"
print(f"This server is using the database: {response.json()['database']}")

MySQL URL and engine

In [None]:
MYSQL_DB_URI = os.getenv("MYSQL_DB_URI", "dgx1.bbp.epfl.ch:8853")
bbs_mysql_engine = sqlalchemy.create_engine(f'mysql+pymysql://guest:guest@{MYSQL_DB_URI}/cord19_v47')

Article saver

In [None]:
article_saver = ArticleSaver(connection=bbs_mysql_engine)

Search widget

In [None]:
search_widget = SearchWidget(
    bbs_search_url=SEARCH_ENGINE_URL,
    bbs_mysql_engine=bbs_mysql_engine,
    article_saver=article_saver,
    results_per_page=3)
search_widget

Status of the Article Saver

In [None]:
article_saver.summary_table()

# Set schemas
The user defines the KG schema.

In [None]:
schema_request = SchemaRequest()

In [None]:
columns = ['entity_type', 'property', 'property_type', 'property_value_type', 'ontology_source']

etypes_sources = [('CELL_COMPARTMENT', None),
                  ('CELL_TYPE', None),
                  ('CHEMICAL', 'NCIT'), 
                  ('CONDITION', None),
                  ('DISEASE', 'NCIT'),
                  ('DRUG', None),
                  ('ORGAN', 'NCIT'),
                  ('ORGANISM', 'NCIT'),
                  ('PATHWAY', 'Reactome'),
                  ('PROTEIN', 'NCIT')
                 ]
schema_request_data = [{'entity_type': etype, 'ontology_source': source} 
                       for etype, source in etypes_sources]

schema_request.schema = pd.DataFrame(schema_request_data, columns=columns)
display(schema_request.schema)

# Create a knowledge graph according to schemas
The user extracts data from the text of a set of papers using selected Named Entity Recognizers and Relation Extractors from Blue Brain Search.
The user can preview the extracted data.
The user curates extracted data.
The user links the extracted entities and relations to ontologies.
The user saves data into Knowledge Graph.

- **input**: raw text
- **output**: csv table of extracted entities/relations

In [None]:
DEFAULT_TEXT = """Autophagy maintains tumour growth through circulating
arginine. Autophagy captures intracellular components and delivers them to
lysosomes, where they are degraded and recycled to sustain metabolism and to
enable survival during starvation. Acute, whole-body deletion of the essential 
autophagy gene Atg7 in adult mice causes a systemic metabolic defect that 
manifests as starvation intolerance and gradual loss of white adipose tissue, 
liver glycogen and muscle mass.  Cancer cells also benefit from autophagy. 
Deletion of essential autophagy genes impairs the metabolism, proliferation, 
survival and malignancy of spontaneous tumours in models of autochthonous 
cancer. Acute, systemic deletion of Atg7 or acute, systemic expression of a 
dominant-negative ATG4b in mice induces greater regression of KRAS-driven 
cancers than does tumour-specific autophagy deletion, which suggests that host 
autophagy promotes tumour growth.
""".replace('\n', ' ').replace('  ', ' ')

In [None]:
TEXT_MINING_URL = os.getenv("TEXT_MINING_URL", "http://dgx1.bbp.epfl.ch:8852")
response = requests.post(TEXT_MINING_URL + "/help")
assert response.ok and response.json()['name'] == 'MiningServer'
print(f"This server is using the database: {response.json()['database']}")

In [None]:
mining_widget = MiningWidget(
    mining_server_url=TEXT_MINING_URL,
    schema_request=schema_request,
    article_saver=article_saver,
    default_text=DEFAULT_TEXT)
mining_widget

- **input**: csv table of extracted entities/relations
- **output**: knowledge graph

In [None]:
# Get DataFrame of extractions
table_extractions = mining_widget.get_extracted_table()

# Drop duplicates in DataFrame
columns_duplicates = table_extractions.columns.tolist()
columns_duplicates.remove('entity_type')
table_extractions = table_extractions.drop_duplicates(subset=columns_duplicates, keep='first', ignore_index=True)
table_extractions = table_extractions.dropna(subset=["entity"])

In [None]:
print(f'The table has {table_extractions.shape[0]} rows.')

In [None]:
import jupyter_server_proxy
import jupyter_dash
import dash
import dash_daq as daq
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
import dash_table
from jupyter_dash import JupyterDash
import dash_bootstrap_components as dbc
import plotly.express as px
import operator
from typing import Iterator, Dict
import pandas as pd
import numpy as np
from pygments import highlight
from pygments.lexers import JsonLdLexer, TurtleLexer
from pygments.formatters import TerminalFormatter, TerminalTrueColorFormatter
import json
import uuid
from tqdm.notebook import tqdm
from kganalytics_utils import prepare_data


def pretty_print(a_json):
    print(highlight(json.dumps(a_json, indent=2), JsonLdLexer(), TerminalFormatter()))

In [None]:
from jupyter_dash.comms import _send_jupyter_config_comm_request
_send_jupyter_config_comm_request()


In [None]:
JupyterDash.infer_jupyter_proxy_config()

In [None]:
%%time

print("Setting default term filters: the user can remove them later on in the UI if need be ...")

filtered_table_extractions = table_extractions.copy()







default_term_filters = 'Glucose; Covid-19; SARS-CoV-2; Diabetes; IL-1; ACE2; glycosylation; hyperglycemia; shock; fatigue; CVD; vasoconstriction; lactate; insulin; SP-D; HbA1c; LDH; glycolysis; GLUT; macrophage; lymphocytes; ventilation;SARS; ARDS; Cytokine Storm; pneumonia; multi-organs failure; thrombosis; inflammation; IL-6; CRP; D-Dimer; Ferritin; Lung Disease; Hypertension; Aging; COPD; angiotensin 2 (or angiotensin II or AngII); Obesity; ICU (intensive care unit); ventilation; ketogenic diet'.split("; ")

default_found_term_filters = set() 
for term_filter in default_term_filters:
    result_df = filtered_table_extractions.loc[filtered_table_extractions["entity"].str.lower().eq(str(term_filter).lower())]
    result_df = result_df["entity"].unique()    
    if result_df is not None and len(result_df) > 0:
        default_found_term_filters.add(tuple(result_df))
term_filter_options= [term_filter[0] for term_filter in  default_found_term_filters]

print("Done.")

print("Computing entity frequencies ...")



def _frequency(group_by, retrieve_key, df, distinct_papers=True, debug=False):
    
    if debug:
        display(df.head(100))
    if distinct_papers:
        colunm_stats = df[[group_by, retrieve_key]].groupby(group_by)[retrieve_key].unique()
    else:
        colunm_stats = df[[group_by, retrieve_key]].groupby(group_by)[retrieve_key].count()
    if debug:
        display(colunm_stats)
    
    return colunm_stats


filtered_table_extractions, counts=prepare_data(filtered_table_extractions)
filtered_table_extractions=filtered_table_extractions.reset_index()
filtered_table_extractions["paper_frequency"] = filtered_table_extractions["paper"].transform(lambda x:  len([str(p).split(":")[0] for p in x]))
filtered_table_extractions["paper"] = filtered_table_extractions["paper"].transform(lambda x:  list(x))
filtered_table_extractions["paragraph"] = filtered_table_extractions["paragraph"].transform(lambda x:  list(x))
filtered_table_extractions["section"] = filtered_table_extractions["section"].transform(lambda x:  list(x))
filtered_table_extractions["entity_type"] = filtered_table_extractions["entity_type"].transform(lambda x:  list(x))


entity_frequency = 1



curated_table_extractions = filtered_table_extractions.copy()
linked_mention_df_unique = pd.read_pickle("/gpfs/bbp.cscs.ch/project/proj116/network_analytics/data/cord_47_linking.pkl")
entity_to_types = pd.read_csv("/gpfs/bbp.cscs.ch/project/proj116/network_analytics/data/cord_47_entity_types.csv")
entity_to_types = entity_to_types.set_index('entity')

linked_mention_df_unique.rename(columns={"mention":"entity"}, inplace=True)


print("Done.")

In [None]:

from dash.exceptions import PreventUpdate

app = JupyterDash('Extracted Entities Curation App')

server = app.server

from operator import ge, gt, lt, le, eq, ne


operators = [['ge ', '>='],
             ['le ', '<='],
             ['lt ', '<'],
             ['gt ', '>'],
             ['ne ', '!='],
             ['eq ', '='],
             ['contains '],
             ['datestartswith ']]

dropdown_freq_filter_list = [{"label":">","value":"gt"},
                             {"label":">=","value":"ge"},
                             {"label":"<","value":"lt"},
                             {"label":"<=","value":"le"},
                             {"label":"=","value":"eq"},
                             {"label":"!=","value":"ne"}]

def split_filter_part(filter_part):
    for operator_type in operators:
        for operator in operator_type:
            if operator in filter_part:
                name_part, value_part = filter_part.split(operator, 1)
                name = name_part[name_part.find('{') + 1: name_part.rfind('}')]

                value_part = value_part.strip()
                v0 = value_part[0]
                if (v0 == value_part[-1] and v0 in ("'", '"', '`')):
                    value = value_part[1: -1].replace('\\' + v0, v0)
                else:
                    try:
                        value = float(value_part)
                    except ValueError:
                        value = value_part

                return name, operator_type[0].strip(), value

    return [None] * 3

# Define UI layout

button_group = dbc.ButtonGroup(
    [
        dcc.Upload(
                id='datatable-upload',
                children=html.Div([
                    dbc.Button("Load a CSV File", color="primary", className="mr-1",id="load_file"),
                    dbc.Tooltip(
                        "Load extracted entities in CSV format",
                        target="load_file",
                        placement="bottom",
                    )
                ]),
            className="mr-1"
        )
    ],
     className="mr-1"
)

buttons = dbc.FormGroup(
            [
                 button_group
            ]
        )

dropdown = dbc.FormGroup(
    [
        dbc.InputGroupAddon(
            dbc.Button("Entity Frequency", color="primary", id="entity_frequency"),
            addon_type="prepend",
            className="mr-1"
        ),
        dbc.Tooltip(
            "Select an operator and a frequency threshold",
            target="entity_frequency",
            placement="bottom",
        ),
        dcc.Dropdown(
            id='dropdown-freq-filter',
            value="ge",
            clearable=False,
            options = dropdown_freq_filter_list,
            
            className="mr-1"
        ),
        daq.NumericInput(
            id="entityfreqslider",
            min=entity_frequency,  
            max=1000,
            value=entity_frequency,
           className="mr-1"
        )
    ],
    className="mr-1"
)


term_filters = dbc.InputGroup(
    [
        dbc.InputGroupAddon(
            "Keep",
            addon_type="prepend",
        ),
         dcc.Dropdown(
            id="term_filters",
            multi=True,
             value=term_filter_options,
             style={
                 "width":"80%"
             },
             placeholder="Search for entities to keep",
             
        )
        
    ],
    className="mb-1"
)



reset = dbc.FormGroup(
    [
        dbc.Button("Reset", color="primary", className="mr-1",id='table-reset'),
        dbc.Tooltip(
            "Reset table and graph to original extracted entities and default filters",
            target="table-reset",
            placement="bottom",
        )
    ]
)

link_ontology = dbc.FormGroup(
    [
        dbc.Button("Link to NCIT ontology", color="primary", className="mr-1",id='link_ontology'),
        dbc.Tooltip(
            "Click to apply ontology linking",
            target="link_ontology",
            placement="bottom",
        )
    ]
)
                        

form_table = dbc.Form([buttons, dropdown,reset,link_ontology,term_filters],inline=True)

columns= [{"name": i, "id": i, "clearable": True, "selectable": True, "renamable": False, "hideable": True, "deletable": False} for i in filtered_table_extractions.columns if i not in ["paper","paragraph","section"]]

app.layout = html.Div(
    [
       dbc.Row(
            dbc.Col(
                form_table
            )
        ),
        dbc.Row(
            dbc.Col(
                dash_table.DataTable(
                    id='datatable-upload-container',
                    columns=columns,
                    style_cell={
                        'whiteSpace': 'normal'
                    },

                    style_data_conditional=[
                        {
                            'if': {'row_index': 'odd'},
                            'backgroundColor': 'rgb(248, 248, 248)'
                        }
                    ],
                    style_header={
                        'backgroundColor': 'rgb(230, 230, 230)',
                        'fontWeight': 'bold'
                    },

                    css=[
                        {
                            'selector': 'dash-fixed-content',
                            'rule': 'height: 100%;'
                        }
                    ],
                    sort_action="custom", #native
                    sort_mode="multi",
                    column_selectable="multi",
                    filter_action="custom",
                    filter_query='',
                    selected_columns=[],
                    page_action="custom", #native
                    export_format='csv',
                    export_headers='display',
                    merge_duplicate_headers=True,
                    selected_rows=[],
                    page_current=0,
                    page_size=10,
                    sort_by=[]
                )
            )
        ),
        dbc.Row(
            
            dbc.Col(dcc.Graph(id='datatable-upload-Scatter'))
           
        )
    ]
)

# Define callbacks

def parse_contents(contents, filename):
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    if 'csv' in filename:
        return pd.read_csv(
            io.StringIO(decoded.decode('utf-8')))

@app.callback(
    Output('datatable-upload-container', 'style_data_conditional'),
    [Input('datatable-upload-container', 'selected_columns')]
)
def update_styles(selected_columns):
    return [{
        'if': {'column_id': i},
        'background_color': '#D2F3FF'
    } for i in selected_columns]



@app.callback(
    Output("term_filters", "options"),
    [Input("term_filters", "search_value"),Input('link_ontology', 'n_clicks')],
    [State("term_filters", "value"),
    State('datatable-upload-container', 'data')],
)
def update_filter(search_value, click_link_ontology, values,data):
    
    ctx = dash.callback_context
    if not ctx.triggered:
        button_id = 'No clicks yet'
    else:
        button_id = ctx.triggered[0]['prop_id'].split('.')[0]
                        
    if not search_value and values is None:
        raise PreventUpdate
    
    res = []
        
    if values is not None:
        if button_id == "link_ontology":
            for value in values:
                try:
                    vals = linked_mention_df_unique.loc[str(value).lower()]
                    vals = vals.concept.lower()
                    res.append( {"label":vals,"value":vals})
                except Exception as e:
                    res.append( {"label":value,"value":value})
        else:
            for value in values:      
                res.append( {"label":value,"value":value})
    
    if search_value is not None:
        result_df = non_deleted_table_extractions.loc[non_deleted_table_extractions["entity"].str.contains(str(search_value))]
        result_df = result_df["entity"].unique()
        if result_df is not None:
            for result in result_df:
                res.append( {"label":result,"value":result})
    return res
    


@app.callback([Output('entityfreqslider', 'value'),
               Output('dropdown-freq-filter', 'value')],
              [ Input('table-reset', 'n_clicks')],
             [State('entityfreqslider', 'value'),
              State('dropdown-freq-filter', 'value')])
def reset(reset, entityfreq,freqoperator):
    ctx = dash.callback_context
    if not ctx.triggered:
        button_id = 'No clicks yet'
    else:
        button_id = ctx.triggered[0]['prop_id'].split('.')[0]
                
    if button_id == "table-reset" or button_id == "No clicks yet":
        global curated_table_extractions
        curated_table_extractions = filtered_table_extractions
        global non_deleted_table_extractions
        non_deleted_table_extractions = filtered_table_extractions
        return [entity_frequency,"ge"]
    
import traceback    

non_deleted_table_extractions = filtered_table_extractions        


def get_freq(row, operator, filter_value, term_filters):
    return eval(operator)(row.paper_frequency,int(filter_value)) or str(row['entity']).lower() in term_filters
import math
def link_ontology(df):
    
    filtered_table_extractions_linked = df.set_index('entity').join(linked_mention_df_unique.set_index('entity'), on='entity')
    filtered_table_extractions_linked=filtered_table_extractions_linked.reset_index()

    filtered_table_extractions_linked.dropna(subset=['paper'], inplace=True)
    filtered_table_extractions_linked["entity"] = filtered_table_extractions_linked.apply(lambda row: row.concept if pd.notnull(row.concept) else row.entity, axis = 1)
    filtered_table_extractions_linked["entity_type"] = filtered_table_extractions_linked.apply(lambda row: entity_to_types.loc[row.concept], axis = 1)
    
    
    return filtered_table_extractions_linked
    
@app.callback([
               Output('datatable-upload-container', 'data'),
               Output('datatable-upload-container', 'columns'),
               Output('datatable-upload-container', 'editable'),
               Output('datatable-upload-container', 'row_deletable'),
               Output('datatable-upload-container', 'page_count')],
              [Input('datatable-upload-container', 'page_size'),
               Input('datatable-upload-container', 'page_current'),
               Input('datatable-upload-container','data_timestamp'),
               Input('datatable-upload', 'contents'),
               Input('entityfreqslider', 'value'),
               Input('dropdown-freq-filter', 'value'),
              Input('datatable-upload-container', 'sort_by'),
              Input('datatable-upload-container', 'filter_query'),
              Input('link_ontology', 'n_clicks')],
              [State("datatable-upload-container", "data"),
               State("datatable-upload-container", "columns"),
              State('datatable-upload', 'filename'),
              State('datatable-upload-container', 'derived_viewport_data'),
                State("term_filters", "value")
              ])

def update_output(page_size, page_current,ts,upload,entityfreq,
                  freqoperator,sort_by,filter_query,click_link_ontology,data,
                  columns, filename,derived_viewport_data, 
                  term_filters):
    try:
        ctx = dash.callback_context
        if not ctx.triggered:
            button_id = 'No clicks yet'
        else:
            button_id = ctx.triggered[0]['prop_id'].split('.')[0]       
            
        if term_filters is not None:
            term_filters = [str(term_filter_value).lower() for term_filter_value in term_filters ]
        else:
            term_filters = []
        if upload is not None:
            global curated_table_extractions
            curated_table_extractions = parse_contents(upload, filename).copy()
            
        elif button_id == "table-reset":
            curated_table_extractions = filtered_table_extractions
        
        
            
            
        elif derived_viewport_data:
            
            removed = [row for row in derived_viewport_data if row not in data and str(row["entity"]).lower() not in term_filters]
            global non_deleted_table_extractions
            for row in removed:
                curated_table_extractions= curated_table_extractions[curated_table_extractions.entity.str.lower() != str(row["entity"]).lower()]
                non_deleted_table_extractions=non_deleted_table_extractions[non_deleted_table_extractions.entity.str.lower() != str(row["entity"]).lower()]

        if button_id == "link_ontology":
            curated_table_extractions = link_ontology(curated_table_extractions)
        result = curated_table_extractions

        
        
        if (button_id == "entityfreqslider" or button_id=="dropdown-freq-filter")  and 'paper' in curated_table_extractions:
            row_filtered = []
           
            curated_table_extractions =non_deleted_table_extractions[non_deleted_table_extractions.apply(lambda row: get_freq(row,freqoperator,entityfreq,term_filters), axis=1)]
            result = curated_table_extractions
        
        # Filter by properties

        dff = result
        if filter_query:
            filtering_expressions = filter_query.split(' && ')
            for filter_part in filtering_expressions:
                col_name, operator, filter_value = split_filter_part(filter_part)

                if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
                    
                    dff = dff.loc[getattr(dff[col_name], operator)(filter_value)]
                elif operator == 'contains':
                    dff = dff.loc[dff[col_name].str.contains(filter_value)]
                elif operator == 'datestartswith':
                    dff = dff.loc[dff[col_name].str.startswith(filter_value)]
            
        # Sorting by properties
        if sort_by and len(sort_by):
            result_sorted = dff.sort_values(
                [col['column_id'] for col in sort_by],
                ascending=[
                    col['direction'] == 'asc'
                    for col in sort_by
                ],
                inplace=False
            )
        else:
            result_sorted = dff
            
        result_paginated= result_sorted.iloc[
            page_current*page_size:(page_current+ 1)*page_size
        ]
                
        page_count = len(result_sorted) // page_size
        
        return result_paginated.to_dict('records'), columns, True, True, page_count
    except Exception as e:
        traceback.print_exc()





@app.callback([Output('datatable-upload-Scatter', 'figure')],
              [Input('datatable-upload-container', 'data_timestamp'),
               Input('datatable-upload-container', 'data')],)
def display_graph(dts, rows):
    df = curated_table_extractions.copy()
    
    if (df.empty or len(df.columns) < 1):
        
        scatter = {
                'data': [{
                    'x': [],
                    'y': []
                }]
            }
    else:
        if "paper" in df:
            scatter = px.scatter(df, x=df.entity, y=df.paper_frequency, color=df.entity_type.apply(lambda x: ",".join(x) if isinstance(x,list) else x))
            
    return [scatter]


width = "100%"
height = "100%"
app.run_server(mode="jupyterlab",width=width,port=8075)

# Validate the knowledge graph
Content of the Knowledge Graph is validated. In this version, syntactic validation (i.e. are the identifiers correct, ...) is performed when building the knowledge graph. If the knowledge graph is successfully built then the validation passes. In case of warning (i.e because of a weird character (+,...) in an extracted entity), the user can go back to the curation step and further curate extracted entities. 

In [None]:
%%time
# Build knowledge graph from enriched annotations
import json
from typing import Iterable, Dict
from rdflib import Graph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_digraph
import networkx as nx
from rdflib.namespace import RDF, RDFS, SKOS
from networkx.readwrite.json_graph.cytoscape import cytoscape_data




from kganalytics.kganalytics_utils import generate_analysis, generate_comention_analysis, prepare_data, generate_comention_analysis1
# Generate a paper-based network from a mentions data frame:
# - we select entities that are mentioned at least 5 times
# - and we then take only 100 most frequent

curated_table_extractions_grouped = curated_table_extractions.rename(columns={"entity": "entity_raw"})
curated_table_extractions_grouped = curated_table_extractions_grouped.rename(columns={"concept": "entity"})


curated_table_extractions_grouped =curated_table_extractions_grouped.groupby("entity").agg({"entity_type":lambda x: list(x)[0], 
                                                                            "paper": lambda x: set(sum(x,[])),
                                                                            "section": lambda x: set(sum(x,[])),
                                                                            "paragraph": lambda x: set(sum(x,[]))})


graphs, trees = generate_comention_analysis1(
    curated_table_extractions_grouped, counts,min_occurrences=5, n_most_frequent=300, factors=["paper"], cores=10)



no_papers = []
def build_cytoscape_data(factor,graph):
    nx.set_node_attributes(graph[factor], entity_to_types.to_dict()["type"], "type")
    elements = cytoscape_data(graph[factor])
    elements=elements["elements"]['nodes']+elements["elements"]['edges']
    for element in elements:
        element["data"]["id"] = str(element["data"]["source"]+'_'+element["data"]["target"]).replace(" ","_") if "source" in element["data"] else element["data"]["id"]
        if 'paper' in element["data"]:
            papers = element["data"].pop("paper")
            element["data"]["papers"] = list(papers)
        else:
            no_papers.append(element)
        element["data"]["papers"] = list(papers)
    elements_dict = {element["data"]["id"]:element for element in elements  }
    return elements, elements_dict

elements_tree, elements_tree_dict = build_cytoscape_data("paper",trees)
elements_graph, elements_graph_dict = build_cytoscape_data("paper",graphs)


cytoscape_data= dict()
cytoscape_data={
    "paper":{
        "tree": (elements_tree, elements_tree_dict),
        "graph": (elements_graph, elements_graph_dict)
    }
}

# Correct knowledge graph
Correction involves going back to the extraction and/or curation steps.

# Access the knowledge graph
The user can search, visualize, and export the knowledge graph.

In [None]:
%%time
import pickle
print("Loading precomputed co-mention graphs for 3000 extracted entities...")

factors = ["paper", "section", "paragraph"]
weights = ["npmi", "ppmi"]
trees = {}

precomputed_nodes_df = {}
precomputed_edges_df = {}
# open graphs if they where already generated
graphs = {}
for factor in tqdm(factors):
    with open("/gpfs/bbp.cscs.ch/project/proj116/network_analytics/data/graphs/cord_47/full_{}_3000_edge_list.pkl".format(factor), "rb") as f:
        edges = pickle.load(f)
    precomputed_edges_df[factor] = edges

    graph = nx.from_pandas_edgelist(
        edges,
         edge_attr=[
            "frequency",
            "ppmi",
            "npmi",
            "distance_ppmi",
            "distance_npmi"

         ])
    with open("/gpfs/bbp.cscs.ch/project/proj116/network_analytics/data/graphs/cord_47/full_{}_3000_node_list.pkl".format(factor), "rb") as f:
        nodes = pickle.load(f)
    nx.set_node_attributes(graph, nodes.to_dict("index"))
    precomputed_nodes_df[factor] = nodes
    graphs[factor] = graph
    
    trees[factor] = {}
    with open("/gpfs/bbp.cscs.ch/project/proj116/network_analytics/data/graphs/cord_47/full_{}_3000_edge_list.pkl".format(factor), "rb") as f:
        tree_edges = pickle.load(f)
        tree_edges = tree_edges.rename(columns={"Source": "source", "Target": "target"})
        tree = nx.from_pandas_edgelist(tree_edges)
        trees[factor] = tree
print("Done")

In [None]:
%%time
with open("/gpfs/bbp.cscs.ch/project/proj116/cytoscape_3000/paper_spanning_tree_3000_styles.json","r") as f:
    paper_spanning_tree_styles = json.load(f)
paper_spanning_tree_styles = paper_spanning_tree_styles[0]['style']

with open("/gpfs/bbp.cscs.ch/project/proj116/cytoscape_3000/paper_clusters_3000_styles.json","r") as f:
    paper_spanning_clusters_styles = json.load(f)
paper_spanning_clusters_styles = paper_spanning_clusters_styles[0]['style']
    
with open("/gpfs/bbp.cscs.ch/project/proj116/cytoscape_3000/paper_clusters_3000.cyjs","r") as f:
    paper_spanning_clusters = json.load(f)
    
paper_spanning_clusters_list = paper_spanning_clusters["elements"]["nodes"] + paper_spanning_clusters["elements"]["edges"]
paper_spanning_clusters_dict = {elt['data']['uid']:elt for elt in paper_spanning_clusters_list  if "uid" in elt['data']}


with open("/gpfs/bbp.cscs.ch/project/proj116/cytoscape_3000/paper_spanning_tree_3000.cyjs") as f:
    paper_spanning_tree = json.load(f)
    
paper_spanning_tree_list = paper_spanning_tree["elements"]["nodes"] + paper_spanning_tree["elements"]["edges"]
paper_spanning_tree_dict = {elt['data']['uid']:elt for elt in paper_spanning_tree_list  if "uid" in elt['data']}


In [None]:
def create_edge(id, from_id, to_id, label=None, label_size=10, label_color="black", thickness=2, edge_color="grey", edge_style="solid",frequency=1,papers=[]):

        if thickness == 0:
            thickness = 2
        return {
            "data": { 
                "id": str(id),
                "source": str(from_id).lower(),
                "target": str(to_id).lower(),
                "frequency":frequency,
                "papers":papers
            },
            "style": {
               "label": label if label else '',
                "width": thickness
            }
        }

def create_node(id, node_type=None,label=None, label_size=10, label_color="black", radius=30, node_color='grey',frequency={}, definition="",papers=[]):

        actualLabel = None
        if label is not None:
            actualLabel = label.lower()
        else:
            actualLabel = str(id).lower().split("/")[-1].split("#")[-1]
        frequency_raw = frequency['frequency'] if 'frequency' in frequency else 1
        return {
            "data": { 
                "id": str(id).lower(),
                "frequency":frequency_raw,
                "degree_frequency":frequency['degree_frequency'] if 'degree_frequency' in frequency else frequency_raw,
                "pagerank_frequency":frequency['pagerank_frequency'] if 'pagerank_frequency' in frequency else frequency_raw,
                "definition":definition,
                "papers":papers,
                "type":node_type
            },
            "style": {
                "label": actualLabel
            }
        }

In [None]:
# ################################# Graph LAYOUT Definition ################################

node_shape_option_list = ['ellipse',
                                'triangle',
                                'rectangle',
                                'diamond',
                                'pentagon',
                                'hexagon',
                                'heptagon',
                                'octagon',
                                'star',
                                'polygon']

dropdown_download_option_list = [
                                    'jpg',
                                    'png',
                                    'svg'
                                ]

graph_layout_option_list = ['preset',
                            'random',
                            'grid',
                            'circle',
                            'concentric',
                            'breadthfirst',
                            'cose',
                            'cose-bilkent',
                            'dagre',
                            'cola',
                            'klay',
                            'spread',
                            'euler']

node_frequency_type = [
    "Frequency",
    "Degree Frequency",
    "PageRank Frequency"
]

graph_type_option_list = ['Knowledge Graph', 'Co-mention Graph Spanning Tree', 'Co-mention Graph Cluster', '3000-cluster','3000-spanning']


button_group = dbc.InputGroup(
    [
                        dbc.Button("Reset", color="primary", className="mr-1",id='bt-reset'),
                        dbc.Tooltip(
                            "Reset the display to default valuess",
                            target="bt-reset",
                            placement="bottom",
                        ),
                        dbc.Button("Remove Selected Node", color="primary", className="mr-1",id='remove-button'),
                        dbc.DropdownMenu(
                            [
                             dbc.DropdownMenuItem("png", id="png-menu"),
                                dbc.DropdownMenuItem(divider=True),
                             dbc.DropdownMenuItem("jpg", id="jpg-menu"),
                                 dbc.DropdownMenuItem(divider=True),
                             dbc.DropdownMenuItem("svg", id="svg-menu")
                            ],
                            label="Download",
                            id='dropdown-download',
                            
                            color="primary",
                            group=True,
                            className="mr-1"
                        )
    ]
)

buttons = dbc.FormGroup(
            [
                 button_group
            ],className="mr-1"
        )

graph_type_radio = dbc.FormGroup(
    [
        dbc.Label("Display", html_for="showgraph", width=3),
        dbc.Col(
            dbc.RadioItems(
                id="showgraph",
                value='Co-mention Graph Spanning Tree',
                options=[{'label': val.capitalize(), 'value': val} for val in graph_type_option_list],
                inline=True
            ), width=9
        )
    ],
    row=True
)

scope_option_list = ['Paper', 'Section', "Paragraph"]

scope_radio = dbc.FormGroup(
    [
        dbc.Label("Scope", html_for="graphscope", width=3),
        dbc.Col(
            dbc.RadioItems(
                id="graphscope",
                value='Paper',
                options=[{'label': val.capitalize(), 'value': val} for val in scope_option_list],
                inline=True
            ), width=9
        )
    ],
    row=True
)

group_option_list = ['None','Type', 'Mutual Information']

group_radio = dbc.FormGroup(
    [
        dbc.Label("Group By", html_for="graphgroup", width=3),
        dbc.Col(
            dbc.RadioItems(
                id="graphgroup",
                value='None',
                options=[{'label': val.capitalize(), 'value': val} for val in group_option_list],
                inline=True
            ), width=9
        )
    ],
    row=True
)


input_group = dbc.InputGroup(
    [
        dbc.InputGroupAddon(
            "Search",
            addon_type="prepend",
        ),
         dcc.Dropdown(
            id="searchdropdown",
            multi=True,
             style={
                 "width":"80%"
             }
             
        )
        
    ],
    className="mb-3"
)


search = dbc.FormGroup(
    [
        dbc.Label("Search", html_for="searchdropdown", width=3),
        dbc.Col(dcc.Dropdown(
            id="searchdropdown",
            multi=True
        ), width=9)
        
    ],
    
    row=True
)

dropdown_menu_items = [
    dbc.DropdownMenuItem("Frequency",   id="dropdown-menu-freq-frequency"),
    dbc.DropdownMenuItem("Degree Frequency",   id="dropdown-menu-freq-degree_frequency"),
    dbc.DropdownMenuItem("PageRank Frequency", id="dropdown-menu-freq-pagerank_frequency")
]


freq_input_group = dbc.InputGroup(
    [
        dbc.InputGroupAddon(
            "Node Frequency",
            addon_type="prepend",
        ),
         dcc.Dropdown(
            id="node_freq_type",
             value="degree_frequency",
             options=[{'label': val, 'value': val.lower().replace(" ","_")} for val in node_frequency_type],
             style={
                 "width":"80%"
             }
        )
    ],
    className="mb-1"
)


node_slider = dbc.InputGroup(
    [
        freq_input_group,
        dcc.Dropdown(
            id='node-freq-filter',
            value="ge",
            clearable=False,
            options = dropdown_freq_filter_list,
            
            className="mr-1"
        ),
        daq.NumericInput(
            id="nodefreqslider",
            min=1,  
            max=10000,
            value=1,
           className="mr-1"
        )
        
       
    ],
    className="mb-3"
    
)





item_details = dbc.FormGroup(
    [
        
        html.Div(id="modal")
    ]
)

item_details_card = dbc.Card(
                        dbc.CardBody(
                            [
                                html.H5("", className="card-title"),
                                html.H6("", className="card-subtitle"),
                                html.P(
                                    "",
                                    className="card-text"
                                ),
                                dbc.Button("", color="primary", id ="see-more-card")
                            ],
                            id = "item-card-body"
                        )
                    )




form = dbc.Form([button_group, graph_type_radio, search, node_slider, item_details_card])


path_from = dbc.FormGroup(
    [
        dbc.Label("From", html_for="searchpathfrom", width=3),
        dbc.Col(dcc.Dropdown(
            id="searchpathfrom"
        ), width=9)
        
    ],
    
    row=True
)

path_to = dbc.FormGroup(
    [
        dbc.Label("To", html_for="searchpathto", width=3),
        dbc.Col(dcc.Dropdown(
            id="searchpathto"
        ), width=9)
        
    ],
    
    row=True
)

path_condition = dbc.FormGroup(
            [
                dbc.Label("Constraints"),
                dbc.FormGroup(
                    [
                        dbc.Label("Traverse", html_for="searchnodetotraverse", width=3),
                        dbc.Col(dcc.Dropdown(
                            id="searchnodetotraverse"
                        ), width=9)

                    ],
                    row=True
                ),
                dbc.FormGroup(
                    [
                        dbc.Label("Allow Overlap", html_for="searchpathoverlap", width=3),
                        dbc.Col(
                            dbc.Checklist(
                                options=[
                                    {"value": 1}
                                ],
                                value=[1],
                                id="searchpathoverlap",
                                switch=True,
                            ),
                            
                            width=9)

                    ],
                    row=True
                ),
                dbc.FormGroup(
                    [
                        dbc.Label("Top N", html_for="searchpathlimit", width=3),
                        dbc.Col(
                            daq.NumericInput(
                                id="searchpathlimit",
                                min=10,  
                                max=20,
                                value=10,
                               className="mr-1"
                            ),
                            width=9
                        )

                    ],
                    row=True
                )
            ]
)

search_path = dbc.InputGroup(
    [
                        dbc.Button("Find Paths", color="primary", className="mr-1",id='bt-path'),
                        dbc.Tooltip(
                            "Find paths between selected entities",
                            target="bt-path",
                            placement="bottom",
                        )
    ]
)


form_path_finder = dbc.Form([path_from,path_to,path_condition,search_path])

graph_layout = dbc.FormGroup(
    [
        dbc.Label("Layout", html_for="searchdropdown", width=3),
        dbc.Col(dcc.Dropdown(
            id ='dropdown-layout',
            options = [{'label': val.capitalize(), 'value': val} for val in graph_layout_option_list],
            value='cola',
            clearable=False
        ), width=9)
        
    ],
    row=True
)

node_shape = dbc.FormGroup(
    [
        dbc.Label("Node Shape", html_for="dropdown-node-shape", width=3),
        dbc.Col(dcc.Dropdown(
            id='dropdown-node-shape',
            value='ellipse',
            clearable=False,
            options = [{'label': val.capitalize(), 'value': val} for val in node_shape_option_list]
        ), width=9)
        
    ],
    row=True
)

link_color_picker = dbc.FormGroup(
    [
        dbc.Col(daq.ColorPicker(
          id='input-follower-color',
          value=dict(hex='#a0b3dc'),
          label="Edge Color"
        ))    
    ],
    row=True
)


conf_form =dbc.Form([graph_layout,node_shape,link_color_picker])


In [None]:
import json
import os

import dash
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc
from kganalytics.paths import top_n_paths, top_n_tripaths
from collections import OrderedDict
from networkx.readwrite.json_graph.cytoscape import cytoscape_data

import dash_cytoscape as cyto

from dash.exceptions import PreventUpdate

def load_json(st):
    if 'http' in st:
        return requests.get(st).json()
    else:
        with open(st, 'rb') as f:
            x = json.load(f)
        return x
    
# Load extra layouts
cyto.load_extra_layouts()
app_tab =  JupyterDash("allvis")

app_tab.add_bootstrap_links = True
app_tab.external_stylesheets=dbc.themes.CYBORG

server = app_tab.server


CONTENT_STYLE = {
    
    "width": "100%",
    "top": "0px",
    "left":"0px",
    "bottom": "0px",
    "position": "fixed",

    }

colors = {
    "CHEMICAL":"green",
    "PROTEIN":"#469d8c",
    "DISEASE":"#dceef1",
    "CELL_TYPE":"#f1d2b5"
}


                
                
cystoscape_STYLE_stylesheet = [
    {
    "selector" : "node[community_npmi = 0.0]",
    "css" : {
      "background-color" : "rgb(137,208,245)"
    }
  }, {
    "selector" : "node[community_npmi = 1.0]",
    "css" : {
      "background-color" : "rgb(255,102,51)"
    }
  }, {
    "selector" : "node[community_npmi = 2.0]",
    "css" : {
      "background-color" : "rgb(0,102,0)"
    }
  },
    {
        "selector":'cytoscape',
        "style": {
            "width": "100%",
            "height": "100%"
        }
    },
    {
        "selector": 'node[type = "CHEMICAL"]',
        "style": {"background-color": colors["CHEMICAL"]},
    },{
        "selector": 'node',
        'style': {
            "font-size": 70,
            #"font-weight":"bold",
            "text-valign": "center",
            "text-halign": "center",
            #"text-outline-color": "#000000",
            #"text-outline-width": "2px",
            "label":"data(name)",
            "color": "black",
            "overlay-padding": "6px",
            "z-index": "10"
        }
    },{
        "selector": 'edge',
        "style": {
            'curve-style': 'bezier',
            'line-color': '#D5DAE6'
        }
    },{
        "selector": 'node[type = "PROTEIN"]',
        "style": {"background-color": colors["PROTEIN"]},
    },{
        "selector": 'node[type = "DISEASE"]',
        "style": {"background-color": colors["DISEASE"]},
    },{
        "selector": 'node[type = "CELL_TYPE"]',
        "style": {"background-color": colors["CELL_TYPE"]},
    }]

app_tab.layout  = html.Div(  
    [
         dcc.Store(id='memory',data={"removed":[]}),
       
    dbc.Row(
            [
                dbc.Col(
                    dcc.Loading(
                    id="loading-graph",
                    children=[
                         html.Div( style=CONTENT_STYLE, children=[
                            cyto.Cytoscape(
                                                id='cytoscape',
                                                elements=elements_tree,

                                                stylesheet = cystoscape_STYLE_stylesheet,
                                                style= {
                                                        "width": "100%",
                                                        "height": "100%"
                                                }
                                            )
                        ])
                    ],
                    type="circle",
                )
                
                   ,
                
                    width=8
                ),
                dbc.Col(
                    
                   
                    
                    html.Div( children=[
                        
                        dbc.Button(
                            "Controls",
                            id="collapse-button",
                            color="primary",
                        ),
                         dbc.Collapse(
                             dbc.Tabs(id='tabs', children=[
                            dbc.Tab(label='Details', label_style={"color": "#00AEF9", "border-radius":"4px"},children=[
                                
                                dbc.Card(
                                    dbc.CardBody(
                                        [
                                            form
                                        ]
                                    )
                                )
                            ]),
                            dbc.Tab(label='Graph Layout and Shape', label_style={"color": "#00AEF9"}, children=[
                                dbc.Card(
                                    dbc.CardBody(
                                        [
                                            conf_form
                                        ]
                                    )
                                )
                            ]),
                            dbc.Tab(label='Path Finder', label_style={"color": "#00AEF9"}, children=[
                                dbc.Card(
                                    dbc.CardBody(
                                        [
                                            form_path_finder
                                        ]
                                    )
                                )
                            ])
                        ]),
                            id="collapse"
                        )
                        
                       
                    ]
                            ),
                    width=4
                )
            ]
        )
    ]
    )
    


# ############################## CALLBACKS ####################################

@app_tab.callback(
    Output("collapse", "is_open"),
    [Input("collapse-button", "n_clicks")],
    [State("collapse", "is_open")],
)
def toggle_collapse(n, is_open):
    if n:
        return not is_open
    return is_open


@app_tab.callback(
    Output("modal-body-scroll", "is_open"),
    [
        Input("open-body-scroll", "n_clicks"),
        Input("close-body-scroll", "n_clicks"),
    ],
    [State("modal-body-scroll", "is_open")],
)
def toggle_modal(n1, n2, is_open):
    if n1 or n2:
        return not is_open
    return is_open


@app_tab.callback(
    Output("searchdropdown", "options"),
    [Input("searchdropdown", "search_value")],
    [State("searchdropdown", "value"),
    State('cytoscape', 'elements')],
)
def update_multi_options(search_value, value,elements):
    
    if not search_value:
        raise PreventUpdate
    res = []
    for ele_data in elements:
        if 'name' in ele_data['data']:
            label =ele_data['data']['name']
            if (search_value in label) or (label in search_value) or ele_data['data']['id'] in (value or []) :
                res.append({"label":ele_data['data']['name'],"value":ele_data['data']['id']})
    return res

def get_cytoscape_data(factor,graph):
    elements = cytoscape_data(graph[factor])
    elements=elements["elements"]['nodes']+elements["elements"]['edges']
    for element in elements:
        element["data"]["id"] = str(element["data"]["source"]+'_'+element["data"]["target"]).replace(" ","_") if "source" in element["data"] else str(element["data"]["id"]).replace(" ","_")
    elements_dict = {element["data"]["id"]:element for element in elements  }
    return elements, elements_dict
    
    
def search(search_value,value, showgraph, diffs=[]):
    res = []
    if showgraph == 'Co-mention Graph Spanning Tree':
        elements = elements_tree
    if showgraph == 'Co-mention Graph Cluster':
        elements = elements_graph
    for ele_data in elements:
        if 'name' in ele_data['data']:
            label =ele_data['data']['name']
            if (search_value in label) or (label in search_value) or ele_data['data']['id'] in (value or []) :
                if ele_data['data']['id'] not in diffs:
                    res.append({"label":ele_data['data']['name'],"value":ele_data['data']['id']})
                
    return res

@app_tab.callback(
    Output("searchpathto", "options"),
    [Input("searchpathto", "search_value")],
    [State("searchpathto", "value"),
     State('searchpathfrom', 'value'),
    State('showgraph', 'value')],
)
def searchpathto (search_value, value,_from,showgraph):
    
    if not search_value:
        raise PreventUpdate
  
    return search(search_value, value,showgraph,[_from])

@app_tab.callback(
    Output("searchnodetotraverse", "options"),
    [Input("searchnodetotraverse", "search_value")],
    [State("searchnodetotraverse", "value"),
    State('searchpathfrom', 'value'),
    State('searchpathto', 'value'),
    State('showgraph', 'value')],
)
def searchpathtraverse (search_value, value,_from,to, showgraph):
    
    if not search_value:
        raise PreventUpdate
  
    return search(search_value, value,showgraph,[_from,to])

@app_tab.callback(
    Output("searchpathfrom", "options"),
    [Input("searchpathfrom", "search_value")],
    [State("searchpathfrom", "value"),
    State('showgraph', 'value')],
)
def searchpathfrom(search_value, value,showgraph ):
    
    if not search_value:
        raise PreventUpdate
    

    return search(search_value, value,showgraph)



@app_tab.callback(Output('nodefreqslider', 'value'),
              [Input('bt-reset', 'n_clicks')],[State('nodefreqslider', 'value')])
def display_freq_node(resetbt, nodefreqslider):
    
    
    ctx = dash.callback_context

    if not ctx.triggered:
        button_id = 'No clicks yet'
    else:
        button_id = ctx.triggered[0]['prop_id'].split('.')[0]
        
    if button_id == 'bt-reset':
        return 1

@app_tab.callback(
    [
        Output('cytoscape', 'generateImage')
    ],
    [
        Input('jpg-menu', 'n_clicks'),
        Input('svg-menu', 'n_clicks'),
        Input('png-menu', 'n_clicks')
    ]
)
def download_image(jpg_menu,svg_menu,png_menu):
    ctx = dash.callback_context
    if not ctx.triggered:
        button_id = 'No clicks yet'
    else:
        button_id = ctx.triggered[0]['prop_id'].split('.')[0]
    ftype  = None
    if button_id == "png-menu":
        ftype = "png"
    if button_id == "jpg-menu":
        ftype = "jpg"
    if button_id == "svg-menu":
        ftype = "svg"
    return [{
        'type': ftype,
        'action': "download"
    }]

removed = set()
from sqlalchemy.sql import select
from sqlalchemy.sql import and_, or_, not_

def list_papers (papers):
    META_DATA = sqlalchemy.MetaData(bind=bbs_mysql_engine, reflect=True)
    articles = META_DATA.tables["articles"]
    clauses = or_( *[articles.c.article_id==x for x in papers] )
    s = select([articles.c.title,articles.c.authors,articles.c.abstract,articles.c.doi,articles.c.url,articles.c.journal,articles.c.pmcid,articles.c.pubmed_id,articles.c.publish_time]).where(
       clauses
       )
    result = bbs_mysql_engine.execute(s)
    results = []
    for row in result:
        results.append(row)
    return results




@app_tab.callback(
    [
        Output('cytoscape', 'zoom'),
        Output('cytoscape', 'elements')
    ],
    [
        Input('bt-reset', 'n_clicks'),
        Input('remove-button', 'n_clicks'),
        Input('showgraph', 'value'),
        Input('nodefreqslider', 'value'),
        Input('node-freq-filter', 'value'),
        Input("searchdropdown", "value"),
        Input('bt-path', 'n_clicks')
     ],
     [
        State('cytoscape', 'elements'),
        State('cytoscape', 'selectedNodeData'),
        State('cytoscape', 'selectedEdgeData'),
        State('cytoscape', 'tapNodeData'),
        State('cytoscape', 'zoom'),
        State('nodefreqslider', 'value'),
        State('searchpathfrom', 'value'),
        State('searchpathto', 'value'),
        State('searchnodetotraverse', 'value'),
        State('searchpathlimit', 'value'),
        State('searchpathoverlap', 'value')
         
      ]
)

def reset_layout(resetbt, removebt, val, nodefreqslider, node_freq_operator,searchvalues,pathbt,cytoelements, data, edge,tappednode,zoom,nodefreqsliderstate, 
                 searchpathfrom, searchpathto,searchnodetotraverse,searchpathlimit,searchpathoverlap):
    global removed 
    global elements_dict
    global graphs
    global trees
    elements = cytoelements
    ctx = dash.callback_context
    if not ctx.triggered:
        button_id = 'No clicks yet'
    else:
        button_id = ctx.triggered[0]['prop_id'].split('.')[0]
    if button_id == 'showgraph':

        
        if val == 'Knowledge Graph':
            elements = knowledge_graph_cyto_elements
            elements_dict = knowledge_graph_cyto_elements_dict
        if val == 'Co-mention Graph Spanning Tree':
            elements, elements_dict = elements_tree, elements_tree_dict
        if val == 'Co-mention Graph Cluster':
            elements, elements_dict = elements_graph, elements_graph_dict
        if val =='3000-cluster':
            elements = paper_spanning_clusters_list
            elements_dict = paper_spanning_clusters_dict
        if val =='3000-spanning':
            elements = paper_spanning_tree_list
            elements_dict = paper_spanning_tree_dict
            

    if searchvalues is not None:
        for searchvalue in searchvalues:
            search_node = elements_dict[searchvalue]
            search_node["selected"]=True
    
    if nodefreqslider == 1:
        if val == 'Knowledge Graph':
            elements = knowledge_graph_cyto_elements
            elements_dict = knowledge_graph_cyto_elements_dict
        if val == 'Co-mention Graph Spanning Tree':
            elements, elements_dict = elements_tree, elements_tree_dict
        if val == 'Co-mention Graph Cluster':
            elements, elements_dict = elements_graph, elements_graph_dict
        if val =='3000-cluster':
            elements = paper_spanning_clusters_list
            elements_dict = paper_spanning_clusters_dict
        if val =='3000-spanning':
            elements = paper_spanning_tree_list
            elements_dict = paper_spanning_tree_dict
            
        zoom =1
        global removed
        removed = set()

    if button_id == 'remove-button':
        if elements and data:
            ids_to_remove = {ele_data['id'] for ele_data in data}
        if elements and edge:
            ids_to_remove = {ele_data['id'] for ele_data in edge}
            
        elements = [ele for ele in elements if ele['data']['id'] not in ids_to_remove]

        removed.update(ids_to_remove)
    
    if button_id == 'bt-path':
        if searchpathfrom and searchpathto:
            topN = searchpathlimit if searchpathlimit else 20
            searchpathfrom_dict = elements_dict[searchpathfrom]
            searchpathto_dict = elements_dict[searchpathto]
            
            if searchnodetotraverse:
                searchnodetotraverse_dict = elements_dict[searchnodetotraverse]
                
                intersecting = len(searchpathoverlap) == 1
                paths = top_n_tripaths(graphs["paper"], searchpathfrom_dict['data']['name'],searchnodetotraverse_dict['data']['name'], searchpathto_dict['data']['name'], topN,
                                       strategy="naive", distance="distance_npmi", intersecting=intersecting, pretty_print=False)
                
                paths = [list(OrderedDict.fromkeys(path[0]+path[1])) for path in paths]
                
            else:
                paths = top_n_paths(graphs["paper"], searchpathfrom_dict['data']['id'], searchpathto_dict['data']['id'], topN, distance="distance_npmi", strategy="naive",pretty_print=False)
            elements = []
                
            
            if paths:
                elements.append(searchpathfrom_dict) 
                elements.append(searchpathto_dict)
                
            for path in paths:
                
                path_steps = list(path)
                searchpathfrom = searchpathfrom_dict["data"]["id"]
                for index, path_step in enumerate(path_steps):
                    path_step = str(path_step).replace(" ","_")
                    if path_step in elements_dict:
                        path_element = elements_dict[path_step]
                    else:
                        try:
                            result_df = linked_mention_df_unique.loc[str(path_step).lower()]

                            if len(result_df) > 0:
                                node = result_df.uid
                                path_element = create_node(id=node, label=result_df.concept, definition=result_df.definition)

                        except Exception as e:
                            
                            continue
                    
                    path_element_id = path_element['data']['id']
                    elements.append(path_element)
                   
                    edge_from_id = str(searchpathfrom).lower().replace(" ","_")+"_"+str(path_element_id).lower()
                    edge_from = create_edge(edge_from_id, searchpathfrom, path_element_id)
                    elements.append(edge_from)
                    
                    searchpathfrom = path_element_id
                   
           

    if elements and (nodefreqslider is not None and button_id == 'nodefreqslider') :
        
        
        if val == 'Knowledge Graph':
            elements = knowledge_graph_cyto_elements
        if val == 'Co-mention Graph Spanning Tree':
            elements, elements_dict = get_cytoscape_data("paper",trees)
        if val == 'Co-mention Graph Cluster':
            elements, elements_dict = get_cytoscape_data("paper",graphs)
       
        ids_to_remove = [ele_data['data']['id'] for ele_data in elements if 'source' not in ele_data["data"] and ele_data["data"]["id"] not in removed and 'frequency' in ele_data['data'] and ele_data['data']['frequency'] is not None and not eval(node_freq_operator)(int(ele_data['data']['frequency']), int(nodefreqslider))]
       
        elements = [ele for ele in elements if ele['data']['id'] not in ids_to_remove]
  
    return zoom, elements




@app_tab.callback([Output('item-card-body', 'children')],
                  [Input('cytoscape', 'tapNode'),Input('cytoscape', 'tapEdge')],
                  [State('cytoscape', 'selectedNodeData'),
                   State('cytoscape', 'selectedEdgeData'),
                   State('showgraph', 'value')])
def display_tap_node(datanode, dataedge,statedatanode,statedataedge,showgraph):
        
    papers = []
    res = []
    modal_button = None
    if datanode and statedatanode:
        definition = ""
        if 'definition' in str(datanode['data']):
            definition = str(datanode['data']['definition'])
        result_df= linked_mention_df_unique.set_index("concept").loc[str(datanode['style']['label'])]
        if len(result_df) > 0:
            definition = result_df.definition.iloc[0]
        label = str(datanode['style']['label'])
        _type = str(datanode['data']['type'])
        frequency = str(len(datanode['data']['papers']))
        res.append([
            html.H5(label, className="card-title"),
            html.H6(_type, className="card-subtitle"),
            html.P(
                definition,
                className="card-text"
            )
        ])
        label = "'"+label+"' mentioned in "+frequency+" papers"
        modal_button = dbc.Button(label, id="open-body-scroll",color="primary")
        
        papers= datanode['data']['papers']
        
    if showgraph == 'Co-mention Graph Spanning Tree':
        elements, elements_dict = elements_tree, elements_tree_dict
    if showgraph == 'Co-mention Graph Cluster':
        elements, elements_dict = elements_graph, elements_graph_dict
        
    if dataedge and statedataedge:
        label = str(dataedge['style']['label'])
        
        source_node = elements_dict[ dataedge['data']['source']]
        source_label = source_node['data']['name']
        target_node = elements_dict[ dataedge['data']['target']]
        target_label = target_node['data']['name']
        papers = set(source_node['data']['papers']).intersection(set(target_node['data']['papers']))
        frequency = str(len(papers))
        mention_label= ''' '%s' mentioned in %s papers with '%s' ''' % (source_label, frequency, target_label) 
        label = mention_label if str(dataedge['style']['label']) == "" else str(dataedge['style']['label']) 
        modal_button= dbc.Button(label, id="open-body-scroll",color="primary")
    
        #papers= dataedge['data']['papers']
       
    if len(papers) > 0:
        papers_in_kg = list_papers(papers)

       
        rows = []
        
        if papers_in_kg:
            for paper in papers_in_kg:
                title = paper[0] if paper[0] else ''
                authors = paper[1] if paper[1] else ''
                abstract = paper[2] if paper[2] else ''
                journal = paper[5] if paper[5] else ''
                url = paper[4] if paper[4] else ''
                publish_time = str(paper[8]) if paper[8] else ''

                abstract = (abstract[:500] + '...') if abstract and len(abstract) > 500 else abstract
                
                paper_card = dbc.Card(
                                    dbc.CardBody(
                                        [
                                            html.H4(title, className="card-title"),
                                            html.H5(authors, className="card-subtitle"),
                                            
                                            html.H6(journal+"( "+publish_time+" )", className="card-subtitle"),
                                            html.P(
                                                abstract,
                                                className="card-text"
                                            ),
                                            dbc.Button("View the paper", href=url,target="_blank",color="primary"),
                                        ]
                                    )
                                )
                rows.append(paper_card)

            cards = dbc.Row(rows)        

            modal = html.Div(
            [
                modal_button,

                dbc.Modal(
                    [
                        dbc.ModalHeader(label),
                        dbc.ModalBody(cards),            
                        dbc.ModalFooter(
                            dbc.Button(
                                "Close", id="close-body-scroll", className="ml-auto"
                            )
                        ),
                    ],
                    id="modal-body-scroll",
                    scrollable=True,
                    size="lg"
                ),
            ]
            )
            if len(res) > 0:
                res[0].append(modal)
            else:
                res.append(modal)
    else:
        
        res = [html.H5("Select an item for details", className="card-title")]
    
    
    return res



@app_tab.callback(
                  Output('cytoscape', 'layout'),
                  [Input('dropdown-layout', 'value'),Input('showgraph', 'value')]
                 )
def update_cytoscape_layout(layout,showgraph):
    if showgraph =='3000-cluster' or showgraph =='3000-spanning' :
        return {
            'name': 'preset'

        }
     
        
        
        
    if layout=="cose":
        return {
                'name': layout,
                'showlegend':True,
                'idealEdgeLength': 100,
                'nodeOverlap': 0,
                'refresh': 20,
                'fit': True,
                'padding': 30,
                'randomize': False,
                'componentSpacing': 100,
                'nodeRepulsion': 400000,
                'edgeElasticity': 100,
                'nestingFactor': 5,
                'gravity': 80,
                'numIter': 1000,
                'initialTemp': 200,
                'coolingFactor': 0.95,
                'minTemp': 1.0

        }
    elif layout =="cola":
        return {
          'name': layout,
          'animate': True,
          'refresh': 1,
          'maxSimulationTime': 4000,
          'ungrabifyWhileSimulating': False,
          'fit': True, 
          'padding': 30,
          "groups":[{"leaves":['http://purl.obolibrary.org/obo/ncit_c2271',
                              'http://purl.obolibrary.org/obo/ncit_c3333', 
                              'http://purl.obolibrary.org/obo/ncit_c3193', 
                              'http://purl.obolibrary.org/obo/ncit_c124113', 
                              'http://purl.obolibrary.org/obo/ncit_c20506']
                   }],
          'nodeDimensionsIncludeLabels': False,
          'randomize': False,
          'avoidOverlap': True,
          'handleDisconnected': True,
          'convergenceThreshold': 0.01,
          'nodeSpacing': 50
        }
    elif layout =="cose-bilkent":
        return {
          'name': layout,
          "quality": 'default',
          "refresh": 30,
          "fit": True,
          "padding": 10,
          "randomize": True,
          "nodeRepulsion": 34500,
          "idealEdgeLength": 50,
          "edgeElasticity": 0.45,
          "nestingFactor": 0.1,
          "gravity": 50.25,
          "numIter": 2500,
          "tile": True,
          "tilingPaddingVertical": 50,
          "tilingPaddingHorizontal": 50,
          "gravityRangeCompound": 1.5,
          "gravityCompound": 2.0,
          "gravityRange": 23.8,
          "initialEnergyOnIncremental": 50.5
        }
    
    else:    
        return {
            'name': layout,
            'showlegend':True

        }



@app_tab.callback(Output('cytoscape', 'stylesheet'),
                  [Input('cytoscape', 'tapNode'),
                   Input('cytoscape', 'selectedNodeData'),
                   Input('input-follower-color', 'value'),
                   Input('dropdown-node-shape', 'value'),
                   Input('showgraph', 'value'),
                   Input('node_freq_type', 'value')],
                   [State('cytoscape', 'stylesheet')])
def generate_stylesheet(node, selectedNodes,follower_color, node_shape, showgraph, node_freq_type,original_stylesheet):
       
    if showgraph =='3000-cluster':
        return paper_spanning_clusters_styles
    
    if showgraph =="3000-spanning":
        return paper_spanning_tree_styles
    
    stylesheet  = original_stylesheet 
    if showgraph =='Co-mention Graph Spanning Tree' or showgraph=='Co-mention Graph Cluster':
        stylesheet = cystoscape_STYLE_stylesheet
    
    
    focus_nodes = []
    
    if selectedNodes:
        focus_nodes = [selectedNode for selectedNode in selectedNodes]
    if node is not None:
        focus_nodes.append(node)
        
    if node_freq_type or node:
        
        stylesheet= [style for style in stylesheet if not (style["selector"] == 'node' and 'width' in style["style"])]
        
        stylesheet.append({
                "selector": 'node',
                'style': {
                    'shape': node_shape,
                    'width':'data('+node_freq_type+')',
                    'height':'data('+node_freq_type+')'
                }
            }
        )
    #print(focus_nodes)
    for focus_node in focus_nodes:      
        node_style = [
            {
              "selector": "node:selected",
              "style": {
                "border-width": "5px",
                "border-color": "#AAD8FF",
                "border-opacity": "0.5"
              }
            }, 
            {
                "selector": 'edge',
                "style": {
                    'curve-style': 'bezier',
                    'line-color': '#D5DAE6'
                }
            },{
                    "selector": 'node[id = "{}"]'.format(focus_node['data']['id'] if "data" in focus_node else focus_node['id']),
                    "style": {
                        "border-width": "5px",
                        "border-color": "#AAD8FF",
                        "border-opacity": "0.5",
                        "text-opacity": 1,
                        'z-index': 9999
                    }
                }]
        for style in node_style:
            stylesheet.append(style)
        
        
        if "edgesData" in focus_node:
            for edge in focus_node['edgesData']:
                if edge['source'] == focus_node['data']['id'] if "data" in focus_node else focus_node['id']:
                    stylesheet.append({
                        "selector": 'node[id = "{}"]'.format(edge['target']),
                        "style": {
                            'opacity': 0.9
                        }
                    })
                    stylesheet.append({
                        "selector": 'edge[id= "{}"]'.format(edge['id']),
                        "style": {
                            "mid-target-arrow-color": follower_color['hex'],
                            "line-color": follower_color['hex'],
                            'opacity': 0.9,
                            'z-index': 5000
                        }
                    })
                if edge['target'] == focus_node['data']['id'] if "data" in focus_node else focus_node['id']:
                    stylesheet.append({
                        "selector": 'node[id = "{}"]'.format(edge['source']),
                        "style": {
                            'opacity': 0.9,
                            'z-index': 9999
                        }
                    })
                    stylesheet.append({
                        "selector": 'edge[id= "{}"]'.format(edge['id']),
                        "style": {
                            "mid-target-arrow-color": follower_color['hex'],
                            "line-color": follower_color['hex'],
                            'opacity': 1,
                            'z-index': 5000
                        }
                    })
    
    return stylesheet


app_tab.config['suppress_callback_exceptions']=True
width = "100%"
app_tab.height = "800px"
app_tab.run_server(mode="jupyterlab",width=width, port="8072")

# Version the knowledge graph
The user can save a knowledge graph with a version.

In [None]:
import time
timestr = time.strftime("%Y%m%d-%H%M%S")
        
# Temporally save the extracted entities csv file locally
table_extractions_filename = "./table_extractions_%s.csv" % (timestr)
table_extractions.to_csv(table_extractions_filename)


# Temporally save the curated list of extracted entities csv file locally
curated_table_extractions_filename = "./curated_table_extractions_%s.csv" % (timestr)
curated_table_extractions.to_csv(curated_table_extractions_filename)

In [None]:
import jwt
from kgforge.core import Resource
from kgforge.specializations.resources import Dataset

agent = jwt.decode(TOKEN,  verify=False)

agent = forge.reshape(forge.from_json(agent), keep=["name","email","sub","preferred_username"])
agent.id = agent.sub
agent.type = "Person"

dataset = Dataset(forge,name="A dataset", about=topic_resource.name)
dataset.add_distribution(table_extractions_filename, content_type="application/csv")
dataset.add_distribution(curated_table_extractions_filename, content_type="application/csv")
dataset.add_contribution(agent)
dataset.contribution.hadRole= "Scientists"

In [None]:
version = agent.preferred_username+"_"+timestr

def register_dataset(b):
    output4.clear_output()
    output5.clear_output()
    dataset.name = t1.value
    dataset.description = t2.value
    forge.register(dataset)
    if dataset._last_action.succeeded == True:
        with output4:
            print("Dataset registered!")
    else:
        with output4:
            print(dataset._last_action.message)

def version_dataset(b):
    output5.clear_output()
    version = t3.value
    forge.tag(dataset,version)
    if dataset._last_action.succeeded == True:
        with output5:
            print(f"Tagged with: {str(version)}")
    
output4 = ipywidgets.Output()
output5 = ipywidgets.Output()

b1 = ipywidgets.Button(
    description= '💾  Register Dataset',
    button_style='',
    layout=ipywidgets.Layout(width='300px', height='30px'),
    disabled=False)

b2 = ipywidgets.Button(
    description= '🔖 Tag Dataset',
    button_style='',
    layout=ipywidgets.Layout(width='300px', height='30px'),
    disabled=False)

t1 = ipywidgets.Text(
    placeholder='Add a name for your dataset',
    description='Name:',
    disabled=False)

t2 = ipywidgets.Textarea(
    placeholder='Add a description of your dataset',
    description='Description:',
    disabled=False)

t3 = ipywidgets.Text(
    description='Tag:',
    value=version,
    disabled=False)

b1.on_click(register_dataset)
b2.on_click(version_dataset)

save_widget = ipywidgets.VBox(children=[t1, t2, b1, output4, t3, b2, output5])

display(save_widget)