# Goal of the notebook
End to end pipeline for searching articles of interest, extracting entities of interest, building, accessing and deploying a knowled graph and a co-mention graph.

In [None]:
import getpass
import json
import os
import pathlib
import requests
import time

import ipywidgets
import networkx as nx
import pandas as pd
import sqlalchemy
from sqlalchemy.sql import select
from sqlalchemy.sql import and_, or_, not_

from bbsearch.widgets import ArticleSaver, MiningSchema, MiningWidget, SearchWidget

In [None]:
pd.options.mode.chained_assignment = None

In [None]:
from jupyter_dash.comms import _send_jupyter_config_comm_request, _jupyter_config
from jupyter_dash import JupyterDash

import dash_cytoscape as cyto

from tqdm.notebook import tqdm

In [None]:
# JupyterDash configs
_send_jupyter_config_comm_request()

In [None]:
time.sleep(3)
JupyterDash.infer_jupyter_proxy_config()

In [None]:
cyto.load_extra_layouts()

In [None]:
import dash_core_components as dcc

In [None]:
from cord_analytics.utils import (generate_curation_table,
                                  link_ontology,
                                  generate_comention_analysis,
                                  build_cytoscape_data,
                                  merge_with_ontology_linking,
                                  resolve_taxonomy_to_types)
            
from bbg_apps.curation_app import (curation_app)
from bbg_apps.visualization_app import (visualization_app)

from kganalytics.export import load_network

# Set a Project

The user chooses / creates a project to host a KG.

* Use the [Nexus web application](https://bbp.epfl.ch/nexus/web) to get a token.
* Once a token is obtained then proceed to paste it below.

In [None]:
import getpass
TOKEN = getpass.getpass()

In [None]:
from kgforge.core import KnowledgeGraphForge

In [None]:
# Configure a 'forge' to manage (create, access and deploy) the knowledge graph within a given Blue Brain Nexus Project.
FORGE_CONFIG_FILE = os.getenv("FORGE_CONFIG_FILE") 
assert (FORGE_CONFIG_FILE is not None) 
forge = KnowledgeGraphForge(FORGE_CONFIG_FILE,token=TOKEN, debug=True)

# Set topic
The user defines a topic.

In [None]:
import jwt
from IPython.display import display, HTML

In [None]:
topic_resource=None
kg_resource=None
agent_username = jwt.decode(TOKEN,  verify=False)['preferred_username']

def save_topic(b):
    output.clear_output()
    output2.clear_output()
    output3.clear_output()
    topic_to_save = {
        'id': str(widget.children[1].children[0].value).replace(' ', '_'),
        'type': 'Topic',
        'name': widget.children[1].children[0].value,
        'field': widget.children[1].children[1].value,
        'description': widget.children[1].children[2].value,
        'keywords': widget.children[1].children[3].value,
        'question':  [widget.children[1].children[i].value for i in range(5,9)]
    }
    global topic_resource
    topic_resource = forge.from_json(topic_to_save)
    forge.register(topic_resource)
    with output2:
        if w1.value == "":
            print("Please provide a topic name")
        else:
            print("Topic saved!")
            w1.value = ""
            w2.value = ""
            w3.value = ""
            w4.value = ""
            w5.value = ""
            w6.value = ""
            w7.value = ""
            w8.value = ""

def get_topics(b):
    output.clear_output()
    output2.clear_output()
    output3.clear_output()
    query = f"""
    SELECT ?id ?name ?description ?keywords ?field ?question ?createdAt
    WHERE {{
        ?id a Topic ;
            name ?name ;
            description ?description ;
            keywords ?keywords ;
            field ?field ;
            question ?question ;
            <https://bluebrain.github.io/nexus/vocabulary/deprecated> false ;
            <https://bluebrain.github.io/nexus/vocabulary/createdAt> ?createdAt ;
            <https://bluebrain.github.io/nexus/vocabulary/createdBy> <{forge._store.endpoint}/realms/bbp/users/{agent_username}> .
    }}
    """
    resources = forge.sparql(query, limit=100)
    if len(resources) >= 1:
        global topics_df
        topics_df = forge.as_dataframe(resources)
        output.clear_output()
        with output:
            topics_list = list(set(topics_df.name))
            topics_list.sort()
            w0.options = [""] + topics_list
            w0.value = ""
            w0.placeholder = "Select topic"
            w0.observe(topics_change, names='value')
            display(w0)
            display(s12)
    else:
        with output:
            print("No topics found!")

def topics_change(change):
    output3.clear_output()
    with output:
        if len(output.outputs) >= 1:
            output.outputs = (output.outputs[0],)
        s5.value = ""
        s6.value = ""
        s7.value = ""
        s8.value = ""
        s9.value = ""
        s10.value = ""
        s11.value = ""
        global topic_resource
        if change['new'] != "":
            topic_resource = forge.retrieve(list(set(topics_df[topics_df.name == change['new']].id))[0])
            s5.value = topic_resource.field
            s6.value = topic_resource.description
            s7.value = topic_resource.keywords
            question = topic_resource.question
            if isinstance(question, str):
                question = [question]
            if isinstance(question, list):
                for i in range(len(question)):
                    sq.children[i].value = question[i]            
        display(s12)

def update_topic(b):
    output2.clear_output()
    if w0.value != "":
        topic_resource.id = forge.as_jsonld(topic_resource, form="expanded")['@id']
        topic_resource.field = s5.value
        topic_resource.description = s6.value
        topic_resource.keywords = s7.value
        topic_resource.question = [sq.children[i].value for i in range(0,4)]
        forge.update(topic_resource)
        with output:
            print("topic updated!")
        
def get_datasets(b):
    output3.clear_output()
    if w0.value != "":
        topic_resource_id = forge.as_jsonld(topic_resource, form="expanded")['@id']
        query = f"""
            SELECT ?id ?name ?description ?keywords ?field ?question ?createdAt
            WHERE {{
                ?id a Dataset ;
                    name ?name ;
                    about <{topic_resource_id}> ;
                    <https://bluebrain.github.io/nexus/vocabulary/deprecated> false ;
                    <https://bluebrain.github.io/nexus/vocabulary/createdAt> ?createdAt ;
                    <https://bluebrain.github.io/nexus/vocabulary/createdBy> <{forge._store.endpoint}/realms/bbp/users/{agent_username}> .
            }}
            """
        global kg_resources
        kg_resources = forge.sparql(query, limit=100, debug=True)
        print(len(kg_resources))
        if len(kg_resources) >= 1:
            with output3:
                display(s2)
                s2.options = [r.name for r in kg_resources]
                display(s3)
        else:
            with output3:
                print("No datasets found!")
        
def download_dataset(b):
    resource_id = [r.id for r in kg_resources if r.name == s2.value][0]
    global kg_resource
    global table_extractions
    kg_resource = forge.retrieve(resource_id)
    forge.download(kg_resource, "distribution.contentUrl", "/tmp/", overwrite=True)
    for r in kg_resource.distribution:
        if "curated" in r.name:
            table_extractions = pd.read_csv(f"/tmp/{r.name}")
            if table_extractions is not None:
                message = f"Dataset '{r.name}' with {len(table_extractions)} entities ready to be reused. Its content has been assigned to the variable 'table_extractions'. Please continue with the interactive UI section to visualise this dataset."
            else:
                table_extractions = pd.DataFrame()
                message = "No dataset has been downloaded"
            with output3:
                print(message)

s0 = ipywidgets.Button(
    description= '🔬 List all your topics',
    button_style='',
    layout=ipywidgets.Layout(width='300px', height='30px'),
    disabled=False)
s1 = ipywidgets.Button(
    description= "📃 Show datasets for selected topic",
    button_style='',
    layout=ipywidgets.Layout(width='300px', height='30px'),
    disabled=False)
s2 = ipywidgets.RadioButtons(
    description='Select:',
    disabled=False)
s3 = ipywidgets.Button(
    description= '📈 Reuse selected dataset',
    button_style='',
    layout=ipywidgets.Layout(width='300px', height='30px'),
    disabled=False)
s4 = ipywidgets.Button(
    description= '✏️ Update topic',
    button_style='',
    layout=ipywidgets.Layout(width='300px', height='30px'),
    disabled=False)
s5 = ipywidgets.Text(
    description='Field:',
    disabled=False)
s6 = ipywidgets.Textarea(
    description='Description:',
    disabled=False)
s7 = ipywidgets.Textarea(
    description='Keywords:',
    disabled=False)
s8 = ipywidgets.Text(
    disabled=False)
s9 = ipywidgets.Text(
    disabled=False)
s10 = ipywidgets.Text(
    disabled=False)
s11 = ipywidgets.Text(
    disabled=False)

sq = ipywidgets.VBox(children=[s8, s9, s10, s11])

s12 = ipywidgets.VBox(children=[s5, s6, s7, ipywidgets.Label('Questions:'), sq, s4])

w0 = ipywidgets.Dropdown(
        description='Select topic:',
        disabled=False)
w1 = ipywidgets.Text(
    placeholder='e.g. COVID-19',
    description='Topic name:',
    disabled=False)
w2 = ipywidgets.Text(
    placeholder='e.g. Neuroscience',
    description='Field:',
    disabled=False)
w3 = ipywidgets.Textarea(
    placeholder='Add a description of your topic',
    description='Description:',
    disabled=False)
w4 = ipywidgets.Textarea(
    placeholder='e.g. Coronavirus; COVID-19; SARS; risk factor; glycosylation; sugar; carbohydrates',
    description='Keywords:',
    disabled=False)
w5 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w6 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w7 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w8 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w9 = ipywidgets.Button(
    description='Create',
    button_style='',
    tooltip='Create new topic',
    disabled=False)

output = ipywidgets.Output()
output2 = ipywidgets.Output()
output3 = ipywidgets.Output()

buttons = ipywidgets.HBox(children=[s0, s1])
outputs = ipywidgets.HBox(children=[output, output3])
tab1 = ipywidgets.VBox(children=[buttons, outputs])
tab2 = ipywidgets.VBox(children=[w1, w2, w3, w4, ipywidgets.Label('Please express your research topic in a few questions:'), w5, w6, w7, w8, w9, output2])
widget = ipywidgets.Tab(children=[tab1, tab2])
widget.set_title(0, 'Select topic')
widget.set_title(1, 'Create topic')

w9.on_click(save_topic)
s0.on_click(get_topics)
s1.on_click(get_datasets)
s3.on_click(download_dataset)
s4.on_click(update_topic)

display(widget)

# Data Import
The user loads data from a data source (CORD-19). The loaded data forms the corpus. The user searches the CORPUS in Blue Brain Search.

Search server URL

In [None]:
SEARCH_ENGINE_URL = os.getenv("SEARCH_ENGINE_URL", "http://dgx1.bbp.epfl.ch:8850")
assert SEARCH_ENGINE_URL is not None

response = requests.post("{}/help".format(SEARCH_ENGINE_URL))
assert response.ok and response.json()['name'] == 'SearchServer', "The server is not accessible"
print(f"This server is using the database: {response.json()['database']}")

MySQL URL and engine

In [None]:
MYSQL_DB_URI = os.getenv("MYSQL_DB_URI", "dgx1.bbp.epfl.ch:8853")
bbs_mysql_engine = sqlalchemy.create_engine(f'mysql+pymysql://guest:guest@{MYSQL_DB_URI}/cord19_v47')

Article saver

In [None]:
article_saver = ArticleSaver(connection=bbs_mysql_engine)

Search widget

In [None]:
search_widget = SearchWidget(
    bbs_search_url=SEARCH_ENGINE_URL,
    bbs_mysql_engine=bbs_mysql_engine,
    article_saver=article_saver,
    results_per_page=3)
search_widget

Show saved articles and paragraphs

In [None]:
df_results = search_widget.saved_results()
df_results

In [None]:
print(f"""For information: \n 
      - The query showed {len(df_results['Article ID'].unique())} different articles.
      - Saved {len(df_results[(df_results['Paragraph']=='✓') & (df_results['Article'] != '✓')])} paragraph(s)
      - Saved {len(df_results[df_results['Article']=='✓']['Article ID'].unique())} article(s)""")

# Set schemas
The user defines the KG schema.

In [None]:
mining_schema = MiningSchema()

mining_schema.add_entity("CELL_COMPARTMENT")
mining_schema.add_entity("CELL_TYPE")
mining_schema.add_entity("CHEMICAL", ontology_source="NCIT")
mining_schema.add_entity("CONDITION")
mining_schema.add_entity("DISEASE", ontology_source="NCIT")
mining_schema.add_entity("DRUG")
mining_schema.add_entity("ORGAN", ontology_source="NCIT")
mining_schema.add_entity("ORGANISM", ontology_source="NCIT")
mining_schema.add_entity("PATHWAY", ontology_source="Reactome")
mining_schema.add_entity("PROTEIN", ontology_source="NCIT")

mining_schema.df

# Create a knowledge graph according to schemas
The user extracts data from the text of a set of papers using selected Named Entity Recognizers and Relation Extractors from Blue Brain Search.
The user can preview the extracted data.
The user curates extracted data.
The user links the extracted entities and relations to ontologies.
The user saves data into Knowledge Graph.

- **input**: raw text
- **output**: csv table of extracted entities/relations

In [None]:
DEFAULT_TEXT = """Autophagy maintains tumour growth through circulating
arginine. Autophagy captures intracellular components and delivers them to
lysosomes, where they are degraded and recycled to sustain metabolism and to
enable survival during starvation. Acute, whole-body deletion of the essential 
autophagy gene Atg7 in adult mice causes a systemic metabolic defect that 
manifests as starvation intolerance and gradual loss of white adipose tissue, 
liver glycogen and muscle mass. Cancer cells also benefit from autophagy. 
Deletion of essential autophagy genes impairs the metabolism, proliferation, 
survival and malignancy of spontaneous tumours in models of autochthonous 
cancer. Acute, systemic deletion of Atg7 or acute, systemic expression of a 
dominant-negative ATG4b in mice induces greater regression of KRAS-driven 
cancers than does tumour-specific autophagy deletion, which suggests that host 
autophagy promotes tumour growth.
""".replace('\n', ' ').replace('  ', ' ')

In [None]:
TEXT_MINING_URL = os.getenv("TEXT_MINING_URL", "http://dgx1.bbp.epfl.ch:8852")
response = requests.post(TEXT_MINING_URL + "/help")
assert response.ok and response.json()['name'] == 'MiningServer'
print(f"This server is using the database: {response.json()['database']}")

In [None]:
mining_widget = MiningWidget(
    mining_server_url=TEXT_MINING_URL,
    mining_schema=mining_schema,
    article_saver=article_saver,
    default_text=DEFAULT_TEXT)
mining_widget

In [None]:
# Get DataFrame of extractions
table_extractions = mining_widget.get_extracted_table()

# Drop duplicates in DataFrame
columns_duplicates = table_extractions.columns.tolist()
columns_duplicates.remove('entity_type')
table_extractions = table_extractions.drop_duplicates(subset=columns_duplicates, keep='first', ignore_index=True)
table_extractions = table_extractions.dropna(subset=["entity"])

## Curate the table with extracted entities

- **input**: csv table of extracted entities/relations
- **output**: csv table with curated and ontology linked entities/relations

In [None]:
print(f'The table has {table_extractions.shape[0]} rows.')

In [None]:
%%time

print("Setting default term filters: the user can remove them later on in the UI if need be ...")
default_term_filters = 'Glucose; Covid-19; SARS-CoV-2; Diabetes; IL-1; ACE2; glycosylation; hyperglycemia; shock; fatigue; CVD; vasoconstriction; lactate; insulin; SP-D; HbA1c; LDH; glycolysis; GLUT; macrophage; lymphocytes; ventilation;SARS; ARDS; Cytokine Storm; pneumonia; multi-organs failure; thrombosis; inflammation; IL-6; CRP; D-Dimer; Ferritin; Lung Disease; Hypertension; Aging; COPD; angiotensin 2 (or angiotensin II or AngII); Obesity; ICU (intensive care unit); ventilation; ketogenic diet'.split("; ")
filtered_table_extractions = table_extractions.copy()

default_found_term_filters = set() 
for term_filter in default_term_filters:
    entities_to_keep = filtered_table_extractions[
        filtered_table_extractions["entity"].apply(lambda x: x.lower() == term_filter.lower())]["entity"].unique()
    if entities_to_keep is not None and len(entities_to_keep) > 0:
        default_found_term_filters.add(tuple(entities_to_keep))
term_filter_options = [term_filter[0] for term_filter in default_found_term_filters]
print("Done.")

print("Prepating curatation data...")
curation_input_table, factor_counts = generate_curation_table(filtered_table_extractions)
print("Done.")

print("Loading the ontology linking data...")
linking = pd.read_pickle("/gpfs/bbp.cscs.ch/project/proj116/network_analytics/data/cord_47_linking.pkl")
definitions = linking[["concept", "definition"]].groupby("concept").aggregate(lambda x: list(x)[0]).to_dict()["definition"]

print("Loading default ontology type mapping...")
with open('/gpfs/bbp.cscs.ch/project/proj116/bbg/ontology-linking/ncit_to_mltypes_mapping.json', "rb") as f:
    default_type_mapping = json.load(f)

print("Done.")

Run the curation app. In case of the error 'Address already in use', try specifying another port (for example, in the range 8072-8099)

In [None]:
curation_app.set_default_terms_to_include(term_filter_options)
curation_app.set_table(curation_input_table.copy())
curation_app.set_ontology_linking_callback(lambda x: link_ontology(linking, default_type_mapping, x))

curation_app.run(port=8071)

## Create a co-mention graph from curated entities

- **input**: csv table with curated and ontology linked entities/relations
- **output**: graph objects with co-occurrence network and its spanning tree

In [None]:
curated_table_extractions = curation_app.get_curated_table()

In [None]:
curated_table_extractions["paper"] = curated_table_extractions["paper"].apply(lambda x: set(x))
curated_table_extractions["paragraph"] = curated_table_extractions["paragraph"].apply(lambda x: set(x))
curated_table_extractions["section"] = curated_table_extractions["section"].apply(lambda x: set(x))

Create a co-mention network from curated table

In [None]:
type_data = curated_table_extractions[["entity_type"]].rename(columns={"entity_type": "type"})
n_most_frequent = curation_app.n_most_frequent if curation_app.n_most_frequent else 100
graphs, trees = generate_comention_analysis(
    curated_table_extractions, factor_counts, n_most_frequent=n_most_frequent, type_data=type_data, factors=["paper", "paragraph"], cores=10)
print("Done.")

In [None]:
cytoscape_graphs = dict()
for f in ["paper", "paragraph"]:
    cytoscape_graphs[f] = {
        "tree": build_cytoscape_data(trees[f]),
        "graph": build_cytoscape_data(graphs[f])
    }

In [None]:
%%time

prefix = "/gpfs/bbp.cscs.ch/project/proj116/network_analytics/data/graphs/cord_47/full_3000"

print("Loading pre-generated graphs with 3'000 entities...")
print("\t - paper-based network")
paper_graph = load_network("{}_paper_edge_list.pkl".format(prefix), "{}_paper_node_list.pkl".format(prefix))
paper_spanning_tree = load_network("{}_paper_tree_edge_list.pkl".format(prefix), "{}_paper_tree_node_list.pkl".format(prefix))
nx.set_node_attributes(
    paper_spanning_tree, {
        n: len(paper_spanning_tree.nodes[n]["paper"])
        for n in paper_spanning_tree.nodes()
    },
    "paper_frequency")

print("\t - paragraph-based network")
paragraph_graph = load_network("{}_paragraph_edge_list.pkl".format(prefix), "{}_paragraph_node_list.pkl".format(prefix))
paragraph_spanning_tree = load_network("{}_paragraph_tree_edge_list.pkl".format(prefix), "{}_paragraph_node_list.pkl".format(prefix))
nx.set_node_attributes(
    paragraph_spanning_tree, {
        n: len(paragraph_spanning_tree.nodes[n]["paper"])
        for n in paragraph_spanning_tree.nodes()
    },
    "paper_frequency")
print("Done.")

print("Loading pre-computed node positions...")
with open("/gpfs/bbp.cscs.ch/project/proj116/network_analytics/data/positions/paper_3000.json", "r") as f:
    paper_positions = json.load(f)

with open("/gpfs/bbp.cscs.ch/project/proj116/network_analytics/data/positions/paragraph_3000.json", "r") as f:
    paragraph_positions = json.load(f)
print("Done.")

Convert pre-computed graphs to a cytoscape format + add node positions

In [None]:
paper_3000_cyto = build_cytoscape_data(paper_spanning_tree, positions=paper_positions)
paragraph_3000_cyto = build_cytoscape_data(paragraph_spanning_tree, positions=paragraph_positions)

In [None]:
paper_3000_cyto = build_cytoscape_data(paper_spanning_tree, positions=paper_positions)
paragraph_3000_cyto = build_cytoscape_data(paragraph_spanning_tree, positions=paragraph_positions)

In [None]:
def list_papers(papers, limit=200):
    META_DATA = sqlalchemy.MetaData(bind=bbs_mysql_engine, reflect=True)
    articles = META_DATA.tables["articles"]
    clauses = or_( *[articles.c.article_id == x for x in papers[:limit]] )
    s = select([
        articles.c.title,
        articles.c.authors,
        articles.c.abstract,
        articles.c.doi,
        articles.c.url,
        articles.c.journal,
        articles.c.pmcid,
        articles.c.pubmed_id,
        articles.c.publish_time
    ]).where(clauses)
    result = bbs_mysql_engine.execute(s)
    results = []
    for row in result:
        results.append(row)
    return results

In [None]:
GRAPH_OBJECTS = {
    "Topic-centered network (paper-based)": {
        "graph": graphs["paper"],
        "tree": trees["paper"],
        "default_top_n": 100
    },
    "Topic-centered network (paragraph-based)": {
        "graph": graphs["paragraph"],
        "tree": trees["paragraph"],
        "default_top_n": 100
    },
    "Naive pre-computed network (paper-based, 3000)": {
        "graph": paper_graph,
        "tree": paper_spanning_tree,
        "positions": paper_positions
    },
    "Naive pre-computed network (paragraph-based, 3000)": {
        "graph": paragraph_graph,
        "tree": paragraph_spanning_tree,
        "positions": paragraph_positions,
    }
}   

In [None]:
for k, v in GRAPH_OBJECTS.items():
    tree = v["tree"] if "tree" in v else None
    positions = v["positions"] if "positions" in v else None  
    default_top_n = v["default_top_n"] if "default_top_n" in v else None
    visualization_app.set_graph(
        k, v["graph"], tree_object=tree, positions=positions, default_top_n=default_top_n)

visualization_app.set_current_graph("Topic-centered network (paper-based)")
visualization_app.set_list_papers_callback(list_papers)
visualization_app.set_entity_definitons(definitions)
visualization_app._db_error_message = "Failed to retreive papers (check if the variable 'bbs_mysql_engine' was initialized or check the DB connection)"

By default, the app will display only top-50 most frequent nodes, you can then choose to show all the nodes in the network

In [None]:
visualization_app.run(port="8076")

# Validate the knowledge graph
Content of the Knowledge Graph is validated. In this version, syntactic validation (i.e. are the identifiers correct, ...) is performed when building the knowledge graph. If the knowledge graph is successfully built then the validation passes. In case of warning (i.e because of a weird character (+,...) in an extracted entity), the user can go back to the curation step and further curate extracted entities. 

# Correct knowledge graph
Correction involves going back to the extraction and/or curation steps.

# Access the knowledge graph
The user can search, visualize, and export the knowledge graph.

# Version the knowledge graph
The user can save a knowledge graph with a version.

In [None]:
import time
timestr = time.strftime("%Y%m%d-%H%M%S")
        
# Temporally save the extracted entities csv file locally
table_extractions_filename = "./table_extractions_%s.csv" % (timestr)
table_extractions.to_csv(table_extractions_filename)


# Temporally save the curated list of extracted entities csv file locally
curated_table_extractions_filename = "./curated_table_extractions_%s.csv" % (timestr)
curated_table_extractions.to_csv(curated_table_extractions_filename)

In [None]:
import jwt
from kgforge.core import Resource
from kgforge.specializations.resources import Dataset

agent = jwt.decode(TOKEN,  verify=False)

agent = forge.reshape(forge.from_json(agent), keep=["name","email","sub","preferred_username"])
agent.id = agent.sub
agent.type = "Person"

dataset = Dataset(forge,name="A dataset", about=topic_resource.name)
dataset.add_distribution(table_extractions_filename, content_type="application/csv")
dataset.add_distribution(curated_table_extractions_filename, content_type="application/csv")
dataset.add_contribution(agent)
dataset.contribution.hadRole= "Scientists"

In [None]:
version = agent.preferred_username+"_"+timestr

def register_dataset(b):
    output4.clear_output()
    output5.clear_output()
    dataset.name = t1.value
    dataset.description = t2.value
    forge.register(dataset)
    if dataset._last_action.succeeded == True:
        with output4:
            print("Dataset registered!")
    else:
        with output4:
            print(dataset._last_action.message)

def version_dataset(b):
    output5.clear_output()
    version = t3.value
    forge.tag(dataset,version)
    if dataset._last_action.succeeded == True:
        with output5:
            print(f"Tagged with: {str(version)}")
    
output4 = ipywidgets.Output()
output5 = ipywidgets.Output()

b1 = ipywidgets.Button(
    description= '💾  Register Dataset',
    button_style='',
    layout=ipywidgets.Layout(width='300px', height='30px'),
    disabled=False)

b2 = ipywidgets.Button(
    description= '🔖 Tag Dataset',
    button_style='',
    layout=ipywidgets.Layout(width='300px', height='30px'),
    disabled=False)

t1 = ipywidgets.Text(
    placeholder='Add a name for your dataset',
    description='Name:',
    disabled=False)

t2 = ipywidgets.Textarea(
    placeholder='Add a description of your dataset',
    description='Description:',
    disabled=False)

t3 = ipywidgets.Text(
    description='Tag:',
    value=version,
    disabled=False)

b1.on_click(register_dataset)
b2.on_click(version_dataset)

save_widget = ipywidgets.VBox(children=[t1, t2, b1, output4, t3, b2, output5])

display(save_widget)