# Goal of the notebook
(to be completed)

In [None]:
import getpass
import os
import pathlib

import pandas as pd
import requests
import sqlalchemy
import ipywidgets

import bbsearch as bbs
from bbsearch.remote_searcher import RemoteSearcher
from bbsearch.widgets import ArticleSaver, SearchWidget, MiningWidget, SchemaRequest

# Set a Project
The user chooses / creates a project to host a KG.

In [None]:
# Use the Nexus Web application to get a token. Once a token is obtained, paste it below.
import getpass
TOKEN = getpass.getpass()

In [None]:
from kgforge.core import KnowledgeGraphForge

In [None]:
# Configure a 'forge' to manage (create, access and deploy) the knowledge graph within a given Blue Brain Nexus Project.
FORGE_CONFIG_FILE = os.getenv("FORGE_CONFIG_FILE") 
assert (FORGE_CONFIG_FILE is not None) 
forge = KnowledgeGraphForge(FORGE_CONFIG_FILE,token=TOKEN)

# Set topic
The user defines a topic.

In [None]:
topic_resource=None
topics = dict({'COVID-19': {
    'description': '',
    'tags': '',
    'questions': ['', '', '', '']}})
def save_topic(b):
    topics[widget.children[2].value] = {
        'description': widget.children[3].value,
        'tags': widget.children[4].value,
        'questions':  [widget.children[i].value for i in range(6,10)]}
    topic_to_save = {
        #"@context":"https://bbp.epfl.ch/nexus/v1/resources/covid19-kg/schemas/context",
        "id":str(widget.children[2].value).replace(' ', '_'),
        "type":"Topic",
        "name":widget.children[2].value,
        'description': widget.children[3].value,
        'keywords': widget.children[4].value,
        'question':  [widget.children[i].value for i in range(6,10)]
    }
    #print(topic_resource)
    global topic_resource
    topic_resource = forge.from_json(topic_to_save)
    forge.register(topic_resource)
    w0.options=list(topics.keys())
def topics_change(change):
    change['new']
    w1.value=change['new']
    w2.value=topics[w1.value]['description']
    w3.value=topics[w1.value]['tags']
    w4.value=topics[w1.value]['questions'][0]
    w5.value=topics[w1.value]['questions'][1]
    w6.value=topics[w1.value]['questions'][2]
    w7.value=topics[w1.value]['questions'][3]
w0 = ipywidgets.Dropdown(
    options=list(topics.keys()),
    description='Topics:',
    disabled=False)
w1 = ipywidgets.Text(
    placeholder='COVID-19',
    description='Name:',
    disabled=False)
w2 = ipywidgets.Textarea(
    placeholder='Add a description of your topic',
    description='Description:',
    disabled=False)
w3 = ipywidgets.Textarea(
    placeholder='Coronavirus; COVID-19; SARS; risk factor; glycosylation; sugar; carbohydrates',
    description='Tags:',
    disabled=False)
w4 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w5 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w6 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w7 = ipywidgets.Text(
    placeholder='Add a question about your research topic',
    disabled=False)
w8 = ipywidgets.Button(
    description='Add',
    button_style='',
    tooltip='Add topic',
    disabled=False)
widget = ipywidgets.VBox([w0, ipywidgets.HTML('<b>Add a new topic to the topics list:</b>'), w1, w2, w3, ipywidgets.Label('Please express your research topic in a few questions:'), 
                          w4, w5, w6, w7, w8], layout=ipywidgets.Layout(
    display='flex',
    align_items='flex-end',
    width='40%'))
w8.on_click(save_topic)
w0.observe(topics_change, names='value')
display(widget)

# Data Import
The user loads data from a data source (CORD-19).
The loaded data forms the corpus.
The user searches the CORPUS in Blue Brain Search.

URLs

In [None]:
SEARCH_ENGINE_URL = os.getenv("SEARCH_ENGINE_URL", "http://dgx1.bbp.epfl.ch:8850")
assert SEARCH_ENGINE_URL is not None

response = requests.post("{}/help".format(SEARCH_ENGINE_URL))
assert response.ok and response.json()['name'] == 'SearchServer', "The server is not accessible"
print(f'The search server is running correclty and using the database {response.json()['database']}')

Paths

In [None]:
BBS_DATA_PATH = os.getenv("BBS_DATA_PATH", "/raid/sync/proj115/bbs_data/")
BBS_DATA_PATH = pathlib.Path(BBS_DATA_PATH)
trained_models_path = BBS_DATA_PATH / 'trained_models'

In [None]:
MYSQL_DB_URI = os.getenv("MYSQL_DB_URI", "dgx1.bbp.epfl.ch:8853")
searcher = RemoteSearcher(SEARCH_ENGINE_URL)
engine = sqlalchemy.create_engine(f'mysql+pymysql://guest:guest@{MYSQL_DB_URI}/cord19_v35')
article_saver = ArticleSaver(connection=engine)

In [None]:
search_widget = SearchWidget(
    searcher=searcher,
    connection=engine,
    article_saver=article_saver,
    results_per_page=3)
search_widget

Status of the Article Saver

In [None]:
article_saver.summary_table()

# Set schemas
The user defines the KG schema.

In [None]:
schema_request = SchemaRequest()

In [None]:
columns = ['entity_type', 'property', 'property_type', 'property_value_type', 'ontology_source']

etypes_sources = [('CELL_TYPE', None), 
                  ('CHEMICAL', 'NCIT'), 
                  ('CONDITION', None),
                  ('DISEASE', 'NCIT'),
                  ('ORGAN', 'NCIT'),
                  ('ORGANISM', 'NCIT'),
                  ('PATHWAY', 'Reactome'),
                  ('PROTEIN', 'NCIT')
                 ]
schema_request_data = [{'entity_type': etype, 'ontology_source': source} 
                       for etype, source in etypes_sources]

schema_request.schema = pd.DataFrame(schema_request_data, columns=columns)
display(schema_request.schema)

# Create a knowledge graph according to schemas
The user extracts data from the text of a set of papers using selected Named Entity Recognizers and Relation Extractors from Blue Brain Search.
The user can preview the extracted data.
The user curates extracted data.
The user links the extracted entities and relations to ontologies.
The user saves data into Knowledge Graph.

- **input**: raw text
- **output**: csv table of extracted entities/relations

In [None]:
DEFAULT_TEXT = """Autophagy maintains tumour growth through circulating
arginine. Autophagy captures intracellular components and delivers them to
lysosomes, where they are degraded and recycled to sustain metabolism and to
enable survival during starvation. Acute, whole-body deletion of the essential 
autophagy gene Atg7 in adult mice causes a systemic metabolic defect that 
manifests as starvation intolerance and gradual loss of white adipose tissue, 
liver glycogen and muscle mass.  Cancer cells also benefit from autophagy. 
Deletion of essential autophagy genes impairs the metabolism, proliferation, 
survival and malignancy of spontaneous tumours in models of autochthonous 
cancer. Acute, systemic deletion of Atg7 or acute, systemic expression of a 
dominant-negative ATG4b in mice induces greater regression of KRAS-driven 
cancers than does tumour-specific autophagy deletion, which suggests that host 
autophagy promotes tumour growth.
""".replace('\n', ' ').replace('  ', ' ')

In [None]:
TEXT_MINING_URL = os.getenv("TEXT_MINING_URL", "http://dgx1.bbp.epfl.ch:8852")
response = requests.post(TEXT_MINING_URL + "/help")
assert response.ok and response.json()['name'] == 'MiningServer'
print(f'The mining server is running correclty and using the database {response.json()['database']}')

In [None]:
mining_widget = MiningWidget(
    mining_server_url=TEXT_MINING_URL,
    schema_request=schema_request,
    article_saver=article_saver,
    default_text=DEFAULT_TEXT)
mining_widget

- **input**: csv table of extracted entities/relations
- **output**: knowledge graph

In [None]:
# Get DataFrame of extractions
table_extractions = mining_widget.get_extracted_table()

# Drop duplicates in DataFrame
columns_duplicates = table_extractions.columns.tolist()
columns_duplicates.remove('entity_type')
table_extractions = table_extractions.drop_duplicates(subset=columns_duplicates, keep='first', ignore_index=True)

In [None]:
print(f'The table has {table_extractions.shape[0]} rows.')

In [None]:
import jupyter_server_proxy
import jupyter_dash
import dash
import dash_daq as daq
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
import dash_table
from jupyter_dash import JupyterDash
import plotly.express as px
import operator

In [None]:
from jupyter_dash.comms import _send_jupyter_config_comm_request
_send_jupyter_config_comm_request()
JupyterDash.infer_jupyter_proxy_config()

In [None]:
curated_table_extractions = table_extractions
curated_table_extractions["paper_id"] = curated_table_extractions["paper_id"].transform(lambda x:  str(x).split(":")[0])
data= table_extractions.to_dict('records')

def _frequency(group_by, retrieve_key, df, distinct_papers=True, debug=False):
    
    if debug:
        display(df.head(100))
    if distinct_papers:
        colunm_stats = df[[group_by, retrieve_key]].groupby(group_by)[retrieve_key].unique()
    else:
        colunm_stats = df[[group_by, retrieve_key]].groupby(group_by)[retrieve_key].count()
    if debug:
        display(colunm_stats)
    
    return colunm_stats
        
entity_stats = _frequency(group_by="entity",retrieve_key="paper_id",df=curated_table_extractions,distinct_papers=True)

In [None]:
app = JupyterDash('Extracted Entities Curation App')

server = app.server

# Define UI layout
dropdown_freq_filter_list = [{"label":">","value":"operator.gt"},
                             {"label":">=","value":"operator.ge"},
                             {"label":"<","value":"operator.lt"},
                             {"label":"<=","value":"operator.le"},
                             {"label":"=","value":"operator.eq"},
                             {"label":"!=","value":"operator.ne"}]

app.layout = html.Div(
    [
        dcc.Store(id='memory'),
        html.Div([ 
        html.Div( 
        dcc.Upload(
                id='datatable-upload',
                children=html.Div([
                    'Drag and Drop or ',
                    html.A('Select a File')
                ]),
                style={
                    'borderWidth': '1px', 'borderStyle': 'dashed',
                    'borderRadius': '5px', 'textAlign': 'center', 'margin': '10px'
                }
        ),
    style={'width': '20%', 'display': 'inline-block'}), 
    html.Div( 
        [
           html.Div( 
           html.P('Keep entities with frequency: '),
           style={'width': '40%', 'display': 'inline-block'}
           ),  
        
            html.Div( 
                dcc.Dropdown(
                                id='dropdown-freq-filter',
                                value="operator.ge",
                                clearable=False,
                                options = dropdown_freq_filter_list

                            ),
               style={'width': '10%', 'display': 'inline-block'}
           )
        ],
     style={'width': '50%', 'display': 'inline-block'}
    ),            
    html.Div( 
        daq.NumericInput(
            id="entityfreqslider",
            min=1,
            value=1,
        ),
     style={'width': '5%', 'display': 'inline-block'}),
    html.Div( 
        html.Button('Reset', id='table-reset'),
     style={'width': '5%', 'display': 'inline-block'})
   
]),
        
        dash_table.DataTable(
                id='datatable-upload-container',
                style_cell={
                    'whiteSpace': 'normal'
                },

                style_data_conditional=[
                    {
                        'if': {'row_index': 'odd'},
                        'backgroundColor': 'rgb(248, 248, 248)'
                    }
                ],
                style_header={
                    'backgroundColor': 'rgb(230, 230, 230)',
                    'fontWeight': 'bold'
                },

                css=[
                    {
                        'selector': 'dash-fixed-content',
                        'rule': 'height: 800;'
                    }
                ],
                virtualization=True,
                sort_action="native",
                sort_mode="multi",
                column_selectable="multi",
                filter_action="native",
                selected_columns=[],
                page_action="native",
                export_format='csv',
                export_headers='display',
                merge_duplicate_headers=True,
                selected_rows=[],
                page_current=0,
                page_size=20,

            ),
            html.Br(),
            dcc.Tabs(id='tabs', 
                     children=[
                        dcc.Tab(label='View extracted entities in a Bar diagram', children=[dcc.Graph(id='datatable-upload-graph')]),
                        dcc.Tab(label='View extracted entities in a Scatter diagram', children=[dcc.Graph(id='datatable-upload-Scatter')])
                     ]
            )
    
    ]
)

# Define callbacks

def parse_contents(contents, filename):
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    if 'csv' in filename:
        # Assume that the user uploaded a CSV file
        return pd.read_csv(
            io.StringIO(decoded.decode('utf-8')))

@app.callback(
    Output('datatable-upload-container', 'style_data_conditional'),
    [Input('datatable-upload-container', 'selected_columns')]
)
def update_styles(selected_columns):
    return [{
        'if': {'column_id': i},
        'background_color': '#D2F3FF'
    } for i in selected_columns]


@app.callback([Output('entityfreqslider', 'value'),Output('dropdown-freq-filter', 'value')],
              [ Input('table-reset', 'n_clicks')],
             [State('entityfreqslider', 'value'),
              State('dropdown-freq-filter', 'value')])
def reset(reset, entityfreq,freqoperator):
    ctx = dash.callback_context
    if not ctx.triggered:
        button_id = 'No clicks yet'
    else:
        button_id = ctx.triggered[0]['prop_id'].split('.')[0]
    if button_id == "table-reset":
        return 1,"operator.ge"
    else:
        return entityfreq, freqoperator
        
@app.callback([Output('memory', 'data'),
               Output('datatable-upload-container', 'data'),
               Output('datatable-upload-container', 'data_previous'),
               Output('datatable-upload-container', 'columns'),
               Output('datatable-upload-container', 'editable'),
               Output('datatable-upload-container', 'row_deletable')],
              [Input('datatable-upload-container', 'page_size'),
               Input('datatable-upload-container', 'page_current'),
               Input('datatable-upload-container','data_timestamp'),
               Input('datatable-upload', 'contents'),
               Input('entityfreqslider', 'value'),
               Input('dropdown-freq-filter', 'value'),
               Input('table-reset', 'n_clicks')],
              [State("datatable-upload-container", "data"),
               State('datatable-upload-container', 'data_previous'),
               State("datatable-upload-container", "columns"),
              State("memory", "data"),
              State('datatable-upload', 'filename')])

def update_output(page_size, page_current,ts,upload,entityfreq,freqoperator,reset,data,data_previous,columns,memory_data, filename):

    ctx = dash.callback_context
    if not ctx.triggered:
        button_id = 'No clicks yet'
    else:
        button_id = ctx.triggered[0]['prop_id'].split('.')[0]
        
    if upload is not None:
        data = parse_contents(upload, filename).to_dict('records')
        
    elif data_previous is None or button_id == "table-reset":
        data = table_extractions.to_dict('records')
        columns= [{"name": i, "id": i, "clearable": True, "selectable": True, "renamable": True, "hideable": True, "deletable": False} for i in table_extractions.columns ]
          
    global curated_table_extractions
    curated_table_extractions = pd.DataFrame(list(data))
    result = curated_table_extractions
    if memory_data is None:
        memory_data = {}
        memory_data['notremoved'] = curated_table_extractions.to_dict('records')
    
    if data_previous:
        if ts and memory_data and 'timestamp' in memory_data and ts != memory_data['timestamp']:
            removed = [row for row in data_previous if row not in data]
            for row in removed:
                curated_table_extractions= curated_table_extractions[curated_table_extractions.entity != row["entity"]]
                if 'notremoved' in memory_data:
                    not_removed_entity_df = pd.DataFrame(memory_data['notremoved'])
                    memory_data['notremoved']=not_removed_entity_df[not_removed_entity_df.entity != row["entity"]].to_dict('records')
            result = curated_table_extractions
                
    if (button_id == "entityfreqslider" or button_id=="dropdown-freq-filter")  and 'paper_id' in table_extractions:
        row_filtered = []
        df_to_filter =  pd.DataFrame(memory_data['notremoved']) if 'notremoved' in memory_data else curated_table_extractions
        row_filtered = [row for row in df_to_filter.itertuples() if eval(freqoperator)(len(entity_stats[row.entity]), int(entityfreq))]
        result = pd.DataFrame(row_filtered)

    memory_data['timestamp'] = ts     
    return memory_data,result.to_dict('records'), result.to_dict('records'), columns, True, True

@app.callback([Output('datatable-upload-graph', 'figure'),
               Output('datatable-upload-Scatter', 'figure')],
              [Input('datatable-upload-container', 'data_timestamp'),Input('datatable-upload-container', 'data')],)
def display_graph(dts, rows):
    df = pd.DataFrame(rows)
    if (df.empty or len(df.columns) < 1):
        bar = {
            'data': [{
                'x': [],
                'y': [],
                'type': 'bar'
            }]
        }
        scatter = {
                'data': [{
                    'x': [],
                    'y': []
                }]
            }
    else:
        if "paper_id" in df:
            df["paper_id"] = df["paper_id"].transform(lambda x:  str(x).split(":")[0])
            df_grouped = df[["paper_id","entity_type","entity"]].groupby(["entity","entity_type"]).paper_id.nunique().reset_index()
            df_grouped = df_grouped.rename(columns={"paper_id": "Frequency"})
            bar = px.bar(df_grouped, x=df_grouped.entity, y=df_grouped.Frequency, color="entity_type")
            scatter = px.scatter(df_grouped, x=df_grouped.entity, y=df_grouped.Frequency, color="entity_type")
    return [bar,scatter]


app.width = "100%"
app.height = "3000px"
app.run_server(mode="jupyterlab")

In [None]:
from typing import Iterator, Dict
import pandas as pd
import numpy as np
from pygments import highlight
from pygments.lexers import JsonLdLexer, TurtleLexer
from pygments.formatters import TerminalFormatter, TerminalTrueColorFormatter
import json
import uuid
from tqdm.notebook import tqdm

def pretty_print(a_json):
    print(highlight(json.dumps(a_json, indent=2), JsonLdLexer(), TerminalFormatter()))

In [None]:
from kgforge.core import Resource
from kgforge.specializations.mappings import DictionaryMapping

# Map extracted entities and relations to the W3C annotation data model

resources = forge.from_dataframe(curated_table_extractions,na="NaN",)
ressources_json = [dict(forge.as_json(resource), **{"id":str(uuid.uuid4())}) for index, resource in enumerate(resources)]

ANNOTATION_MAPPING_FILE = os.getenv("ANNOTATION_MAPPING_FILE") 
assert (ANNOTATION_MAPPING_FILE is not None) 

PROPERTY_MAPPING_FILE = os.getenv("PROPERTY_MAPPING_FILE") 
assert (PROPERTY_MAPPING_FILE is not None) 



annotation_maping = DictionaryMapping.load(ANNOTATION_MAPPING_FILE)
property_maping = DictionaryMapping.load(PROPERTY_MAPPING_FILE)


annotations = forge.map(ressources_json,[annotation_maping],na='')
ressources_prop_mapped = forge.map(ressources_json,[property_maping],na='')


for i,r in enumerate(ressources_json):
    if 'property' in r:
        annotations[i].target.selector.value.__setattr__(r['property'], ressources_prop_mapped[i])
        annotations[i].body.__setattr__(r['property'], ressources_prop_mapped[i])
print(f'{len(annotations)} annotations created.')

In [None]:
print(f'{len(annotations)} annotations created.')

In [None]:
### Entity Linking ###

import pickle

import faiss
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

class Candidate:
    
    def __init__(self, distance, alias, uid, concept, definition):
        self.distance = distance
        self.alias = alias
        self.uid = uid
        self.concept = concept
        self.definition = definition
    
    def __repr__(self):
        attrs = (f"{k}={v!r}" for k, v in self.__dict__.items())
        return f"Candidate({', '.join(attrs)})"

class EntityLinker:
    
    def __init__(self, bulk):
        self.bulk = bulk
        self.ontology = None
        self.aliases = None
        self.model = None
        self.index = None
    
    def link(self, mentions, threshold=0.8):
        print('WARN   Entity Linking   '
              'Low performances because component not part of the NLP pipeline.')
        selections = self.candidates(mentions, 3)
        return [self.disambiguate(cs, m, None, threshold) for m, cs in selections]
    
    def disambiguate(self, candidates, mention, context, threshold):
        # TODO Disambiguation requires the component to be part of the NLP pipeline.        
        zeros = [x for x in candidates if x.distance == 0]
        if zeros:
            chosen = sorted(zeros, key=lambda x: len(x.concept))[0]
            return chosen
        else:
            chosen = sorted(candidates, key=lambda x: x.distance)[0]
            return chosen if chosen.distance <= threshold else None
    
    def candidates(self, mentions, limit):
        def _(d, i):
            alias, uid = self.aliases[int(i)]
            return Candidate(d, alias, uid, *self.ontology[uid])
        embeddings = self.model.transform(mentions)
        if self.bulk:
            distances, indexes = self.index.search(embeddings.toarray(), limit)
        else:
            distances, indexes = self.index.kneighbors(embeddings, limit)
        results = np.stack((distances, indexes), axis=2)
        return [(m, [_(d, i) for d, i in rs]) for m, rs in zip(mentions, results)]
    
    def train(self, ontology, model_params, index_params):
        self.ontology = {k: (v[0], v[2]) for k, v in ontology.items()}
        self.model = TfidfVectorizer(**model_params)
        aliases = [(x, k) for k, v in ontology.items() for x in [v[0], *v[1]]]
        embeddings = self.model.fit_transform(x for x, _ in aliases)
        flags = np.array(embeddings.sum(axis=1) != 0).reshape(-1)
        filtered_embeddings = embeddings[flags]
        self.aliases = [t for t, f in zip(aliases, flags) if f]
        if self.bulk:
            self.index = faiss.IndexFlatL2(filtered_embeddings.shape[1])
            self.index.add(filtered_embeddings.toarray())
        else:
            self.index = NearestNeighbors(**index_params)
            self.index.fit(filtered_embeddings)
        self._stats()
    
    def save_pretrained(self, dirpath):
        with open(f'{dirpath}/model', 'wb') as f:
            pickle.dump(linker.ontology, f)
            pickle.dump(linker.aliases, f)
            pickle.dump(linker.model, f)
            if not self.bulk:
                pickle.dump(linker.index, f)
        if self.bulk:
            faiss.write_index(linker.index, f'{dirpath}/index')
    
    @staticmethod
    def from_pretrained(dirpath, bulk):
        linker = EntityLinker(bulk)
        with open(f'{dirpath}/model', 'rb') as f:
            linker.ontology = pickle.load(f)
            linker.aliases = pickle.load(f)
            linker.model = pickle.load(f)
            if not bulk:
                linker.index = pickle.load(f)
        if bulk:
            linker.index = faiss.read_index(f'{dirpath}/index')
        linker._stats()
        return linker

    def _stats(self):
        ccount = len(self.ontology)
        tcount = len(self.aliases)
        print(f'INFO   EntityLinker   Links to {ccount} concepts ({tcount} aliases).')

In [None]:
ONTOLOGY_LINKING_MODEL_PATH = os.getenv("ONTOLOGY_LINKING_MODEL_PATH")
assert (ONTOLOGY_LINKING_MODEL_PATH is not None)
linker = EntityLinker.from_pretrained(ONTOLOGY_LINKING_MODEL_PATH, bulk=False)

In [None]:
from typing import Iterable, Dict, Iterator
from copy import deepcopy

def enrich_annotations(annotations: Iterable[Dict], linker: EntityLinker) -> Iterator[Dict]:
    def _(ann, can):
        new = deepcopy(ann)
        #pretty_print(ann)
        if can:
            #print(can)
            new.body = {
                '@id': can.uid,
                'label': can.concept,
                'definition':can.definition
            }
            #pretty_print(new)
            
        return new
    mentions = [x.target.selector.exact for x in annotations]
    linked_mentions = linker.link(mentions)
    return (_(ann, can) for ann, can in zip(annotations, linked_mentions))

In [None]:
enriched_annotations = list(enrich_annotations(annotations, linker))

In [None]:
import json
from typing import Iterable, Dict
from rdflib import Graph

def load_knowledge_graph(jsonlds: Iterable[Dict]) -> Graph:
    g = Graph()
    for x in tqdm(jsonlds):
        x = forge.as_jsonld(x, form="expanded")
        g.parse(data=json.dumps(x), format='json-ld')
    return g

In [None]:
%%time
# Build knowledge graph from enriched annotations
knowledge_graph = load_knowledge_graph(enriched_annotations)
# Note: Takes around 8 secs on a BBP issued MacBook Pro.

In [None]:
print(f'The knowledge graph has {len(knowledge_graph)} triples.')

In [None]:
content_graph= Graph()
import rdflib
for o in knowledge_graph.objects(None,rdflib.term.URIRef("http://www.w3.org/ns/anno.jsonld/hasBody")):
    for ss, pp, oo in knowledge_graph.triples((rdflib.term.URIRef(o),None,None)):
        if pp!=rdflib.term.URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"):
            content_graph.add((ss,pp,oo))                

# Validate the knowledge graph
Thee User reviews content of Knowledge Graph.

# Correct knowledge graph
The correct the Knowledge Graph is errors occur.

# Access the knowledge graph
The user can search, visualize, and export the knowledge graph.

In [None]:
from rdflib.extras.external_graph_libs import rdflib_to_networkx_digraph
import networkx as nx
from rdflib.namespace import RDF, RDFS, SKOS

In [None]:
reshaped_enriched_annotations = forge.reshape(enriched_annotations, keep=["body","target.selector.exact","target.source"])

enriched_annotations_df = forge.as_dataframe(reshaped_enriched_annotations)

def _build_co_mention(group_by, retrieve_key,df):
    entity_co_mention = df[[group_by,retrieve_key]].groupby(group_by)
    group_keys = list(entity_co_mention.groups.keys())
    all_co_mentions = [entity_co_mention.get_group(group_key)[retrieve_key].dropna().unique() for group_key in group_keys]
    return entity_co_mention, group_keys,all_co_mentions
        
entity_stats = _frequency(group_by="body.@id",retrieve_key="target.source",df=enriched_annotations_df,distinct_papers=True)
relation_stats = _frequency(group_by="property",retrieve_key="paper_id",df=curated_table_extractions,distinct_papers=True)

entity_co_mention, paper_ids,all_co_mentions = _build_co_mention(group_by="target.source",retrieve_key= "body.@id",df=enriched_annotations_df)


In [None]:
comention_graph= rdflib.ConjunctiveGraph()

import rdflib
from rdflib import Graph, Namespace
from rdflib.paths import Path
comentioned_dict = {}
   
for paper_id in paper_ids:
    comentioned_entities = entity_co_mention.get_group(paper_id)["body.@id"].dropna().unique()
    comentioned_entities = set(comentioned_entities)
    for comentioned_entity in  comentioned_entities:
        if comentioned_entity not in comentioned_dict:
            comentioned_dict[comentioned_entity] = []
        comentioned_dict[comentioned_entity].append((paper_id,comentioned_entities))


for ss in comentioned_dict.keys():
    for aPaper, co_mentioned_entities in comentioned_dict[str(ss)]:
        for co_mentioned in co_mentioned_entities:
            if ss != co_mentioned:
                if (rdflib.term.URIRef(co_mentioned),rdflib.term.URIRef("https://bbp.epfl.ch/nexus/v1/resources/covid19-kg/vocab/comention"),rdflib.term.URIRef(ss), aPaper) not in comention_graph:
                    comention_graph.add((rdflib.term.URIRef(ss),rdflib.term.URIRef("https://bbp.epfl.ch/nexus/v1/resources/covid19-kg/vocab/comention"),rdflib.term.URIRef(co_mentioned),aPaper))
                    


In [None]:
from rdflib.extras.external_graph_libs import rdflib_to_networkx_digraph
import base64
import io
import dash
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
import dash_table
import pandas as pd
import dash_cytoscape as cyto

cyto.load_extra_layouts()
def build_cytoscape_elements(comention_graph, content_graph, graph_type="comention"):
    elements = []
    
    G = rdflib_to_networkx_digraph(comention_graph) if graph_type =="comention" else rdflib_to_networkx_digraph(content_graph)
    
    def addNode(id, label=None, label_size=10, label_color="black", radius=30, node_color='grey',frequency=1, definition="",papers=[]):
        """
        Adds a node to the list of object to display in Cytoscape.
        Must have an id, the rest is optional.
        """
        actualLabel = None
        if label is not None:
            actualLabel = label.lower()
        else:
            actualLabel = str(id).lower().split("/")[-1].split("#")[-1]

        elements.append({
            "data": { 
                "id": str(id).lower(),
                "frequency":frequency,
                "definition":definition,
                "papers":papers
            },
            "style": {
                "label": actualLabel,
                "width": radius,
                "height": radius
            }
        })
        
    
    def addEdge(id, from_id, to_id, label=None, label_size=10, label_color="black", thickness=2, edge_color="grey", edge_style="solid",frequency=1,papers=[]):
        """
        Adds an edge to the list of object to display in Cytoscape.
        Must have an id, the id of the node the link comes from (from_id) and the id of the node it going towards (to_id).
       
         """
        if thickness == 0:
            thickness = 2
        elements.append({
            "data": { 
                "id": str(id),
                "source": str(from_id).lower(),
                "target": str(to_id).lower(),
                "frequency":frequency,
                "papers":papers
            },
            "style": {
               "label": label if label else '',
                "font-size": f"{label_size}px",
                "width": thickness,
                "line-style": edge_style
            }
        })

    for node, node_attrs in G.nodes(data=True):
        if (str(node).startswith("http")):
            node_label = content_graph.label(node,str(node).split("/")[-1].split("#")[-1])

            node_definition = content_graph.value(node,SKOS.definition, default="", any=True)
    
            node_radius = 5
            frequency=1
            
     
            node_papers = entity_stats[str(node)]
            frequency = len(node_papers)
            if frequency >= 1:
                node_radius = frequency * node_radius
                addNode(str(node), label=node_label,radius=node_radius, frequency=frequency,node_color="lightblue", label_color='blue',definition=node_definition, papers = node_papers)
       
    
    
    for source, target, edge_attrs in G.edges(data=True):
        if not 'value' in edge_attrs and not 'width' in edge_attrs and 'weight' in edge_attrs:
            edge_attrs['value'] = edge_attrs['weight']
        if 'triples' in edge_attrs:
            edge_attrs['title'] = edge_attrs['triples'][0][1]
        edge_id = str(source).lower().replace(" ","_")+"_"+str(target).lower()
        edge_label = str(edge_attrs['title']).split("/")[-1].split("#")[-1]
        
        if edge_label != "label" and edge_label != "definition":
            thickness = 2
            edge_papers = set()
            
            if graph_type == "comention":
                for q in comention_graph.quads((rdflib.term.URIRef(source),rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/covid19-kg/vocab/comention'),rdflib.term.URIRef(target),None)):
                    edge_papers.add(q[3].identifier)
                for q in comention_graph.quads((rdflib.term.URIRef(target),rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/covid19-kg/vocab/comention'),rdflib.term.URIRef(source),None)):
                    edge_papers.add(q[3].identifier)
            else:
                edge_papers = relation_stats[edge_labelge]
                
            thickness = thickness * len(edge_papers)
          
            if len(edge_papers) >= 1:
                addEdge(
                        id = edge_id, 
                        from_id = str(source), 
                        to_id = str(target), 
                        label=None if graph_type == "comention" else edge_label,
                        label_size=6,
                        thickness=thickness, 
                         edge_color="lightgrey",
                        frequency=len(edge_papers),
                        papers = list(edge_papers)
                       )
  
    return elements, G


comention_graph_cyto_elements, G = build_cytoscape_elements(comention_graph, content_graph, graph_type="comention")
knowledge_graph_cyto_elements, G = build_cytoscape_elements(comention_graph, content_graph, graph_type="kg")

comention_graph_cyto_elements_dict = {elt['data']['id']:elt for elt in comention_graph_cyto_elements}
knowledge_graph_cyto_elements_dict= {elt['data']['id']:elt for elt in knowledge_graph_cyto_elements}

In [None]:
def load_json(st):
    if 'http' in st:
        return requests.get(st).json()
    else:
        with open(st, 'rb') as f:
            x = json.load(f)
        return x

In [None]:
import json
import os

import dash
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html

import dash_cytoscape as cyto

from dash.exceptions import PreventUpdate

# Load extra layouts
cyto.load_extra_layouts()
app_tab =  JupyterDash("allvis")
server = app_tab.server

default_stylesheet = [
    {
        "selector": 'node',
        'style': {
            "opacity": 1,
            'z-index': 9999
        }
    },
    {
        "selector": 'edge',
        'style': {
            "curve-style": "bezier",
            "opacity": 1,
            'z-index': 5000
        }
    },
    {
        'selector': '.followerNode',
        'style': {
            'background-color': '#0074D9'
        }
    },
    {
        'selector': '.followerEdge',
        "style": {
            "mid-target-arrow-color": "blue",
            "mid-target-arrow-shape": "vee",
            "line-color": "#0074D9"
        }
    },
    {
        'selector': '.followingNode',
        'style': {
            'background-color': '#FF4136'
        }
    },
    {
        'selector': '.followingEdge',
        "style": {
            "mid-target-arrow-color": "red",
            "mid-target-arrow-shape": "vee",
            "line-color": "#FF4136",
        }
    },
    {
        "selector": '.genesis',
        "style": {
            'background-color': '#B10DC9',
            "border-width": 2,
            "border-color": "purple",
            "border-opacity": 1,
            "opacity": 1,

            "label": "data(label)",
            "color": "#B10DC9",
            "text-opacity": 1,
            "font-size": 12,
            'z-index': 9999
        }
    },
    {
        'selector': ':selected',
        "style": {
            "border-width": 2,
            "border-color": "black",
            "border-opacity": 1,
            "opacity": 1,
            "label": "data(label)",
            "color": "black",
            "font-size": 12,
            'z-index': 9999
        }
    }
]

default_stylesheet = load_json('https://js.cytoscape.org/demos/colajs-graph/cy-style.json')

# ################################# APP LAYOUT ################################

node_shape_option_list = ['ellipse',
                                'triangle',
                                'rectangle',
                                'diamond',
                                'pentagon',
                                'hexagon',
                                'heptagon',
                                'octagon',
                                'star',
                                'polygon']

dropdown_download_option_list = [
                                    'jpg',
                                    'png',
                                    'svg'
                                ]

graph_layout_option_list = ['random',
                                'grid',
                                'circle',
                                'concentric',
                                'breadthfirst',
                                'cose',
                                'cose-bilkent',
                                'dagre',
                                'cola',
                                'klay',
                                'spread',
                                'euler']

graph_type_option_list = ['Knowledge Graph', 'Co-mention Graph']

                                
styles = {
    'json-output': {
        'overflow-y': 'scroll',
        'height': 'calc(50% - 25px)',
        'border': 'thin lightgrey solid'
    },
    'tab': {'height': 'calc(98vh - 80px)'}
}

app_tab.layout = html.Div([
    html.Div(className='eight columns', children=[
        cyto.Cytoscape(
            id='cytoscape',
            elements=comention_graph_cyto_elements,
            stylesheet=default_stylesheet,
            style={
                'height': '95vh',
                'width': '100%'
            }
        )
        
    ]),

    html.Div(className='four columns', children=[
        dcc.Tabs(id='tabs', children=[
            dcc.Tab(label='Details', children=[
                html.Button('Reset', id='bt-reset'),
                html.Button("Remove Selected Node", id='remove-button'),
                
                
                 html.Div(
                    style={'padding': '20px 10px 25px 4px'},
                    children=[
                        html.P(children='Show:'),
                        dcc.RadioItems(
                            id='showgraph',
                            options = [{'label': val.capitalize(), 'value': val} for val in graph_type_option_list],
                            value='Co-mention Graph'
                        )
                    ]
                ),
                html.Div(
                    
                    style={'margin': '10px 0px'},
                    children=[
                        html.Label(
                            [
                                "(In progress) Search for entities and relations",
                                dcc.Dropdown(id="searchdropdown", multi=True),
                            ]
                    )]),
               
                html.Div(
                    style={'margin': '10px 0px'},
                    children=[
                        html.P(
                            children='Download graph as:',
                            style={'margin-left': '3px'}
                        ),

                        dcc.Dropdown(
                            id='dropdown-download',
                            value='ellipse',
                            clearable=False,
                            options = [{'label': val, 'value': val} for val in dropdown_download_option_list]
                        )
                    ]
                ),
                html.Div(children=[
                    html.P('Selected Item details:'),
                    dcc.Markdown(id="md")
                ]),
                 
              
               
                html.Div(id='nodefreqslider-output-container',children=['Show nodes with frequency greater or equal to:','1']),
                dcc.Slider(
                    
                    min=0,
                    max=100,
                    value=1,
                    id='nodefreqslider',
                    marks={
                        1: {'label': '1', 'style': {'color': '#77b0b1'}},
                        5: {'label': '5'},
                        10: {'label': '10'},
                        15: {'label': '15'},
                        20: {'label': '20', 'style': {'color': '#f50'}}
                    },
                    included=False
                )
                
               
                
            ]),
            dcc.Tab(label='Graph Layout and Shape', children=[
                html.Div(style=styles['tab'], children=[
                    html.P('Set Node Color:'),
                    
                 html.Div(
                    style={'margin': '10px 0px'},
                    children=[
                        html.P(
                            children='Layout:',
                            style={'margin-left': '3px'}
                        ),

                        dcc.Dropdown(
                            id ='dropdown-layout',
                            options = [{'label': val.capitalize(), 'value': val} for val in graph_layout_option_list],
                            value='circle',
                            clearable=False
                        )
                    ]
                ),
                html.Div(
                    style={'margin': '10px 0px'},
                    children=[
                        html.P(
                            children='Node Shape:',
                            style={'margin-left': '3px'}
                        ),

                        dcc.Dropdown(
                            id='dropdown-node-shape',
                            value='ellipse',
                            clearable=False,
                            options = [{'label': val.capitalize(), 'value': val} for val in node_shape_option_list]
                        )
                    ]
                ),
                
               
                html.Div(
                    children=[
                        html.P(children='Followers Color:'),
                        dcc.Input(
                            id='input-follower-color',
                            type='text',
                            value='#a0b3dc',
                        )
                    ]
                ),
                    html.Div(
                    children=[
                        html.P(children='Following Color:'),
                        dcc.Input(
                            id='input-following-color',
                            type='text',
                            value='#a0b3dc',
                        )
                    ]
                )

               
                ])
            ])
        ]),

    ])
])



# ############################## CALLBACKS ####################################





@app_tab.callback(
    Output("searchdropdown", "options"),
    [Input("searchdropdown", "search_value")],
    [State("searchdropdown", "value"),
    State('cytoscape', 'elements')],
)
def update_multi_options(search_value, value,elements):
    
    if not search_value:
        raise PreventUpdate
    

    res = []
    for ele_data in elements:
        
        if 'label' in ele_data['style']:
            label =ele_data['style']['label']
           
            if (search_value in label) or (label in search_value) or ele_data['data']['id'] in (value or []) :

                #ele_data["selected"]=True
                res.append( {"label":ele_data['style']['label'],"value":ele_data['data']['id']})
  
    return res
####


@app_tab.callback(Output('nodefreqslider', 'value'),
              [Input('bt-reset', 'n_clicks')],[State('nodefreqslider', 'value')])
def display_freq_node(resetbt, nodefreqslider):
    
    
    ctx = dash.callback_context

    if not ctx.triggered:
        button_id = 'No clicks yet'
    else:
        button_id = ctx.triggered[0]['prop_id'].split('.')[0]
        
    if button_id == 'bt-reset':
        return 1

@app_tab.callback(
    [Output('cytoscape', 'generateImage')],
    [
        Input('dropdown-download', 'value')
    ]
)
def download_image(ftype):


    return [{
        'type': ftype,
        'action': "download"
        }]
    
@app_tab.callback(
    [
        Output('cytoscape', 'zoom'),
        Output('cytoscape', 'elements'),
        Output('nodefreqslider-output-container', 'children')
    ],
    [
        Input('bt-reset', 'n_clicks'),
        Input('remove-button', 'n_clicks'),
        Input('showgraph', 'value'),
        Input('nodefreqslider', 'value'),
        Input("searchdropdown", "value")
     ],
     [
    
        State('cytoscape', 'elements'),
        State('cytoscape', 'selectedNodeData'),
        State('cytoscape', 'selectedEdgeData'),
        State('nodefreqslider-output-container', 'children'),
        State('cytoscape', 'tapNodeData'),
        State('cytoscape', 'zoom')
        
      ]
)

def reset_layout(resetbt, removebt, val, nodefreqslider, searchvalues,cytoelements, data, edge, nodefreqslidercchildren,tappednode,zoom):
    
    elements = cytoelements
    elements_dict  = {elt['data']['id']:elt for elt in cytoelements}

    ctx = dash.callback_context

    if not ctx.triggered:
        button_id = 'No clicks yet'
    else:
        button_id = ctx.triggered[0]['prop_id'].split('.')[0]
    
    if button_id == 'showgraph':
        if val == 'Knowledge Graph':
            elements = knowledge_graph_cyto_elements
            elements_dict = knowledge_graph_cyto_elements_dict
        if val == 'Co-mention Graph':
            elements = comention_graph_cyto_elements
            elements_dict = comention_graph_cyto_elements_dict

    if searchvalues is not None:
        for searchvalue in searchvalues:
            search_node = elements_dict[searchvalue]
            search_node["selected"]=True
    
    if nodefreqslider == 1:
        if val == 'Knowledge Graph':
            elements = knowledge_graph_cyto_elements
            elements_dict = knowledge_graph_cyto_elements_dict
        if val == 'Co-mention Graph':
            elements = comention_graph_cyto_elements
            elements_dict = comention_graph_cyto_elements_dict
        zoom =1
        nodefreqslidercchildren[1] = str(nodefreqslider)
    
        
    if button_id == 'remove-button':
        if elements and data:
            ids_to_remove = {ele_data['id'] for ele_data in data}
            elements = [ele for ele in elements if ele['data']['id'] not in ids_to_remove]
        if elements and edge:
            ids_to_remove = {ele_data['id'] for ele_data in edge}
            elements = [ele for ele in elements if ele['data']['id'] not in ids_to_remove]
            
    if elements and button_id == 'nodefreqslider':
        
        ids_to_remove = {ele_data['data']['id'] for ele_data in elements if  'frequency' in ele_data['data'] and int(ele_data['data']['frequency']) < int(nodefreqslider)}
       
        nodefreqslidercchildren[1] = str(nodefreqslider)
        elements = [ele for ele in elements if ele['data']['id'] not in ids_to_remove]

    return [zoom, elements,nodefreqslidercchildren]


@app_tab.callback(Output('md', 'children'),
              [Input('cytoscape', 'tapNode'),Input('cytoscape', 'tapEdge')],[State('cytoscape', 'selectedNodeData'),State('cytoscape', 'selectedEdgeData')])
def display_tap_node(datanode, dataedge,statedatanode,statedataedge):
        
    if datanode and statedatanode:
        definition = ""
        if 'definition' in str(datanode['data']):
            definition = '* Definition: '+str(datanode['data']['definition'])
        res = '''
                * Label: %s
                %s
                * Frequency: %s
                ''' % (str(datanode['style']['label']),str(definition),str(datanode['data']['frequency']))
        return res
                               
    if dataedge and statedataedge:
        label = str(dataedge['style']['label'])
        
        source_node = comention_graph_cyto_elements_dict[ dataedge['data']['source']]
        source_label = source_node['style']['label']
        target_node = comention_graph_cyto_elements_dict[ dataedge['data']['target']]
        target_label = target_node['style']['label']
        
        mention_label= '''**'%s'** mentioned in **%s** papers with **'%s'**''' % (source_label, str(dataedge['data']['frequency']), target_label) 
        label = mention_label if str(dataedge['style']['label']) == "" else str(dataedge['style']['label']) 
        res = '''
                * Label: %s
                * Frequency: %s
                ''' % (label,str(dataedge['data']['frequency']))
        return res
    return " * Nothing Selected"



@app_tab.callback(Output('cytoscape', 'layout'),
              [Input('dropdown-layout', 'value')])
def update_cytoscape_layout(layout):
    return {
        'name': layout,
        'showlegend':True
        
    }



@app_tab.callback(Output('cytoscape', 'stylesheet'),
                  [Input('cytoscape', 'tapNode'),
                   Input('cytoscape', 'selectedNodeData'),
                   Input('input-follower-color', 'value'),
                   Input('input-following-color', 'value'),
                   Input('dropdown-node-shape', 'value'),
                   Input('showgraph', 'value')
                  ])
def generate_stylesheet(node, selectedNode,follower_color, following_color, node_shape, graphtype):
    
    if not graphtype:
        return default_stylesheet
    if not node:
        return default_stylesheet
    
    focus_nodes = []
    
    if node is not None:
        focus_nodes.append(node)
   
    for focus_node in focus_nodes:      
        stylesheet = [
                    {
            "selector": 'node',
            'style': {
                "width": "mapData(score, 0, 0.006769776522008331, 20, 60)",
                "height": "mapData(score, 0, 0.006769776522008331, 20, 60)",
                "content": "data(name)",
                "font-size": "12px",
                "text-valign": "center",
                "text-halign": "center",
                "background-color": "#555",
                "text-outline-color": "#555",
                "text-outline-width": "2px",
                "color": "#fff",
                "overlay-padding": "6px",
                "z-index": "10",
                'shape': node_shape

            }
                    }, {
              "selector": "node:selected",
              "style": {
                "border-width": "6px",
                "border-color": "#AAD8FF",
                "border-opacity": "0.5",
                "background-color": "#77828C",
                "text-outline-color": "#77828C"
              }
            },{
            'selector': 'edge',
            'style': {
                'opacity': 0.2,
                "curve-style": "bezier",
            }
        }, {
            "selector": 'node[id = "{}"]'.format(focus_node['data']['id']),
            "style": {
            #   'background-color': '#B10DC9',
               # "border-color": "purple",
                "border-width": 2,
                "border-opacity": 1,
                "opacity": 1,

              #  "label": "data(label)",
               # "color": "#B10DC9",
                "text-opacity": 1,
                "font-size": 12,
                'z-index': 9999
            }
        }]

        for edge in node['edgesData']:
            if edge['source'] == focus_node['data']['id']:
                stylesheet.append({
                    "selector": 'node[id = "{}"]'.format(edge['target']),
                    "style": {
                        #'background-color': following_color,
                        'opacity': 0.9
                    }
                })
                stylesheet.append({
                    "selector": 'edge[id= "{}"]'.format(edge['id']),
                    "style": {
                        "mid-target-arrow-color": following_color,
                        #"mid-target-arrow-shape": "vee",
                        "line-color": following_color,
                        'opacity': 0.9,
                        'z-index': 5000
                    }
                })

            if edge['target'] == focus_node['data']['id']:
                stylesheet.append({
                    "selector": 'node[id = "{}"]'.format(edge['source']),
                    "style": {
                       # 'background-color': follower_color,
                        'opacity': 0.9,
                        'z-index': 9999
                    }
                })
                stylesheet.append({
                    "selector": 'edge[id= "{}"]'.format(edge['id']),
                    "style": {
                        "mid-target-arrow-color": follower_color,
                        #"mid-target-arrow-shape": "vee",
                        "line-color": follower_color,
                        'opacity': 1,
                        'z-index': 5000
                    }
                })

    return stylesheet



app_tab.width = "800px"
app_tab.height = "800px"
app_tab.run_server(mode="jupyterlab",port="8556")

# Version the knowledge graph
The user can save a knowledge graph with a version.

In [None]:
import time
timestr = time.strftime("%Y%m%d-%H%M%S")

# Temporally save the knowledge graph locally
kg_ttl = knowledge_graph.serialize(format="turtle",auto_compact=True)
kg_ttl_filename = "/tmp/kg_%s.ttl" % (timestr)
with open(kg_ttl_filename, 'wb') as outfile:
        outfile.write(kg_ttl)

        
# Temporally save the extracted entities csv file locally
table_extractions_filename = "/tmp/table_extractions_%s.csv" % (timestr)
table_extractions.to_csv(table_extractions_filename)


# Temporally save the curated list of extracted entities csv file locally
curated_table_extractions_filename = "/tmp/curated_table_extractions_%s.csv" % (timestr)
curated_table_extractions.to_csv(curated_table_extractions_filename)

In [None]:
from kgforge.core import Resource
from kgforge.specializations.resources import Dataset


# Create and register a Dataset from the extracted entities and relations

table_extractions_dataset = Dataset(forge,name="A dataset", about=forge.as_jsonld(topic_resource,form="expanded")['@id'])
table_extractions_dataset.add_distribution(table_extractions_filename, content_type="application/csv")

forge.register(table_extractions_dataset)

In [None]:
# Create and register a Dataset from the curated extracted entities and relations.

curated_table_extractions_dataset = Dataset(forge,name="A dataset", about=forge.as_jsonld(topic_resource,form="expanded")['@id'])
curated_table_extractions_dataset.add_distribution(curated_table_extractions_filename, content_type="application/csv")
curated_table_extractions_dataset.add_derivation(table_extractions_dataset)

forge.register(curated_table_extractions_dataset)

In [None]:
import jwt

# Create and register a Dataset from the generated knowledge graph

agent = jwt.decode(TOKEN,  verify=False)

agent = forge.reshape(forge.from_json(agent), keep=["name","email","sub","preferred_username"])
agent.id = agent.sub
agent.type = "Person"


dataset = Dataset(forge,name="A dataset", about=forge.as_jsonld(topic_resource,form="expanded")['@id'])
dataset.add_distribution(kg_ttl_filename, content_type="application/x-turtle")
dataset.add_parts([table_extractions_dataset,curated_table_extractions_dataset])
dataset.add_contribution(agent)
dataset.contribution.hadRole= "Scientists"
forge.register(dataset)

In [None]:
version = agent.preferred_username+"_"+timestr
forge.tag(dataset,version)