# Rich Context Knowledge Graph Visualization

In [3]:
!pip install pyvis
!pip install pandas

Collecting pandas
[?25l  Downloading https://files.pythonhosted.org/packages/16/b5/bab3477466a4d9e705d40829ac65683155e7977acbc07f05b06fabded1be/pandas-0.25.3-cp37-cp37m-macosx_10_9_x86_64.whl (10.2MB)
[K     |████████████████████████████████| 10.2MB 2.3MB/s eta 0:00:01    |███████████████▎                | 4.8MB 1.2MB/s eta 0:00:05
Collecting pytz>=2017.2
  Using cached https://files.pythonhosted.org/packages/e7/f9/f0b53f88060247251bf481fa6ea62cd0d25bf1b11a87888e53ce5b7c8ad2/pytz-2019.3-py2.py3-none-any.whl
Collecting numpy>=1.13.3
[?25l  Downloading https://files.pythonhosted.org/packages/f0/14/f71a89e03578084111e352f464d9f3b7f701ebbecbd1a60e89c96983ef77/numpy-1.18.0-cp37-cp37m-macosx_10_9_x86_64.whl (15.1MB)
[K     |████████████████████████████████| 15.1MB 3.4MB/s eta 0:00:01
Installing collected packages: pytz, numpy, pandas
Successfully installed numpy-1.18.0 pandas-0.25.3 pytz-2019.3


In [73]:
import json

filename = "tmp.jsonld"

with open(filename, "r") as f:
    jld_corpus = json.load(f)
    corpus = jld_corpus["@graph"]

IDS = []
publications = {}
datasets = {}
journals = {}


def get_id (id):
    global IDS
    return int(IDS.index(id))

    
# load the datasets

for elem in corpus:
    kind = elem["@type"]
    title = elem["dct:title"]["@value"]

    id = elem["@id"].split("#")[1]
    IDS.append(id)

    if kind == "Dataset":
        provider = elem["dct:publisher"]["@value"]

        view = {
            "id": id,
            "title": title,
            "provider": provider
        }

        datasets[id] = view


# load the journals

for elem in corpus:
    kind = elem["@type"]
    title = elem["dct:title"]["@value"]

    id = elem["@id"].split("#")[1]
    ids.append(id)

    if kind == "Journal":
        if "dct:identifier" in elem:
            issn = elem["dct:identifier"]["@value"]
        else:
            issn = ""

        view = {
            "id": id,
            "title": title,
            "issn": issn
        }

        journals[id] = view


# load the publications

for elem in corpus:
    kind = elem["@type"]
    title = elem["dct:title"]["@value"]

    id = elem["@id"].split("#")[1]
    ids.append(id)

    if kind == "ResearchPublication":
        dat_list = []
        c = elem["cito:citesAsDataSource"]

        if isinstance(c, dict):
            c = [c]
            
        for d in c:
            dat_id = d["@id"].split("#")[1]
            datasets[dat_id]["used"] = True
            dat_list.append(dat_id)

        if "dct:identifier" in elem:
            doi = elem["dct:identifier"]["@value"]

            if not doi.startswith("10."):
                doi = ""

        if "dct:publisher" in elem:
            jour_id = elem["dct:publisher"]["@id"].split("#")[1]
            journals[jour_id]["used"] = True
        else:
            journal = None

        view = {
            "id": id,
            "title": title,
            "doi": doi,
            "journal": jour_id,
            "datasets": dat_list
        }

        publications[id] = view

In [74]:
from pyvis.network import Network

g = Network(notebook=True, height="1000px", width="100%")
g.force_atlas_2based()

for d in datasets.values():
    if "used" in d:
        title = "{}<br/>{}".format(d["title"], d["provider"])
        g.add_node(get_id(d["id"]), label=d["title"], title=title, color="red")

for j in journals.values():
    if "used" in j:
        title = "{}<br/>{}".format(j["title"], j["issn"])
        g.add_node(get_id(j["id"]), label=j["title"], title=title, color="green")

for p in publications.values():
    title = "{}<br/>{}".format(p["title"], p["doi"])
    g.add_node(get_id(p["id"]), label=p["title"], title=title, color="blue")

    if p["journal"]:
        g.add_edge(get_id(p["id"]), get_id(p["journal"]), color="gray")

    for d in p["datasets"]:
        g.add_edge(get_id(p["id"]), get_id(d), color="gray")

g.show_buttons()
g.show("corpus.html")