From fedde35938f7384b7f57657f28afb52a8a089698 Mon Sep 17 00:00:00 2001
From: Deepak Unni <deepak.unni3@gmail.com>
Date: Fri, 25 Sep 2020 17:02:21 -0700
Subject: [PATCH 1/7] Add notebook to demonstrate running KG-COVID-19 pipeline

---
 Run-KG-COVID-19-pipeline.ipynb | 124 +++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 Run-KG-COVID-19-pipeline.ipynb

diff --git a/Run-KG-COVID-19-pipeline.ipynb b/Run-KG-COVID-19-pipeline.ipynb
new file mode 100644
index 00000000..2631413d
--- /dev/null
+++ b/Run-KG-COVID-19-pipeline.ipynb
@@ -0,0 +1,124 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Running KG-COVID-19 pipeline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The KG-COVID-19 pipeline can be run on the command line or via this notebook. The goal here is to run the pipeline end-to-end.\n",
+    "\n",
+    "**Note:** This notebook assumes that you have already installed the required dependencies for KG-COVID-19. For more information refer to [Installation instructions](https://github.com/Knowledge-Graph-Hub/kg-covid-19/wiki#installation)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Downloading all required datasets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First we start with downloading all required datasets as listed in [download.yaml](../download.yaml)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!python run.py download"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Transform all required datasets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We then transform all the datasets and generate a nodes.tsv and edges.tsv for each dataset.\n",
+    "\n",
+    "The files are located in `data/transformed/SOURCE_NAME` where `SOURCE_NAME` is the name of the data source."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!python run.py transform"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Merge all datasets into a single graph"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, we create a merged graph by reading in the individual nodes.tsv and edges.tsv and merging them. \n",
+    "The merge process is driven by the [merge.yaml](../merge.yaml)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!python run.py merge"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The merged graph should be available in `data/merged/` folder.\n",
+    "\n",
+    "This pipeline generates two sets export:\n",
+    "- The graph as a TSV: `data/merged/merged-kg.tar.gz`\n",
+    "- The graph as RDF N-Triples: `data/merged/merged-kg.nt`\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From d6703ab9698afca83a336f2b45dece6ae221bb8f Mon Sep 17 00:00:00 2001
From: Justin Reese <reeseju@tartini.cgrb.oregonstate.local>
Date: Mon, 28 Sep 2020 16:01:19 -0700
Subject: [PATCH 2/7] Add stuff to notebook for generating holdouts, and
 running queries on the commandline

---
 Run-KG-COVID-19-pipeline.ipynb | 342 +++++++++++++++++++++++++++++++--
 1 file changed, 330 insertions(+), 12 deletions(-)

diff --git a/Run-KG-COVID-19-pipeline.ipynb b/Run-KG-COVID-19-pipeline.ipynb
index 2631413d..f8d8bfbe 100644
--- a/Run-KG-COVID-19-pipeline.ipynb
+++ b/Run-KG-COVID-19-pipeline.ipynb
@@ -11,9 +11,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The KG-COVID-19 pipeline can be run on the command line or via this notebook. The goal here is to run the pipeline end-to-end.\n",
+    "The KG-COVID-19 pipeline can be run on the command line or via this notebook. The goal here is to run the pipeline end-to-end. \n",
     "\n",
-    "**Note:** This notebook assumes that you have already installed the required dependencies for KG-COVID-19. For more information refer to [Installation instructions](https://github.com/Knowledge-Graph-Hub/kg-covid-19/wiki#installation)\n"
+    "We will also demonstrates some ways that you can use the KG downstream, and show some other features of the framework.\n",
+    "\n",
+    "**Note:** This notebook assumes that you have already installed the required dependencies for KG-COVID-19. For more information refer to [Installation instructions](https://github.com/Knowledge-Graph-Hub/kg-covid-19/wiki#installation)"
    ]
   },
   {
@@ -32,9 +34,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Downloading files:   0%|                                 | 0/24 [00:00<?, ?it/s]\r",
+      "Downloading files: 100%|█████████████████████| 24/24 [00:00<00:00, 19599.55it/s]\r\n"
+     ]
+    }
+   ],
    "source": [
     "!python run.py download"
    ]
@@ -57,9 +69,67 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tabula.io:Got stderr: Sep 28, 2020 2:44:04 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'Arial-BoldMT'\n",
+      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
+      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
+      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "INFO: Your current java version is: 1.8.0_161\n",
+      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "INFO: To get higher rendering speed on old java 1.8 or 9 versions,\n",
+      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),\n",
+      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "INFO:   or\n",
+      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider\n",
+      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "INFO:   or call System.setProperty(\"sun.java2d.cmm\", \"sun.java2d.cmm.kcms.KcmsServiceProvider\")\n",
+      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'Arial-BoldMT'\n",
+      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
+      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
+      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'Arial-BoldMT'\n",
+      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
+      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
+      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
+      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
+      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
+      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
+      "Sep 28, 2020 2:44:06 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
+      "Sep 28, 2020 2:44:06 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
+      "Sep 28, 2020 2:44:06 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
+      "Sep 28, 2020 2:44:06 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
+      "\n",
+      "5782864it [00:22, 258353.59it/s]\n",
+      "^C\n",
+      "\n",
+      "Aborted!\n"
+     ]
+    }
+   ],
    "source": [
     "!python run.py transform"
    ]
@@ -81,9 +151,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'drug-central'\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'pharmgkb'\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'STRING'\n",
+      "[KGX][cli_utils.py][       apply_filters] INFO: with node filters: {'category': ['biolink:Gene', 'biolink:Protein']}\n",
+      "[KGX][cli_utils.py][       apply_filters] INFO: with edge filters: {'subject_category': ['biolink:Gene', 'biolink:Protein'], 'object_category': ['biolink:Gene', 'biolink:Protein'], 'edge_label': ['biolink:interacts_with', 'biolink:has_gene_product']}\n",
+      "^C\n",
+      "\n",
+      "Aborted!\n"
+     ]
+    }
+   ],
    "source": [
     "!python run.py merge"
    ]
@@ -94,10 +179,243 @@
    "source": [
     "The merged graph should be available in `data/merged/` folder.\n",
     "\n",
-    "This pipeline generates two sets export:\n",
-    "- The graph as a TSV: `data/merged/merged-kg.tar.gz`\n",
-    "- The graph as RDF N-Triples: `data/merged/merged-kg.nt`\n"
+    "This pipeline generates a graph in KGX TSV format here:\n",
+    "`data/merged/merged-kg.tar.gz`\n",
+    "Prebuilt graphs are also available here:\n",
+    "https://kg-hub.berkeleybop.io/kg-covid-19/index.html"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Make training data for machine learning use case"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### untar and gunzip the graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!tar -xvzf data/merged/merged-kg.tar.gz"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### create the training/holdout data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "!python run.py holdouts -e merged-kg_edges.tsv -n merged-kg_nodes.tsv  # this might take 10 minutes or so"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Let's get some stats on our training graph. We're tightly integrated with ensmallen_graph, so we'll use that package to do this."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'bidirectional_rate': '1',\n",
+       " 'unique_edge_types_number': '32',\n",
+       " 'degrees_mode': '1',\n",
+       " 'is_directed': 'false',\n",
+       " 'degrees_max': '72280',\n",
+       " 'selfloops_rate': '0.000014902610832412994',\n",
+       " 'edges_number': '24760762',\n",
+       " 'traps_rate': '0.0612818094137847',\n",
+       " 'singleton_nodes': '23128',\n",
+       " 'connected_components_number': '24763',\n",
+       " 'nodes_number': '377404',\n",
+       " 'strongly_connected_components_number': '24763',\n",
+       " 'degrees_min': '0',\n",
+       " 'density': '0.0001738405182835909',\n",
+       " 'degrees_median': '5',\n",
+       " 'degrees_mean': '65.60810696230034',\n",
+       " 'unique_node_types_number': '37'}"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from ensmallen_graph import EnsmallenGraph\n",
+    "\n",
+    "training = EnsmallenGraph.from_csv(\n",
+    "    edge_path=\"data/holdouts/pos_train_edges.tsv\",\n",
+    "    sources_column='subject',\n",
+    "    destinations_column='object',\n",
+    "    directed=False,\n",
+    "    edge_types_column='edge_label',\n",
+    "    default_edge_type='biolink:Association',\n",
+    "    node_path=\"data/holdouts/pos_train_nodes.tsv\",\n",
+    "    nodes_column='id',\n",
+    "    default_node_type='biolink:NamedThing',\n",
+    "    node_types_column='category',\n",
+    "    ignore_duplicated_edges=True,\n",
+    "    ignore_duplicated_nodes=True,\n",
+    ");\n",
+    "\n",
+    "training.report()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'density': '0.00021729963334795238',\n",
+       " 'degrees_mean': '82.00975082405061',\n",
+       " 'degrees_max': '90378',\n",
+       " 'nodes_number': '377404',\n",
+       " 'selfloops_rate': '0.00001534693375371654',\n",
+       " 'connected_components_number': '8996',\n",
+       " 'unique_node_types_number': '37',\n",
+       " 'degrees_mode': '1',\n",
+       " 'edges_number': '30950808',\n",
+       " 'degrees_median': '6',\n",
+       " 'unique_edge_types_number': '32',\n",
+       " 'bidirectional_rate': '1',\n",
+       " 'degrees_min': '0',\n",
+       " 'singleton_nodes': '8243',\n",
+       " 'strongly_connected_components_number': '8996',\n",
+       " 'is_directed': 'false',\n",
+       " 'traps_rate': '0.02184131593729796'}"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "graph = EnsmallenGraph.from_csv(\n",
+    "    edge_path=\"merged-kg_edges.tsv\",\n",
+    "    sources_column='subject',\n",
+    "    destinations_column='object',\n",
+    "    directed=False,\n",
+    "    edge_types_column='edge_label',\n",
+    "    default_edge_type='biolink:Association',\n",
+    "    node_path=\"merged-kg_nodes.tsv\",\n",
+    "    nodes_column='id',\n",
+    "    default_node_type='biolink:NamedThing',\n",
+    "    node_types_column='category',\n",
+    "    ignore_duplicated_edges=True,\n",
+    "    ignore_duplicated_nodes=True,\n",
+    "    force_conversion_to_undirected=True # deprecated, removed in ensmallen_graph 0.4\n",
+    ");\n",
+    "graph.report()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### See [these](https://github.com/monarch-initiative/embiggen/blob/master/notebooks/) notebook to generate embeddings from the KG you've created above. There are notebooks to make embeddings using:\n",
+    "- [Skipgram](https://github.com/monarch-initiative/embiggen/blob/master/notebooks/Graph%20embedding%20using%20SkipGram.ipynb)\n",
+    "- [CBOW](https://github.com/monarch-initiative/embiggen/blob/master/notebooks/Graph%20embedding%20using%20CBOW.ipynb)\n",
+    "- [GloVe](https://github.com/monarch-initiative/embiggen/blob/master/notebooks/Graph%20embedding%20using%20GloVe.ipynb)\n",
+    "\n",
+    "#### These embeddings can then be used to train MLP, random forest, decision tree, and logistic regression classifiers using [this notebook](https://github.com/monarch-initiative/embiggen/blob/master/notebooks/Link%20Prediction.ipynb).\n",
+    "\n",
+    "##### Note: "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Use prebuilt SPARQL queries to query our Blazegraph endpoint on the commandline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!python run.py query -y queries/sparql/query-01-bl-cat-counts.yaml # or make a new YAML file and write your own query"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['v1', 'v0']\n",
+      "['199', 'organism taxon']\n",
+      "['19131', 'https://w3id.org/biolink/vocab/Gene']\n",
+      "['3908', 'https://w3id.org/biolink/vocab/NamedThing']\n",
+      "['20167', 'https://w3id.org/biolink/vocab/Protein']\n",
+      "['30534', 'https://w3id.org/biolink/vocab/BiologicalProcess']\n",
+      "['4468', 'https://w3id.org/biolink/vocab/CellularComponent']\n",
+      "['30018', 'https://w3id.org/biolink/vocab/ChemicalSubstance']\n",
+      "['32228', 'https://w3id.org/biolink/vocab/Drug']\n",
+      "['12241', 'https://w3id.org/biolink/vocab/MolecularActivity']\n",
+      "['62446', 'https://w3id.org/biolink/vocab/OntologyClass']\n",
+      "['6', 'https://w3id.org/biolink/vocab/OrganismalEntity']\n",
+      "['15530', 'https://w3id.org/biolink/vocab/PhenotypicFeature']\n",
+      "['129930', 'https://w3id.org/biolink/vocab/Publication']\n",
+      "['4687', 'https://w3id.org/biolink/vocab/AnatomicalEntity']\n",
+      "['48', 'https://w3id.org/biolink/vocab/Assay']\n",
+      "['703', 'https://w3id.org/biolink/vocab/Cell']\n",
+      "['24229', 'https://w3id.org/biolink/vocab/Disease']\n",
+      "['1', 'https://w3id.org/biolink/vocab/MolecularEntity']\n",
+      "['17', 'https://w3id.org/biolink/vocab/RNA']\n",
+      "['47', 'https://w3id.org/biolink/vocab/SequenceFeature']\n"
+     ]
+    }
+   ],
+   "source": [
+    "# have a look at biolink category counts currently in KG-COVID-19 loaded on Blazegraph endpoint\n",
+    "import csv\n",
+    "\n",
+    "with open('data/queries/query-01-bl-cat-counts.tsv', newline='') as tsv:\n",
+    "    read_tsv = csv.reader(tsv, delimiter=\"\\t\")\n",
+    "    for row in read_tsv:\n",
+    "      print(row)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -116,7 +434,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.1"
+   "version": "3.8.3"
   }
  },
  "nbformat": 4,

From 4dc858d7ccedb752093f5becadd1ed0868080c54 Mon Sep 17 00:00:00 2001
From: Justin Reese <reeseju@tartini.cgrb.oregonstate.local>
Date: Mon, 28 Sep 2020 16:03:42 -0700
Subject: [PATCH 3/7] Add comment

---
 Run-KG-COVID-19-pipeline.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Run-KG-COVID-19-pipeline.ipynb b/Run-KG-COVID-19-pipeline.ipynb
index f8d8bfbe..c8bdfddd 100644
--- a/Run-KG-COVID-19-pipeline.ipynb
+++ b/Run-KG-COVID-19-pipeline.ipynb
@@ -348,7 +348,7 @@
     "\n",
     "#### These embeddings can then be used to train MLP, random forest, decision tree, and logistic regression classifiers using [this notebook](https://github.com/monarch-initiative/embiggen/blob/master/notebooks/Link%20Prediction.ipynb).\n",
     "\n",
-    "##### Note: "
+    "##### Note: consider running the code in these notebooks on a server with GPUs in order to complete in a reasonable amount of time"
    ]
   },
   {

From db83d6497f9ae07cab7b8e2f1b766e6133a976b8 Mon Sep 17 00:00:00 2001
From: Justin Reese <reeseju@tartini.cgrb.oregonstate.local>
Date: Mon, 28 Sep 2020 16:11:36 -0700
Subject: [PATCH 4/7] Update link prediction NB

---
 Run-KG-COVID-19-pipeline.ipynb | 154 ++++++++-------------------------
 1 file changed, 35 insertions(+), 119 deletions(-)

diff --git a/Run-KG-COVID-19-pipeline.ipynb b/Run-KG-COVID-19-pipeline.ipynb
index c8bdfddd..cf898c5b 100644
--- a/Run-KG-COVID-19-pipeline.ipynb
+++ b/Run-KG-COVID-19-pipeline.ipynb
@@ -43,7 +43,7 @@
      "text": [
       "\r",
       "Downloading files:   0%|                                 | 0/24 [00:00<?, ?it/s]\r",
-      "Downloading files: 100%|█████████████████████| 24/24 [00:00<00:00, 19599.55it/s]\r\n"
+      "Downloading files: 100%|█████████████████████| 24/24 [00:00<00:00, 25311.36it/s]\r\n"
      ]
     }
    ],
@@ -76,55 +76,56 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "WARNING:tabula.io:Got stderr: Sep 28, 2020 2:44:04 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING:tabula.io:Got stderr: Sep 28, 2020 4:06:47 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial-BoldMT'\n",
-      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
-      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
       "INFO: Your current java version is: 1.8.0_161\n",
-      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
       "INFO: To get higher rendering speed on old java 1.8 or 9 versions,\n",
-      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
       "INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),\n",
-      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
       "INFO:   or\n",
-      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
       "INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider\n",
-      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
       "INFO:   or call System.setProperty(\"sun.java2d.cmm\", \"sun.java2d.cmm.kcms.KcmsServiceProvider\")\n",
-      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial-BoldMT'\n",
-      "Sep 28, 2020 2:44:04 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
-      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial-BoldMT'\n",
-      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
-      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
-      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 2:44:05 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
-      "Sep 28, 2020 2:44:06 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 2:44:06 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
-      "Sep 28, 2020 2:44:06 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:49 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 2:44:06 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:06:49 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
       "\n",
-      "5782864it [00:22, 258353.59it/s]\n",
-      "^C\n",
+      "5782864it [00:21, 267769.37it/s]\n",
+      "4222272it [00:15, 245962.42it/s]^C\n",
+      "4243208it [00:16, 265139.87it/s]\n",
       "\n",
       "Aborted!\n"
      ]
@@ -151,7 +152,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -162,10 +163,7 @@
       "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'pharmgkb'\n",
       "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'STRING'\n",
       "[KGX][cli_utils.py][       apply_filters] INFO: with node filters: {'category': ['biolink:Gene', 'biolink:Protein']}\n",
-      "[KGX][cli_utils.py][       apply_filters] INFO: with edge filters: {'subject_category': ['biolink:Gene', 'biolink:Protein'], 'object_category': ['biolink:Gene', 'biolink:Protein'], 'edge_label': ['biolink:interacts_with', 'biolink:has_gene_product']}\n",
-      "^C\n",
-      "\n",
-      "Aborted!\n"
+      "[KGX][cli_utils.py][       apply_filters] INFO: with edge filters: {'subject_category': ['biolink:Gene', 'biolink:Protein'], 'object_category': ['biolink:Gene', 'biolink:Protein'], 'edge_label': ['biolink:interacts_with', 'biolink:has_gene_product']}\n"
      ]
     }
    ],
@@ -235,36 +233,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'bidirectional_rate': '1',\n",
-       " 'unique_edge_types_number': '32',\n",
-       " 'degrees_mode': '1',\n",
-       " 'is_directed': 'false',\n",
-       " 'degrees_max': '72280',\n",
-       " 'selfloops_rate': '0.000014902610832412994',\n",
-       " 'edges_number': '24760762',\n",
-       " 'traps_rate': '0.0612818094137847',\n",
-       " 'singleton_nodes': '23128',\n",
-       " 'connected_components_number': '24763',\n",
-       " 'nodes_number': '377404',\n",
-       " 'strongly_connected_components_number': '24763',\n",
-       " 'degrees_min': '0',\n",
-       " 'density': '0.0001738405182835909',\n",
-       " 'degrees_median': '5',\n",
-       " 'degrees_mean': '65.60810696230034',\n",
-       " 'unique_node_types_number': '37'}"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from ensmallen_graph import EnsmallenGraph\n",
     "\n",
@@ -288,36 +259,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'density': '0.00021729963334795238',\n",
-       " 'degrees_mean': '82.00975082405061',\n",
-       " 'degrees_max': '90378',\n",
-       " 'nodes_number': '377404',\n",
-       " 'selfloops_rate': '0.00001534693375371654',\n",
-       " 'connected_components_number': '8996',\n",
-       " 'unique_node_types_number': '37',\n",
-       " 'degrees_mode': '1',\n",
-       " 'edges_number': '30950808',\n",
-       " 'degrees_median': '6',\n",
-       " 'unique_edge_types_number': '32',\n",
-       " 'bidirectional_rate': '1',\n",
-       " 'degrees_min': '0',\n",
-       " 'singleton_nodes': '8243',\n",
-       " 'strongly_connected_components_number': '8996',\n",
-       " 'is_directed': 'false',\n",
-       " 'traps_rate': '0.02184131593729796'}"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "graph = EnsmallenGraph.from_csv(\n",
     "    edge_path=\"merged-kg_edges.tsv\",\n",
@@ -369,37 +313,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['v1', 'v0']\n",
-      "['199', 'organism taxon']\n",
-      "['19131', 'https://w3id.org/biolink/vocab/Gene']\n",
-      "['3908', 'https://w3id.org/biolink/vocab/NamedThing']\n",
-      "['20167', 'https://w3id.org/biolink/vocab/Protein']\n",
-      "['30534', 'https://w3id.org/biolink/vocab/BiologicalProcess']\n",
-      "['4468', 'https://w3id.org/biolink/vocab/CellularComponent']\n",
-      "['30018', 'https://w3id.org/biolink/vocab/ChemicalSubstance']\n",
-      "['32228', 'https://w3id.org/biolink/vocab/Drug']\n",
-      "['12241', 'https://w3id.org/biolink/vocab/MolecularActivity']\n",
-      "['62446', 'https://w3id.org/biolink/vocab/OntologyClass']\n",
-      "['6', 'https://w3id.org/biolink/vocab/OrganismalEntity']\n",
-      "['15530', 'https://w3id.org/biolink/vocab/PhenotypicFeature']\n",
-      "['129930', 'https://w3id.org/biolink/vocab/Publication']\n",
-      "['4687', 'https://w3id.org/biolink/vocab/AnatomicalEntity']\n",
-      "['48', 'https://w3id.org/biolink/vocab/Assay']\n",
-      "['703', 'https://w3id.org/biolink/vocab/Cell']\n",
-      "['24229', 'https://w3id.org/biolink/vocab/Disease']\n",
-      "['1', 'https://w3id.org/biolink/vocab/MolecularEntity']\n",
-      "['17', 'https://w3id.org/biolink/vocab/RNA']\n",
-      "['47', 'https://w3id.org/biolink/vocab/SequenceFeature']\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# have a look at biolink category counts currently in KG-COVID-19 loaded on Blazegraph endpoint\n",
     "import csv\n",

From 5205d0e131328a12bd5a2bc4ffb12a1d6d50ad29 Mon Sep 17 00:00:00 2001
From: Justin Reese <reeseju@tartini.cgrb.oregonstate.local>
Date: Mon, 28 Sep 2020 17:39:57 -0700
Subject: [PATCH 5/7] Update nb

---
 Run-KG-COVID-19-pipeline.ipynb | 93 +++++++++++++++-------------------
 1 file changed, 40 insertions(+), 53 deletions(-)

diff --git a/Run-KG-COVID-19-pipeline.ipynb b/Run-KG-COVID-19-pipeline.ipynb
index cf898c5b..b3b430c6 100644
--- a/Run-KG-COVID-19-pipeline.ipynb
+++ b/Run-KG-COVID-19-pipeline.ipynb
@@ -43,7 +43,7 @@
      "text": [
       "\r",
       "Downloading files:   0%|                                 | 0/24 [00:00<?, ?it/s]\r",
-      "Downloading files: 100%|█████████████████████| 24/24 [00:00<00:00, 25311.36it/s]\r\n"
+      "Downloading files: 100%|█████████████████████| 24/24 [00:00<00:00, 19807.81it/s]\r\n"
      ]
     }
    ],
@@ -69,65 +69,67 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "WARNING:tabula.io:Got stderr: Sep 28, 2020 4:06:47 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "WARNING:tabula.io:Got stderr: Sep 28, 2020 4:12:20 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial-BoldMT'\n",
-      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:20 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:20 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
-      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "Sep 28, 2020 4:12:21 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
       "INFO: Your current java version is: 1.8.0_161\n",
-      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "Sep 28, 2020 4:12:21 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
       "INFO: To get higher rendering speed on old java 1.8 or 9 versions,\n",
-      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "Sep 28, 2020 4:12:21 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
       "INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),\n",
-      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "Sep 28, 2020 4:12:21 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
       "INFO:   or\n",
-      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "Sep 28, 2020 4:12:21 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
       "INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider\n",
-      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
+      "Sep 28, 2020 4:12:21 PM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS\n",
       "INFO:   or call System.setProperty(\"sun.java2d.cmm\", \"sun.java2d.cmm.kcms.KcmsServiceProvider\")\n",
-      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:21 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial-BoldMT'\n",
-      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:21 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 4:06:47 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:21 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
-      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:21 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial-BoldMT'\n",
-      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:21 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:21 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
-      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:21 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:22 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
-      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:22 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:22 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
-      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:22 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 4:06:48 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:22 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
-      "Sep 28, 2020 4:06:49 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:22 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'ArialMT'\n",
-      "Sep 28, 2020 4:06:49 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
+      "Sep 28, 2020 4:12:22 PM org.apache.pdfbox.pdmodel.font.PDTrueTypeFont <init>\n",
       "WARNING: Using fallback font 'LiberationSans' for 'Arial'\n",
       "\n",
-      "5782864it [00:21, 267769.37it/s]\n",
-      "4222272it [00:15, 245962.42it/s]^C\n",
-      "4243208it [00:16, 265139.87it/s]\n",
-      "\n",
-      "Aborted!\n"
+      "5782864it [00:21, 270243.44it/s]\n",
+      "5782864it [00:21, 271635.01it/s]\n",
+      "Loading gene info: 28496648it [01:33, 304282.49it/s]\n",
+      "Loading country codes: 264it [00:00, 238538.62it/s]\n",
+      "Unzipping files: 100%|███████████████████████████| 2/2 [03:30<00:00, 105.07s/it]\n",
+      "100%|█████████████████████████████████████| 54137/54137 [11:09<00:00, 80.84it/s]\n",
+      "100%|█████████████████████████████████████| 75785/75785 [17:06<00:00, 73.86it/s]\n"
      ]
     }
    ],
@@ -154,19 +156,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'drug-central'\n",
-      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'pharmgkb'\n",
-      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'STRING'\n",
-      "[KGX][cli_utils.py][       apply_filters] INFO: with node filters: {'category': ['biolink:Gene', 'biolink:Protein']}\n",
-      "[KGX][cli_utils.py][       apply_filters] INFO: with edge filters: {'subject_category': ['biolink:Gene', 'biolink:Protein'], 'object_category': ['biolink:Gene', 'biolink:Protein'], 'edge_label': ['biolink:interacts_with', 'biolink:has_gene_product']}\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "!python run.py merge"
    ]
@@ -187,7 +177,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Make training data for machine learning use case"
+    "## Make training data for machine learning use case\n",
+    "\n",
+    "KG-COVID-19 contains tooling to produce training data for machine learning. Briefly, a training graph is produced with 80% (by default, override with `-t` parameter) of edges. 20% of edges are removed such that they do not create new components. These graphs are emitted as KGX TSV files in `data/holdouts`."
    ]
   },
   {
@@ -290,7 +282,7 @@
     "- [CBOW](https://github.com/monarch-initiative/embiggen/blob/master/notebooks/Graph%20embedding%20using%20CBOW.ipynb)\n",
     "- [GloVe](https://github.com/monarch-initiative/embiggen/blob/master/notebooks/Graph%20embedding%20using%20GloVe.ipynb)\n",
     "\n",
-    "#### These embeddings can then be used to train MLP, random forest, decision tree, and logistic regression classifiers using [this notebook](https://github.com/monarch-initiative/embiggen/blob/master/notebooks/Link%20Prediction.ipynb).\n",
+    "#### These embeddings can then be used to train MLP, random forest, decision tree, and logistic regression classifiers using [this notebook](https://github.com/monarch-initiative/embiggen/blob/master/notebooks/Classical%20Link%20Prediction.ipynb).\n",
     "\n",
     "##### Note: consider running the code in these notebooks on a server with GPUs in order to complete in a reasonable amount of time"
    ]
@@ -299,7 +291,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Use prebuilt SPARQL queries to query our Blazegraph endpoint on the commandline"
+    "## Use prebuilt SPARQL queries to query our Blazegraph endpoint on the commandline\n",
+    "\n",
+    "KG-COVID-19 has tooling to query our Blazegraph endpoint using predetermined SPARQL queries, and emit the results as a TSV file. Different SPARQL queries on our endpoint or other endpoints can be used by creating a new YAML file and specific this filewith the `-y` flag. "
    ]
   },
   {
@@ -325,13 +319,6 @@
     "    for row in read_tsv:\n",
     "      print(row)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

From a3f5d0c22f7c413ac0e49a7b76a2e0090cfdedca Mon Sep 17 00:00:00 2001
From: Justin Reese <reeseju@tartini.cgrb.oregonstate.local>
Date: Mon, 28 Sep 2020 20:15:33 -0700
Subject: [PATCH 6/7] complete run of notebook

---
 Run-KG-COVID-19-pipeline.ipynb | 190 +++++++++++++++++++++++++++++++--
 1 file changed, 179 insertions(+), 11 deletions(-)

diff --git a/Run-KG-COVID-19-pipeline.ipynb b/Run-KG-COVID-19-pipeline.ipynb
index b3b430c6..f08a3b50 100644
--- a/Run-KG-COVID-19-pipeline.ipynb
+++ b/Run-KG-COVID-19-pipeline.ipynb
@@ -69,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -129,7 +129,47 @@
       "Loading country codes: 264it [00:00, 238538.62it/s]\n",
       "Unzipping files: 100%|███████████████████████████| 2/2 [03:30<00:00, 105.07s/it]\n",
       "100%|█████████████████████████████████████| 54137/54137 [11:09<00:00, 80.84it/s]\n",
-      "100%|█████████████████████████████████████| 75785/75785 [17:06<00:00, 73.86it/s]\n"
+      "100%|█████████████████████████████████████| 75785/75785 [17:06<00:00, 73.86it/s]\n",
+      "WARNING:root:Found >1 DB_Object_Name in rec, using the first one\n",
+      "Parsing data/raw/go-plus.json\n",
+      "WARNING:ToolkitGenerator:Range of slot 'treated by' (named thing) does not line with the domain of its inverse (treats)\n",
+      "WARNING:ToolkitGenerator:Range of slot 'enabled by' (named thing) does not line with the domain of its inverse (enables)\n",
+      "WARNING:ToolkitGenerator:Range of slot 'superclass of' (iri type) does not line with the domain of its inverse (subclass of)\n",
+      "WARNING:ToolkitGenerator:Unrecognized prefix: SEMMEDDB\n",
+      "WARNING:ToolkitGenerator:Unrecognized prefix: UBERON_CORE\n",
+      "WARNING:ToolkitGenerator:Unrecognized prefix: WD\n",
+      "WARNING:ToolkitGenerator:Unrecognized prefix: chembio\n",
+      "WARNING:ToolkitGenerator:Unrecognized prefix: PHAROS\n",
+      "WARNING:ToolkitGenerator:Unrecognized prefix: GTEx\n",
+      "WARNING:ToolkitGenerator:Unrecognized prefix: ExO\n",
+      "WARNING:ToolkitGenerator:Unrecognized prefix: HANCESTRO\n",
+      "WARNING:ToolkitGenerator:Unrecognized prefix: ORPHA\n",
+      "WARNING:ToolkitGenerator:Unrecognized prefix: medgen\n",
+      "WARNING:ToolkitGenerator:Unrecognized prefix: VMC\n",
+      "WARNING:ToolkitGenerator:Unrecognized prefix: ECTO\n",
+      "[KGX][json_transformer.py][               parse] INFO: Parsing data/raw/go-plus.json\n",
+      "[KGX][json_transformer.py][          load_nodes] INFO: Loading 80507 nodes into networkx.MultiDiGraph\n",
+      "[KGX][json_transformer.py][          load_edges] INFO: Loading 170564 edges into networkx.MultiDiGraph\n",
+      "Parsing data/raw/hp.json\n",
+      "[KGX][json_transformer.py][               parse] INFO: Parsing data/raw/hp.json\n",
+      "[KGX][json_transformer.py][          load_nodes] INFO: Loading 15536 nodes into networkx.MultiDiGraph\n",
+      "[KGX][json_transformer.py][          load_edges] INFO: Loading 19395 edges into networkx.MultiDiGraph\n",
+      "Parsing data/raw/mondo.json\n",
+      "[KGX][json_transformer.py][               parse] INFO: Parsing data/raw/mondo.json\n",
+      "[KGX][json_transformer.py][          load_nodes] INFO: Loading 24279 nodes into networkx.MultiDiGraph\n",
+      "[KGX][json_transformer.py][          load_edges] INFO: Loading 47822 edges into networkx.MultiDiGraph\n",
+      "Parsing data/raw/chebi.json.gz\n",
+      "[KGX][json_transformer.py][               parse] INFO: Parsing data/raw/chebi.json.gz\n",
+      "[KGX][json_transformer.py][          load_nodes] INFO: Loading 144674 nodes into networkx.MultiDiGraph\n",
+      "[KGX][json_transformer.py][          load_edges] INFO: Loading 276297 edges into networkx.MultiDiGraph\n",
+      "Decompressing\n",
+      "Parsing data/raw/lifted-go-cams-20200619.xml\n",
+      "[KGX][rdf_transformer.py][               parse] INFO: Parsing data/raw/lifted-go-cams-20200619.xml with 'None' format\n",
+      "[KGX][rdf_transformer.py][               parse] INFO: data/raw/lifted-go-cams-20200619.xml parsed with 36281 triples\n",
+      "[KGX][rdf_transformer.py][             dereify] INFO: Dereifying 4587 nodes\n",
+      "[KGX][rdf_transformer.py][               parse] INFO: Done parsing data/raw/lifted-go-cams-20200619.xml\n",
+      "[KGX][transformer.py][              report] INFO: Total nodes in graph: 3681\n",
+      "[KGX][transformer.py][              report] INFO: Total edges in graph: 3724\n"
      ]
     }
    ],
@@ -154,9 +194,82 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'drug-central'\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'pharmgkb'\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'STRING'\n",
+      "[KGX][cli_utils.py][       apply_filters] INFO: with node filters: {'category': ['biolink:Gene', 'biolink:Protein']}\n",
+      "[KGX][cli_utils.py][       apply_filters] INFO: with edge filters: {'subject_category': ['biolink:Gene', 'biolink:Protein'], 'object_category': ['biolink:Gene', 'biolink:Protein'], 'edge_label': ['biolink:interacts_with', 'biolink:has_gene_product']}\n",
+      "[KGX][cli_utils.py][    apply_operations] INFO: Applying operation kgx.utils.graph_utils.remap_node_identifier with args: {'category': 'biolink:Protein', 'alternative_property': 'xrefs', 'prefix': 'UniProtKB'}\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'ttd'\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'zhou-host-proteins'\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'SciBite-CORD-19'\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'sars-cov-2-gene-annot'\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'intact'\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'chembl'\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'gene-ontology'\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'mondo-ontology'\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'hp-ontology'\n",
+      "[KGX][cli_utils.py][        parse_target] INFO: Processing target 'go-cams'\n",
+      "[KGX][graph_merge.py][       add_all_nodes] INFO: Adding 3753 nodes from drug-central to STRING\n",
+      "[KGX][graph_merge.py][       add_all_edges] INFO: Adding 13900 edges from drug-central to STRING\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of nodes merged between STRING and drug-central: 1448\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of edges merged between STRING and drug-central: 0\n",
+      "[KGX][graph_merge.py][       add_all_nodes] INFO: Adding 2432 nodes from pharmgkb to STRING\n",
+      "[KGX][graph_merge.py][       add_all_edges] INFO: Adding 5715 edges from pharmgkb to STRING\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of nodes merged between STRING and pharmgkb: 971\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of edges merged between STRING and pharmgkb: 0\n",
+      "[KGX][graph_merge.py][       add_all_nodes] INFO: Adding 29089 nodes from ttd to STRING\n",
+      "[KGX][graph_merge.py][       add_all_edges] INFO: Adding 82668 edges from ttd to STRING\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of nodes merged between STRING and ttd: 206\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of edges merged between STRING and ttd: 0\n",
+      "[KGX][graph_merge.py][       add_all_nodes] INFO: Adding 125 nodes from zhou-host-proteins to STRING\n",
+      "[KGX][graph_merge.py][       add_all_edges] INFO: Adding 127 edges from zhou-host-proteins to STRING\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of nodes merged between STRING and zhou-host-proteins: 0\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of edges merged between STRING and zhou-host-proteins: 0\n",
+      "[KGX][graph_merge.py][       add_all_nodes] INFO: Adding 188684 nodes from SciBite-CORD-19 to STRING\n",
+      "[KGX][graph_merge.py][       add_all_edges] INFO: Adding 9257840 edges from SciBite-CORD-19 to STRING\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of nodes merged between STRING and SciBite-CORD-19: 114\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of edges merged between STRING and SciBite-CORD-19: 0\n",
+      "[KGX][graph_merge.py][       add_all_nodes] INFO: Adding 2528 nodes from sars-cov-2-gene-annot to STRING\n",
+      "[KGX][graph_merge.py][       add_all_edges] INFO: Adding 46150 edges from sars-cov-2-gene-annot to STRING\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of nodes merged between STRING and sars-cov-2-gene-annot: 84\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of edges merged between STRING and sars-cov-2-gene-annot: 0\n",
+      "[KGX][graph_merge.py][       add_all_nodes] INFO: Adding 2461 nodes from intact to STRING\n",
+      "[KGX][graph_merge.py][       add_all_edges] INFO: Adding 1093 edges from intact to STRING\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of nodes merged between STRING and intact: 1909\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of edges merged between STRING and intact: 0\n",
+      "[KGX][graph_merge.py][       add_all_nodes] INFO: Adding 6974 nodes from chembl to STRING\n",
+      "[KGX][graph_merge.py][       add_all_edges] INFO: Adding 7357 edges from chembl to STRING\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of nodes merged between STRING and chembl: 2774\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of edges merged between STRING and chembl: 0\n",
+      "[KGX][graph_merge.py][       add_all_nodes] INFO: Adding 80614 nodes from gene-ontology to STRING\n",
+      "[KGX][graph_merge.py][       add_all_edges] INFO: Adding 170521 edges from gene-ontology to STRING\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of nodes merged between STRING and gene-ontology: 10335\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of edges merged between STRING and gene-ontology: 0\n",
+      "[KGX][graph_merge.py][       add_all_nodes] INFO: Adding 30024 nodes from mondo-ontology to STRING\n",
+      "[KGX][graph_merge.py][       add_all_edges] INFO: Adding 47809 edges from mondo-ontology to STRING\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of nodes merged between STRING and mondo-ontology: 1558\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of edges merged between STRING and mondo-ontology: 167\n",
+      "[KGX][graph_merge.py][       add_all_nodes] INFO: Adding 15536 nodes from hp-ontology to STRING\n",
+      "[KGX][graph_merge.py][       add_all_edges] INFO: Adding 19395 edges from hp-ontology to STRING\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of nodes merged between STRING and hp-ontology: 5617\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of edges merged between STRING and hp-ontology: 0\n",
+      "[KGX][graph_merge.py][       add_all_nodes] INFO: Adding 3681 nodes from go-cams to STRING\n",
+      "[KGX][graph_merge.py][       add_all_edges] INFO: Adding 3724 edges from go-cams to STRING\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of nodes merged between STRING and go-cams: 237\n",
+      "[KGX][graph_merge.py][        merge_graphs] INFO: Number of edges merged between STRING and go-cams: 0\n",
+      "[KGX][cli_utils.py][    apply_operations] INFO: Applying operation kgx.operations.summarize_graph.generate_graph_stats with args: {'graph_name': 'KG-COVID-19 Graph', 'filename': 'merged_graph_stats.yaml', 'node_facet_properties': ['provided_by'], 'edge_facet_properties': ['provided_by']}\n",
+      "[KGX][cli_utils.py][               merge] INFO: Writing merged graph to merged-kg-tsv\n"
+     ]
+    }
+   ],
    "source": [
     "!python run.py merge"
    ]
@@ -173,6 +286,13 @@
     "https://kg-hub.berkeleybop.io/kg-covid-19/index.html"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Other tooling/functionality"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -191,9 +311,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "merged-kg_nodes.tsv\n",
+      "merged-kg_edges.tsv\n"
+     ]
+    }
+   ],
    "source": [
     "!tar -xvzf data/merged/merged-kg.tar.gz"
    ]
@@ -207,11 +336,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:Loading graph from nodes merged-kg_nodes.tsv and edges merged-kg_edges.tsv files\n",
+      "INFO:root:Making positive edges\n",
+      "INFO:root:Making negative edges\n",
+      "INFO:root:Writing out positive edges\n",
+      "INFO:root:Writing out negative edges\n"
+     ]
+    }
+   ],
    "source": [
     "!python run.py holdouts -e merged-kg_edges.tsv -n merged-kg_nodes.tsv  # this might take 10 minutes or so"
    ]
@@ -225,9 +366,36 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'degrees_max': '72381',\n",
+       " 'edges_number': '24761540',\n",
+       " 'degrees_min': '0',\n",
+       " 'connected_components_number': '24907',\n",
+       " 'degrees_mean': '65.5801068391348',\n",
+       " 'degrees_median': '5',\n",
+       " 'selfloops_rate': '0.0000155886911718738',\n",
+       " 'is_directed': 'false',\n",
+       " 'density': '0.00017368670983437763',\n",
+       " 'strongly_connected_components_number': '24907',\n",
+       " 'singleton_nodes': '23320',\n",
+       " 'unique_node_types_number': '37',\n",
+       " 'unique_edge_types_number': '33',\n",
+       " 'degrees_mode': '1',\n",
+       " 'traps_rate': '0.06176223657691014',\n",
+       " 'bidirectional_rate': '1',\n",
+       " 'nodes_number': '377577'}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "from ensmallen_graph import EnsmallenGraph\n",
     "\n",
@@ -293,7 +461,7 @@
    "source": [
     "## Use prebuilt SPARQL queries to query our Blazegraph endpoint on the commandline\n",
     "\n",
-    "KG-COVID-19 has tooling to query our Blazegraph endpoint using predetermined SPARQL queries, and emit the results as a TSV file. Different SPARQL queries on our endpoint or other endpoints can be used by creating a new YAML file and specific this filewith the `-y` flag. "
+    "KG-COVID-19 has tooling to query our Blazegraph endpoint using predetermined SPARQL queries, and emit the results as a TSV file. Different SPARQL queries on our endpoint or other endpoints can be used by creating a new YAML file and specifying this file with the `-y` flag. "
    ]
   },
   {

From b2c55a18e2dd7f50e2c021f3487c7373da9459c8 Mon Sep 17 00:00:00 2001
From: Justin Reese <reeseju@tartini.cgrb.oregonstate.local>
Date: Mon, 28 Sep 2020 20:18:25 -0700
Subject: [PATCH 7/7] Update nb

---
 ...e.ipynb => example-KG-COVID-19-usage.ipynb | 76 +++++++++++++++++--
 1 file changed, 70 insertions(+), 6 deletions(-)
 rename Run-KG-COVID-19-pipeline.ipynb => example-KG-COVID-19-usage.ipynb (90%)

diff --git a/Run-KG-COVID-19-pipeline.ipynb b/example-KG-COVID-19-usage.ipynb
similarity index 90%
rename from Run-KG-COVID-19-pipeline.ipynb
rename to example-KG-COVID-19-usage.ipynb
index f08a3b50..8a887775 100644
--- a/Run-KG-COVID-19-pipeline.ipynb
+++ b/example-KG-COVID-19-usage.ipynb
@@ -419,9 +419,36 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'nodes_number': '377577',\n",
+       " 'is_directed': 'false',\n",
+       " 'density': '0.00021710748243868716',\n",
+       " 'degrees_min': '0',\n",
+       " 'traps_rate': '0.022019349695558788',\n",
+       " 'bidirectional_rate': '1',\n",
+       " 'degrees_mean': '81.97479189675218',\n",
+       " 'selfloops_rate': '0.00001554029368764255',\n",
+       " 'connected_components_number': '9068',\n",
+       " 'degrees_mode': '1',\n",
+       " 'singleton_nodes': '8314',\n",
+       " 'degrees_max': '90378',\n",
+       " 'edges_number': '30951796',\n",
+       " 'unique_node_types_number': '37',\n",
+       " 'unique_edge_types_number': '33',\n",
+       " 'strongly_connected_components_number': '9068',\n",
+       " 'degrees_median': '6'}"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "graph = EnsmallenGraph.from_csv(\n",
     "    edge_path=\"merged-kg_edges.tsv\",\n",
@@ -466,18 +493,55 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/reeseju/kg-covid-19/kg_covid_19/query.py:17: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\r\n",
+      "  return yaml.load(open(yaml_file))\r\n"
+     ]
+    }
+   ],
    "source": [
     "!python run.py query -y queries/sparql/query-01-bl-cat-counts.yaml # or make a new YAML file and write your own query"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['v1', 'v0']\n",
+      "['199', 'organism taxon']\n",
+      "['19131', 'https://w3id.org/biolink/vocab/Gene']\n",
+      "['3908', 'https://w3id.org/biolink/vocab/NamedThing']\n",
+      "['20167', 'https://w3id.org/biolink/vocab/Protein']\n",
+      "['30534', 'https://w3id.org/biolink/vocab/BiologicalProcess']\n",
+      "['4468', 'https://w3id.org/biolink/vocab/CellularComponent']\n",
+      "['30018', 'https://w3id.org/biolink/vocab/ChemicalSubstance']\n",
+      "['32228', 'https://w3id.org/biolink/vocab/Drug']\n",
+      "['12241', 'https://w3id.org/biolink/vocab/MolecularActivity']\n",
+      "['62446', 'https://w3id.org/biolink/vocab/OntologyClass']\n",
+      "['6', 'https://w3id.org/biolink/vocab/OrganismalEntity']\n",
+      "['15530', 'https://w3id.org/biolink/vocab/PhenotypicFeature']\n",
+      "['129930', 'https://w3id.org/biolink/vocab/Publication']\n",
+      "['4687', 'https://w3id.org/biolink/vocab/AnatomicalEntity']\n",
+      "['48', 'https://w3id.org/biolink/vocab/Assay']\n",
+      "['703', 'https://w3id.org/biolink/vocab/Cell']\n",
+      "['24229', 'https://w3id.org/biolink/vocab/Disease']\n",
+      "['1', 'https://w3id.org/biolink/vocab/MolecularEntity']\n",
+      "['17', 'https://w3id.org/biolink/vocab/RNA']\n",
+      "['47', 'https://w3id.org/biolink/vocab/SequenceFeature']\n"
+     ]
+    }
+   ],
    "source": [
     "# have a look at biolink category counts currently in KG-COVID-19 loaded on Blazegraph endpoint\n",
     "import csv\n",