Merge pull request #12 from SmartDataAnalytics/add-converter-tests

Add converter classes and first tests
SmartDataAnalytics · Jan 13, 2019 · f423443 · f423443
2 parents 795b460 + ce09249
commit f423443
Show file tree

Hide file tree

Showing 9 changed files with 527 additions and 253 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -3,13 +3,10 @@ cache: pip
 language: python
 python:
   - 3.6
-  - 3.7
 stages:
   - lint
   - docs
   - test
-env:
-  - TOXENV=py
 jobs:
   include:
     # lint stage
@@ -24,6 +21,13 @@ jobs:
       env: TOXENV=doc8
     - env: TOXENV=readme
     - env: TOXENV=docs
+    - stage: test
+      env: TOXENV=py
+      python: "3.6"
+    - env: TOXENV=py
+      sudo: true
+      python: "3.7"
+      dist: xenial
 matrix:
   allow_failures:
     - env: TOXENV=vulture
@@ -35,5 +39,3 @@ script:
   - tox
 after_success:
   - sh -c 'if [ "$TOXENV" = "py" ]; then tox -e coverage-report; codecov; fi'
-notifications:
-  slack: pybel:n2KbWKBum3musnBg3L76gGwq
diff --git a/src/biokeen/cli.py b/src/biokeen/cli.py
@@ -4,22 +4,15 @@
 
 import json
 import logging
-from collections import OrderedDict
+from typing import List, Optional, TextIO
 
 import click
 
 import pykeen
 from bio2bel.constants import get_global_connection
-from biokeen.build import ensure_compath, ensure_drugbank, ensure_hippie, iterate_source_paths
+from biokeen.build import iterate_source_paths
 from biokeen.cli_utils.bio_2_bel_utils import install_bio2bel_module
-from biokeen.cli_utils.cli_print_msg_helper import print_intro, print_welcome_message
-from biokeen.cli_utils.cli_query_helper import select_database
-from pykeen.cli.prompt import (
-    prompt_device, prompt_embedding_model, prompt_evaluation_parameters, prompt_execution_parameters,
-    prompt_output_directory, prompt_random_seed, prompt_training_file,
-)
-from pykeen.cli.utils.cli_print_msg_helper import print_execution_mode_message, print_section_divider
-from pykeen.constants import EXECUTION_MODE, HPO_MODE, TRAINING_MODE, TRAINING_SET_PATH
+from biokeen.cli_utils.prompt_utils import prompt_config
 from pykeen.predict import start_predictions_pipeline
 
 connection_option = click.option(
@@ -31,65 +24,6 @@
 )
 
 
-def prompt_config(connection, rebuild):
-    """Configure experiments."""
-    config = OrderedDict()
-
-    # Step 1: Welcome + Intro
-    print_welcome_message()
-    print_section_divider()
-    print_intro()
-    print_section_divider()
-
-    # Step 2: Ask for data source
-    is_biokeen_data_required = click.confirm('Do you want to use one of the databases provided by BioKEEN?',
-                                             default=True)
-    print_section_divider()
-
-    if is_biokeen_data_required:
-        database_name = select_database()
-        config[TRAINING_SET_PATH] = install_bio2bel_module(name=database_name, connection=connection, rebuild=rebuild)
-    else:
-        prompt_training_file(config)
-
-    print_section_divider()
-
-    # Step 3: Ask for execution mode
-    print_execution_mode_message()
-    config[EXECUTION_MODE] = (
-        TRAINING_MODE
-        if click.confirm('Do you have hyper-parameters? If not, will begin hyper-parameter search.', default=False) else
-        HPO_MODE
-    )
-    print_section_divider()
-
-    # Step 4: Ask for model
-    model_name = prompt_embedding_model()
-    print_section_divider()
-
-    # Step 5: Query parameters depending on the selected execution mode
-    prompt_execution_parameters(config=config, model_name=model_name)
-    print_section_divider()
-
-    prompt_evaluation_parameters(config)
-
-    print_section_divider()
-
-    # Step 6: Please select a random seed
-    prompt_random_seed(config)
-    print_section_divider()
-
-    # Step 7: Query device to train on
-    prompt_device(config)
-    print_section_divider()
-
-    # Step 8: Define output directory
-    prompt_output_directory(config)
-    print_section_divider()
-
-    return config
-
-
 @click.group()
 @click.version_option()
 def main():  # noqa: D401
@@ -100,7 +34,7 @@ def main():  # noqa: D401
 @connection_option
 @click.option('-f', '--config', type=click.File())
 @click.option('-r', '--rebuild', is_flag=True)
-def start(config, connection, rebuild):
+def start(config: Optional[TextIO], connection: str, rebuild: bool):
     """Start BioKEEN pipeline."""
     if config is None:
         config = prompt_config(connection, rebuild)
@@ -111,11 +45,11 @@ def start(config, connection, rebuild):
 
 
 @main.command()
-@click.option('-m', '--model_direc', type=click.Path(file_okay=False, dir_okay=True))
-@click.option('-d', '--data_direc', type=click.Path(file_okay=False, dir_okay=True))
-def predict(model_direc: str, data_direc: str):
+@click.option('-m', '--model-directory', type=click.Path(file_okay=False, dir_okay=True))
+@click.option('-d', '--data-directory', type=click.Path(file_okay=False, dir_okay=True))
+def predict(model_directory: str, data_directory: str):
     """Use a trained model to make predictions."""
-    start_predictions_pipeline(model_direc, data_direc)
+    start_predictions_pipeline(model_directory, data_directory)
 
 
 @main.group()
@@ -135,44 +69,14 @@ def ls():
 @connection_option
 @click.option('-r', '--rebuild', is_flag=True)
 @click.option('-v', '--verbose', is_flag=True)
-def get(names, connection, rebuild, verbose):
+def get(names: List[str], connection: str, rebuild: bool, verbose: bool):
     """Install, populate, and build Bio2BEL repository."""
     if verbose:
         logging.basicConfig(level=logging.INFO)
+
     for name in names:
         install_bio2bel_module(name, connection, rebuild)
 
 
-@data.group()
-def build():
-    """Build suggested Bio2BEL resources."""
-
-
-@build.command()
-@connection_option
-def all(connection):
-    """Build all resources."""
-    click.secho('HIPPIE', fg='cyan', bold=True)
-    ensure_hippie(connection)
-    click.secho('DrugBank', fg='cyan', bold=True)
-    ensure_drugbank(connection)
-
-
-@build.command()
-@connection_option
-def hippie(connection):
-    """Build HIPPIE."""
-    ensure_hippie(connection)
-
-
-@build.command()
-@connection_option
-def drugbank(connection):
-    """Build DrugBank."""
-    ensure_drugbank(connection)
-
-
-@build.command()
-def compath():
-    """Build ComPath."""
-    ensure_compath()
+if __name__ == '__main__':
+    main()
diff --git a/src/biokeen/cli_utils/bio_2_bel_utils.py b/src/biokeen/cli_utils/bio_2_bel_utils.py
@@ -13,7 +13,7 @@
 from bio2bel.manager.bel_manager import BELManagerMixin
 from biokeen.constants import DATA_DIR, EMOJI
 from biokeen.convert import to_pykeen_file
-from pybel import from_pickle, to_pickle
+from pybel import from_json_path, to_json_path
 
 
 def _import_bio2bel_module(package: str):
@@ -47,18 +47,18 @@ def install_bio2bel_module(name, connection, rebuild):
     if name == 'compath':  # special case for compath
         module_name = 'compath_resources'
     else:
-        module_name = f"bio2bel_{name}"
+        module_name = f'bio2bel_{name}'
 
     pykeen_df_path = os.path.join(DATA_DIR, f'{name}.keen.tsv')
-    pickle_path = os.path.join(DATA_DIR, f'{name}.bel.pickle')
+    json_path = os.path.join(DATA_DIR, f'{name}.bel.json')
 
     if os.path.exists(pykeen_df_path) and not rebuild:
         click.secho(f'{EMOJI} {module_name} has already been retrieved. See: {pykeen_df_path}', bold=True)
         return pykeen_df_path
 
-    if os.path.exists(pickle_path) and not rebuild:
-        click.secho(f'{EMOJI} loaded {module_name} pickle: {pickle_path}', bold=True)
-        graph = from_pickle(pickle_path)
+    if os.path.exists(json_path) and not rebuild:
+        click.secho(f'{EMOJI} loaded {module_name} JSON: {json_path}', bold=True)
+        graph = from_json_path(json_path)
         to_pykeen_file(graph, pykeen_df_path)
         return pykeen_df_path
 
@@ -84,7 +84,7 @@ def install_bio2bel_module(name, connection, rebuild):
     click.secho(f'{EMOJI} generating BEL for {module_name}', bold=True)
     graph = manager.to_bel()
     click.echo(f'Summary: {graph.number_of_nodes()} nodes / {graph.number_of_edges()} edges')
-    to_pickle(graph, pickle_path)
+    to_json_path(graph, json_path, indent=2)
     click.secho(f'{EMOJI} generating PyKEEN TSV for {module_name}', bold=True)
     to_pykeen_file(graph, pykeen_df_path)
     click.secho(f'{EMOJI} wrote PyKEEN TSV to {pykeen_df_path}', bold=True)

diff --git a/src/biokeen/cli_utils/prompt_utils.py b/src/biokeen/cli_utils/prompt_utils.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+"""CLI utils."""
+
+from collections import OrderedDict
+from typing import Dict
+
+import click
+
+from biokeen.cli_utils.bio_2_bel_utils import install_bio2bel_module
+from biokeen.cli_utils.cli_print_msg_helper import print_intro, print_welcome_message
+from biokeen.cli_utils.cli_query_helper import select_database
+from pykeen.cli.prompt import (
+    prompt_device, prompt_embedding_model, prompt_evaluation_parameters, prompt_execution_parameters,
+    prompt_output_directory, prompt_random_seed, prompt_training_file,
+)
+from pykeen.cli.utils.cli_print_msg_helper import print_execution_mode_message, print_section_divider
+from pykeen.constants import EXECUTION_MODE, HPO_MODE, TRAINING_MODE, TRAINING_SET_PATH
+
+__all__ = [
+    'prompt_config',
+]
+
+
+def prompt_config(connection: str, rebuild: bool) -> Dict:
+    """Configure experiments."""
+    config = OrderedDict()
+
+    # Step 1: Welcome + Intro
+    print_welcome_message()
+    print_section_divider()
+    print_intro()
+    print_section_divider()
+
+    # Step 2: Ask for data source
+    is_biokeen_data_required = click.confirm('Do you want to use one of the databases provided by BioKEEN?',
+                                             default=True)
+    print_section_divider()
+
+    if is_biokeen_data_required:
+        database_name = select_database()
+        config[TRAINING_SET_PATH] = install_bio2bel_module(name=database_name, connection=connection, rebuild=rebuild)
+    else:
+        prompt_training_file(config)
+
+    print_section_divider()
+
+    # Step 3: Ask for execution mode
+    print_execution_mode_message()
+    config[EXECUTION_MODE] = (
+        TRAINING_MODE
+        if click.confirm('Do you have hyper-parameters? If not, will begin hyper-parameter search.', default=False) else
+        HPO_MODE
+    )
+    print_section_divider()
+
+    # Step 4: Ask for model
+    model_name = prompt_embedding_model()
+    print_section_divider()
+
+    # Step 5: Query parameters depending on the selected execution mode
+    prompt_execution_parameters(config=config, model_name=model_name)
+    print_section_divider()
+
+    prompt_evaluation_parameters(config)
+
+    print_section_divider()
+
+    # Step 6: Please select a random seed
+    prompt_random_seed(config)
+    print_section_divider()
+
+    # Step 7: Query device to train on
+    prompt_device(config)
+    print_section_divider()
+
+    # Step 8: Define output directory
+    prompt_output_directory(config)
+    print_section_divider()
+
+    return config