Skip to content
This repository has been archived by the owner on Sep 7, 2022. It is now read-only.

Commit

Permalink
Merge pull request #12 from SmartDataAnalytics/add-converter-tests
Browse files Browse the repository at this point in the history
Add converter classes and first tests
  • Loading branch information
ddomingof committed Jan 13, 2019
2 parents 795b460 + ce09249 commit f423443
Show file tree
Hide file tree
Showing 9 changed files with 527 additions and 253 deletions.
12 changes: 7 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,10 @@ cache: pip
language: python
python:
- 3.6
- 3.7
stages:
- lint
- docs
- test
env:
- TOXENV=py
jobs:
include:
# lint stage
Expand All @@ -24,6 +21,13 @@ jobs:
env: TOXENV=doc8
- env: TOXENV=readme
- env: TOXENV=docs
- stage: test
env: TOXENV=py
python: "3.6"
- env: TOXENV=py
sudo: true
python: "3.7"
dist: xenial
matrix:
allow_failures:
- env: TOXENV=vulture
Expand All @@ -35,5 +39,3 @@ script:
- tox
after_success:
- sh -c 'if [ "$TOXENV" = "py" ]; then tox -e coverage-report; codecov; fi'
notifications:
slack: pybel:n2KbWKBum3musnBg3L76gGwq
120 changes: 12 additions & 108 deletions src/biokeen/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,15 @@

import json
import logging
from collections import OrderedDict
from typing import List, Optional, TextIO

import click

import pykeen
from bio2bel.constants import get_global_connection
from biokeen.build import ensure_compath, ensure_drugbank, ensure_hippie, iterate_source_paths
from biokeen.build import iterate_source_paths
from biokeen.cli_utils.bio_2_bel_utils import install_bio2bel_module
from biokeen.cli_utils.cli_print_msg_helper import print_intro, print_welcome_message
from biokeen.cli_utils.cli_query_helper import select_database
from pykeen.cli.prompt import (
prompt_device, prompt_embedding_model, prompt_evaluation_parameters, prompt_execution_parameters,
prompt_output_directory, prompt_random_seed, prompt_training_file,
)
from pykeen.cli.utils.cli_print_msg_helper import print_execution_mode_message, print_section_divider
from pykeen.constants import EXECUTION_MODE, HPO_MODE, TRAINING_MODE, TRAINING_SET_PATH
from biokeen.cli_utils.prompt_utils import prompt_config
from pykeen.predict import start_predictions_pipeline

connection_option = click.option(
Expand All @@ -31,65 +24,6 @@
)


def prompt_config(connection, rebuild):
"""Configure experiments."""
config = OrderedDict()

# Step 1: Welcome + Intro
print_welcome_message()
print_section_divider()
print_intro()
print_section_divider()

# Step 2: Ask for data source
is_biokeen_data_required = click.confirm('Do you want to use one of the databases provided by BioKEEN?',
default=True)
print_section_divider()

if is_biokeen_data_required:
database_name = select_database()
config[TRAINING_SET_PATH] = install_bio2bel_module(name=database_name, connection=connection, rebuild=rebuild)
else:
prompt_training_file(config)

print_section_divider()

# Step 3: Ask for execution mode
print_execution_mode_message()
config[EXECUTION_MODE] = (
TRAINING_MODE
if click.confirm('Do you have hyper-parameters? If not, will begin hyper-parameter search.', default=False) else
HPO_MODE
)
print_section_divider()

# Step 4: Ask for model
model_name = prompt_embedding_model()
print_section_divider()

# Step 5: Query parameters depending on the selected execution mode
prompt_execution_parameters(config=config, model_name=model_name)
print_section_divider()

prompt_evaluation_parameters(config)

print_section_divider()

# Step 6: Please select a random seed
prompt_random_seed(config)
print_section_divider()

# Step 7: Query device to train on
prompt_device(config)
print_section_divider()

# Step 8: Define output directory
prompt_output_directory(config)
print_section_divider()

return config


@click.group()
@click.version_option()
def main(): # noqa: D401
Expand All @@ -100,7 +34,7 @@ def main(): # noqa: D401
@connection_option
@click.option('-f', '--config', type=click.File())
@click.option('-r', '--rebuild', is_flag=True)
def start(config, connection, rebuild):
def start(config: Optional[TextIO], connection: str, rebuild: bool):
"""Start BioKEEN pipeline."""
if config is None:
config = prompt_config(connection, rebuild)
Expand All @@ -111,11 +45,11 @@ def start(config, connection, rebuild):


@main.command()
@click.option('-m', '--model_direc', type=click.Path(file_okay=False, dir_okay=True))
@click.option('-d', '--data_direc', type=click.Path(file_okay=False, dir_okay=True))
def predict(model_direc: str, data_direc: str):
@click.option('-m', '--model-directory', type=click.Path(file_okay=False, dir_okay=True))
@click.option('-d', '--data-directory', type=click.Path(file_okay=False, dir_okay=True))
def predict(model_directory: str, data_directory: str):
"""Use a trained model to make predictions."""
start_predictions_pipeline(model_direc, data_direc)
start_predictions_pipeline(model_directory, data_directory)


@main.group()
Expand All @@ -135,44 +69,14 @@ def ls():
@connection_option
@click.option('-r', '--rebuild', is_flag=True)
@click.option('-v', '--verbose', is_flag=True)
def get(names, connection, rebuild, verbose):
def get(names: List[str], connection: str, rebuild: bool, verbose: bool):
"""Install, populate, and build Bio2BEL repository."""
if verbose:
logging.basicConfig(level=logging.INFO)

for name in names:
install_bio2bel_module(name, connection, rebuild)


@data.group()
def build():
"""Build suggested Bio2BEL resources."""


@build.command()
@connection_option
def all(connection):
"""Build all resources."""
click.secho('HIPPIE', fg='cyan', bold=True)
ensure_hippie(connection)
click.secho('DrugBank', fg='cyan', bold=True)
ensure_drugbank(connection)


@build.command()
@connection_option
def hippie(connection):
"""Build HIPPIE."""
ensure_hippie(connection)


@build.command()
@connection_option
def drugbank(connection):
"""Build DrugBank."""
ensure_drugbank(connection)


@build.command()
def compath():
"""Build ComPath."""
ensure_compath()
if __name__ == '__main__':
main()
14 changes: 7 additions & 7 deletions src/biokeen/cli_utils/bio_2_bel_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from bio2bel.manager.bel_manager import BELManagerMixin
from biokeen.constants import DATA_DIR, EMOJI
from biokeen.convert import to_pykeen_file
from pybel import from_pickle, to_pickle
from pybel import from_json_path, to_json_path


def _import_bio2bel_module(package: str):
Expand Down Expand Up @@ -47,18 +47,18 @@ def install_bio2bel_module(name, connection, rebuild):
if name == 'compath': # special case for compath
module_name = 'compath_resources'
else:
module_name = f"bio2bel_{name}"
module_name = f'bio2bel_{name}'

pykeen_df_path = os.path.join(DATA_DIR, f'{name}.keen.tsv')
pickle_path = os.path.join(DATA_DIR, f'{name}.bel.pickle')
json_path = os.path.join(DATA_DIR, f'{name}.bel.json')

if os.path.exists(pykeen_df_path) and not rebuild:
click.secho(f'{EMOJI} {module_name} has already been retrieved. See: {pykeen_df_path}', bold=True)
return pykeen_df_path

if os.path.exists(pickle_path) and not rebuild:
click.secho(f'{EMOJI} loaded {module_name} pickle: {pickle_path}', bold=True)
graph = from_pickle(pickle_path)
if os.path.exists(json_path) and not rebuild:
click.secho(f'{EMOJI} loaded {module_name} JSON: {json_path}', bold=True)
graph = from_json_path(json_path)
to_pykeen_file(graph, pykeen_df_path)
return pykeen_df_path

Expand All @@ -84,7 +84,7 @@ def install_bio2bel_module(name, connection, rebuild):
click.secho(f'{EMOJI} generating BEL for {module_name}', bold=True)
graph = manager.to_bel()
click.echo(f'Summary: {graph.number_of_nodes()} nodes / {graph.number_of_edges()} edges')
to_pickle(graph, pickle_path)
to_json_path(graph, json_path, indent=2)
click.secho(f'{EMOJI} generating PyKEEN TSV for {module_name}', bold=True)
to_pykeen_file(graph, pykeen_df_path)
click.secho(f'{EMOJI} wrote PyKEEN TSV to {pykeen_df_path}', bold=True)
Expand Down
81 changes: 81 additions & 0 deletions src/biokeen/cli_utils/prompt_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-

"""CLI utils."""

from collections import OrderedDict
from typing import Dict

import click

from biokeen.cli_utils.bio_2_bel_utils import install_bio2bel_module
from biokeen.cli_utils.cli_print_msg_helper import print_intro, print_welcome_message
from biokeen.cli_utils.cli_query_helper import select_database
from pykeen.cli.prompt import (
prompt_device, prompt_embedding_model, prompt_evaluation_parameters, prompt_execution_parameters,
prompt_output_directory, prompt_random_seed, prompt_training_file,
)
from pykeen.cli.utils.cli_print_msg_helper import print_execution_mode_message, print_section_divider
from pykeen.constants import EXECUTION_MODE, HPO_MODE, TRAINING_MODE, TRAINING_SET_PATH

__all__ = [
'prompt_config',
]


def prompt_config(connection: str, rebuild: bool) -> Dict:
"""Configure experiments."""
config = OrderedDict()

# Step 1: Welcome + Intro
print_welcome_message()
print_section_divider()
print_intro()
print_section_divider()

# Step 2: Ask for data source
is_biokeen_data_required = click.confirm('Do you want to use one of the databases provided by BioKEEN?',
default=True)
print_section_divider()

if is_biokeen_data_required:
database_name = select_database()
config[TRAINING_SET_PATH] = install_bio2bel_module(name=database_name, connection=connection, rebuild=rebuild)
else:
prompt_training_file(config)

print_section_divider()

# Step 3: Ask for execution mode
print_execution_mode_message()
config[EXECUTION_MODE] = (
TRAINING_MODE
if click.confirm('Do you have hyper-parameters? If not, will begin hyper-parameter search.', default=False) else
HPO_MODE
)
print_section_divider()

# Step 4: Ask for model
model_name = prompt_embedding_model()
print_section_divider()

# Step 5: Query parameters depending on the selected execution mode
prompt_execution_parameters(config=config, model_name=model_name)
print_section_divider()

prompt_evaluation_parameters(config)

print_section_divider()

# Step 6: Please select a random seed
prompt_random_seed(config)
print_section_divider()

# Step 7: Query device to train on
prompt_device(config)
print_section_divider()

# Step 8: Define output directory
prompt_output_directory(config)
print_section_divider()

return config

0 comments on commit f423443

Please sign in to comment.