Skip to content
This repository has been archived by the owner on Sep 7, 2022. It is now read-only.

Commit

Permalink
Update output to add summary information
Browse files Browse the repository at this point in the history
Still, this doesn't summarize if the appropriate identifiers are used in each namespace. I'm guessing this will be a problem at some point.
  • Loading branch information
cthoyt committed Jan 20, 2019
1 parent 821559c commit 70135ee
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 17 deletions.
21 changes: 14 additions & 7 deletions src/biokeen/cli_utils/bio_2_bel_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from bio2bel import AbstractManager
from bio2bel.manager.bel_manager import BELManagerMixin
from biokeen.constants import DATA_DIR, EMOJI
from biokeen.convert import to_pykeen_file
from biokeen.convert import to_pykeen_df, to_pykeen_path, to_pykeen_summary_path
from pybel import from_json_path, to_json_path


Expand Down Expand Up @@ -43,14 +43,17 @@ def _import_bio2bel_module(package: str):
return b_module


_SPECIAL_CASES = {
'compath': 'compath_resources',
}


def install_bio2bel_module(name: str, connection: str, rebuild: bool) -> Optional[str]:
"""Install Bio2BEL module."""
if name == 'compath': # special case for compath
module_name = 'compath_resources'
else:
module_name = f'bio2bel_{name}'
module_name = _SPECIAL_CASES.get(name, f'bio2bel_{name}')

pykeen_df_path = os.path.join(DATA_DIR, f'{name}.keen.tsv')
pykeen_df_summary_path = os.path.join(DATA_DIR, f'{name}.keen.summary.json')
json_path = os.path.join(DATA_DIR, f'{name}.bel.json')

if os.path.exists(pykeen_df_path) and not rebuild:
Expand All @@ -60,7 +63,9 @@ def install_bio2bel_module(name: str, connection: str, rebuild: bool) -> Optiona
if os.path.exists(json_path) and not rebuild:
click.secho(f'{EMOJI} loaded {module_name} JSON: {json_path}', bold=True)
graph = from_json_path(json_path)
to_pykeen_file(graph, pykeen_df_path)
df = to_pykeen_df(graph)
to_pykeen_path(df, pykeen_df_path)
to_pykeen_summary_path(df, pykeen_df_summary_path)
return pykeen_df_path

bio2bel_module = _import_bio2bel_module(module_name)
Expand Down Expand Up @@ -89,7 +94,9 @@ def install_bio2bel_module(name: str, connection: str, rebuild: bool) -> Optiona
to_json_path(graph, json_path, indent=2)

click.secho(f'{EMOJI} generating PyKEEN TSV for {module_name}', bold=True)
success = to_pykeen_file(graph, pykeen_df_path)
df = to_pykeen_df(graph)
to_pykeen_summary_path(df, pykeen_df_summary_path)
success = to_pykeen_path(df, pykeen_df_path)

if success:
click.secho(f'{EMOJI} wrote PyKEEN TSV to {pykeen_df_path}', bold=True)
Expand Down
2 changes: 1 addition & 1 deletion src/biokeen/convert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

"""Conversion from BEL to proper triples."""

from .io import get_triple, to_pykeen_df, to_pykeen_file # noqa: F401
from .io import get_pykeen_summary, get_triple, to_pykeen_df, to_pykeen_path, to_pykeen_summary_path # noqa: F401
38 changes: 29 additions & 9 deletions src/biokeen/convert/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

"""Input and output for BEL conversion."""

import itertools as itt
import json
import logging
from pathlib import Path
from typing import Optional, TextIO, Tuple, Union
from collections import Counter
from typing import Dict, Optional, Tuple

import pandas as pd
from tqdm import tqdm
Expand All @@ -15,29 +17,47 @@
AssociationConverter, CorrelationConverter, DecreasesAmountConverter, DrugIndicationConverter,
DrugSideEffectConverter, EquivalenceConverter, IncreasesAmountConverter, IsAConverter,
MiRNADecreasesExpressionConverter, MiRNADirectlyDecreasesExpressionConverter, NamedComplexHasComponentConverter,
PartOfNamedComplexConverter, RegulatesActivityConverter, RegulatesAmountConverter, PartOfBiologicalProcess
PartOfBiologicalProcess, PartOfNamedComplexConverter, RegulatesActivityConverter, RegulatesAmountConverter,
)

__all__ = [
'to_pykeen_file',
'to_pykeen_path',
'to_pykeen_df',
'get_pykeen_summary',
'to_pykeen_summary_path',
'get_triple',
]

logger = logging.getLogger(__name__)


def to_pykeen_file(graph: BELGraph, file: Union[str, Path, TextIO]) -> bool:
def to_pykeen_path(df: pd.DataFrame, path: str) -> bool:
"""Write the relationships in the BEL graph to a KEEN TSV file."""
df = to_pykeen_df(graph)

if len(df.index) == 0:
return False

df.to_csv(file, sep='\t', index=None, header=None)
df.to_csv(path, sep='\t', index=None, header=None)
return True


def get_pykeen_summary(df: pd.DataFrame) -> Dict:
"""Summarize a KEEN dataframe."""
entity_count = Counter(itt.chain(df[df.columns[0]], df[df.columns[2]]))
return {
'namespaces': Counter(
element.split(':')[0]
for element in itt.chain(df[df.columns[0]], df[df.columns[2]])
),
'entities': len(entity_count),
'relations': len(df.index),
}


def to_pykeen_summary_path(df: pd.DataFrame, path: str, indent=2, **kwargs):
"""Write the summary of a KEEN dataframe to a file."""
with open(path, 'w') as file:
json.dump(get_pykeen_summary(df), file, indent=indent, **kwargs)


def to_pykeen_df(graph: BELGraph) -> pd.DataFrame:
"""Get a pandas DataFrame representing the triples."""
triples = (
Expand Down

0 comments on commit 70135ee

Please sign in to comment.