Skip to content

Commit

Permalink
Improve code
Browse files Browse the repository at this point in the history
  • Loading branch information
ddomingof committed Oct 4, 2018
1 parent b77773a commit 244dd9b
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 17 deletions.
1 change: 1 addition & 0 deletions src/pathme/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def ensure_pathme_folders():
CHEBI = 'ChEBI'
CHEBI_NAME = 'ChEBI name'
PUBCHEM = 'PubChem'
WIKIPEDIA = 'WIKIPEDIA'

KEGG_MODIFICATIONS = {
'phosphorylation': 'Ph',
Expand Down
22 changes: 12 additions & 10 deletions src/pathme/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

"""Common utils."""

import collections
import logging
import os
import pickle
from collections import defaultdict
from typing import Dict, List, Optional
from typing import AnyStr, Dict, Iterable, List, Optional
from urllib.request import urlretrieve

import click
Expand All @@ -15,9 +15,8 @@
import rdflib
from pybel import union
from pybel_tools import summary
from pathme.constants import UNKNOWN

from .constants import DATA_DIR
from pathme.constants import UNKNOWN

log = logging.getLogger(__name__)

Expand All @@ -34,8 +33,9 @@ def get_files_in_folder(path: str) -> List[str]:
if os.path.isfile(os.path.join(path, file))
]

def check_multiple(element, element_name) :
if isinstance(element, set) or isinstance(element, list):

def check_multiple(element: Iterable, element_name: AnyStr):
if isinstance(element, Iterable):
log.warning('Multiple {}: {}'.format(element_name, element))
# TODO: print the wikipathways bps that return a set because they are probably wrong.
if len(element) != 0:
Expand Down Expand Up @@ -143,8 +143,9 @@ def entry_result_to_dict(entry, **kwargs):

return attributes_dict


def entries_dict_ids_argument(entries_dict):
entries_dict_ids = defaultdict(dict)
entries_dict_ids = collections.defaultdict(dict)
for entry_id, entry_att in entries_dict.items():
entry_identifiers = {}

Expand All @@ -157,6 +158,7 @@ def entries_dict_ids_argument(entries_dict):

return entries_dict


def query_result_to_dict(entries, **kwargs) -> Dict[str, Dict[str, Dict[str, str]]]:
"""Export to a dictionary a SPARQL query result data structure.
Expand Down Expand Up @@ -225,7 +227,7 @@ def get_entry_statitics(types_list, primary_type=None, **kwargs):
:param str rdf_graph: primary entries type identifier (ex: DataNode or Interaction)
:param str primary_type: primary entries type identifier (ex: DataNode or Interaction)
"""
type_statistics = defaultdict(int)
type_statistics = collections.defaultdict(int)

for entry_types in types_list:
if isinstance(entry_types, set):
Expand Down Expand Up @@ -287,11 +289,11 @@ def statistics_to_df(all_pathways_statistics):
:param dict all_pathways_statistics: pathway statistics
:rtype: pandas.DataFrame
"""
pathways_statistics = defaultdict(list)
pathways_statistics = collections.defaultdict(list)
rows = []

column_types = set()
column_primary_types_dict = defaultdict(set)
column_primary_types_dict = collections.defaultdict(set)

# Get pathway type statistics
for pathway_name, statistics_primary_type_dict in all_pathways_statistics.items():
Expand Down
15 changes: 8 additions & 7 deletions src/pathme/wikipathways/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import networkx as nx
from bio2bel_wikipathways import Manager as WikiPathwaysManager

from ..constants import DATA_DIR, HGNC, ENSEMBL, ENTREZ, EXPASY, UNIPROT, WIKIPATHWAYS
from ..constants import DATA_DIR, ENSEMBL, ENTREZ, EXPASY, HGNC, KEGG, UNIPROT, WIKIPATHWAYS, WIKIPEDIA
from ..utils import get_files_in_folder, check_multiple

WIKIPATHWAYS_DIR = os.path.join(DATA_DIR, WIKIPATHWAYS)
Expand Down Expand Up @@ -130,19 +130,20 @@ def get_valid_gene_identifier(node_ids_dict, hgnc_manager):
log.warning('Adding WikiPathways node %s (%s)', name, WIKIPATHWAYS)
return WIKIPATHWAYS, name, name

elif 'wikipedia' in node_ids_dict['uri_id']:
elif WIKIPEDIA.lower() in node_ids_dict['uri_id']:
wiki_name = check_multiple(node_ids_dict['identifier'], 'wikipedia_id')
wiki_id = check_multiple(node_ids_dict['name'], 'wikipedia_name')

log.warning('Adding Wikipedia node %s (%s)', wiki_name, WIKIPATHWAYS)

return 'WIKIPEDIA', wiki_name, wiki_id
return WIKIPEDIA, wiki_name, wiki_id

elif 'kegg' in node_ids_dict['identifier']:
id = check_multiple(node_ids_dict['identifier'], 'wikipedia_id')
log.warning('Adding KEGG node %s ', id)
elif KEGG.lower() in node_ids_dict['identifier']:
#TODO: why wikipedia_id? i thought it was a kegg id here
kegg_id = check_multiple(node_ids_dict['identifier'], 'wikipedia_id')
log.warning('Adding KEGG node %s ', kegg_id)

return 'KEGG', id, id
return KEGG, kegg_id, kegg_id

raise Exception('Unknown identifier for node %s', node_ids_dict)

Expand Down

0 comments on commit 244dd9b

Please sign in to comment.