Skip to content

Commit

Permalink
Code cleanup for flake8 and mypy
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Jul 15, 2019
1 parent eb64371 commit cbd8a6e
Show file tree
Hide file tree
Showing 7 changed files with 493 additions and 391 deletions.
107 changes: 49 additions & 58 deletions src/pathme/reactome/rdf_sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@
import logging
import os
from collections import defaultdict
from typing import Set, Dict, Union, Tuple, List, Any
from typing import Any, Dict, List, Set, Tuple, Union

import rdflib
import tqdm
from pybel import to_pickle
from rdflib import URIRef
from rdflib.namespace import Namespace, RDFS, RDF, DCTERMS, DC, OWL, XSD, SKOS
from rdflib.namespace import DC, DCTERMS, Namespace, OWL, RDF, RDFS, SKOS, XSD

from pathme.constants import REACTOME_BEL
from pathme.reactome.convert_to_bel import convert_to_bel
from pathme.utils import query_result_to_dict, parse_rdf, get_pathway_statitics
from pathme.utils import get_pathway_statitics, parse_rdf, query_result_to_dict

log = logging.getLogger(__name__)

Expand All @@ -36,82 +36,73 @@
'biopax3': Namespace('http://www.biopax.org/release/biopax-level3.owl#'),
}

#: SPARQL query string to get all the primary types of entries (Pathway, BiochemicalReaction) in a pathway network.
#: SPARQL query string to get all the primary types of entries (Pathway, BiochemicalReaction) in a pathway network.
GET_ALL_TYPES = """
SELECT DISTINCT (STRAFTER(STR(?rdf_type), str(biopax3:)) AS ?entry_type)
WHERE
{
?uri_id rdf:type ?rdf_type .
WHERE {
?uri_id rdf:type ?rdf_type .
}
"""

#: SPARQL query string to get pathway URIs and names in the RDF file.
GET_ALL_PATHWAYS = """
SELECT DISTINCT ?uri_id ?name
WHERE
{
?uri_id rdf:type biopax3:Pathway .
?uri_id biopax3:displayName ?name .
}
WHERE {
?uri_id rdf:type biopax3:Pathway .
?uri_id biopax3:displayName ?name .
}
"""

#: SPARQL query string to get all components of a pathway (predicate biopax3:pathwayComponent).
GET_ALL_PATHWAY_COMPONENTS = """
SELECT DISTINCT ?uri_id ?name ?comment (STRAFTER(STR(?uri_type), str(biopax3:)) AS ?component_type)
WHERE
{
?pathway biopax3:pathwayComponent ?uri_id .
?uri_id rdf:type ?uri_type .
optional {?uri_id biopax3:displayName ?name .}
optional {?uri_id biopax3:comment ?comment .}
}
WHERE {
?pathway biopax3:pathwayComponent ?uri_id .
?uri_id rdf:type ?uri_type .
optional {?uri_id biopax3:displayName ?name .}
optional {?uri_id biopax3:comment ?comment .}
}
"""

#: SPARQL query string to get all participants in an interaction and its controlType (ACTIVATION or INHIBITION).
GET_INTERACTION_PARTICIPANTS_AND_TYPE = """
SELECT DISTINCT (STRAFTER(STR(?component), '#') AS ?identifier) ?reactant ?product (STR(?control_type) AS ?interaction_type)
WHERE
{
?component biopax3:left ?reactant .
?component biopax3:right ?product .
optional {?control biopax3:controlled ?component .}
optional {?control biopax3:controlType ?control_type }
}
SELECT DISTINCT
(STRAFTER(STR(?component), '#') AS ?identifier)
?reactant
?product
(STR(?control_type) AS ?interaction_type)
WHERE {
?component biopax3:left ?reactant .
?component biopax3:right ?product .
optional {?control biopax3:controlled ?component .}
optional {?control biopax3:controlType ?control_type }
}
"""

#: SPARQL query to get all the possible metadate (optional statements) of an entity (Protein, Dna, Pathway...).
GET_ENTITY_METADATA = """
SELECT DISTINCT
(STRAFTER (STR(?uri_type), str(biopax3:)) AS ?entity_type)
(STRAFTER(STR(?entity), '#') AS ?identifier)
(STR(?entity) AS ?uri_id)
(STRAFTER(STR(?entity), '#') AS ?reactome_id)
(STR(?entity) AS ?uri_reactome_id)
(STR(?entity_reference) AS ?uri_id)
?name
?cell_locat
?display_name
?complex_components
?comment
WHERE
{
?entity rdf:type ?uri_type .
optional {?entity biopax3:comment ?comment .}
optional {?entity biopax3:entityReference ?entity_reference .}
optional {?entity biopax3:name ?name .}
optional {?entity biopax3:displayName ?display_name .}
optional {?entity biopax3:cellularLocation ?cell_locat .}
optional {?entity biopax3:organism ?organism .}
optional {?entity biopax3:component ?complex_components .}
}
SELECT DISTINCT
(STRAFTER (STR(?uri_type), str(biopax3:)) AS ?entity_type)
(STRAFTER(STR(?entity), '#') AS ?identifier)
(STR(?entity) AS ?uri_id)
(STRAFTER(STR(?entity), '#') AS ?reactome_id)
(STR(?entity) AS ?uri_reactome_id)
(STR(?entity_reference) AS ?uri_id)
?name
?cell_locat
?display_name
?complex_components
?comment
WHERE {
?entity rdf:type ?uri_type .
optional {?entity biopax3:comment ?comment .}
optional {?entity biopax3:entityReference ?entity_reference .}
optional {?entity biopax3:name ?name .}
optional {?entity biopax3:displayName ?display_name .}
optional {?entity biopax3:cellularLocation ?cell_locat .}
optional {?entity biopax3:organism ?organism .}
optional {?entity biopax3:component ?complex_components .}
}
"""

"""Queries managers"""
Expand Down
39 changes: 20 additions & 19 deletions src/pathme/reactome/utils.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,37 @@
# -*- coding: utf-8 -*-

"""This module has utilities method for parsing, handling wikipathways RDF and data."""
"""This module has utilities method for parsing, handling WikiPathways RDF and data."""

import logging
import tarfile
from typing import List
from typing import List, Tuple

from bio2bel_chebi import Manager as ChebiManager
from bio2bel_hgnc import Manager as HgncManager
from bio2bel_hgnc.models import HumanGene
from pybel.dsl import protein

from pathme.utils import parse_id_uri
from ..constants import HGNC, UNKNOWN, UNIPROT, ENSEMBL
from ..constants import ENSEMBL, HGNC, UNIPROT, UNKNOWN

log = logging.getLogger(__name__)

"""Download utilities"""


def get_hgnc_node_info(gene):
def get_hgnc_node_info(gene: HumanGene) -> Tuple[str, str, str]:
"""Return HGNC identifier, symbol and namespace from HGNC entry.
:param bio2bel_hgnc.manager.models.HGNC gene:
:rtype: tuple[str,str,str]
"""
return gene.identifier, gene.symbol, HGNC
return str(gene.identifier), gene.symbol, HGNC


def get_valid_node_parameters(node, hgnc_manager, chebi_manager):
def get_valid_node_parameters(
node,
hgnc_manager: HgncManager,
chebi_manager: ChebiManager,
) -> Tuple[str, str, str]:
namespace = None

if 'uri_id' in node:
Expand Down Expand Up @@ -106,22 +112,17 @@ def process_multiple_proteins(hgnc_entries: List) -> List:
:param hgnc_entries: Results from query
:return: List of Protein BEL nodes
"""
protein_group = list()
return [
protein(namespace='HGNC', name=hgnc_entry.symbol, identifier=hgnc_entry.id)
for hgnc_entry in hgnc_entries
]

for hgnc_entry in hgnc_entries:
protein_group.append(
protein(namespace='HGNC', name=hgnc_entry.symbol, identifier=hgnc_entry.id)
)

return protein_group


def untar_file(file_path, export_folder):
def untar_file(file_path: str, export_folder: str) -> None:
"""Unzip file into a destination folder.
:param str file_path: name of the file
:param str export_folder: name of the file
:param file_path: name of the file
:param export_folder: name of the file
"""
tar_ref = tarfile.open(file_path, 'r:bz2')
tar_ref.extractall(export_folder)
Expand Down
2 changes: 1 addition & 1 deletion src/pathme/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __call__(self, *args, **kwargs):
return self.method(*args, **kwargs)


def parse_id_uri(uri):
def parse_id_uri(uri: str) -> Tuple[str, str, str, str]:
"""Get the components of a given uri (with identifier at the last position).
:param uri: URI
Expand Down

0 comments on commit cbd8a6e

Please sign in to comment.