Skip to content

Commit

Permalink
Remap Chembl target to NCBITaxon for SARS-CoV-2
Browse files Browse the repository at this point in the history
  • Loading branch information
deepakunni3 committed Oct 8, 2020
1 parent aedf1ca commit 50055f7
Showing 1 changed file with 15 additions and 6 deletions.
21 changes: 15 additions & 6 deletions kg_covid_19/transform_utils/chembl/chembl_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
from kg_covid_19.utils import write_node_edge_item


TAXON_MAP = {
'Severe acute respiratory syndrome coronavirus 2': 'NCBITaxon:2697049',
'SARS-CoV-2': 'NCBITaxon:2697049',
}


class ChemblTransform(Transform):
"""
Parse ChEMBL and transform them into a property graph representation.
Expand All @@ -36,7 +42,7 @@ def run(self, data_file: Optional[str] = None) -> None:
None.
"""
self.node_header = ['id', 'category', 'provided_by']
self.node_header = ['id', 'name', 'category', 'provided_by']
self.edge_header = ['id', 'subject', 'edge_label', 'object', 'relation', 'provided_by', 'type']

# ChEMBL molecules
Expand All @@ -55,8 +61,8 @@ def run(self, data_file: Optional[str] = None) -> None:
data = self.get_chembl_activities()
activity_edges = self.parse_chembl_activity(data)

self.node_header.extend(self._node_header)
self.edge_header.extend(self._edge_header)
self.node_header.extend([x for x in self._node_header if x not in self.node_header])
self.edge_header.extend([x for x in self._edge_header if x not in self.edge_header])

node_handle = open(self.output_node_file, 'w')
edge_handle = open(self.output_edge_file, 'w')
Expand Down Expand Up @@ -116,9 +122,8 @@ def parse_chembl_activity(self, data: List):
edge_label = 'biolink:interacts_with'
relation = 'RO:0002436'
allowed_properties = {
'assay_chembl_id', 'document_chembl_id', 'target_chembl_id', 'molecule_chembl_id',
'standard_units', 'standard_type', 'standard_relation', 'standard_value',
'uo_units'
'assay_organism', 'assay_chembl_id', 'document_chembl_id', 'target_chembl_id', 'target_organism', 'target_pref_name',
'molecule_chembl_id', 'standard_units', 'standard_type', 'standard_relation', 'standard_value', 'uo_units'
}
remap = {
'molecule_chembl_id': 'subject',
Expand All @@ -137,6 +142,10 @@ def parse_chembl_activity(self, data: List):
edge_properties['relation'] = relation
edge_properties['subject'] = f"CHEMBL.COMPOUND:{edge_properties['subject']}"
edge_properties['object'] = f"CHEMBL.TARGET:{edge_properties['object']}"
if 'target_organism' in edge_properties:
# remap CHEMBL.TARGET that are just references to SARS-CoV-2
if edge_properties['target_organism'] in TAXON_MAP:
edge_properties['object'] = TAXON_MAP[edge_properties['target_organism']]
edge_properties['assay'] = f"CHEMBL.ASSAY:{edge_properties['assay']}"
if edge_properties['uo_units']:
edge_properties['uo_units'] = edge_properties['uo_units'].replace('_', ':')
Expand Down

0 comments on commit 50055f7

Please sign in to comment.