Skip to content

Commit

Permalink
Add pubmed info to Drug Central ingest (if present)
Browse files Browse the repository at this point in the history
  • Loading branch information
justaddcoffee committed Jun 2, 2020
1 parent b33e483 commit 9d71c95
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 2 deletions.
19 changes: 18 additions & 1 deletion kg_covid_19/transform_utils/drug_central/drug_central.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def run(self, data_file: Optional[str] = None, species: str = "Homo sapiens") ->
drug_gene_edge_label = "biolink:interacts_with"
drug_gene_edge_relation = "RO:0002436" # molecularly interacts with
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'provided_by', 'comment']
'provided_by', 'publication', 'comment']

# unzip tcrd.zip and get tchem and tclin filenames
tempdir = tempfile.mkdtemp()
Expand Down Expand Up @@ -118,11 +118,28 @@ def run(self, data_file: Optional[str] = None, species: str = "Homo sapiens") ->
gene_id,
drug_gene_edge_relation,
self.source_name,
get_pub_info_from_dict(items_dict),
items_dict['ACT_COMMENT']])

return None


def get_pub_info_from_dict(items_dict,
pubmed_prefix="PMID",
uri_match='http://www.ncbi.nlm.nih.gov/pubmed/'
) -> str:
pubs = []
if 'ACT_SOURCE_URL' in items_dict and re.match(uri_match,
items_dict['ACT_SOURCE_URL']):
pubs.append(
items_dict['ACT_SOURCE_URL'].replace(uri_match, pubmed_prefix + ":"))
if 'MOA_SOURCE_URL' in items_dict and re.match(uri_match,
items_dict['MOA_SOURCE_URL']):
pubs.append(
items_dict['MOA_SOURCE_URL'].replace(uri_match, pubmed_prefix + ":"))
return "|".join(pubs)


def tsv_to_dict(input_file: str, col_for_key: str) -> dict:
this_dict: dict = defaultdict(list)
with open(input_file) as file:
Expand Down
28 changes: 27 additions & 1 deletion tests/test_drug_central.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import unittest

from kg_covid_19.transform_utils.drug_central.drug_central import \
parse_drug_central_line, unzip_and_get_tclin_tchem, tsv_to_dict
parse_drug_central_line, unzip_and_get_tclin_tchem, tsv_to_dict, \
get_pub_info_from_dict
from kg_covid_19.utils.transform_utils import parse_header
from parameterized import parameterized

Expand Down Expand Up @@ -63,3 +64,28 @@ def test_unzip_and_get_tclin_tchem(self) -> None:
self.assertEqual(tclin, os.path.join(tempdir, 'tclin_05122020.tsv'))
self.assertEqual(tchem, os.path.join(tempdir, 'tchem_drugs_05122020.tsv'))

@parameterized.expand([
('', ''),
({'ACT_SOURCE_URL': '',
'MOA_SOURCE_URL': 'https://www.ebi.ac.uk/chembl/compound/inspect/CHEMBL1200749'},
''
),
({'ACT_SOURCE_URL': 'https://www.ebi.ac.uk/chembl/compound/inspect/CHEMBL1200749',
'MOA_SOURCE_URL': ''},
''
),
({'ACT_SOURCE_URL': 'https://www.ebi.ac.uk/chembl/compound/inspect/CHEMBL1200749',
'MOA_SOURCE_URL': 'https://www.ebi.ac.uk/chembl/compound/inspect/CHEMBL1200749'},
''
),
({'ACT_SOURCE_URL': 'http://www.ncbi.nlm.nih.gov/pubmed/17275317',
'MOA_SOURCE_URL': 'https://www.ebi.ac.uk/chembl/compound/inspect/CHEMBL1200749'},
'PMID:17275317'
),
({'ACT_SOURCE_URL': 'http://www.ncbi.nlm.nih.gov/pubmed/17275317',
'MOA_SOURCE_URL': 'http://www.ncbi.nlm.nih.gov/pubmed/3207986'},
'PMID:17275317|PMID:3207986'
),
])
def test_get_pub_info_from_dict(self, this_dict, expected_pub_info) -> None:
self.assertEqual(expected_pub_info, get_pub_info_from_dict(this_dict))

0 comments on commit 9d71c95

Please sign in to comment.