Skip to content

Commit

Permalink
Merge 0d7f9b4 into b8b829b
Browse files Browse the repository at this point in the history
  • Loading branch information
justaddcoffee committed May 1, 2020
2 parents b8b829b + 0d7f9b4 commit 15a93df
Show file tree
Hide file tree
Showing 10 changed files with 30 additions and 14 deletions.
7 changes: 5 additions & 2 deletions kg_covid_19/transform_utils/drug_central/drug_central.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def run(self, data_file: Optional[str] = None, species: str = "Homo sapiens") ->
gene_node_type = "biolink:Gene"
drug_gene_edge_label = "biolink:interacts_with"
drug_gene_edge_relation = "RO:0002436" # molecularly interacts with
self.edge_header = ['subject', 'edge_label', 'object', 'relation', 'comment']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'provided_by', 'comment']

with open(self.output_node_file, 'w') as node, \
open(self.output_edge_file, 'w') as edge, \
Expand Down Expand Up @@ -89,13 +90,15 @@ def run(self, data_file: Optional[str] = None, species: str = "Homo sapiens") ->
gene_node_type])

# WRITE EDGES
# ['subject', 'edge_label', 'object', 'relation', 'comment']
# ['subject', 'edge_label', 'object', 'relation', 'provided_by',
# 'comment']
write_node_edge_item(fh=edge,
header=self.edge_header,
data=[drug_id,
drug_gene_edge_label,
gene_id,
drug_gene_edge_relation,
self.source_name,
items_dict['ACT_COMMENT']])

return None
Expand Down
7 changes: 4 additions & 3 deletions kg_covid_19/transform_utils/intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __init__(self, input_dir: str = None, output_dir: str = None) -> None:
self.pubmed_curie_prefix = 'PMID:'
self.ppi_edge_label = 'biolink:interacts_with'
self.ppi_ro_relation = 'RO:0002437'
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
self.edge_header = ['subject', 'edge_label', 'object', 'relation', 'provided_by',
'publication', 'num_participants', 'association_type',
'detection_method', 'subj_exp_role', 'obj_exp_role']

Expand Down Expand Up @@ -181,8 +181,9 @@ def interaction_to_edge(self, interaction: object, nodes_dict: dict,
if None not in [node1, node2]:
edges.append(
[node1, self.ppi_edge_label, node2, self.ppi_ro_relation,
publication, str(len(participants)), interaction_type_str,
detection_method, p1_exp_role, p2_exp_role])
self.source_name, publication, str(len(participants)),
interaction_type_str, detection_method, p1_exp_role,
p2_exp_role])

return edges

Expand Down
4 changes: 3 additions & 1 deletion kg_covid_19/transform_utils/pharmgkb/pharmgkb.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class PharmGKB(Transform):
def __init__(self, input_dir: str = None, output_dir: str = None):
source_name = "pharmgkb"
super().__init__(source_name, input_dir, output_dir)
self.edge_header = ['subject', 'edge_label', 'object', 'relation', 'evidence']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'provided_by', 'evidence']
self.node_header = ['id', 'name', 'category']
self.edge_of_interest = ['Gene',
'Chemical'] # logic also matches 'Chemical'-'Gene'
Expand Down Expand Up @@ -140,6 +141,7 @@ def make_pharmgkb_edge(self,
self.drug_gene_edge_label,
gene_id,
self.drug_gene_edge_relation,
self.source_name,
evidence])

def make_pharmgkb_gene_node(self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ def __init__(self, input_dir: Optional[str] = None, output_dir: str = None):

self.node_header = ['id', 'name', 'category', 'synonym', 'taxon']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'DB_References', 'ECO_code', 'With', 'Interacting_taxon_ID',
'provided_by', 'DB_References', 'ECO_code', 'With',
'Interacting_taxon_ID',
'Date', 'Assigned_by', 'Annotation_Extension',
'Annotation_Properties']

Expand Down Expand Up @@ -77,7 +78,8 @@ def gpa_to_edge_data(self, rec: dict) -> list:
except KeyError:
relation = ''

edge_data = [subj, self.edge_label_prefix + edge_label, obj, relation]
edge_data = [subj, self.edge_label_prefix + edge_label, obj, relation,
self.source_name]
# all the others
for key in ['DB:Reference', 'ECO_Evidence_code', 'With', 'Interacting_taxon_ID',
'Date', 'Assigned_by', 'Annotation_Extension',
Expand Down
2 changes: 1 addition & 1 deletion kg_covid_19/transform_utils/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(self, source_name, input_dir: str = None, output_dir: str = None):
self.source_name = source_name
self.node_header = ['id', 'name', 'category']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'publications']
'provided_by']

# default dirs
self.input_base_dir = input_dir if input_dir else self.DEFAULT_INPUT_DIR
Expand Down
4 changes: 3 additions & 1 deletion kg_covid_19/transform_utils/ttd/ttd.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ def run(self, data_file: Optional[str] = None):
drug_gene_edge_relation = "RO:0002436" # molecularly interacts with
uniprot_curie_prefix = "UniProtKB:"

self.edge_header = ['subject', 'edge_label', 'object', 'relation', 'target_type']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'provided_by', 'target_type']

# make name to id map for uniprot names of human proteins
dat_gz_id_file = os.path.join(self.input_base_dir,
Expand Down Expand Up @@ -108,6 +109,7 @@ def run(self, data_file: Optional[str] = None):
drug_gene_edge_label,
this_id,
drug_gene_edge_relation,
self.source_name,
targ_type])

def get_uniproids(self, data: dict, name_2_id_map: dict,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ class ZhouTransform(Transform):
def __init__(self, input_dir: str = None, output_dir: str = None) -> None:
source_name = "zhou_host_proteins"
super().__init__(source_name, input_dir, output_dir)
self.node_header = ['id', 'name', 'category']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'provided_by', 'publication']

def run(self, data_file: Optional[str] = None):
"""Method is called and performs needed transformations to process the zhou host protein data, additional
Expand Down Expand Up @@ -110,6 +113,7 @@ def run(self, data_file: Optional[str] = None):
host_gene_vgene_edge_label,
corona_curie,
host_gene_vgene_relation,
self.source_name,
pubmed_curie_prefix + row['PubMed ID']
])

Expand Down
5 changes: 3 additions & 2 deletions tests/test_intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_intact_instance(self):
self.assertEqual(self.intact.node_header,
['id', 'name', 'category'])
self.assertEqual(self.intact.edge_header,
['subject', 'edge_label', 'object', 'relation',
['subject', 'edge_label', 'object', 'relation', 'provided_by',
'publication', 'num_participants', 'association_type',
'detection_method', 'subj_exp_role', 'obj_exp_role'])

Expand All @@ -34,7 +34,7 @@ def test_struct_parse_xml_to_nodes_edges(self):
{'nodes': [['UniProtKB:P20290', 'btf3_human', 'biolink:Protein'],
['UniProtKB:P0C6X7-PRO_0000037317', 'nsp10_cvhsa', 'biolink:RNA']],
'edges': [['UniProtKB:P20290', 'biolink:interacts_with',
'UniProtKB:P0C6X7-PRO_0000037317', 'RO:0002437',
'UniProtKB:P0C6X7-PRO_0000037317', 'RO:0002437', 'intact',
'PMID:16157265', '2', 'physical association', '2 hybrid', 'prey',
'bait']]
}),
Expand All @@ -45,6 +45,7 @@ def test_struct_parse_xml_to_nodes_edges(self):
'biolink:interacts_with',
'UniProtKB:P41811',
'RO:0002437',
'intact',
'PMID:23481256',
'3',
'physical association',
Expand Down
3 changes: 2 additions & 1 deletion tests/test_sars_cov_2_gene_annot.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def test_gpa_to_edge_data(self):

self.assertEqual(len(self.sc2ga.edge_header), len(edge1))
self.assertEqual(edge1,
['UniProtKB:P0DTC1', 'biolink:enables', 'GO:0003723', 'RO:0002327',
['UniProtKB:P0DTC1', 'biolink:enables', 'GO:0003723',
'RO:0002327', 'sars_cov_2_gene_annot',
'GO_REF:0000043', 'ECO:0000322', 'UniProtKB-KW:KW-0694', '',
'20200321', 'UniProt', '', 'go_evidence=IEA'])

Expand Down
2 changes: 1 addition & 1 deletion tests/test_transform_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_reality(self):
('source_name', 'test_transform'),
('node_header', ['id', 'name', 'category']),
('edge_header',
['subject', 'edge_label', 'object', 'relation', 'publications']),
['subject', 'edge_label', 'object', 'relation', 'provided_by']),
('output_base_dir', os.path.join("data", "transformed")),
('input_base_dir', os.path.join("data", "raw")),
('output_dir', os.path.join("data", "transformed", "test_transform")),
Expand Down

0 comments on commit 15a93df

Please sign in to comment.