Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add provided by to edges #124

Merged
merged 8 commits into from
May 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions kg_covid_19/transform_utils/drug_central/drug_central.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def run(self, data_file: Optional[str] = None, species: str = "Homo sapiens") ->
gene_node_type = "biolink:Gene"
drug_gene_edge_label = "biolink:interacts_with"
drug_gene_edge_relation = "RO:0002436" # molecularly interacts with
self.edge_header = ['subject', 'edge_label', 'object', 'relation', 'comment']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'provided_by', 'comment']

with open(self.output_node_file, 'w') as node, \
open(self.output_edge_file, 'w') as edge, \
Expand Down Expand Up @@ -89,13 +90,15 @@ def run(self, data_file: Optional[str] = None, species: str = "Homo sapiens") ->
gene_node_type])

# WRITE EDGES
# ['subject', 'edge_label', 'object', 'relation', 'comment']
# ['subject', 'edge_label', 'object', 'relation', 'provided_by',
# 'comment']
write_node_edge_item(fh=edge,
header=self.edge_header,
data=[drug_id,
drug_gene_edge_label,
gene_id,
drug_gene_edge_relation,
self.source_name,
items_dict['ACT_COMMENT']])

return None
Expand Down
7 changes: 4 additions & 3 deletions kg_covid_19/transform_utils/intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __init__(self, input_dir: str = None, output_dir: str = None) -> None:
self.pubmed_curie_prefix = 'PMID:'
self.ppi_edge_label = 'biolink:interacts_with'
self.ppi_ro_relation = 'RO:0002437'
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
self.edge_header = ['subject', 'edge_label', 'object', 'relation', 'provided_by',
'publication', 'num_participants', 'association_type',
'detection_method', 'subj_exp_role', 'obj_exp_role']

Expand Down Expand Up @@ -181,8 +181,9 @@ def interaction_to_edge(self, interaction: object, nodes_dict: dict,
if None not in [node1, node2]:
edges.append(
[node1, self.ppi_edge_label, node2, self.ppi_ro_relation,
publication, str(len(participants)), interaction_type_str,
detection_method, p1_exp_role, p2_exp_role])
self.source_name, publication, str(len(participants)),
interaction_type_str, detection_method, p1_exp_role,
p2_exp_role])

return edges

Expand Down
4 changes: 3 additions & 1 deletion kg_covid_19/transform_utils/pharmgkb/pharmgkb.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class PharmGKB(Transform):
def __init__(self, input_dir: str = None, output_dir: str = None):
source_name = "pharmgkb"
super().__init__(source_name, input_dir, output_dir)
self.edge_header = ['subject', 'edge_label', 'object', 'relation', 'evidence']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'provided_by', 'evidence']
self.node_header = ['id', 'name', 'category']
self.edge_of_interest = ['Gene',
'Chemical'] # logic also matches 'Chemical'-'Gene'
Expand Down Expand Up @@ -140,6 +141,7 @@ def make_pharmgkb_edge(self,
self.drug_gene_edge_label,
gene_id,
self.drug_gene_edge_relation,
self.source_name,
evidence])

def make_pharmgkb_gene_node(self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ def __init__(self, input_dir: Optional[str] = None, output_dir: str = None):

self.node_header = ['id', 'name', 'category', 'synonym', 'taxon']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'DB_References', 'ECO_code', 'With', 'Interacting_taxon_ID',
'provided_by', 'DB_References', 'ECO_code', 'With',
'Interacting_taxon_ID',
'Date', 'Assigned_by', 'Annotation_Extension',
'Annotation_Properties']

Expand Down Expand Up @@ -77,7 +78,8 @@ def gpa_to_edge_data(self, rec: dict) -> list:
except KeyError:
relation = ''

edge_data = [subj, self.edge_label_prefix + edge_label, obj, relation]
edge_data = [subj, self.edge_label_prefix + edge_label, obj, relation,
self.source_name]
# all the others
for key in ['DB:Reference', 'ECO_Evidence_code', 'With', 'Interacting_taxon_ID',
'Date', 'Assigned_by', 'Annotation_Extension',
Expand Down
2 changes: 1 addition & 1 deletion kg_covid_19/transform_utils/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(self, source_name, input_dir: str = None, output_dir: str = None):
self.source_name = source_name
self.node_header = ['id', 'name', 'category']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'publications']
'provided_by']

# default dirs
self.input_base_dir = input_dir if input_dir else self.DEFAULT_INPUT_DIR
Expand Down
4 changes: 3 additions & 1 deletion kg_covid_19/transform_utils/ttd/ttd.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ def run(self, data_file: Optional[str] = None):
drug_gene_edge_relation = "RO:0002436" # molecularly interacts with
uniprot_curie_prefix = "UniProtKB:"

self.edge_header = ['subject', 'edge_label', 'object', 'relation', 'target_type']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'provided_by', 'target_type']

# make name to id map for uniprot names of human proteins
dat_gz_id_file = os.path.join(self.input_base_dir,
Expand Down Expand Up @@ -108,6 +109,7 @@ def run(self, data_file: Optional[str] = None):
drug_gene_edge_label,
this_id,
drug_gene_edge_relation,
self.source_name,
targ_type])

def get_uniproids(self, data: dict, name_2_id_map: dict,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ class ZhouTransform(Transform):
def __init__(self, input_dir: str = None, output_dir: str = None) -> None:
source_name = "zhou_host_proteins"
super().__init__(source_name, input_dir, output_dir)
self.node_header = ['id', 'name', 'category']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'provided_by', 'publication']

def run(self, data_file: Optional[str] = None):
"""Method is called and performs needed transformations to process the zhou host protein data, additional
Expand Down Expand Up @@ -110,6 +113,7 @@ def run(self, data_file: Optional[str] = None):
host_gene_vgene_edge_label,
corona_curie,
host_gene_vgene_relation,
self.source_name,
pubmed_curie_prefix + row['PubMed ID']
])

Expand Down
5 changes: 3 additions & 2 deletions tests/test_intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_intact_instance(self):
self.assertEqual(self.intact.node_header,
['id', 'name', 'category'])
self.assertEqual(self.intact.edge_header,
['subject', 'edge_label', 'object', 'relation',
['subject', 'edge_label', 'object', 'relation', 'provided_by',
'publication', 'num_participants', 'association_type',
'detection_method', 'subj_exp_role', 'obj_exp_role'])

Expand All @@ -34,7 +34,7 @@ def test_struct_parse_xml_to_nodes_edges(self):
{'nodes': [['UniProtKB:P20290', 'btf3_human', 'biolink:Protein'],
['UniProtKB:P0C6X7-PRO_0000037317', 'nsp10_cvhsa', 'biolink:RNA']],
'edges': [['UniProtKB:P20290', 'biolink:interacts_with',
'UniProtKB:P0C6X7-PRO_0000037317', 'RO:0002437',
'UniProtKB:P0C6X7-PRO_0000037317', 'RO:0002437', 'intact',
'PMID:16157265', '2', 'physical association', '2 hybrid', 'prey',
'bait']]
}),
Expand All @@ -45,6 +45,7 @@ def test_struct_parse_xml_to_nodes_edges(self):
'biolink:interacts_with',
'UniProtKB:P41811',
'RO:0002437',
'intact',
'PMID:23481256',
'3',
'physical association',
Expand Down
3 changes: 2 additions & 1 deletion tests/test_sars_cov_2_gene_annot.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def test_gpa_to_edge_data(self):

self.assertEqual(len(self.sc2ga.edge_header), len(edge1))
self.assertEqual(edge1,
['UniProtKB:P0DTC1', 'biolink:enables', 'GO:0003723', 'RO:0002327',
['UniProtKB:P0DTC1', 'biolink:enables', 'GO:0003723',
'RO:0002327', 'sars_cov_2_gene_annot',
'GO_REF:0000043', 'ECO:0000322', 'UniProtKB-KW:KW-0694', '',
'20200321', 'UniProt', '', 'go_evidence=IEA'])

Expand Down
2 changes: 1 addition & 1 deletion tests/test_transform_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_reality(self):
('source_name', 'test_transform'),
('node_header', ['id', 'name', 'category']),
('edge_header',
['subject', 'edge_label', 'object', 'relation', 'publications']),
['subject', 'edge_label', 'object', 'relation', 'provided_by']),
('output_base_dir', os.path.join("data", "transformed")),
('input_base_dir', os.path.join("data", "raw")),
('output_dir', os.path.join("data", "transformed", "test_transform")),
Expand Down