diff --git a/kg_covid_19/transform_utils/drug_central/drug_central.py b/kg_covid_19/transform_utils/drug_central/drug_central.py index b5853955..6c8f2a01 100644 --- a/kg_covid_19/transform_utils/drug_central/drug_central.py +++ b/kg_covid_19/transform_utils/drug_central/drug_central.py @@ -42,7 +42,8 @@ def run(self, data_file: Optional[str] = None, species: str = "Homo sapiens") -> gene_node_type = "biolink:Gene" drug_gene_edge_label = "biolink:interacts_with" drug_gene_edge_relation = "RO:0002436" # molecularly interacts with - self.edge_header = ['subject', 'edge_label', 'object', 'relation', 'comment'] + self.edge_header = ['subject', 'edge_label', 'object', 'relation', + 'provided_by', 'comment'] with open(self.output_node_file, 'w') as node, \ open(self.output_edge_file, 'w') as edge, \ @@ -89,13 +90,15 @@ def run(self, data_file: Optional[str] = None, species: str = "Homo sapiens") -> gene_node_type]) # WRITE EDGES - # ['subject', 'edge_label', 'object', 'relation', 'comment'] + # ['subject', 'edge_label', 'object', 'relation', 'provided_by', + # 'comment'] write_node_edge_item(fh=edge, header=self.edge_header, data=[drug_id, drug_gene_edge_label, gene_id, drug_gene_edge_relation, + self.source_name, items_dict['ACT_COMMENT']]) return None diff --git a/kg_covid_19/transform_utils/intact/intact.py b/kg_covid_19/transform_utils/intact/intact.py index 7921ee03..bc881879 100644 --- a/kg_covid_19/transform_utils/intact/intact.py +++ b/kg_covid_19/transform_utils/intact/intact.py @@ -51,7 +51,7 @@ def __init__(self, input_dir: str = None, output_dir: str = None) -> None: self.pubmed_curie_prefix = 'PMID:' self.ppi_edge_label = 'biolink:interacts_with' self.ppi_ro_relation = 'RO:0002437' - self.edge_header = ['subject', 'edge_label', 'object', 'relation', + self.edge_header = ['subject', 'edge_label', 'object', 'relation', 'provided_by', 'publication', 'num_participants', 'association_type', 'detection_method', 'subj_exp_role', 'obj_exp_role'] @@ -181,8 +181,9 @@ def interaction_to_edge(self, interaction: object, nodes_dict: dict, if None not in [node1, node2]: edges.append( [node1, self.ppi_edge_label, node2, self.ppi_ro_relation, - publication, str(len(participants)), interaction_type_str, - detection_method, p1_exp_role, p2_exp_role]) + self.source_name, publication, str(len(participants)), + interaction_type_str, detection_method, p1_exp_role, + p2_exp_role]) return edges diff --git a/kg_covid_19/transform_utils/pharmgkb/pharmgkb.py b/kg_covid_19/transform_utils/pharmgkb/pharmgkb.py index 68293cb0..69e7c38f 100644 --- a/kg_covid_19/transform_utils/pharmgkb/pharmgkb.py +++ b/kg_covid_19/transform_utils/pharmgkb/pharmgkb.py @@ -24,7 +24,8 @@ class PharmGKB(Transform): def __init__(self, input_dir: str = None, output_dir: str = None): source_name = "pharmgkb" super().__init__(source_name, input_dir, output_dir) - self.edge_header = ['subject', 'edge_label', 'object', 'relation', 'evidence'] + self.edge_header = ['subject', 'edge_label', 'object', 'relation', + 'provided_by', 'evidence'] self.node_header = ['id', 'name', 'category'] self.edge_of_interest = ['Gene', 'Chemical'] # logic also matches 'Chemical'-'Gene' @@ -140,6 +141,7 @@ def make_pharmgkb_edge(self, self.drug_gene_edge_label, gene_id, self.drug_gene_edge_relation, + self.source_name, evidence]) def make_pharmgkb_gene_node(self, diff --git a/kg_covid_19/transform_utils/sars_cov_2_gene_annot/sars_cov_2_gene_annot.py b/kg_covid_19/transform_utils/sars_cov_2_gene_annot/sars_cov_2_gene_annot.py index e62bdc99..2e83c05d 100644 --- a/kg_covid_19/transform_utils/sars_cov_2_gene_annot/sars_cov_2_gene_annot.py +++ b/kg_covid_19/transform_utils/sars_cov_2_gene_annot/sars_cov_2_gene_annot.py @@ -23,7 +23,8 @@ def __init__(self, input_dir: Optional[str] = None, output_dir: str = None): self.node_header = ['id', 'name', 'category', 'synonym', 'taxon'] self.edge_header = ['subject', 'edge_label', 'object', 'relation', - 'DB_References', 'ECO_code', 'With', 'Interacting_taxon_ID', + 'provided_by', 'DB_References', 'ECO_code', 'With', + 'Interacting_taxon_ID', 'Date', 'Assigned_by', 'Annotation_Extension', 'Annotation_Properties'] @@ -77,7 +78,8 @@ def gpa_to_edge_data(self, rec: dict) -> list: except KeyError: relation = '' - edge_data = [subj, self.edge_label_prefix + edge_label, obj, relation] + edge_data = [subj, self.edge_label_prefix + edge_label, obj, relation, + self.source_name] # all the others for key in ['DB:Reference', 'ECO_Evidence_code', 'With', 'Interacting_taxon_ID', 'Date', 'Assigned_by', 'Annotation_Extension', diff --git a/kg_covid_19/transform_utils/transform.py b/kg_covid_19/transform_utils/transform.py index 5ba42bec..bba6c17c 100644 --- a/kg_covid_19/transform_utils/transform.py +++ b/kg_covid_19/transform_utils/transform.py @@ -14,7 +14,7 @@ def __init__(self, source_name, input_dir: str = None, output_dir: str = None): self.source_name = source_name self.node_header = ['id', 'name', 'category'] self.edge_header = ['subject', 'edge_label', 'object', 'relation', - 'publications'] + 'provided_by'] # default dirs self.input_base_dir = input_dir if input_dir else self.DEFAULT_INPUT_DIR diff --git a/kg_covid_19/transform_utils/ttd/ttd.py b/kg_covid_19/transform_utils/ttd/ttd.py index d1593970..7f4e6427 100644 --- a/kg_covid_19/transform_utils/ttd/ttd.py +++ b/kg_covid_19/transform_utils/ttd/ttd.py @@ -41,7 +41,8 @@ def run(self, data_file: Optional[str] = None): drug_gene_edge_relation = "RO:0002436" # molecularly interacts with uniprot_curie_prefix = "UniProtKB:" - self.edge_header = ['subject', 'edge_label', 'object', 'relation', 'target_type'] + self.edge_header = ['subject', 'edge_label', 'object', 'relation', + 'provided_by', 'target_type'] # make name to id map for uniprot names of human proteins dat_gz_id_file = os.path.join(self.input_base_dir, @@ -108,6 +109,7 @@ def run(self, data_file: Optional[str] = None): drug_gene_edge_label, this_id, drug_gene_edge_relation, + self.source_name, targ_type]) def get_uniproids(self, data: dict, name_2_id_map: dict, diff --git a/kg_covid_19/transform_utils/zhou_host_proteins/zhou_transform.py b/kg_covid_19/transform_utils/zhou_host_proteins/zhou_transform.py index 6bc40059..21732ae6 100644 --- a/kg_covid_19/transform_utils/zhou_host_proteins/zhou_transform.py +++ b/kg_covid_19/transform_utils/zhou_host_proteins/zhou_transform.py @@ -34,6 +34,9 @@ class ZhouTransform(Transform): def __init__(self, input_dir: str = None, output_dir: str = None) -> None: source_name = "zhou_host_proteins" super().__init__(source_name, input_dir, output_dir) + self.node_header = ['id', 'name', 'category'] + self.edge_header = ['subject', 'edge_label', 'object', 'relation', + 'provided_by', 'publication'] def run(self, data_file: Optional[str] = None): """Method is called and performs needed transformations to process the zhou host protein data, additional @@ -110,6 +113,7 @@ def run(self, data_file: Optional[str] = None): host_gene_vgene_edge_label, corona_curie, host_gene_vgene_relation, + self.source_name, pubmed_curie_prefix + row['PubMed ID'] ]) diff --git a/tests/test_intact.py b/tests/test_intact.py index 2491119e..dede813c 100644 --- a/tests/test_intact.py +++ b/tests/test_intact.py @@ -15,7 +15,7 @@ def test_intact_instance(self): self.assertEqual(self.intact.node_header, ['id', 'name', 'category']) self.assertEqual(self.intact.edge_header, - ['subject', 'edge_label', 'object', 'relation', + ['subject', 'edge_label', 'object', 'relation', 'provided_by', 'publication', 'num_participants', 'association_type', 'detection_method', 'subj_exp_role', 'obj_exp_role']) @@ -34,7 +34,7 @@ def test_struct_parse_xml_to_nodes_edges(self): {'nodes': [['UniProtKB:P20290', 'btf3_human', 'biolink:Protein'], ['UniProtKB:P0C6X7-PRO_0000037317', 'nsp10_cvhsa', 'biolink:RNA']], 'edges': [['UniProtKB:P20290', 'biolink:interacts_with', - 'UniProtKB:P0C6X7-PRO_0000037317', 'RO:0002437', + 'UniProtKB:P0C6X7-PRO_0000037317', 'RO:0002437', 'intact', 'PMID:16157265', '2', 'physical association', '2 hybrid', 'prey', 'bait']] }), @@ -45,6 +45,7 @@ def test_struct_parse_xml_to_nodes_edges(self): 'biolink:interacts_with', 'UniProtKB:P41811', 'RO:0002437', + 'intact', 'PMID:23481256', '3', 'physical association', diff --git a/tests/test_sars_cov_2_gene_annot.py b/tests/test_sars_cov_2_gene_annot.py index f61c7b94..098afce7 100644 --- a/tests/test_sars_cov_2_gene_annot.py +++ b/tests/test_sars_cov_2_gene_annot.py @@ -53,7 +53,8 @@ def test_gpa_to_edge_data(self): self.assertEqual(len(self.sc2ga.edge_header), len(edge1)) self.assertEqual(edge1, - ['UniProtKB:P0DTC1', 'biolink:enables', 'GO:0003723', 'RO:0002327', + ['UniProtKB:P0DTC1', 'biolink:enables', 'GO:0003723', + 'RO:0002327', 'sars_cov_2_gene_annot', 'GO_REF:0000043', 'ECO:0000322', 'UniProtKB-KW:KW-0694', '', '20200321', 'UniProt', '', 'go_evidence=IEA']) diff --git a/tests/test_transform_class.py b/tests/test_transform_class.py index 24bf3d5d..2c4a4547 100644 --- a/tests/test_transform_class.py +++ b/tests/test_transform_class.py @@ -17,7 +17,7 @@ def test_reality(self): ('source_name', 'test_transform'), ('node_header', ['id', 'name', 'category']), ('edge_header', - ['subject', 'edge_label', 'object', 'relation', 'publications']), + ['subject', 'edge_label', 'object', 'relation', 'provided_by']), ('output_base_dir', os.path.join("data", "transformed")), ('input_base_dir', os.path.join("data", "raw")), ('output_dir', os.path.join("data", "transformed", "test_transform")),