From 7fda59e673103815d54135e13475721fec061e5d Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Sun, 3 May 2020 13:15:06 -0700 Subject: [PATCH 1/2] Remove old HPO ingest --- kg_covid_19/transform_utils/hpo/__init__.py | 5 -- kg_covid_19/transform_utils/hpo/hpo.py | 98 --------------------- 2 files changed, 103 deletions(-) delete mode 100644 kg_covid_19/transform_utils/hpo/__init__.py delete mode 100644 kg_covid_19/transform_utils/hpo/hpo.py diff --git a/kg_covid_19/transform_utils/hpo/__init__.py b/kg_covid_19/transform_utils/hpo/__init__.py deleted file mode 100644 index 18ac8db5..00000000 --- a/kg_covid_19/transform_utils/hpo/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .hpo import HpoTransform - -__all__ = [ - "HpoTransform" -] diff --git a/kg_covid_19/transform_utils/hpo/hpo.py b/kg_covid_19/transform_utils/hpo/hpo.py deleted file mode 100644 index 1d9ace1a..00000000 --- a/kg_covid_19/transform_utils/hpo/hpo.py +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -import os -from typing import Optional - -import obonet # type: ignore -from typing.io import TextIO # type: ignore - -from kg_covid_19.transform_utils.transform import Transform -from kg_covid_19.utils import write_node_edge_item -from kg_covid_19.utils.transform_utils import get_item_by_priority, data_to_dict, \ - ItemInDictNotFound - -"""Ingest Human Phenotype Ontology (no annotations for now, just the ontology) - -Dataset location: https://raw.githubusercontent.com/obophenotype/human-phenotype-ontology/master/hp.obo -GitHub Issue: https://github.com/Knowledge-Graph-Hub/kg-covid-19/issues/48 - -""" - - -class HpoTransform(Transform): - - def __init__(self, input_dir: str = None, output_dir: str = None): - source_name = "hpo" - super().__init__(source_name, input_dir, output_dir) - - def run(self, data_file: Optional[str] = None): - self.node_header.extend(["comments", "description"]) - hpo_node_type = "biolink:PhenotypicFeature" - hpo_edge_label = "rdfs:subClassOf" - hpo_ro_relation = "RO:0002351" - hpo_obo_file = os.path.join(self.input_base_dir, "hp.obo") - - # transform data, something like: - with open(self.output_node_file, 'w') as node, \ - open(self.output_edge_file, 'w') as edge: - - # write headers (change default node/edge headers if necessary - node.write("\t".join(self.node_header) + "\n") - edge.write("\t".join(self.edge_header) + "\n") - - graph = obonet.read_obo(hpo_obo_file) - - for id_, data in graph.nodes(data=True): - - # Write HPO nodes - self.write_hpo_node(node, id_, data, hpo_node_type) - - # if we see is_a relationship(s), write parent-child edge(s) - if 'is_a' in data: - for parent in data['is_a']: - self.write_hpo_edge(edge, - id_, - hpo_edge_label, - parent, - hpo_ro_relation) - - - def write_hpo_node(self, fh: TextIO, id: str, data: dict, node_type: str) -> None: - # Try to get comments/def in case this is useful for ML - try: - comment_field = get_item_by_priority(data, ['comment']) - except ItemInDictNotFound: - comment_field = "" - - try: - description = get_item_by_priority(data, ['def']) - except ItemInDictNotFound: - description = "" - - try: - name_field = get_item_by_priority(data, ['name']) - except ItemInDictNotFound: - name_field = "" - - write_node_edge_item(fh=fh, header=self.node_header, - data=[id, - name_field, - node_type, - comment_field, - description - ]) - - def write_hpo_edge(self, - fh: TextIO, - subject: str, - edge_label: str, - object: str, - relation: str) -> None: - - # ['subject', 'edge_label', 'object', 'relation', 'publications'] - write_node_edge_item(fh=fh, header=self.edge_header, - data=[subject, - edge_label, - object, - relation, - ""]) From aed13ae82b722ecd764fd1ce996798b2c22ec1fb Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Sun, 3 May 2020 13:15:32 -0700 Subject: [PATCH 2/2] Remove unused import --- kg_covid_19/transform.py | 1 - 1 file changed, 1 deletion(-) diff --git a/kg_covid_19/transform.py b/kg_covid_19/transform.py index 9a721c28..fc8095f3 100644 --- a/kg_covid_19/transform.py +++ b/kg_covid_19/transform.py @@ -4,7 +4,6 @@ from typing import List from kg_covid_19.transform_utils.drug_central.drug_central import DrugCentralTransform -from kg_covid_19.transform_utils.hpo.hpo import HpoTransform from kg_covid_19.transform_utils.intact.intact import IntAct from kg_covid_19.transform_utils.ontology import OntologyTransform from kg_covid_19.transform_utils.ontology.ontology_transform import ONTOLOGIES