Skip to content

Commit

Permalink
Merge 432804f into ce9ebe8
Browse files Browse the repository at this point in the history
  • Loading branch information
justaddcoffee committed Jul 15, 2020
2 parents ce9ebe8 + 432804f commit e7ae82d
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 8 deletions.
8 changes: 8 additions & 0 deletions download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,11 @@
-
url: http://current.geneontology.org/ontology/extensions/go-plus.json
local_name: go-plus.json

#
# GO-CAM
#

-
url: https://github.com/justaddcoffee/go-cam-models-kg-covid-19/raw/master/lifted-go-cams-20200619.xml.gz
local_name: go-cams.rdf.gz
1 change: 1 addition & 0 deletions kg_covid_19/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
'GoTransform': OntologyTransform,
'HpTransform': OntologyTransform,
'MondoTransform': OntologyTransform,
'GoCam': OntologyTransform,
'ChemblTransform': ChemblTransform
}

Expand Down
43 changes: 35 additions & 8 deletions kg_covid_19/transform_utils/ontology/ontology_transform.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
import os
import logging
import tempfile
from typing import Optional

from kg_covid_19.transform_utils.transform import Transform
from kgx import PandasTransformer, ObographJsonTransformer # type: ignore
from kgx import PandasTransformer, ObographJsonTransformer, RdfTransformer # type: ignore

from kg_covid_19.utils.transform_utils import ungzip_to_tempdir

ONTOLOGIES = {
'HpTransform': 'hp.json',
'GoTransform': 'go-plus.json',
'MondoTransform': 'mondo.json'
'MondoTransform': 'mondo.json',
'GoCam': 'go-cams.rdf.gz'
}


class OntologyTransform(Transform):
"""
OntologyTransform parses an Obograph JSON form of an Ontology into nodes nad edges.
Expand All @@ -19,7 +24,7 @@ def __init__(self, input_dir: str = None, output_dir: str = None):
source_name = "ontologies"
super().__init__(source_name, input_dir, output_dir)

def run(self, data_file: Optional[str] = None) -> None:
def run(self, data_file: Optional[str] = None, input_format = 'json') -> None:
"""Method is called and performs needed transformations to process
an ontology.
Expand All @@ -32,28 +37,50 @@ def run(self, data_file: Optional[str] = None) -> None:
"""
if data_file:
k = data_file.split('.')[0]

if data_file.endswith('.gz'):
input_format = 'rdf/xml'
xml_tempdir = tempfile.mkdtemp()
data_file = ungzip_to_tempdir(os.path.join(
self.input_base_dir, data_file), xml_tempdir)

data_file = os.path.join(self.input_base_dir, data_file)
self.parse(k, data_file, k)
self.parse(k, data_file, k, input_format=input_format)
else:
# load all ontologies
for k in ONTOLOGIES.keys():

data_file = os.path.join(self.input_base_dir, ONTOLOGIES[k])
self.parse(k, data_file, k)

def parse(self, name: str, data_file: str, source: str) -> None:
if data_file.endswith('.gz'):
input_format = 'rdf/xml'
xml_tempdir = tempfile.mkdtemp()
data_file = ungzip_to_tempdir(os.path.join(
self.input_base_dir, data_file), xml_tempdir)

self.parse(k, data_file, k, input_format=input_format)

def parse(self, name: str, data_file: str, source: str,
input_format: str = 'json') -> None:
"""Processes the data_file.
Args:
name: Name of the ontology
data_file: data file to parse
source: Source name
source: source name
input_format: format of input file
Returns:
None.
"""
logging.info(f"Parsing {data_file}")
transformer = ObographJsonTransformer()
if input_format == 'json':
transformer = ObographJsonTransformer()
elif input_format == 'rdf/xml':
transformer = RdfTransformer()
else:
raise ValueError("No way to parse format %s" % format)
transformer.parse(data_file, provided_by=source)
output_transformer = PandasTransformer(transformer.graph)
output_transformer.save(filename=os.path.join(self.output_dir, f'{name}'), output_format='tsv', mode=None)
13 changes: 13 additions & 0 deletions kg_covid_19/utils/transform_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
# -*- coding: utf-8 -*-
import gzip
import logging
import os
import re
import shutil
import tempfile
import zipfile
from typing import Any, Dict, List, Union
from tqdm import tqdm # type: ignore
Expand Down Expand Up @@ -157,6 +160,16 @@ def unzip_to_tempdir(zip_file_name: str, tempdir: str) -> None:
z.extractall(tempdir)


def ungzip_to_tempdir(gzipped_file: str, tempdir: str) -> str:
ungzipped_file = os.path.join(tempdir, os.path.basename(gzipped_file))
if ungzipped_file.endswith('.gz'):
ungzipped_file = os.path.splitext(ungzipped_file)[0]

with gzip.open(gzipped_file, 'rb') as f_in, open(ungzipped_file, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
return ungzipped_file


def guess_bl_category(identifier: str) -> str:
"""Guess category for a given identifier.
Expand Down

0 comments on commit e7ae82d

Please sign in to comment.