Skip to content

Commit

Permalink
Updated the TADbit 3D model indexing pipeline and added tool tests
Browse files Browse the repository at this point in the history
  • Loading branch information
markmcdowall committed Dec 18, 2017
1 parent f4fe5de commit b284440
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 13 deletions.
52 changes: 52 additions & 0 deletions tests/test_json3d_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
.. See the NOTICE file distributed with this work for additional information
regarding copyright ownership.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from __future__ import print_function

import os.path
import h5py
import pytest # pylint: disable=unused-import

from basic_modules.metadata import Metadata

from tool.json_3d_indexer import json3dIndexerTool

@pytest.mark.json3d
def test_json3d_indexer():
"""
Function to test Kallisto indexer
"""
resource_path = os.path.join(os.path.dirname(__file__), "data/")

input_files = {
"models" : resource_path + "sample_3D_models.tar.gz"
}

output_files = {
"index" : resource_path + "sample.models.hdf5"
}

metadata = {
"models" : Metadata(
"data_rnaseq", "gff3", "test_gff3_location", [], {'assembly' : 'test'})
}

j3d_handle = json3dIndexerTool()
j3d_handle.run(input_files, metadata, output_files)

print(resource_path)
# assert os.path.isfile(resource_path + "sample.gff3.gz") is True
# assert os.path.getsize(resource_path + "sample.gff3.gz") > 0
2 changes: 1 addition & 1 deletion tool/gff3_sorter.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def run(self, input_files, input_metadata, output_files):
sources=[],
taxon_id=input_metadata["gff3"].taxon_id,
meta_data={
"tool": "gff3_sorter"
"tool" : "gff3_sorter"
}
)
}
Expand Down
46 changes: 34 additions & 12 deletions tool/json_3d_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,23 @@
import numpy as np
import h5py

from utils import logger

try:
if hasattr(sys, '_run_from_cmdl') is True:
raise ImportError
from pycompss.api.parameter import FILE_IN, FILE_INOUT
from pycompss.api.parameter import FILE_IN, FILE_OUT
from pycompss.api.task import task
from pycompss.api.api import compss_wait_on
except ImportError:
print("[Warning] Cannot import \"pycompss\" API packages.")
print(" Using mock decorators.")
logger.warn("[Warning] Cannot import \"pycompss\" API packages.")
logger.warn(" Using mock decorators.")

from dummy_pycompss import FILE_IN, FILE_INOUT
from dummy_pycompss import task
from dummy_pycompss import compss_wait_on
from utils.dummy_pycompss import FILE_IN, FILE_OUT # pylint: disable=ungrouped-imports
from utils.dummy_pycompss import task
from utils.dummy_pycompss import compss_wait_on

from basic_modules.metadata import Metadata
from basic_modules.tool import Tool

# ------------------------------------------------------------------------------
Expand All @@ -51,13 +54,18 @@ class json3dIndexerTool(Tool):
Tool for running indexers over 3D JSON files for use in the RESTful API
"""

def __init__(self):
def __init__(self, configuration=None):
"""
Init function
"""
print("3D JSON Model Indexer")
Tool.__init__(self)

if configuration is None:
configuration = {}

self.configuration.update(configuration)

def unzipJSON(self, file_targz):
"""
Unzips the zipped folder containing all the models for regions of the
Expand Down Expand Up @@ -99,7 +107,7 @@ def unzipJSON(self, file_targz):

return onlyfiles

@task(json_file_gz=FILE_IN, hdf5_file=FILE_INOUT)
@task(returns=bool, json_file_gz=FILE_IN, hdf5_file=FILE_OUT)
def json2hdf5(self, json_file_gz, hdf5_file):
"""
Genome Model Indexing
Expand Down Expand Up @@ -246,7 +254,7 @@ def json2hdf5(self, json_file_gz, hdf5_file):

return True

def run(self, input_files, output_files, metadata=None):
def run(self, input_files, input_metadata, output_files):
"""
Function to index models of the geome structure generated by TADbit on a
per dataset basis so that they can be easily distributed as part of the
Expand Down Expand Up @@ -283,13 +291,27 @@ def run(self, input_files, output_files, metadata=None):
j3di = j3d.run((gz_file, hdf5_file_id), ())
"""

targz_file = input_files[0]
h5_file = input_files[1]
targz_file = input_files["models"]
h5_file = output_files["index"]

output_metadata = {}

# handle error
results = self.json2hdf5(targz_file, h5_file)
results = compss_wait_on(results)

return ([h5_file], output_metadata)
output_metadata = {
"index": Metadata(
data_type=input_metadata["models"].data_type,
file_type=input_metadata["models"].file_type,
file_path=input_metadata["models"].file_path,
sources=[],
taxon_id=input_metadata["models"].taxon_id,
meta_data={
"tool" : "json_3d_indexer",
"assembly" : input_metadata["models"].meta_data["assembly"]
}
)
}

return (output_files, output_metadata)

0 comments on commit b284440

Please sign in to comment.