Skip to content

Commit

Permalink
Changes to test the pipeline with generating indexes and bigbed files…
Browse files Browse the repository at this point in the history
…. Required including the type of the bed file that was being indexed
  • Loading branch information
markmcdowall committed May 9, 2017
1 parent ea1155e commit 3434148
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 16 deletions.
34 changes: 26 additions & 8 deletions process_bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

from dummy_pycompss import *

from tool import bed_indexer

# ------------------------------------------------------------------------------

Expand Down Expand Up @@ -78,10 +79,18 @@ def run(self, file_ids, metadata):
chrom_file = file_ids[1]
hdf5_file = file_ids[2]
assembly = metadata["assembly"]
file_id = metadata["file_id"]
bed_type = metadata["bed_type"]

meta_data = {
"file_id" : b_file,
"bed_type" : bed_type,
"assembly" : assembly
}

# Bed Indexer
b = tool.bedIndexerTool(self.configuration)
bb, h5_idx = b.run((bed_file, chrom_file, hdf5_file), {'assembly' : assembly})
b = bed_indexer.bedIndexerTool()
bb, h5_idx = b.run((bed_file, chrom_file, hdf5_file), meta_data)

return (bb, h5_idx)

Expand All @@ -97,14 +106,16 @@ def run(self, file_ids, metadata):
parser.add_argument("--chrom", help="Matching chrom.size file")
parser.add_argument("--bed_file", help="Bed file to get indexed")
parser.add_argument("--h5_file", help="Location of HDF5 index file")
parser.add_argument("--bed_type", help="Type of Bed file bedN[+P]", default=None)

# Get the matching parameters from the command line
args = parser.parse_args()

assembly = args.assembly
chrom_size_file = args.chrom
bed_file = args.bed_file
h5_file = args.h5_file
hdf5_file = args.h5_file
bed_type = args.bed_type

pb = process_bed()

Expand All @@ -118,19 +129,26 @@ def run(self, file_ids, metadata):


#2. Register the data with the DMP
da = dmp()
da = dmp(test=True)

print(da.get_files_by_user("test"))

cs_file = da.set_file("test", chrom_size_file, "tsv", "ChIP-seq", "", None)
b_file = da.set_file("test", bed_file, "bed", "Assembly", "", None)
h5_file = da.set_file("test", h5_file, "hdf5", "index", "", None)
h5_file = da.set_file("test", hdf5_file, "hdf5", "index", "", None)

print(da.get_files_by_user("test"))

# 3. Instantiate and launch the App
from basic_modules import WorkflowApp
app = WorkflowApp()
results = app.launch(process_bed, [b_file, cs_file, h5_file], {"assembly" : assembly})
#from basic_modules import WorkflowApp
#app = WorkflowApp()
#results = app.launch(process_bed, [b_file, cs_file, h5_file], {"assembly" : assembly})

metadata = {
"file_id" : b_file,
"bed_type" : bed_type,
"assembly" : assembly
}
results = pb.run([bed_file, chrom_size_file, hdf5_file], metadata)

print(da.get_files_by_user("test"))
16 changes: 8 additions & 8 deletions tool/bed_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def bedsort(self, file_bed, file_sorted_bed):


@task(file_sorted_bed=FILE_IN, file_chrom=FILE_IN, file_bb=FILE_OUT)
def bed2bigbed(self, file_bed, file_chrom, file_bb):
def bed2bigbed(self, file_sorted_bed, file_chrom, file_bb):
"""
BED to BigBed converter
Expand Down Expand Up @@ -255,7 +255,7 @@ def run(self, input_files, metadata):
Location of chrom.size file
hdf5_file : str
Location of the HDF5 index file
meta_data : list
metadata : list
file_id : str
file_id used to identify the original bed file
assembly : str
Expand Down Expand Up @@ -285,17 +285,17 @@ def run(self, input_files, metadata):
hdf5_file = input_files[2]

bed_sorted_name = bed_file.split("/")
bed_sorted_name[-1].replace('.bed', '.sorted.bed')
bed_sorted_name[-1] = bed_sorted_name[-1].replace('.bed', '.sorted.bed')
bed_sorted_file = '/'.join(bed_sorted_name)

bb_name = bed_file.split("/")
bb_name[-1].replace('.bed', '.bb')
bb_name[-1] = bb_name[-1].replace('.bed', '.bb')
bb_file = '/'.join(bb_name)

assembly = meta_data['assembly']
assembly = metadata['assembly']

# handle error
if not self.bedsorted(bed_file, bed_sorted_file):
if not self.bedsort(bed_file, bed_sorted_file):
output_metadata.set_exception(
Exception(
"bedsorted: Could not process files {}, {}.".format(*input_files)))
Expand All @@ -305,7 +305,7 @@ def run(self, input_files, metadata):
Exception(
"bed2bigbed: Could not process files {}, {}.".format(*input_files)))

if not self.bed2hdf5(file_id, assembly, bed_file, hdf5_file):
if not self.bed2hdf5(metadata['file_id'], assembly, bed_file, hdf5_file):
output_metadata.set_exception(
Exception(
"bed2hdf5: Could not process files {}, {}.".format(*input_files)))
Expand Down

0 comments on commit 3434148

Please sign in to comment.