Skip to content

Commit

Permalink
Cleaning --extend code
Browse files Browse the repository at this point in the history
  • Loading branch information
ACEnglish committed May 17, 2022
1 parent dbdbe55 commit 9e4e8a8
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 20 deletions.
20 changes: 3 additions & 17 deletions truvari/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ def parse_args(args):
filteg.add_argument("--includebed", type=str, default=None,
help="Bed file of regions in the genome to include only calls overlapping")
thresg.add_argument("--extend", type=int, default=0,
help="Number of bases to extend the regions in the bed file for variant matching, default is 0, only used if --includebed is defined. Allows matching base variants to comp variants that are just outside the regions. ")
help="Distance to allow comp entries outside of includebed regions (%(default)s)")
filteg.add_argument("--multimatch", action="store_true", default=defaults.multimatch,
help=("Allow base calls to match multiple comparison calls, and vice versa. "
"Output vcfs will have redundant entries. (%(default)s)"))
Expand Down Expand Up @@ -787,22 +787,8 @@ def bench_main(cmdargs):
args.includebed,
args.sizemax)

merged_overlaps = regions.merge_overlaps()
if merged_overlaps:
logging.info("Found %d chromosomes with overlapping regions",
len(merged_overlaps))
logging.debug("CHRs: %s", merged_overlaps)

if args.extend > 0:
logging.info("Extending the regions by %d bases on each side", args.extend)
regions_extended = copy.deepcopy(regions)
regions_extended.extend(args.extend)
merged_overlaps_extended = regions_extended.merge_overlaps()
if merged_overlaps_extended:
logging.info("After region extension found %d chromosomes with overlapping regions. Merged the overlaps", len(merged_overlaps_extended))
logging.debug("CHRs: %s", merged_overlaps_extended)
else:
regions_extended = regions
regions.merge_overlaps()
regions_extended = regions.extend(args.extend) if args.extend else regions

base_i = regions.iterate(base)
comp_i = regions_extended.iterate(comp)
Expand Down
16 changes: 13 additions & 3 deletions truvari/region_vcf_iter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Helper class to specify included regions of the genome when iterating events.
"""
import sys
import copy
import gzip
import logging
from collections import defaultdict
Expand Down Expand Up @@ -65,7 +66,10 @@ def merge_overlaps(self):
post_len = len(self.tree[i])
if pre_len != post_len:
chr_with_overlaps.append(i)
return chr_with_overlaps
if chr_with_overlaps:
logging.info("Found %d chromosomes with overlapping regions",
len(chr_with_overlaps))
logging.debug("CHRs: %s", chr_with_overlaps)

def iterate(self, vcf_file):
"""
Expand Down Expand Up @@ -98,10 +102,16 @@ def include(self, entry):
def extend(self, pad):
"""
Extends all intervals by a fixed number of bases on each side
Returns a copy of this IntervalTree
"""
for chrom in self.tree:
self.tree[chrom] = IntervalTree.from_tuples(((max(0, i.begin - pad), i.end + pad)) for i in self.tree[chrom])
logging.info("Extending the regions by %d bases", pad)
ret = copy.deepcopy(self)

for chrom in ret.tree:
ret.tree[chrom] = IntervalTree.from_tuples(((max(0, i.begin - pad), i.end + pad)) for i in ret.tree[chrom])

ret.merge_overlaps()
return ret

def build_anno_tree(filename, chrom_col=0, start_col=1, end_col=2, one_based=False, comment='#'):
"""
Expand Down

0 comments on commit 9e4e8a8

Please sign in to comment.