Skip to content

Commit

Permalink
Added restricted_int
Browse files Browse the repository at this point in the history
New argparse type parameter to enforce positive integers from user input
  • Loading branch information
ACEnglish committed May 17, 2022
1 parent 9e4e8a8 commit 2f5b001
Show file tree
Hide file tree
Showing 15 changed files with 72 additions and 41 deletions.
4 changes: 4 additions & 0 deletions docs/api/truvari.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ restricted_float
^^^^^^^^^^^^^^^^
.. autofunction:: restricted_float

restricted_int
^^^^^^^^^^^^^^^^
.. autofunction:: restricted_int

setup_logging
^^^^^^^^^^^^^
.. autofunction:: setup_logging
Expand Down
1 change: 1 addition & 0 deletions truvari/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
make_temp_filename,
ref_ranges,
restricted_float,
restricted_int,
setup_logging,
setup_progressbar,
vcf_ranges,
Expand Down
10 changes: 5 additions & 5 deletions truvari/annos/bpovl.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,21 @@ def parse_args(args):
help="Tab-delimited annotation file")
parser.add_argument("-o", "--output", type=str, required=True,
help="Output joblib DataFrame")
parser.add_argument("--sizemin", type=int, default=50,
parser.add_argument("--sizemin", type=truvari.restricted_int, default=truvari.restricted_int,

This comment has been minimized.

Copy link
@bnoyvert

bnoyvert May 17, 2022

Contributor

Typo? Should be default=50?

help="Minimum size of variant to annotate (%(default)s)")
parser.add_argument("--spanmax", type=int, default=50000,
parser.add_argument("--spanmax", type=truvari.restricted_int, default=50000,
help="Maximum span of SVs to annotate (%(default)s)")
annosg = parser.add_argument_group("Annotation File Arguments")
annosg.add_argument("-p", "--preset", choices=PRESET_FMTS.keys(), default=None,
help=("Annotation format. This option overwrites "
"-s, -b, -e, -c and -1 (%(default)s)"))
annosg.add_argument("-c", "--comment", type=str, default="#",
help="Skip lines started with character. (%(default)s)")
annosg.add_argument("-s", "--sequence", type=int, default=0,
annosg.add_argument("-s", "--sequence", type=truvari.restricted_int, default=0,
help="Column of sequence/chromosome name. (%(default)s)")
annosg.add_argument("-b", "--begin", type=int, default=1,
annosg.add_argument("-b", "--begin", type=truvari.restricted_int, default=1,
help="Column of start chromosomal position. (%(default)s)")
annosg.add_argument("-e", "--end", type=int, default=2,
annosg.add_argument("-e", "--end", type=truvari.restricted_int, default=2,
help="Column of end chromosomal position. (%(default)s)")
# The end column can be the same as the start column. [2]
annosg.add_argument("-1", "--one-based", action='store_true',
Expand Down
2 changes: 1 addition & 1 deletion truvari/annos/density.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def parse_args(args):
help="Output joblib DataFrame")
parser.add_argument("-m", "--mask", type=str,
help="Mask bed file")
parser.add_argument("-w", "--windowsize", type=int, default=10000,
parser.add_argument("-w", "--windowsize", type=truvari.restricted_int, default=10000,
help="Window size (%(default)s)")
parser.add_argument("-t", "--threshold", type=float, default=3,
help="std for identifying 'dense' regions (%(default)s)")
Expand Down
6 changes: 3 additions & 3 deletions truvari/annos/grm.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,11 @@ def parse_args(args):
help="Bed file of regions to parse (None)")
parser.add_argument("-o", "--output", default="results.jl",
help="Output dataframe (%(default)s)")
parser.add_argument("-k", "--kmersize", default=50, type=int,
parser.add_argument("-k", "--kmersize", default=50, type=truvari.restricted_int,
help="Size of kmer to map (%(default)s)")
parser.add_argument("-m", "--min-size", default=25, type=int,
parser.add_argument("-m", "--min-size", default=25, type=truvari.restricted_int,
help="Minimum size of variants to map (%(default)s)")
parser.add_argument("-t", "--threads", default=os.cpu_count(), type=int,
parser.add_argument("-t", "--threads", default=os.cpu_count(), type=truvari.restricted_int,
help="Number of threads (%(default)s)")
parser.add_argument("--debug", action="store_true",
help="Verbose logging")
Expand Down
6 changes: 3 additions & 3 deletions truvari/annos/hompct.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ def parse_args(args):
help="Compressed, indexed VCF to annotate ")
parser.add_argument("-o", "--output", type=str, default="/dev/stdout",
help="Output filename (stdout)")
parser.add_argument("-b", "--buffer", type=int, default=5000,
parser.add_argument("-b", "--buffer", type=truvari.restricted_int, default=5000,
help="Number of base-pairs up/dn-stream to query (%(default)s)")
parser.add_argument("-m", "--minanno", type=int, default=50,
parser.add_argument("-m", "--minanno", type=truvari.restricted_int, default=50,
help="Minimum size of event to annotate (%(default)s)")
parser.add_argument("-M", "--maxgt", type=int, default=1,
parser.add_argument("-M", "--maxgt", type=truvari.restricted_int, default=1,
help="Largest event size to count for genotyping (%(default)s)")
parser.add_argument("--debug", action="store_true",
help="Verbose logging")
Expand Down
4 changes: 2 additions & 2 deletions truvari/annos/numneigh.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def parse_args(args):
help="VCF to annotate")
parser.add_argument("-o", "--output", type=str, default="/dev/stdout",
help="Output vcf (stdout)")
parser.add_argument("-r", "--refdist", type=int, default=1000,
parser.add_argument("-r", "--refdist", type=truvari.restricted_int, default=1000,
help="Max reference location distance (%(default)s)")
parser.add_argument("-s", "--sizemin", type=int, default=50,
parser.add_argument("-s", "--sizemin", type=truvari.restricted_int, default=50,
help="Minimum variant size to consider for annotation (%(default)s)")
parser.add_argument("--passonly", action="store_true", default=False,
help="Only count calls with FILTER == PASS")
Expand Down
6 changes: 3 additions & 3 deletions truvari/annos/remap.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,14 +150,14 @@ def parse_args(args):
help="BWA indexed reference")
parser.add_argument("-o", "--output", default="/dev/stdout",
help="Output VCF (%(default)s)")
parser.add_argument("-m", "--minlength", default=50, type=int,
parser.add_argument("-m", "--minlength", default=50, type=truvari.restricted_int,
help="Smallest length of allele to remap (%(default)s)")
parser.add_argument("-t", "--threshold", type=truvari.restricted_float, default=.8,
help="Threshold for pct of allele covered to consider hit (%(default)s)")
parser.add_argument("-d", "--dist", type=int, default=10,
parser.add_argument("-d", "--dist", type=truvari.restricted_int, default=10,
help=("Minimum distance an alignment must be from a DEL's "
"position to be considered (%(default)s))"))
parser.add_argument("-H", "--hits", type=int, default=0,
parser.add_argument("-H", "--hits", type=truvari.restricted_int, default=0,
help="Report top hits as chr:start-end.pct (max %(default)s)")
parser.add_argument("--debug", action="store_true",
help="Verbose logging")
Expand Down
6 changes: 3 additions & 3 deletions truvari/annos/repmask.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,15 +182,15 @@ def parse_args(args):
help="Output filename (%(default)s)")
parser.add_argument("-e", "--executable", type=str, default="RepeatMasker",
help="Path to RepeatMasker (%(default)s)")
parser.add_argument("-m", "--min-length", type=int, default=50,
parser.add_argument("-m", "--min-length", type=truvari.restricted_int, default=50,
help="Minimum size of entry to annotate (%(default)s)")
parser.add_argument("-M", "--max-length", type=int, default=50000,
parser.add_argument("-M", "--max-length", type=truvari.restricted_int, default=50000,
help="Maximum size of entry to annotate (%(default)s)")
parser.add_argument("-t", "--threshold", type=truvari.restricted_float, default=.8,
help="Threshold for pct of allele covered (%(default)s)")
parser.add_argument("-p", "--params", type=str, default=DEFAULTPARAMS,
help="Default parameter string to send to RepeatMasker (%(default)s)")
parser.add_argument("-T", "--threads", type=int, default=os.cpu_count(),
parser.add_argument("-T", "--threads", type=truvari.restricted_int, default=os.cpu_count(),
help="Number of threads to use (%(default)s)")
parser.add_argument("--debug", action="store_true",
help="Verbose logging")
Expand Down
2 changes: 1 addition & 1 deletion truvari/annos/svinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def parse_args(args):
help="VCF to annotate (stdin)")
parser.add_argument("-o", "--output", type=str, default="/dev/stdout",
help="Output filename (stdout)")
parser.add_argument("-m", "--minsize", type=int, default=50,
parser.add_argument("-m", "--minsize", type=truvari.restricted_int, default=50,
help="Minimum size of entry to annotate (%(default)s)")
truvari.setup_logging()
return parser.parse_args(args)
Expand Down
8 changes: 4 additions & 4 deletions truvari/annos/trf.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,13 @@ def parse_args(args):
help="Simple repeats bed")
parser.add_argument("-f", "--reference", type=str, required=True,
help="Reference fasta file")
parser.add_argument("-m", "--min-length", type=int, default=50,
parser.add_argument("-m", "--min-length", type=truvari.restricted_int, default=50,
help="Minimum size of entry to annotate (%(default)s)")
parser.add_argument("-M", "--max-length", type=int, default=10000,
parser.add_argument("-M", "--max-length", type=truvari.restricted_int, default=10000,
help="Maximum size of sequence to run through trf (%(default)s)")
parser.add_argument("-t", "--threads", type=int, default=multiprocessing.cpu_count(),
parser.add_argument("-t", "--threads", type=truvari.restricted_int, default=multiprocessing.cpu_count(),
help="Number of threads to use (%(default)s)")
parser.add_argument("-C", "--chunk-size", type=int, default=1,
parser.add_argument("-C", "--chunk-size", type=truvari.restricted_int, default=1,
help="Size (in mbs) of reference chunks for parallelization (%(default)s)")
parser.add_argument("--debug", action="store_true",
help="Verbose logging")
Expand Down
23 changes: 13 additions & 10 deletions truvari/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ def __eq__(self, other):
def __str__(self):
return f'{self.state} {self.score} ->\n {self.base} {self.comp}'

def __repr__(self):
return f'<truvari.bench.MatchResult ({self.state} {self.score})>'


class Matcher():
"""
Expand All @@ -80,7 +83,7 @@ class Matcher():
>>> v = pysam.VariantFile('repo_utils/test_files/input1.vcf.gz')
>>> one = next(v); two = next(v)
>>> mat.build_match(one, two)
<0 False chr20:66235->chr20:68303>
<truvari.bench.MatchResult (False 0.0)>
"""

def __init__(self, params=None, args=None):
Expand Down Expand Up @@ -603,11 +606,11 @@ def parse_args(args):
help="Turn on progress monitoring")

thresg = parser.add_argument_group("Comparison Threshold Arguments")
thresg.add_argument("-r", "--refdist", type=int, default=defaults.refdist,
thresg.add_argument("-r", "--refdist", type=truvari.restricted_int, default=defaults.refdist,
help="Max reference location distance (%(default)s)")
thresg.add_argument("-p", "--pctsim", type=truvari.restricted_float, default=defaults.pctsim,
help="Min percent allele sequence similarity. Set to 0 to ignore. (%(default)s)")
thresg.add_argument("-B", "--minhaplen", type=int, default=defaults.minhaplen,
thresg.add_argument("-B", "--minhaplen", type=truvari.restricted_int, default=defaults.minhaplen,
help="Minimum haplotype sequence length to create (%(default)s)")
thresg.add_argument("-P", "--pctsize", type=truvari.restricted_float, default=defaults.pctsize,
help="Min pct allele size similarity (minvarsize/maxvarsize) (%(default)s)")
Expand All @@ -617,7 +620,7 @@ def parse_args(args):
help="Variant types don't need to match to compare (%(default)s)")
thresg.add_argument("--use-lev", action="store_true",
help="Use the Levenshtein distance ratio instead of edlib editDistance ratio (%(default)s)")
thresg.add_argument("-C", "--chunksize", type=int, default=defaults.chunksize,
thresg.add_argument("-C", "--chunksize", type=truvari.restricted_int, default=defaults.chunksize,
help="Max reference distance to compare calls (%(default)s)")

genoty = parser.add_argument_group("Genotype Comparison Arguments")
Expand All @@ -629,19 +632,19 @@ def parse_args(args):
help="Comparison calls sample to use (first)")

filteg = parser.add_argument_group("Filtering Arguments")
filteg.add_argument("-s", "--sizemin", type=int, default=defaults.sizemin,
filteg.add_argument("-s", "--sizemin", type=truvari.restricted_int, default=defaults.sizemin,
help="Minimum variant size to consider for comparison (%(default)s)")
filteg.add_argument("-S", "--sizefilt", type=int, default=defaults.sizefilt,
filteg.add_argument("-S", "--sizefilt", type=truvari.restricted_int, default=defaults.sizefilt,
help="Minimum variant size to load into IntervalTree (%(default)s)")
filteg.add_argument("--sizemax", type=int, default=defaults.sizemax,
filteg.add_argument("--sizemax", type=truvari.restricted_int, default=defaults.sizemax,
help="Maximum variant size to consider for comparison (%(default)s)")
filteg.add_argument("--passonly", action="store_true", default=defaults.passonly,
help="Only consider calls with FILTER == PASS")
filteg.add_argument("--no-ref", default=defaults.no_ref, choices=['a', 'b', 'c'],
help="Don't include 0/0 or ./. GT calls from all (a), base (b), or comp (c) vcfs (%(default)s)")
filteg.add_argument("--includebed", type=str, default=None,
help="Bed file of regions in the genome to include only calls overlapping")
thresg.add_argument("--extend", type=int, default=0,
thresg.add_argument("--extend", type=truvari.restricted_int, default=0,
help="Distance to allow comp entries outside of includebed regions (%(default)s)")
filteg.add_argument("--multimatch", action="store_true", default=defaults.multimatch,
help=("Allow base calls to match multiple comparison calls, and vice versa. "
Expand All @@ -652,7 +655,7 @@ def parse_args(args):
parser.error("--reference is required when --pctsim is set")
if args.chunksize < args.refdist:
parser.error("--chunksize must be >= --refdist")
if args.extend > 0 and args.includebed is None:
if args.extend and args.includebed is None:
parser.error("--extend can only be used when --includebed is set")
return args

Expand Down Expand Up @@ -797,7 +800,7 @@ def bench_main(cmdargs):
for call in itertools.chain.from_iterable(map(compare_chunk, chunks)):
# setting non-matched call variants that are not fully contained in the original regions to None
# These don't count as FP or TP and don't appear in the output vcf files
if args.extend > 0 and call.comp is not None and not call.state and not regions.include(call.comp):
if args.extend and call.comp is not None and not call.state and not regions.include(call.comp):
call.comp = None
output_writer(call, outputs, args.sizemin)

Expand Down
8 changes: 4 additions & 4 deletions truvari/collapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,11 +247,11 @@ def parse_args(args):

# trubench.add_comparison_args(parser)
thresg = parser.add_argument_group("Comparison Threshold Arguments")
thresg.add_argument("-r", "--refdist", type=int, default=500,
thresg.add_argument("-r", "--refdist", type=truvari.restricted_int, default=500,
help="Max reference location distance (%(default)s)")
thresg.add_argument("-p", "--pctsim", type=truvari.restricted_float, default=0.95,
help="Min percent allele sequence similarity. Set to 0 to ignore. (%(default)s)")
thresg.add_argument("-B", "--minhaplen", type=truvari.restricted_float, default=50,
thresg.add_argument("-B", "--minhaplen", type=truvari.restricted_int, default=50,
help="Minimum haplotype sequence length to create (%(default)s)")
thresg.add_argument("-P", "--pctsize", type=truvari.restricted_float, default=0.95,
help="Min pct allele size similarity (minvarsize/maxvarsize) (%(default)s)")
Expand All @@ -270,9 +270,9 @@ def parse_args(args):
help=("Comma separated list of FORMAT fields to consolidate into the kept "
"entry by taking the first non-null from all neighbors (%(default)s)"))
filteg = parser.add_argument_group("Filtering Arguments")
filteg.add_argument("-s", "--sizemin", type=int, default=50,
filteg.add_argument("-s", "--sizemin", type=truvari.restricted_int, default=50,
help="Minimum variant size to consider for comparison (%(default)s)")
filteg.add_argument("-S", "--sizemax", type=int, default=50000,
filteg.add_argument("-S", "--sizemax", type=truvari.restricted_int, default=50000,
help="Maximum variant size to consider for comparison (%(default)s)")
filteg.add_argument("--passonly", action="store_true", default=False,
help="Only consider calls with FILTER == PASS")
Expand Down
4 changes: 2 additions & 2 deletions truvari/divide.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ def parse_args(args):
help="VCF to split")
parser.add_argument("output", metavar="DIR",
help="Output directory to save parts")
parser.add_argument("-b", "--buffer", type=int, default=1000,
parser.add_argument("-b", "--buffer", type=truvari.restricted_int, default=1000,
help="Buffer to make mini-clusters (%(default)s)")
parser.add_argument("-m", "--min", type=int, default=100,
parser.add_argument("-m", "--min", type=truvari.restricted_int, default=100,
help="Minimum number of entries per-vcf (%(default)s)")
parser.add_argument("--no-compress", action="store_false",
help="Don't attempt to compress/index sub-VCFs")
Expand Down
23 changes: 23 additions & 0 deletions truvari/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,29 @@ def restricted_float(x):
f"{x} not in range (0, 1)")
return x

def restricted_int(x):
"""
Restrict int to positive. Raises argparse.ArgumentTypeError if int is negative
Used with :class:`argparse.ArgumentParser.add_argument` type parameter
:param `x`: number to check
:type `x`: int
:return: input number
:rtype: float
Example
>>> import truvari
>>> truvari.restricted_int(5)
5
>>> truvari.restricted_int(-2)
Traceback (most recent call last):
argparse.ArgumentTypeError: -2 is < 0
"""
x = int(x)
if x < 0:
raise argparse.ArgumentTypeError(f"{x} is < 0")
return x

def setup_progressbar(size):
"""
Expand Down

0 comments on commit 2f5b001

Please sign in to comment.