From 522251516a9230192e04f6ed318085a7e2f56dd0 Mon Sep 17 00:00:00 2001 From: Peter Krusche Date: Mon, 10 Oct 2016 15:45:00 +0100 Subject: [PATCH] HAP-244 progress --- .gitignore | 4 +++ src/c++/lib/variant/VariantReader.cpp | 38 ++++++++++++++++++++++++++ src/c++/main/vcfcheck.cpp | 5 ++++ src/data/faulty_gt.vcf | 7 +++++ src/data/faulty_gt.vcf.gz | Bin 0 -> 261 bytes src/data/faulty_gt.vcf.gz.tbi | Bin 0 -> 104 bytes src/data/per_sample_ft_lhs.vcf | 8 ++++++ src/data/per_sample_ft_rhs.vcf | 12 ++++++++ 8 files changed, 74 insertions(+) create mode 100755 src/data/faulty_gt.vcf create mode 100644 src/data/faulty_gt.vcf.gz create mode 100644 src/data/faulty_gt.vcf.gz.tbi create mode 100755 src/data/per_sample_ft_lhs.vcf create mode 100755 src/data/per_sample_ft_rhs.vcf diff --git a/.gitignore b/.gitignore index 0a89648..b083be6 100755 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,7 @@ tags_sorted_by_file *.sublime-project *.sublime-workspace Thumbs.db +/bin +/lib +/include +/nbproject \ No newline at end of file diff --git a/src/c++/lib/variant/VariantReader.cpp b/src/c++/lib/variant/VariantReader.cpp index 0a1cc76..ae7067d 100755 --- a/src/c++/lib/variant/VariantReader.cpp +++ b/src/c++/lib/variant/VariantReader.cpp @@ -591,6 +591,44 @@ bool VariantReader::advance() ++ncalls; bcf_unpack(line, BCF_UN_ALL); + + std::string fmt_strings = bcfhelpers::getFormatString(reader.header, line, "FT", isample, ""); + std::vector fmt_filters; + stringutil::split(fmt_strings, fmt_filters, ";", false); + for(auto const & f : fmt_filters) + { + if(f.empty() || f == "PASS") + { + continue; + } + fail = true; + bool has_filter = false; + for(size_t ff = 0; ff < vars.calls[sid].nfilter; ++ff) + { + if(f == vars.calls[sid].filter[ff]) + { + has_filter = true; + break; + } + } + if(!has_filter) + { + if(vars.calls[sid].nfilter + 1 > MAX_FILTER) + { + error("Too many filters at %s:%i in sample %i", vars.chr.c_str(), vars.pos, sid); + } + vars.calls[sid].filter[vars.calls[sid].nfilter - 1] = f; + vars.calls[sid].nfilter++; + } + } + + if(getApplyFilters((int) sid) && fail) + { + vars.calls[sid].ngt = 0; + vars.calls[sid].phased = false; + vars.calls[sid].nfilter = 0; + continue; + } vars.calls[sid].qual = line->qual; diff --git a/src/c++/main/vcfcheck.cpp b/src/c++/main/vcfcheck.cpp index 820a7bf..20adee6 100755 --- a/src/c++/main/vcfcheck.cpp +++ b/src/c++/main/vcfcheck.cpp @@ -353,6 +353,11 @@ int main(int argc, char *argv[]) { if(gt[g] > 0) { + if(gt[g] + 1 > line->n_allele) + { + error("Call with invalid genotype (non-existent allele) at %s:%i", + vchr.c_str(), vstart + 1); + } const char * alt = line->d.allele[gt[g]]; if(strchr(alt, '.') && strlen(alt) > 1) diff --git a/src/data/faulty_gt.vcf b/src/data/faulty_gt.vcf new file mode 100755 index 0000000..534a595 --- /dev/null +++ b/src/data/faulty_gt.vcf @@ -0,0 +1,7 @@ +##fileformat=VCFv4.1 +##contig= +##INFO= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12877 +chrQ 1 . A . . PASS . GT 0/1 +chrQ 2 . A T . PASS . GT 0/2 diff --git a/src/data/faulty_gt.vcf.gz b/src/data/faulty_gt.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..1671a375ac54a74d601cea168647691381501a1e GIT binary patch literal 261 zcmb2|=3rp}f&Xj_PR>jWFBoF`5B4<~2(+C)9GvsStDsx$&B44&45_mfI7;?ze$(K) zWk!^5O*Nx`)r>O_Pk%rEXWi|U%4N-#rqkXj*G2z6Qh2xQLfF?5<)mA3J}+ehzkhQG zfB2ol^<1~4YUP`!*DuRUdD&FjD~sMr>Oa=~@sd>OwyX;!8}oazw)}PYyMIPaTu8Lq z^Olf(tXHmS+};06^VPw=f=P?#uexz)-3rGe0je?)Y776QN)}8!xuof(Wkp=WoQV&# zdFHNU68O;A`}lC*)vr5(IOltRYB|XBRGMcFd$fMtIeA6~26;4(N;5En{S6`jF=%vA literal 0 HcmV?d00001 diff --git a/src/data/faulty_gt.vcf.gz.tbi b/src/data/faulty_gt.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..a7e74cfdd1cb7e70c831218133faed511b789bf8 GIT binary patch literal 104 zcmb2|=3rp}f&Xj_PR>jW-VEG@pHfm%5)u-ak|cPUP6f;o?U-!b#dJ_;o|Ay9zhuG# n)fr!Av0UuXJ9ngkE8wdW1B30qBQlLZBjnM{l4f8A8x0}=0-_q5 literal 0 HcmV?d00001 diff --git a/src/data/per_sample_ft_lhs.vcf b/src/data/per_sample_ft_lhs.vcf new file mode 100755 index 0000000..7ca2873 --- /dev/null +++ b/src/data/per_sample_ft_lhs.vcf @@ -0,0 +1,8 @@ +##fileformat=VCFv4.1 +##contig= +##INFO= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12877 +chrQ 1 . A G . PASS . GT 0/1 +chrQ 2 . A T . PASS . GT 0/1 +chrQ 3 . A T . PASS . GT 0/1 diff --git a/src/data/per_sample_ft_rhs.vcf b/src/data/per_sample_ft_rhs.vcf new file mode 100755 index 0000000..c8225ca --- /dev/null +++ b/src/data/per_sample_ft_rhs.vcf @@ -0,0 +1,12 @@ +##fileformat=VCFv4.1 +##contig= +##INFO= +##FORMAT= +##FORMAT= +##FILTER= +##FILTER= +##FILTER= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12877 +chrQ 1 . A G . PASS . GT 0/1 +chrQ 2 . A T . PASS . GT:FT 0/1:A +chrQ 3 . A T . PASS . GT:FT 0/1:A;B