Skip to content

Commit

Permalink
Merge pull request #2066 from Clinical-Genomics/peddy_check
Browse files Browse the repository at this point in the history
adds automatic ped_check fail
  • Loading branch information
jemten committed Feb 21, 2024
2 parents d6baf44 + dc66fad commit 010a180
Show file tree
Hide file tree
Showing 16 changed files with 258 additions and 12 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -6,6 +6,8 @@ This project adheres to [Semantic Versioning](http://semver.org/).
## [develop]

- Adds optional trimming of reads with Fastp for the DNA workflow, turned on by default
- Adds automatic fail in analaysisrunstatus for cases where the peddy detects errors in the pedigree
- Adds automatic fail in analaysisrunstatus for cases where the peddy detects a discrepancy between the given and calculated gender

### Tools

Expand Down
2 changes: 1 addition & 1 deletion definitions/rd_dna_panel_parameters.yaml
Expand Up @@ -1198,7 +1198,7 @@ qccollect_regexp_file:
associated_recipe:
- qccollect_ar
data_type: SCALAR
default: qc_regexp_-v1.27-.yaml
default: qc_regexp_-v1.28-.yaml
exists_check: file
is_reference: 1
reference: reference_dir
Expand Down
2 changes: 1 addition & 1 deletion definitions/rd_dna_parameters.yaml
Expand Up @@ -2373,7 +2373,7 @@ qccollect_regexp_file:
associated_recipe:
- qccollect_ar
data_type: SCALAR
default: qc_regexp_-v1.27-.yaml
default: qc_regexp_-v1.28-.yaml
exists_check: file
is_reference: 1
reference: reference_dir
Expand Down
2 changes: 1 addition & 1 deletion definitions/rd_rna_parameters.yaml
Expand Up @@ -1110,7 +1110,7 @@ qccollect_regexp_file:
associated_recipe:
- qccollect_ar
data_type: SCALAR
default: qc_regexp_-v1.27-.yaml
default: qc_regexp_-v1.28-.yaml
exists_check: file
is_reference: 1
reference: reference_dir
Expand Down
2 changes: 1 addition & 1 deletion documentation/README.md
Expand Up @@ -235,4 +235,4 @@ MIP will place any generated data files in the output data directory specified b
[Perl]:https://www.perl.org/
[Rank model file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/rank_model_-v1.34-.ini
[SV rank model file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/svrank_model_-v1.9-.ini
[Qc regexp file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/qc_regexp_-v1.26-.yaml
[Qc regexp file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/qc_regexp_-v1.28-.yaml
2 changes: 1 addition & 1 deletion lib/MIP/Cli/Mip/Analyse/Rd_dna.pm
Expand Up @@ -2110,7 +2110,7 @@ q{Default: hgvs, symbol, numbers, sift, polyphen, humdiv, domains, protein, ccds

option(
q{qccollect_regexp_file} => (
cmd_tags => [q{Default: qc_regexp_-v1.25-.yaml}],
cmd_tags => [q{Default: qc_regexp_-v1.28-.yaml}],
documentation =>
q{Regular expression file containing the regular expression to be used for each program},
is => q{rw},
Expand Down
2 changes: 1 addition & 1 deletion lib/MIP/Cli/Mip/Analyse/Rd_dna_panel.pm
Expand Up @@ -1197,7 +1197,7 @@ q{Default: hgvs, symbol, numbers, sift, polyphen, humdiv, domains, protein, ccds

option(
q{qccollect_regexp_file} => (
cmd_tags => [q{Default: qc_regexp_-v1.25-.yaml}],
cmd_tags => [q{Default: qc_regexp_-v1.28-.yaml}],
documentation =>
q{Regular expression file containing the regular expression to be used for each program},
is => q{rw},
Expand Down
2 changes: 1 addition & 1 deletion lib/MIP/Cli/Mip/Analyse/Rd_rna.pm
Expand Up @@ -817,7 +817,7 @@ q{Default: BaseQualityRankSumTest, ChromosomeCounts, Coverage, DepthPerAlleleByS

option(
q{qccollect_regexp_file} => (
cmd_tags => [q{Default: qc_regexp_-v1.25-.yaml}],
cmd_tags => [q{Default: qc_regexp_-v1.28-.yaml}],
documentation =>
q{Regular expression file containing the regular expression to be used for each program},
is => q{rw},
Expand Down
8 changes: 8 additions & 0 deletions lib/MIP/Qcc_regexp.pm
Expand Up @@ -212,6 +212,14 @@ q?perl -nae 'my @sexCheckFactor; if ($. > 1) {my @temp = split(/\s+/,$_);push(@s
# Get entire sample relation check file
$regexp{relation_check}{sample_relation_check} = q?perl -nae 'print $_;' ?;

# Return FAIL if peddy has detected relationship error
$regexp{ped_check}{peddy_kinship} =
q?perl -F, -ne 'BEGIN {my @ped_checks;} next if $. == 1; push @ped_checks, $F[12]; END{ if ( grep $_ eq q{True}, @ped_checks ) { print q{FAIL}; } else { print q{PASS};} }' ?;

# Return FAIL if peddy has detected an error in the given gender
$regexp{sex_check}{peddy_sexcheck} =
q?perl -F, -lne 'BEGIN {my @sex_checks;} next if $. == 1; push @sex_checks, $F[7]; END{ if ( grep $_ eq q{True}, @sex_checks ) { print q{FAIL}; } else { print q{PASS};} }'?;

# Return fraction duplicates
$regexp{markduplicates}{fraction_duplicates} =
q?perl -nae 'if($_=~/Fraction Duplicates\: (\S+)/) {print $1;}' ?;
Expand Down
14 changes: 13 additions & 1 deletion lib/MIP/Recipes/Analysis/Peddy.pm
Expand Up @@ -124,7 +124,7 @@ sub analysis_peddy {
use MIP::Program::Bcftools qw{ bcftools_view_and_index_vcf };
use MIP::Program::Peddy qw{ peddy };
use MIP::Recipe qw{ parse_recipe_prerequisites };
use MIP::Sample_info qw{ set_file_path_to_store set_recipe_metafile_in_sample_info };
use MIP::Sample_info qw{ set_file_path_to_store set_recipe_metafile_in_sample_info set_recipe_outfile_in_sample_info };
use MIP::Script::Setup_script qw{ setup_script };

### PREPROCESSING:
Expand Down Expand Up @@ -250,6 +250,18 @@ sub analysis_peddy {
}
);

if ( $outfile_tag eq q{ped_check} || q{sex_check} ) {

## Duplicate ped_check tag one level out in sample_info. To be used for automatic kinship test
set_recipe_outfile_in_sample_info(
{
path => $outfile_path,
recipe_name => $outfile_tag,
sample_info_href => $sample_info_href,
}
);
}

set_file_path_to_store(
{
format => q{meta},
Expand Down
2 changes: 1 addition & 1 deletion lib/MIP/Recipes/Install/Mip_scripts.pm
Expand Up @@ -89,7 +89,7 @@ sub install_mip_scripts {
mip_rd_rna_config.yaml
program_test_cmds.yaml
qc_eval_metric_-v1.4-.yaml
qc_regexp_-v1.26-.yaml
qc_regexp_-v1.28-.yaml
rank_model_-v1.34-.ini
svrank_model_-v1.9-.ini
}
Expand Down
112 changes: 112 additions & 0 deletions t/data/references/qc_regexp_-v1.28-.yaml
@@ -0,0 +1,112 @@
---
bamstats:
percentage_mapped_reads: "perl -nae 'if($_=~/percentage mapped reads:\\s+(\\S+)/) {print $1;last}' "
raw_total_sequences: "perl -nae 'if($_=~/raw total sequences:\\s+(\\S+)/) {print $1;last}' "
reads_mapped: "perl -nae 'if($_=~/reads mapped:\\s+(\\S+)/) {print $1;last}' "
chanjo_sexcheck:
gender: "perl -nae 'if( ($F[0]!~/^#/) && ($F[2] =~/\\S+/) ) {print $F[2];}' "
collecthsmetrics:
data: "perl -nae' if ( ($. ==8) && ($_ =~/(\\S+)/) ) {print $_;last;}' "
header: "perl -nae' if ($_ =~/^BAIT_SET/ ) {print $_;last;}' "
collectmultiplemetrics:
first_of_pair: "perl -nae' if ($_ =~/^FIRST_OF_PAIR/ ) {print $_;last;}' "
header: "perl -nae' if ($_ =~/^CATEGORY/ ) {print $_;last;}' "
pair: "perl -nae' if ($_ =~/^PAIR/ ) {print $_;last;}' "
second_of_pair: "perl -nae' if ($_ =~/^SECOND_OF_PAIR/ ) {print $_;last;}' "
collectmultiplemetricsinsertsize:
data: "perl -nae' if ( ($. ==8) && ($_ =~/(\\S+)/) ) {print $_;last;}' "
header: "perl -nae' if ($_ =~/^MEDIAN_INSERT_SIZE/ ) {print $_;last;}' "
collectrnaseqmetrics:
data: "perl -nae' if ( ($. ==8) && ($_ =~/(\\S+)/) ) {print $_;last;}' "
header: "perl -nae' if ($_ =~/^PF_BASES/ ) {print $_;last;}' "
fastqc_ar:
basic_statistics: "perl -nae' if ($_=~/>>Basic Statistics\\s+(\\S+)/) {print $1;last;}' "
encoding: "perl -nae' if ($_=~/Encoding\\s+(\\S+\\s\\S+\\s\\S+\\s\\S+|\\S+\\s\\S+)/) { my $encoding = $1;$encoding=~s/\\s/\\_/g; print $encoding;last;}' "
gc: "perl -nae' if ($_=~/%GC\\s(\\d+)/) {print $1;last;}' "
kmer_content: "perl -nae' if ($_=~/>>Kmer Content\\s+(\\S+)/) {print $1;last;}' "
overrepresented_sequences: "perl -nae' if ($_=~/>>Overrepresented sequences\\s+(\\S+)/) {print $1;last;}' "
per_base_gc_content: "perl -nae' if ($_=~/>>Per base GC content\\s+(\\S+)/) {print $1;last;}' "
per_base_n_content: "perl -nae' if ($_=~/>>Per base N content\\s+(\\S+)/) {print $1;last;}' "
per_base_sequence_content: "perl -nae' if ($_=~/>>Per base sequence content\\s+(\\S+)/) {print $1;last;}' "
per_base_sequence_quality: "perl -nae' if ($_=~/>>Per base sequence quality\\s+(\\S+)/) {print $1;last;}' "
per_sequence_gc_content: "perl -nae' if ($_=~/>>Per sequence GC content\\s+(\\S+)/) {print $1;last;}' "
per_sequence_quality_scores: "perl -nae' if ($_=~/>>Per sequence quality scores\\s+(\\S+)/) {print $1;last;}' "
sequence_duplication: "perl -nae' if ($_=~/#Total Duplicate Percentage\\s+(\\d+.\\d)/) {print $1;last;}' "
sequence_duplication_levels: "perl -nae' if ($_=~/>>Sequence Duplication Levels\\s+(\\S+)/) {print $1;last;}' "
sequence_length: "perl -nae' if ($_=~/Sequence length\\s(\\d+)/) {print $1;last;}' "
total_number_of_reads: "perl -nae' if ($_=~/Total Sequences\\s(\\d+)/) {print $1;last;}' "
inbreeding_factor:
sample_inbreeding_factor: "perl -nae 'my @inbreedingFactor; if ($. > 1) {my @temp = split(/\\s/,$_);push(@inbreedingFactor, $F[0].\":\".$F[5]); print $inbreedingFactor[0], \"\\t\"; }' "
markduplicates:
fraction_duplicates: "perl -nae 'if($_=~/Fraction Duplicates\\: (\\S+)/) {print $1;}' "
ped_check:
peddy_kinship: "perl -F, -ne 'BEGIN {my @ped_checks;} next if $. == 1; push @ped_checks, $F[12]; END{ if ( grep $_ eq q{True}, @ped_checks ) { print q{FAIL}; } else { print q{PASS};} }' "
pedigree_check:
sample_order: "perl -nae 'if ($_=~/^#CHROM/) {chomp $_; my @line = split(/\\t/,$_); for (my $sample=9;$sample<scalar(@line);$sample++) { print $line[$sample], \"\\t\";}last;}' "
plink_sexcheck:
sample_sexcheck: "perl -nae 'my @sexCheckFactor; if ($. > 1) {my @temp = split(/\\s+/,$_);push(@sexCheckFactor,$temp[2].\":\".$temp[4]); print $sexCheckFactor[0], \"\\t\"; }' "
relation_check:
sample_relation_check: "perl -nae 'print $_;' "
sex_check:
peddy_sexcheck: "perl -F, -lne 'BEGIN {my @sex_checks;} next if $. == 1; push @sex_checks, $F[7]; END{ if ( grep $_ eq q{True}, @sex_checks ) { print q{FAIL}; } else { print q{PASS};} }'"
star_log:
percentage_uniquely_mapped_reads: "perl -nae 'if(m/Uniquely\\smapped\\sreads\\s%\\s\\|\\t(\\d+\\.\\d+) /xms) {print $1; last;}' "
sv_varianteffectpredictor:
assembly: "perl -nae 'if($_=~/##VEP=/ && $_=~/assembly=(\\S+)/) {print $1;last;}' "
cache: "perl -nae 'if($_=~/##VEP=\\w+\\s+cache=(\\S+)/) {print $1;last;}' "
gencode: "perl -nae 'if($_=~/##VEP=/ && $_=~/gencode=\\S+\\s+(\\d+)/) {print $1;last;}' "
gene_build: "perl -nae 'if($_=~/##VEP=/ && $_=~/genebuild=(\\S+)/) {print $1;last;}' "
hgmd_public: "perl -nae 'if($_=~/##VEP=/ && $_=~/HGMD-PUBLIC=(\\S+)/) {print $1;last;}' "
polyphen: "perl -nae 'if($_=~/##VEP=/ && $_=~/polyphen=(\\S+)/) {print $1;last;}' "
reg_build: "perl -nae 'if($_=~/##VEP=/ && $_=~/regbuild=(\\S+)/) {print $1;last;}' "
sift: "perl -nae 'if($_=~/##VEP=/ && $_=~/sift=sift(\\S+)/) {print $1;last;}' "
version: "perl -nae 'if($_=~/##VEP=\"(\\w+)\"/) {print $1;last;}' "
sv_vcfparser:
version: "perl -nae 'if($_=~/##Software=<ID=mip,Version=(\\d+.\\d+.\\d+)/) {print $1;last;} else { if($_=~/#CHROM/) {last;} }' "
trim_galore_stats:
percentage_bp_after_trimming: "perl -nae 'if( m/Total\\swritten\\s\\([^(]+\\((\\d+\\.\\d+) /xms ){ print $1; last;}' "
percentage_reads_after_trimming: "perl -nae 'if( m/Reads\\swritten\\s\\([^(]+\\((\\d+\\.\\d+) /xms ){ print $1; last;}' "
percentage_reads_with_adapter: "perl -nae 'if( m/Reads\\swith\\sadapters[^(]+\\((\\d+\\.\\d+) /xms ){ print $1; last;}' "
varianteffectpredictor:
assembly: "perl -nae 'if($_=~/##VEP=/ && $_=~/assembly=(\\S+)/) {print $1;last;}' "
cache: "perl -nae 'if($_=~/##VEP=\\w+\\s+cache=(\\S+)/) {print $1;last;}' "
gencode: "perl -nae 'if($_=~/##VEP=/ && $_=~/gencode=\\S+\\s+(\\d+)/) {print $1;last;}' "
gene_build: "perl -nae 'if($_=~/##VEP=/ && $_=~/genebuild=(\\S+)/) {print $1;last;}' "
hgmd_public: "perl -nae 'if($_=~/##VEP=/ && $_=~/HGMD-PUBLIC=(\\S+)/) {print $1;last;}' "
polyphen: "perl -nae 'if($_=~/##VEP=/ && $_=~/polyphen=(\\S+)/) {print $1;last;}' "
reg_build: "perl -nae 'if($_=~/##VEP=/ && $_=~/regbuild=(\\S+)/) {print $1;last;}' "
sift: "perl -nae 'if($_=~/##VEP=/ && $_=~/sift=sift(\\S+)/) {print $1;last;}' "
version: "perl -nae 'if($_=~/##VEP=\"(\\w+)\"/) {print $1;last;}' "
variantevalall: &1
comp_overlap_data_all: "perl -nae' if ( ($_ =~/^CompOverlap/) && ($_ =~/all/) && ($_ =~/none/)) {print $_;last;}' "
comp_overlap_data_header: "perl -nae' if ($_ =~/^CompOverlap\\s+CompFeatureInput/ ) {print $_;last;}' "
comp_overlap_data_known: "perl -nae' if ( ($_ =~/^CompOverlap/) && ($_ =~/known\\s/) ) {print $_;last;}' "
comp_overlap_data_novel: "perl -nae' if ( ($_ =~/^CompOverlap/) && ($_ =~/novel\\s/) ) {print $_;last;}' "
count_variants_data_all: "perl -nae' if ( ($_ =~/^CountVariants/) && ($_ =~/all\\s/) ) {print $_;last;}' "
count_variants_data_header: "perl -nae' if ($_ =~/^CountVariants\\s+CompFeatureInput/ ) {print $_;last;}' "
count_variants_data_known: "perl -nae' if ( ($_ =~/^CountVariants/) && ($_ =~/known\\s/) ) {print $_;last;}' "
count_variants_data_novel: "perl -nae' if ( ($_ =~/^CountVariants/) && ($_ =~/novel\\s/) ) {print $_;last;}' "
indel_summary_data_all: "perl -nae' if ( ($_ =~/^IndelSummary/) && ($_ =~/all\\s/) ) {print $_;last;}' "
indel_summary_data_header: "perl -nae' if ($_ =~/^IndelSummary\\s+CompFeatureInput/ ) {print $_;last;}' "
indel_summary_data_known: "perl -nae' if ( ($_ =~/^IndelSummary/) && ($_ =~/known\\s/) ) {print $_;last;}' "
indel_summary_data_novel: "perl -nae' if ( ($_ =~/^IndelSummary/) && ($_ =~/novel\\s/) ) {print $_;last;}' "
multiallelic_summary_data_all: "perl -nae' if ( ($_ =~/^MultiallelicSummary/) && ($_ =~/all\\s/) ) {print $_;last;}' "
multiallelic_summary_data_header: "perl -nae' if ($_ =~/^MultiallelicSummary\\s+CompFeatureInput/ ) {print $_;last;}' "
multiallelic_summary_data_known: "perl -nae' if ( ($_ =~/^MultiallelicSummary/) && ($_ =~/known\\s/) ) {print $_;last;}' "
multiallelic_summary_data_novel: "perl -nae' if ( ($_ =~/^MultiallelicSummary/) && ($_ =~/novel\\s/) ) {print $_;last;}' "
titv_variant_evaluator_data_all: "perl -nae' if ( ($_ =~/^TiTvVariantEvaluator/) && ($_ =~/all\\s/) ) {print $_;last;}' "
titv_variant_evaluator_data_header: "perl -nae' if ($_ =~/^TiTvVariantEvaluator\\s+CompFeatureInput/ ) {print $_;last;}' "
titv_variant_evaluator_data_known: "perl -nae' if ( ($_ =~/^TiTvVariantEvaluator/) && ($_ =~/known\\s/) ) {print $_;last;}' "
titv_variant_evaluator_data_novel: "perl -nae' if ( ($_ =~/^TiTvVariantEvaluator/) && ($_ =~/novel\\s/) ) {print $_;last;}' "
validation_report_data_all: "perl -nae' if ( ($_ =~/^ValidationReport/) && ($_ =~/all\\s/) && ($_ =~/none\\s/)) {print $_;last;}' "
validation_report_data_known: "perl -nae' if ( ($_ =~/^ValidationReport/) && ($_ =~/known\\s/) ) {print $_;last;}' "
validation_report_data_novel: "perl -nae' if ( ($_ =~/^ValidationReport/) && ($_ =~/novel\\s/) ) {print $_;last;}' "
validation_report_header: "perl -nae' if ($_ =~/^ValidationReport\\s+CompFeatureInput/ ) {print $_;last;}' "
variant_summary_data_all: "perl -nae' if ( ($_ =~/^VariantSummary/) && ($_ =~/all\\s/) ) {print $_;last;}' "
variant_summary_data_known: "perl -nae' if ( ($_ =~/^VariantSummary/) && ($_ =~/known\\s/) ) {print $_;last;}' "
variant_summary_data_novel: "perl -nae' if ( ($_ =~/^VariantSummary/) && ($_ =~/novel\\s/) ) {print $_;last;}' "
variant_summary_header: "perl -nae' if ($_ =~/^VariantSummary\\s+CompFeatureInput/ ) {print $_;last;}' "
variantevalexome: *1
vcfparser_ar:
version: "perl -nae 'if($_=~/##Software=<ID=mip,Version=(\\d+.\\d+.\\d+)/) {print $1;last;}' "

2 changes: 1 addition & 1 deletion t/mip_core.t
Expand Up @@ -329,7 +329,7 @@ sub mip_scripts {
mip_rd_dna_vcf_rerun_config.yaml
mip_rd_rna_config.yaml
program_test_cmds.yaml
qc_regexp_-v1.26-.yaml
qc_regexp_-v1.28-.yaml
rank_model_-v1.34-.ini
svrank_model_-v1.9-.ini
}
Expand Down
2 changes: 1 addition & 1 deletion t/mip_qccollect.test
Expand Up @@ -36,7 +36,7 @@ my $eval_metric_file = catfile( dirname($Bin), qw{ templates qc_eval_metric
my $mip_path = catfile( dirname($Bin), q{mip} );
my $log_file_path = catfile( cwd(), q{qc_metrics_qccollect.log} );
my $test_reference_path = catdir( $cluster_constant_path, q{references} );
my $regexp_file_path = catfile( $test_reference_path, q{qc_regexp_-v1.26-.yaml} );
my $regexp_file_path = catfile( $test_reference_path, q{qc_regexp_-v1.28-.yaml} );
my $sample_info_file =
catfile( $cluster_constant_path, qw{ test_data 643594-miptest_qc_sample_info_ci.yaml } );
my $outfile = catfile( cwd(), q{qc_metrics.yaml} );
Expand Down
2 changes: 1 addition & 1 deletion templates/mip_install_config.yaml
Expand Up @@ -260,7 +260,7 @@ container:
vep:
executable:
vep:
uri: docker.io/ensemblorg/ensembl-vep:release_110.1
uri: docker.io/ensemblorg/ensembl-vep:release_107.0
vcf2cytosure:
executable:
vcf2cytosure:
Expand Down

0 comments on commit 010a180

Please sign in to comment.