Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2002 from Clinical-Genomics/release/mip11.1
Release/mip11.1
- Loading branch information
Showing
116 changed files
with
5,402 additions
and
948 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,31 @@ | ||
################## BASE IMAGE ###################### | ||
|
||
FROM clinicalgenomics/mip_base:2.1 | ||
FROM ubuntu:bionic | ||
|
||
################## METADATA ###################### | ||
|
||
LABEL base_image="clinicalgenomics/mip_base:2.1" | ||
LABEL version="3" | ||
LABEL software="expansionhunter" | ||
LABEL software.version="4.0.2" | ||
LABEL extra.binaries="expansionhunter" | ||
LABEL base_image="ubuntu:bionic" | ||
LABEL version="4" | ||
LABEL software="ExpanionHunter" | ||
LABEL software.version="5.0.0" | ||
LABEL extra.binaries="ExpanionHunter" | ||
LABEL maintainer="Clinical-Genomics/MIP" | ||
|
||
RUN conda install -c bioconda expansionhunter=4.0.2 | ||
## Install wget | ||
RUN apt-get update && \ | ||
apt-get install -y --no-install-recommends \ | ||
ca-certificates \ | ||
curl \ | ||
wget \ | ||
libreadline-dev && \ | ||
apt-get clean && \ | ||
apt-get purge && \ | ||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* | ||
|
||
## Clean up after conda | ||
RUN /opt/conda/bin/conda clean -ya | ||
WORKDIR /app | ||
|
||
WORKDIR /data/ | ||
RUN wget -nv https://github.com/Illumina/ExpansionHunter/releases/download/v5.0.0/ExpansionHunter-v5.0.0-linux_x86_64.tar.gz && \ | ||
tar -xvf ExpansionHunter-v5.0.0-linux_x86_64.tar.gz && \ | ||
rm ExpansionHunter-v5.0.0-linux_x86_64.tar.gz | ||
|
||
ENV PATH=/app/ExpansionHunter-v5.0.0-linux_x86_64/bin:${PATH} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# syntax=docker/dockerfile:1 | ||
FROM clinicalgenomics/htslib:1.13 | ||
WORKDIR /bin | ||
COPY . . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
#!/usr/bin/env perl | ||
use strict; | ||
use warnings; | ||
use Data::Dumper; | ||
use List::Util qw(sum); | ||
use File::Basename qw(dirname); | ||
|
||
if ( $ARGV[0] eq "--version" ) { | ||
print "generate_gens_data.pl 1.0.2\n"; | ||
exit 0; | ||
} | ||
|
||
my $SCRIPT_ROOT = dirname($0); | ||
|
||
my @COV_WINDOW_SIZES = ( 100000, 25000, 5000, 1000, 100 ); | ||
my @BAF_SKIP_N = ( 160, 40, 10, 4, 1 ); | ||
my @PREFIXES = qw( o a b c d ); | ||
my $cov_fn = $ARGV[0]; | ||
my $gvcf_fn = $ARGV[1]; | ||
|
||
my $SAMPLE_ID = $ARGV[2]; | ||
my $GNOMAD = $ARGV[3]; | ||
|
||
my $COV_OUTPUT = $SAMPLE_ID . ".cov.bed"; | ||
my $BAF_OUTPUT = $SAMPLE_ID . ".baf.bed"; | ||
|
||
print STDERR "Calculating coverage data\n"; | ||
|
||
# Calculate coverage data | ||
open( COVOUT, ">" . $COV_OUTPUT ); | ||
for my $i ( 0 .. $#COV_WINDOW_SIZES ) { | ||
generate_cov_bed( $cov_fn, $COV_WINDOW_SIZES[$i], $PREFIXES[$i] ); | ||
} | ||
close COVOUT; | ||
|
||
print STDERR "Calculating BAFs from gvcf...\n"; | ||
|
||
# Calculate BAFs | ||
system( $SCRIPT_ROOT. "/gvcfvaf.pl " . "$gvcf_fn $GNOMAD > baf.tmp" ); | ||
open( BAFOUT, ">" . $BAF_OUTPUT ); | ||
for my $i ( 0 .. $#BAF_SKIP_N ) { | ||
print STDERR "Outputting BAF $PREFIXES[$i]...\n"; | ||
generate_baf_bed( "baf.tmp", $BAF_SKIP_N[$i], $PREFIXES[$i] ); | ||
} | ||
close BAFOUT; | ||
|
||
system("bgzip -f -\@10 $BAF_OUTPUT"); | ||
system("tabix -f -p bed $BAF_OUTPUT.gz"); | ||
system("bgzip -f -\@10 $COV_OUTPUT"); | ||
system("tabix -f -p bed $COV_OUTPUT.gz"); | ||
unlink("baf.tmp"); | ||
|
||
sub generate_baf_bed { | ||
my ( $fn, $skip, $prefix ) = @_; | ||
open( my $fh, $fn ); | ||
my $i = 0; | ||
while (<$fh>) { | ||
if ( $i++ % $skip == 0 ) { | ||
chomp; | ||
my @a = split /\t/; | ||
print BAFOUT $prefix . "_" | ||
. $a[0] . "\t" | ||
. ( $a[1] - 1 ) . "\t" | ||
. $a[1] . "\t" | ||
. $a[2] . "\n"; | ||
} | ||
} | ||
close $fh; | ||
} | ||
|
||
sub generate_cov_bed { | ||
|
||
my ( $fn, $win_size, $prefix ) = @_; | ||
|
||
open( my $fh, $fn ); | ||
my ( $reg_start, $reg_end, $reg_chr, $force_end ); | ||
my @reg_ratios; | ||
while (<$fh>) { | ||
next if /^@/ or /^CONTIG/; | ||
chomp; | ||
my ( $chr, $start, $end, $ratio ) = split /\t/; | ||
my $orig_end = $end; | ||
unless ($reg_start) { | ||
$reg_start = $start; | ||
$reg_end = $end; | ||
$reg_chr = $chr; | ||
} | ||
|
||
if ( $chr eq $reg_chr ) { | ||
if ( $start - $reg_end < $win_size ) { | ||
push @reg_ratios, $ratio; | ||
$reg_end = $end; | ||
} | ||
|
||
# If there is a large gap to the next region, prematurely end region | ||
else { | ||
$force_end = 1; | ||
$end = $reg_end; | ||
} | ||
} | ||
else { | ||
$force_end = 1; | ||
$end = $reg_end; | ||
} | ||
if ( $end - $reg_start + 1 >= $win_size or $force_end ) { | ||
my $mid_point = $reg_start + int( ( $end - $reg_start ) / 2 ); | ||
print COVOUT $prefix . "_" | ||
. $reg_chr . "\t" | ||
. ( $mid_point - 1 ) . "\t" | ||
. $mid_point . "\t" | ||
. mean(@reg_ratios) . "\n"; | ||
undef $reg_start; | ||
undef $reg_end; | ||
undef $reg_chr; | ||
undef @reg_ratios; | ||
} | ||
|
||
if ($force_end) { | ||
$reg_start = $start; | ||
$reg_end = $orig_end; | ||
$reg_chr = $chr; | ||
push @reg_ratios, $ratio; | ||
undef $force_end; | ||
} | ||
} | ||
close $fh; | ||
} | ||
|
||
sub mean { | ||
return sum(@_) / @_; | ||
} |
Oops, something went wrong.