-
Notifications
You must be signed in to change notification settings - Fork 0
/
6_dm3_GenotypeGVCF.sh
executable file
·66 lines (54 loc) · 2.43 KB
/
6_dm3_GenotypeGVCF.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
#BSUB -L /bin/bash
#BSUB -e 5_GVCFs.err
#BSUB -o 5_GVCFs.out
#BSUB -J 5_GVCFs
#BSUB -M 100000000
#BSUB -R rusage[mem=100000]
#BSUB -n 4
#BSUB -u maria.litovchenko@epfl.ch
export PATH=/software/bin:$PATH;
module use /software/module/;
module add Development/java_jdk/1.8.0_66;
module add UHTS/Analysis/vcftools/0.1.12b;
module add R/3.3.2;
#------------------------------------------------------------------------------
# STEP 6: PERFORM MERGING OF RAW GVCF WITH GenotypeGVCFs
#------------------------------------------------------------------------------
outputdir=/scratch/el/monthly/mlitovch/MitoSeq_RPJB_ML_Aug2017;
GATKjar=/scratch/el/monthly/mlitovch/tools/GenomeAnalysisTK.jar
ref=/scratch/el/monthly/mlitovch/RefGen/dm3/dm3.Wolb.fa
rawGVCFdir=/scratch/el/monthly/mlitovch/MitoSeq_RPJB_ML_Aug2017/4_dm3_HC_d_rawGVCF
# list samples to merge and remove bad samples
toMerge=`ls -d -1 $rawGVCFdir/* | grep noIndelMNP.g.vcf$ | grep -v 177A12_L8 | grep -v 304A10_L5 | xargs | sed "s@/scratch@-V /scratch@g"`
# try first use mergeGVCF to make it faster
# do GenotypeGVCFs, it improves genotype calls
java -Xms64g -Xmx64g -jar $GATKjar -T GenotypeGVCFs \
-allSites -stand_call_conf 30 -stand_emit_conf 10 \
-R $ref -nt 4 --variant $toMerge \
-o $outputdir/MitoSeq_AllRuns_dm3.gvcf.vcf
echo "Finished GenotypeGVCFs"
# select only SNPs
java -Xms64g -Xmx64g -jar $GATKjar -T SelectVariants \
-R $ref -V $outputdir/MitoSeq_AllRuns_dm3.gvcf.vcf \
-selectType SNP --selectTypeToExclude MNP -select "DP > 5" \
-o $outputdir/MitoSeq_AllRuns_dm3.gvcf.snps.vcf
echo "Finished SelectVariants"
# filter out bad quality
java -Xms64g -Xmx64g -jar $GATKjar -T VariantFiltration \
-R $ref -V $outputdir/MitoSeq_AllRuns_dm3.gvcf.snps.vcf \
-filterName FS -filter "FS > 30.0" -filterName QD \
-filter "QD < 2.0" \
-o $outputdir/MitoSeq_AllRuns_dm3.gvcf.snps.fltr.vcf
echo "Finished VariantFiltration"
# extract only genotype info
vcftools --vcf $outputdir/MitoSeq_AllRuns_dm3.gvcf.snps.fltr.vcf \
--extract-FORMAT-info GT \
--out $outputdir/MitoSeq_AllRuns_dm3.gvcf.snps.fltr
echo "Finished GT extraction"
# create table suitable for R
sed -i 's@\.\/\.@-@g' $outputdir/MitoSeq_AllRuns_dm3.gvcf.snps.fltr.GT.FORMAT
sed -i 's@0\/0@0@g' $outputdir/MitoSeq_AllRuns_dm3.gvcf.snps.fltr.GT.FORMAT
sed -i 's@1\/1@2@g' $outputdir/MitoSeq_AllRuns_dm3.gvcf.snps.fltr.GT.FORMAT
echo "Finished preparation of file for R"
exit 0;