-
Notifications
You must be signed in to change notification settings - Fork 0
/
map_to_supercontigs_gatk4.sh
103 lines (68 loc) · 2.33 KB
/
map_to_supercontigs_gatk4.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/bin/bash
# This workflow will take the supercontig output of HybPiper and return a supercontig that
# contains heterozygous positions as ambiguity bases. Uses paired reads.
#The script should be run on a FASTA file containing all the supercontigs of interest.
if [[ $# -eq 0 ]] ; then
echo 'usage: map_to_supercontigs.sh [-Xmx]mem[G] prefix[.supercontigs.fasta] readfile1.fq readfile2.fq'
exit 1
fi
#########CHANGE THESE PATHS AS NEEDED###########
gatk4path=/home/mskset001/gatk-4.1.7.0/gatk
gatkpath=/home/mskset001/gatk3/GenomeAnalysisTK/GenomeAnalysisTK.jar
picardpath=/home/mskset001/picard/picard.jar
#############COMMAND LINE ARGUMENTS############
mem=$1
prefix=$2
read1fq=$3
read2fq=$4
supercontig=$prefix.supercontigs.fasta
mkdir $prefix
cd $prefix
cp ../$supercontig .
#####STEP ZERO: Make Reference Databases
java -Xmx"$mem"G -jar $picardpath CreateSequenceDictionary \
R=$supercontig
bwa index $supercontig
samtools faidx $supercontig
#####STEP ONE: Map reads
echo "Mapping Reads"
bwa mem -t $SLURM_NTASKS $supercontig $read1fq $read2fq | samtools view -bS - | samtools sort - -o $supercontig.sorted.bam
java -Xmx"$mem"G -jar $picardpath FastqToSam \
F1=$read1fq \
F2=$read2fq \
O=$supercontig.unmapped.bam \
SM=$supercontig
java -Xmx"$mem"G -jar $picardpath MergeBamAlignment \
ALIGNED=$supercontig.sorted.bam \
UNMAPPED=$supercontig.unmapped.bam \
O=$supercontig.merged.bam \
R=$supercontig
#####STEP TWO: Mark duplicates
echo "Marking Duplicates"
java -Xmx"$mem"G -jar $picardpath MarkDuplicates \
I=$supercontig.merged.bam \
O=$supercontig.marked.bam \
M=$supercontig.metrics.txt
#######STEP THREE: Identify variants, select only SNPs
echo "Identifying variants"
samtools index $supercontig.marked.bam
#samtools mpileup -B -f $supercontig $supercontig.marked.bam -v -u > $supercontig.vcf
$gatk4path --java-options "-Xmx"$mem"G" HaplotypeCaller \
-R $supercontig \
-I $supercontig.marked.bam \
-O $supercontig.vcf
time java -Xmx"$mem"G -jar $gatkpath \
-T SelectVariants \
-R $supercontig \
-V $supercontig.vcf \
-selectType SNP \
-o $supercontig.snps.vcf
######STEP FOUR: Output new supercontig FASTA with ambiguity codes
echo "Generating IUPAC FASTA file"
java -Xmx"$mem"G -jar $gatkpath \
-T FastaAlternateReferenceMaker \
-R $supercontig \
-o $supercontig.iupac \
-V $supercontig.snps.vcf \
-IUPAC $supercontig
cd ..