-
Notifications
You must be signed in to change notification settings - Fork 26
/
dme-index-bismark-bowtie2.sh
executable file
·80 lines (67 loc) · 3.28 KB
/
dme-index-bismark-bowtie2.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/bin/bash
# dme-index-bismark.sh - Creates C->T indexed genomic files with Bismark/Bowtie2 used for Whole Genome Bisulphite Analysis
main() {
# If available, will print tool versions to stderr and json string to stdout
versions=''
if [ -f /usr/bin/tool_versions.py ]; then
versions=`tool_versions.py --dxjson dnanexus-executable.json`
fi
echo "* Value of reference: '$reference'"
echo "* Value of chrom_sizes: '$chrom_sizes'"
echo "* Value of lambda: '$lambda'"
# Prefer to discover genome and gender
source_msg="Value of"
if [ -f /usr/bin/parse_property.py ]; then
genome_prop=`parse_property.py -f "$reference" -p "genome" --quiet`
gender_prop=`parse_property.py -f "$reference" -p "gender" --quiet`
if [ "$genome_prop" != "" ] && [ "$gender_prop" != "" ]; then
genome=$genome_prop
gender=$gender_prop
source_msg="Discovered"
fi
fi
if [ "$genome" == "" ] || [ "$gender" == "" ]; then
echo "Reference genome and/or gender could not be determined and must be supplied as arguments."
exit 1
fi
echo "* ${source_msg} genome: '$genome'"
echo "* ${source_msg} gender: '$gender'"
echo "* Download and unzip genome reference..."
mkdir -p input/lambda
dx download "$reference" -o - | gunzip > input/${genome}_${gender}.fa
dx download "$lambda" -o - | gunzip > input/lambda/lambda.fa
dx download "$chrom_sizes" -o input/chrom.sizes
index_root="${genome}_${gender}_bismark_bowtie2_index"
echo "* Expect to create '${index_root}.tgz'"
echo "* Preparing/indexing ${genome}-${gender} genome..."
set -x
bismark_genome_preparation --bowtie2 --path_to_bowtie /usr/bin/ input | tee ref.log
set +x
echo "* Preparing/indexing lambda genome..."
set -x
bismark_genome_preparation --bowtie2 --path_to_bowtie /usr/bin/ input/lambda | tee lambda.log
set +x
# QC anyone?
ref_ctot=`head -10 ref.log | grep -F "C->T" | awk '{print $2}'`
ref_gtoa=`head -10 ref.log | grep -F "G->A" | awk '{print $2}'`
lambda_ctot=`head -10 lambda.log | grep -F "C->T" | awk '{print $2}'`
lambda_gtoa=`head -10 lambda.log | grep -F "G->A" | awk '{print $2}'`
meta=`echo { \"reference\": { \"genome\": \"${genome}\", \"gender\": \"${gender}\"`
meta=`echo ${meta}, \"C_to_T\": ${ref_ctot}, \"G_to_A\": ${ref_gtoa} }`
meta=`echo ${meta}, \"lambda\": { \"C_to_T\": ${lambda_ctot}, \"G_to_A\": ${lambda_gtoa} } }`
echo "* JSON metadata..."
echo ${meta}
echo "* ----------------"
echo "* Archiving prepped genome..."
ls -l input/Bisulfite_Genome/
set -x
tar zcvf ${index_root}.tgz input/chrom.sizes input/${genome}_${gender}.fa input/Bisulfite_Genome/ \
input/lambda/lambda.fa input/lambda/Bisulfite_Genome/
set +x
echo "* Upload results..."
dme_ix=$(dx upload ${index_root}.tgz --details "${meta}" --property genome="$genome" --property gender="$gender" \
--property C_to_T="$ref_ctot" --property G_to_A="$ref_gtoa" --property SW="$versions" --brief)
dx-jobutil-add-output dme_ix "$dme_ix" --class=file
dx-jobutil-add-output metadata "${meta}" --class=string
echo "* Finished."
}