This repository has been archived by the owner on Mar 1, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 8
/
baralign.sh
executable file
·114 lines (102 loc) · 3.09 KB
/
baralign.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env bash
if [ x$BASH = x ] || [ ! $BASH_VERSINFO ] || [ $BASH_VERSINFO -lt 4 ]; then
echo "Error: Must use bash version 4+." >&2
exit 1
fi
set -ue
Usage="Usage: \$ $(basename $0) [-R] families_file ref_dir out_file
families_file: The families.tsv produced by make-barcodes.awk and sorted.
ref_dir: The directory to put the reference file (\"barcodes.fa\") and its index
files in.
out_file: The path to put the output alignment BAM file at.
-R: Don't include reversed barcodes (alpha+beta -> beta+alpha) in the alignment target."
function main {
# Read in arguments and check them.
reverse=true
while getopts ":rh" opt; do
case "$opt" in
r) reverse='';;
h) fail "$USAGE";;
esac
done
# Get positional arguments.
families=${@:$OPTIND:1}
refdir=${@:$OPTIND+1:1}
outfile=${@:$OPTIND+2:1}
if ! [[ -f $families ]]; then
fail "Error: families_file \"$families\" not found."
fi
if ! [[ -d $refdir ]]; then
echo "Info: ref_dir \"$refdir\" not found. Creating.." >&2
mkdir $refdir
fi
outbase=$(echo $outfile | sed -E 's/\.bam$//')
if [[ $outbase == $outfile ]]; then
fail "Error: out_file \"$outfile\" does not end in .bam."
fi
if [[ -e $outfile ]] || [[ -e $outbase.sam ]] || [[ -e $outbase.tmp.sam ]]; then
fail "Error: out_file \"$outfile\" conflicts with existing filename(s)."
fi
for cmd in bowtie2 bowtie2-build samtools awk; do
if ! which $cmd >/dev/null 2>/dev/null; then
fail "Error: command \"$cmd\" not found."
fi
done
echo "
families: $families
refdir: $refdir
outfile: $outfile
outbase: $outbase" >&2
# Create FASTA with barcodes as "reads" for alignment.
awk '$1 != last {
count++
print ">" count
print $1
}
{
last = $1
}' $families > $refdir/barcodes.fa
# Create "reference" to align the barcodes to.
if [[ $reverse ]]; then
# If we're including reversed barcodes, create a new FASTA which includes reversed barcodes
# as well as their forward versions.
awk '
$1 != last {
count++
bar = $1
print ">" count
print bar
print ">" count ":rev"
print swap_halves(bar)
}
{
last = $1
}
function swap_halves(str) {
half = length(str)/2
alpha = substr(str, 1, half)
beta = substr(str, half+1)
return beta alpha
}' $families > $refdir/barcodes-ref.fa
else
# If we're not including reversed barcodes, the original FASTA is all we need. Just link to it.
ln -s $refdir/barcodes.fa $refdir/barcodes-ref.fa
fi
# Perform alignment.
bowtie2-build --packed $refdir/barcodes-ref.fa $refdir/barcodes-ref >/dev/null
bowtie2 -a -x $refdir/barcodes-ref -f -U $refdir/barcodes.fa -S $outbase.sam
samtools view -Sb $outbase.sam > $outbase.tmp.bam
samtools sort $outbase.tmp.bam $outbase
if [[ -s $outfile ]]; then
samtools index $outbase.bam
rm $outbase.sam $outbase.tmp.bam
echo "Success. Output located in \"$outfile\"." >&2
else
echo "Warning: No output file \"$outfile\" found." >&2
fi
}
function fail {
echo "$@" >&2
exit 1
}
main "$@"