forked from jgurtowski/ectools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
correct.sh
executable file
·98 lines (67 loc) · 2.69 KB
/
correct.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/bin/bash -x
#Make sure all of the Nucmer tools are in your path
set -e
source ~/.bashrc
##SET THE FOLLOWING PARAMETERS
#path to correct script in pbtools repo
CORRECT_SCRIPT=/path/to/ectools/pb_correct.py
#pre filter delta file
PRE_DELTA_FILTER_SCRIPT=/path/to/ectools/pre_delta_filter.py
#smallest alignment allowed, filter out alignments smaller than this
#set this a few bp short of the short-read length
MIN_ALIGNMENT_LEN=200
#Allow % from the ends of the fragments to be wiggle room
#(for determining proper overlaps)
WIGGLE_PCT=0.05
#pct of read length for alignment to be considered contained
CONTAINED_PCT_ID=0.80
#path to high identity unitigs
UNITIG_FILE=/path/to/unitigs.fa
#Trim out regions with lower identity than
CLR_PCT_ID=0.96
#Minimum read length to output after splitting/trimming
MIN_READ_LEN=3000
#Number of bases without a match after which nucmer breaks the alignment.
#Increase for better alignments but at the cost of dramatically
#increased runtime
#Small genomes (10's of MB) can up this to 10000
NUCMER_BREAK_LEN=2000
##Filter the delta file for proper overlaps before doing LIS in delta-filter
#
#Depending on the quality of the data, you may want to ensure
#that only proper overlapping alignments are used for correction.
#If the initial short read assembly is very good (ex. 100kb contig N50)
#you probably want to ensure proper overlaps.
#If the initial assembly is not very contiguous, requiring
#proper overlaps may hinder correction.
#'true' setting is most useful for short genomes with high coverage.
PRE_DELTA_FILTER=true
###Done parameters
suffix=`printf "%04d" $SGE_TASK_ID`
FILE=p${suffix}
ORIGINAL_DIR=`pwd`
#Move to sge temp storage
if [[ $TMPDIR ]]
then
cd $TMPDIR
fi
cp ${ORIGINAL_DIR}/${FILE} .
nucmer --maxmatch -l 11 -b ${NUCMER_BREAK_LEN} -g 1000 -p ${FILE} ${FILE} ${UNITIG_FILE}
cp ${FILE}.delta ${ORIGINAL_DIR}
FILTERED_DELTA=${FILE}.delta
if [[ "$PRE_DELTA_FILTER" == true ]]
then
FILTERED_DELTA=${FILE}.delta.pre
python ${PRE_DELTA_FILTER_SCRIPT} ${FILE}.delta ${WIGGLE_PCT} ${CONTAINED_PCT_ID} ${MIN_ALIGNMENT_LEN} ${FILTERED_DELTA}
cp ${FILTERED_DELTA} ${ORIGINAL_DIR}
cp ${FILTERED_DELTA}.log ${ORIGINAL_DIR}
fi
delta-filter -l $MIN_ALIGNMENT_LEN -i 70.0 -r ${FILTERED_DELTA} > ${FILTERED_DELTA}.r
cp ${FILTERED_DELTA}.r ${ORIGINAL_DIR}
show-coords -l -H -r ${FILTERED_DELTA}.r > ${FILTERED_DELTA}.r.sc
cp ${FILTERED_DELTA}.r.sc ${ORIGINAL_DIR}
show-snps -H -l -r ${FILTERED_DELTA}.r > ${FILTERED_DELTA}.snps
cp ${FILTERED_DELTA}.snps ${ORIGINAL_DIR}
python ${CORRECT_SCRIPT} ${FILE} ${FILTERED_DELTA}.snps ${FILTERED_DELTA}.r.sc ${CLR_PCT_ID} ${MIN_READ_LEN} ${FILE}
cp ${FILE}.cor.fa ${ORIGINAL_DIR}
cp ${FILE}.cor.pileup ${ORIGINAL_DIR}