/
20200318_cbai_diamond_blastx.sh
88 lines (71 loc) · 1.96 KB
/
20200318_cbai_diamond_blastx.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/bash
## Job Name
#SBATCH --job-name=cbai_blastx_DIAMOND
## Allocation Definition
#SBATCH --account=coenv
#SBATCH --partition=coenv
## Resources
## Nodes
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=20-00:00:00
## Memory per node
#SBATCH --mem=120G
##turn on e-mail notification
#SBATCH --mail-type=ALL
#SBATCH --mail-user=samwhite@uw.edu
## Specify the working directory for this job
#SBATCH --chdir=/gscratch/scrubbed/samwhite/outputs/20200318_cbai_diamond_blastx
## Perform DIAMOND BLASTx on trimmed Chionoecetes bairdi (Tanner crab) FastQ files.
# Exit script if any command fails
set -e
# Load Python Mox module for Python module availability
module load intel-python3_2017
# SegFault fix?
export THREADS_DAEMON_MODEL=1
# Document programs in PATH (primarily for program version ID)
{
date
echo ""
echo "System PATH for $SLURM_JOB_ID"
echo ""
printf "%0.s-" {1..10}
echo "${PATH}" | tr : \\n
} >> system_path.log
# Program paths
diamond=/gscratch/srlab/programs/diamond-0.9.29/diamond
# DIAMOND NCBI nr database
dmnd=/gscratch/srlab/blastdbs/ncbi-nr-20190925/nr.dmnd
# Capture program options
{
echo "Program options for DIAMOND: "
echo ""
"${diamond}" help
echo ""
echo ""
echo "----------------------------------------------"
echo ""
echo ""
} &>> program_options.log || true
# Trimmed FastQ files directory
fastq_dir=/gscratch/scrubbed/samwhite/outputs/20200318_cbai_RNAseq_fastp_trimming/
# Loop through FastQ files, log filenames to fastq_list.txt.
# Run DIAMOND on each FastQ
for fastq in ${fastq_dir}*fastp-trim*.fq.gz
do
# Log input FastQs
echo "${fastq}" >> fastq_list.txt
# Strip leading path and extensions
no_path=$(echo "${fastq##*/}")
no_ext=$(echo "${no_path%%.*}")
# Run DIAMOND with blastx
# Output format 100 produces a DAA binary file for use with MEGAN
${diamond} blastx \
--db ${dmnd} \
--query "${fastq}" \
--out "${no_ext}".blastx.daa \
--outfmt 100 \
--top 5 \
--block-size 15.0 \
--index-chunks 4
done