-
Notifications
You must be signed in to change notification settings - Fork 1
/
spacesavers2_e2e
executable file
·91 lines (77 loc) · 3.16 KB
/
spacesavers2_e2e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env bash
####################################################################################
# spacesavers2 end-to-end wrapper script
####################################################################################
set -e -o pipefail
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
# ncpus=`nproc`
ARGPARSE_DESCRIPTION="End-to-end run of spacesavers2"
source ${SCRIPT_DIR}/resources/argparse.bash || exit 1
argparse "$@" <<EOF || exit 1
parser.add_argument('-f','--folder',required=True, help='Folder to run spacesavers_catalog on.')
parser.add_argument('-p','--threads',required=False, help='number of threads to use', default=4)
parser.add_argument('-d','--maxdepth',required=False, help='maxdepth for mimeo', default=4)
parser.add_argument('-l','--limit',required=False, help='limit for running spacesavers_grubbers', default=5)
parser.add_argument('-q','--quota',required=False, help='total size of the volume (default = 200 for /data/CCBR)', default=200)
parser.add_argument('-o','--outfolder',required=True, help='Folder where all spacesavers_e2e output files will be saved')
EOF
# assuming that python 3.11 is available with xxhash module
# . "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" && conda activate py311
if [ "$SLURM_JOB_ID" == "" ]; then
tmpdir="$OUTFOLDER"
else
tmpdir="/lscratch/${SLURM_JOB_ID}"
fi
# run spacesavers2
dt=$(date +%Y%m%d)
prefix=$(echo "${dt}.${FOLDER}"|sed "s/\//_/g")
outfile_catalog="${OUTFOLDER}/${prefix}.catalog"
outfile_catalog_err="${OUTFOLDER}/${prefix}.catalog.err"
outfile_catalog_log="${OUTFOLDER}/${prefix}.catalog.log"
outfile_mimeo_log="${OUTFOLDER}/${prefix}.mimeo.log"
outfile_mimeo_err="${OUTFOLDER}/${prefix}.mimeo.err"
outfile_blamematrix="${OUTFOLDER}/${prefix}.blamematrix.tsv"
outfile_blamematrix_log="${OUTFOLDER}/${prefix}.blamematrix.log"
outfile_blamematrix_err="${OUTFOLDER}/${prefix}.blamematrix.err"
if [ ! -d $OUTFOLDER ];then mkdir -p $OUTFOLDER;fi
# spacesavers2_catalog
if [ "$?" == "0" ];then
echo "Running spacesavers2_catalog" && \
spacesavers2_catalog \
--folder $FOLDER \
--threads $THREADS \
--outfile ${outfile_catalog} \
--bottomhash \
> ${outfile_catalog_log} 2> ${outfile_catalog_err}
fi
sleep 60
# spacesavers2_mimeo
if [ "$?" == "0" ];then
echo "Running spacesavers2_mimeo"
command -V ktImportText 2>/dev/null || module load kronatools || (>&2 echo "module kronatools could not be loaded")
spacesavers2_mimeo \
--catalog ${outfile_catalog} \
--outdir ${OUTFOLDER} \
--quota $QUOTA \
--duplicatesonly \
--maxdepth $MAXDEPTH \
--p $prefix \
--kronaplot \
> ${outfile_mimeo_log} 2> ${outfile_mimeo_err}
fi
sleep 60
# spacesavers2_grubbers
if [ "$?" == "0" ];then
echo "Running spacesavers2_grubbers" && \
for filegz in `ls ${OUTFOLDER}/${prefix}*files.gz`;do
outfile=`echo $filegz|sed "s/mimeo.files.gz/grubbers.tsv/g"`
logfile=`echo $filegz|sed "s/mimeo.files.gz/grubbers.log/g"`
errfile=`echo $filegz|sed "s/mimeo.files.gz/grubbers.err/g"`
spacesavers2_grubbers \
--filesgz $filegz \
--limit $LIMIT \
--outfile $outfile \
> $logfile 2> $errfile
done
fi
echo "Done!"