# Map parental illumina to progeny

In [1]:
wkdir=/powerplant/workspace/hrasrb/Blueberry_trio/2024-03-12_mapping_rates
cd ${wkdir}

## If file already exists delete it, otherwise text gets appended to existing file
file="cleaned_assemblies.fofn"
if [ -f "$file" ] ; then
    rm "$file"
fi

cat <<'EOF' >> cleaned_assemblies.fofn
/powerplant/workspace/hraijc/Blueberry/Blueberry_trio/Assembly/postcontamremoval/classified_M7_plus_unclassified_hap1.clean.fa
/powerplant/workspace/hraijc/Blueberry/Blueberry_trio/Assembly/postcontamremoval/classified_M7_plus_unclassified_hap2.clean.fa
/powerplant/workspace/hraijc/Blueberry/Blueberry_trio/Assembly/postcontamremoval/classified_Nui_plus_unclassified_hap1.clean.fa
/powerplant/workspace/hraijc/Blueberry/Blueberry_trio/Assembly/postcontamremoval/classified_Nui_plus_unclassified_hap2.clean.fa
EOF

# Index references

In [None]:
# set working directory
WKDIR=/powerplant/workspace/hrasrb/kiwifruit_pangenome/SV_calling/07-11-2022_T08.33-13-15a_Trial/10.other_align
cd $WKDIR

# create variables
LOG=/powerplant/workspace/hrasrb/kiwifruit_pangenome/SV_calling/07-11-2022_T08.33-13-15a_Trial/log

genome_file=/powerplant/output/genomic/plant/Actinidia/chinensis/CK51F3_01/Genome/Assembly/v2.0/Fasta/Red5_V2.chromosomes.only.fsa # Red5v2

# load modules
ml bwa/0.7.17
# create and submit bash script
sbatch << EOF
#!/bin/bash -e

#SBATCH -J index_BWA
#SBATCH --output=${LOG}/hrasrb_%j.out
#SBATCH --error=${LOG}/hrasrb_%j.err
#SBATCH --mail-user=Sarah.Bailey@plantandfood.co.nz
#SBATCH --mail-type=ALL
#SBATCH --time=00:30:00 # Walltime # change this
#SBATCH --mem=4G # change this

echo "Index genome."
bwa index -a bwtsw ${genome_file} -p genome.index
echo "Done."

EOF

In [3]:
base=/powerplant/workspace/hrasrb/Blueberry_trio/2024-03-12_mapping_rates
LOG=/powerplant/workspace/hrasrb/log

mkdir -p -v ${base}
cd ${base}

# load modules
ml bwa/0.7.17

FILE=${base}/cleaned_assemblies.fofn
cat $FILE | while read line
do
    PREFIX=$(basename ${line} .fa)
    echo "bwa index -a bwtsw ${line} -p "${PREFIX}.index""
done | abatch -j index_array --time 00:20:00 --mem 2G --cpus-per-task=1 --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL | sbatch

module unload bwa/0.7.17


SBATCH_ARGS: --time 00:20:00 --mem 2G --cpus-per-task=1 --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL
JOB_ARRAY_NAME: index_array
GROUP_SIZE: 1
NUM_COMMANDS: 4
Submitted batch job 4691571


In [6]:
# ml bwa
bwa mem


Usage: bwa mem [options] <idxbase> <in1.fq> [in2.fq]

Algorithm options:

       -t INT        number of threads [1]
       -k INT        minimum seed length [19]
       -w INT        band width for banded alignment [100]
       -d INT        off-diagonal X-dropoff [100]
       -r FLOAT      look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]
       -y INT        seed occurrence for the 3rd round seeding [20]
       -c INT        skip seeds with more than INT occurrences [500]
       -D FLOAT      drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50]
       -W INT        discard a chain if seeded bases shorter than INT [0]
       -m INT        perform at most INT rounds of mate rescues for each read [50]
       -S            skip mate rescue
       -P            skip pairing; mate rescue performed unless -S also in use

Scoring options:

       -A INT        score for a sequence match, which scales options -TdBOELU unless overridden [1]
     

: 1

In [7]:
M71=/powerplant/input/genomic/plant/Vaccinium/corymbosum/ExperimentRequestor10969_NuixM7_TrioBin/816043/M7_L001_ds.588732e90e434df093aa5fbfe0ad68c2/M7_S1_L001_R1_001.fastq.gz
M72=/powerplant/input/genomic/plant/Vaccinium/corymbosum/ExperimentRequestor10969_NuixM7_TrioBin/816043/M7_L001_ds.588732e90e434df093aa5fbfe0ad68c2/M7_S1_L001_R2_001.fastq.gz
M73=/powerplant/input/genomic/plant/Vaccinium/corymbosum/ExperimentRequestor10969_NuixM7_TrioBin/816043/M7_L004_ds.3bb185df2b8c4f09a01f7279b8236997/M7_S1_L004_R1_001.fastq.gz
M74=/powerplant/input/genomic/plant/Vaccinium/corymbosum/ExperimentRequestor10969_NuixM7_TrioBin/816043/M7_L004_ds.3bb185df2b8c4f09a01f7279b8236997/M7_S1_L004_R2_001.fastq.gz

BASE=/powerplant/workspace/hrasrb/Blueberry_trio/2024-03-12_mapping_rates
LOG=/powerplant/workspace/hrasrb/log

OUTDIR=${BASE}/00.M7_combined

mkdir -p -v ${OUTDIR}
cd ${OUTDIR}

# create and submit bash script
sbatch --nice << EOF
#!/bin/bash -e

#SBATCH -J Combine_M7_reads
#SBATCH --output=${LOG}/hrasrb_%j.out
#SBATCH --error=${LOG}/hrasrb_%j.err
#SBATCH --mail-user=Sarah.Bailey@plantandfood.co.nz
#SBATCH --mail-type=ALL
#SBATCH --time=24:00:00 # Walltime # change this
#SBATCH --mem=4G # change this
#SBATCH --cpus-per-task 1 # change this

echo
echo
echo "Combine Read 1 lane 1 & 4"
cat ${M71} ${M73} > ./M7_S1_L00X_R1_001.fastq.gz
echo "Done."
echo
echo
echo "Combine Read 2 lane 1 & 4"
cat ${M72} ${M74} > ./M7_S1_L00X_R2_001.fastq.gz
echo "Done."

EOF


mkdir: created directory ‘/powerplant/workspace/hrasrb/Blueberry_trio/2024-03-12_mapping_rates/00.M7_combined’
Submitted batch job 4691968


In [3]:
# M71=/powerplant/workspace/hrasrb/Blueberry_trio/2024-03-12_mapping_rates/00.M7_combined/M7_S1_L00X_R1_001.fastq.gz
# M72=/powerplant/workspace/hrasrb/Blueberry_trio/2024-03-12_mapping_rates/00.M7_combined/M7_S1_L00X_R2_001.fastq.gz

Nui1=/powerplant/input/genomic/plant/Vaccinium/corymbosum/ExperimentRequestor10969_NuixM7_TrioBin/816043/Nui_L001_ds.8485a4d14def4ea9ad3784b85fadd7a2/Nui_S2_L001_R1_001.fastq.gz
Nui2=/powerplant/input/genomic/plant/Vaccinium/corymbosum/ExperimentRequestor10969_NuixM7_TrioBin/816043/Nui_L001_ds.8485a4d14def4ea9ad3784b85fadd7a2/Nui_S2_L001_R2_001.fastq.gz

BASE=/powerplant/workspace/hrasrb/Blueberry_trio/2024-03-12_mapping_rates
LOG=/powerplant/workspace/hrasrb/log

# OUTDIR=${BASE}/01.align_M7_reads
# OUTDIR=${BASE}/01.align_Nui_reads

# OUTDIR=${BASE}/02.align_M7_reads_B40
# OUTDIR=${BASE}/02.align_Nui_reads_B40

# OUTDIR=${BASE}/03.align_M7_reads_stringent
OUTDIR=${BASE}/03.align_Nui_reads_stringent

mkdir -p -v ${OUTDIR}
cd ${OUTDIR}

# More stringent alignment: rm *vhttps://www.biostars.org/p/132681/
# -L penalty for 5' & 3' clipping
# -B penalty for mismatch
# -O penalty for gaps open
# -E penalty for extension

# load packages
ml bwa/0.7.17
ml samtools/1.16

FILE=${BASE}/cleaned_assemblies.fofn
cat $FILE | while read line
do
    PREFIX=$(basename ${line} .fa)
    echo "bwa mem -B 40 -O 60 -E 10 -L100 -t 8 "${BASE}/${PREFIX}.index" ${Nui1} ${Nui2} > "${PREFIX}.sam""
    echo "samtools sort --threads 8 "${PREFIX}.sam" -l 1 -o "${PREFIX}.sorted.bam" -O bam"
    echo "rm "${PREFIX}.sam""
done | abatch -j alignment_array_Nui -g 3 --time 24:00:00 --mem 8G --cpus-per-task=6 --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL | sbatch --nice --dependency=afterany:5177509

module unload samtools/1.16
module unload bwa/0.7.17

cd ${BASE}

mkdir: created directory ‘/powerplant/workspace/hrasrb/Blueberry_trio/2024-03-12_mapping_rates/03.align_Nui_reads_stringent’
SBATCH_ARGS: --time 24:00:00 --mem 8G --cpus-per-task=6 --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL
JOB_ARRAY_NAME: alignment_array_Nui
GROUP_SIZE: 3
NUM_COMMANDS: 12
ARRAY_SIZE: 4
Submitted batch job 5177513


In [7]:
BASE=/powerplant/workspace/hrasrb/Blueberry_trio/2024-03-12_mapping_rates
LOG=/powerplant/workspace/hrasrb/log

# OUTDIR=${BASE}/01.align_M7_reads
# OUTDIR=${BASE}/01.align_Nui_reads
# OUTDIR=${BASE}/02.align_M7_reads_B40
# OUTDIR=${BASE}/02.align_Nui_reads_B40
# OUTDIR=${BASE}/03.align_M7_reads_stringent
OUTDIR=${BASE}/03.align_Nui_reads_stringent

mkdir -p -v ${OUTDIR}
cd ${OUTDIR}

# load packages
ml samtools/1.16

FILE=${BASE}/cleaned_assemblies.fofn
cat $FILE | while read line
do
    PREFIX=$(basename ${line} .fa)
    echo "samtools flagstat "${PREFIX}.sorted.bam" > "${PREFIX}.flagstat.stringent.txt""
done | abatch -j samtools_array_Nui --time 01:00:00 --mem 50M --cpus-per-task=1 --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL | sbatch --nice --dependency=afterok:5177513

module unload samtools/1.16

cd ${BASE}
#     echo "samtools sort -n "${PREFIX}.sorted.bam" -@ 8 -o "${BASE}.nsorted.bam""
#     echo "samtools fixmate "${PREFIX}.nsorted.bam" -@ 8 "${BASE}.sorted.fixmate.bam""
#     echo "samtools index -@ 4 "${PREFIX}.sorted.bam""
#     echo "samtools view -q 20 "${PREFIX}.sorted.bam" -o "${PREFIX}.sorted.filtered.bam""

SBATCH_ARGS: --time 01:00:00 --mem 50M --cpus-per-task=1 --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL
JOB_ARRAY_NAME: samtools_array_Nui
GROUP_SIZE: 1
NUM_COMMANDS: 4
Submitted batch job 5177517


In [4]:
squeue -u hrasrb

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
     4709736_[1-4]      fast samtools   hrasrb PD       0:00      1 (BeginTime)
     4709735_[1-4]      fast samtools   hrasrb PD       0:00      1 (BeginTime)
           4693027    medium    BMARK   hrasrb  R 1-23:06:45      1 aklppb41
           4709732     short genomeco   hrasrb PD       0:00      1 (Priority)
           4707569     short nf-SV_BE   hrasrb  R   19:05:59      1 wkoppb30
