# Assess assembly contamination

The hifiasm assemblies produced by Iggy and Chen are required for contamination assessment. 

In [1]:
base=/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination
cd $base

In [3]:
ll *fa

lrwxrwxrwx. 1 hrasrb powerplant  89 Sep  4 10:41 M7xNui_min2k_Flye.fa -> /powerplant/workspace/hraijc/Blueberry/BB_M7xNui_Assembly/03_FLYE/M7xNui_min2k_Flye.fasta
-rw-rw-r--. 1 hrasrb powerplant 104 Sep  6 15:46 Pacific_Biosciences_Blunt_Adapter.fa
lrwxrwxrwx. 1 hrasrb powerplant  77 Sep  4 10:41 progeny_M7_hap1.fa -> /powerplant/workspace/hraczw/github/bioinf_blueberry_trio/M7.bp.hap1.p_ctg.fa
lrwxrwxrwx. 1 hrasrb powerplant  77 Sep  4 10:41 progeny_M7_hap2.fa -> /powerplant/workspace/hraczw/github/bioinf_blueberry_trio/M7.bp.hap2.p_ctg.fa
lrwxrwxrwx. 1 hrasrb powerplant  78 Sep  4 10:41 progeny_Nui_hap1.fa -> /powerplant/workspace/hraczw/github/bioinf_blueberry_trio/Nui.bp.hap1.p_ctg.fa
lrwxrwxrwx. 1 hrasrb powerplant  78 Sep  4 10:41 progeny_Nui_hap2.fa -> /powerplant/workspace/hraczw/github/bioinf_blueberry_trio/Nui.bp.hap2.p_ctg.fa


# Map the reads back to the collapsed reference

In [19]:
WKDIR=/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination
LOG=${WKDIR}/log

NuixM7_SMRTcell1=/powerplant/input/genomic/plant/Vaccinium/corymbosum/ExperimentRequestor10969_NuixM7_TrioBin/Blueberry_M7xNui/SMRTcell1/CCS_Data/m64136_221113_041854.hifi_reads.fastq.gz	
NuixM7_SMRTcell2=/powerplant/input/genomic/plant/Vaccinium/corymbosum/ExperimentRequestor10969_NuixM7_TrioBin/Blueberry_M7xNui/SMRTcell2/CCS_Data/m64136_221114_133241.hifi_reads.fastq.gz	

ASM=${WKDIR}/M7xNui_min2k_Flye.fa

tag=$(basename "${ASM}" .fa)

cd "${WKDIR}"

# mkdir bamfiles

module load minimap2/2.22
module load samtools/1.16

# Create coverage files.
sbatch << EOF
#!/bin/bash
#SBATCH -J "${tag}_minimap_samtools"
#SBATCH -o ${LOG}/hrasrb_%J.out
#SBATCH -e ${LOG}/hrasrb_%J.err
#SBATCH --cpus-per-task=8
#SBATCH --mem=16G
#SBATCH --time=03:00:00
#SBATCH --mail-user=Sarah.Bailey@plantandfood.co.nz
#SBATCH --mail-type=ALL

echo "$tag"
echo
echo "Map"

minimap2 -t 8 -ax map-hifi "${ASM}" "${NuixM7_SMRTcell1}" "${NuixM7_SMRTcell2}" > "${WKDIR}/bamfiles/${tag}_to_hifi.sam"

echo "Done"
echo
echo "Sam -> bam"

samtools view --threads 8 -b "${WKDIR}/bamfiles/${tag}_to_binned_hifi.sam" > "${WKDIR}/bamfiles/${tag}_to_hifi.bam"

echo "Done"
echo
echo "Sort bam"

samtools sort --threads 8 "${WKDIR}/bamfiles/${tag}_to_binned_hifi.bam" > "${WKDIR}/bamfiles/${tag}_to_hifi.sorted.bam"

echo "Done"
echo
echo "index bam"

samtools index "${WKDIR}/bamfiles/${tag}_to_hifi.sorted.bam"

echo "Done"
echo

EOF

module unload samtools/1.16
module unload minimap2/2.22

Submitted batch job 2070365


In [1]:
seff 2070365

Job ID: 2070365
Cluster: powerplant
User/Group: hrasrb/hrasrb
State: COMPLETED (exit code 0)
Nodes: 1
Cores per node: 8
CPU Utilized: 18:49:49
CPU Efficiency: 98.47% of 19:07:20 core-walltime
Job Wall-clock time: 02:23:25
Memory Utilized: 15.95 GB
Memory Efficiency: 99.69% of 16.00 GB


# 4 haplotype assemblies map the binned reads

In [6]:
cd /workspace/hrasrb/Blueberry_trio/trio-binning/02.classifyHiFi
ll

total 36571176
-rw-rw-r--. 1 hrasrb powerplant 16996071646 Sep 28 16:28 classified_M7_hifi.fastq.gz
-rw-rw-r--. 1 hrasrb powerplant 15639726003 Sep 28 16:28 classified_Nui_hifi.fastq.gz
-rw-rw-r--. 1 hrasrb powerplant    52807511 Sep 28 16:16 unclassified_M7xNui_hifi.fastq.gz


In [5]:
rm *fq

In [6]:
BASE=/workspace/hrasrb/Blueberry_trio

WKDIR=${BASE}/trio-binning/02.classifyHiFi

cd $WKDIR

LOG=${BASE}/log

ml htslib

sbatch << EOF
#!/bin/bash -e

#SBATCH -J Nui_hifi_compress # change this
#SBATCH --output=${LOG}/hrasrb_%j.out
#SBATCH --error=${LOG}/hrasrb_%j.err
#SBATCH --mail-user=Sarah.Bailey@plantandfood.co.nz
#SBATCH --mail-type=ALL
#SBATCH --time=24:00:00 # Walltime # change this
#SBATCH --mem=500M # change this
#SBATCH --cpus-per-task=12

# echo "combine reads"

# cat *_Nui.fq > ./classified_Nui_hifi.fastq
# cat *_M7.fq > ./classified_M7_hifi.fastq
# cat unclassified*.fq > ./unclassified_M7xNui_hifi.fastq

# echo "Done"
# echo
echo "compress reads"

bgzip -@ 12 classified_Nui_hifi.fastq

echo "Done"
echo

EOF

module unload htslib


Submitted batch job 2064545


In [1]:
seff 2064545

Job ID: 2064545
Cluster: powerplant
User/Group: hrasrb/hrasrb
State: COMPLETED (exit code 0)
Nodes: 1
Cores per node: 12
CPU Utilized: 02:12:26
CPU Efficiency: 92.48% of 02:23:12 core-walltime
Job Wall-clock time: 00:11:56
Memory Utilized: 10.14 MB
Memory Efficiency: 2.03% of 500.00 MB


In [7]:
squeue -u hrasrb

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           2064545     short Nui_hifi   hrasrb PD       0:00      1 (Priority)
         2064410_1     short MegaBlas   hrasrb  R    1:17:03      1 wkoppb30
         2064410_2     short MegaBlas   hrasrb  R    1:17:03      1 aklppg33
         2064410_3     short MegaBlas   hrasrb  R    1:17:03      1 aklppg33
         2064410_4     short MegaBlas   hrasrb  R    1:17:03      1 aklppg33
         2064410_5     short MegaBlas   hrasrb  R    1:17:03      1 aklppg33
         2063723_3     short Reciproc   hrasrb  R    2:25:06      1 wkoppb30
           2064542     short M7_hifi_   hrasrb  R       0:55      1 aklppg32


In [8]:
WKDIR=${BASE}/trio-binning/02.classifyHiFi

cd $WKDIR
ll

total 185692216
-rw-rw-r--. 1 hrasrb powerplant 43009452297 Sep 28 14:44 classified_M7_hifi.fastq
-rw-rw-r--. 1 hrasrb powerplant  1311978474 Sep 28 16:17 classified_M7_hifi.fastq.gz
-rw-rw-r--. 1 hrasrb powerplant 39840258147 Sep 28 14:32 classified_Nui_hifi.fastq
-rw-rw-r--. 1 hrasrb powerplant 18469240330 Feb 28  2023 classified_progeny1_hapA_Nui.fq
-rw-rw-r--. 1 hrasrb powerplant 19952357985 Feb 28  2023 classified_progeny1_hapB_M7.fq
-rw-rw-r--. 1 hrasrb powerplant 21371017817 Feb 28  2023 classified_progeny2_hapA_Nui.fq
-rw-rw-r--. 1 hrasrb powerplant 23057094312 Feb 28  2023 classified_progeny2_hapB_M7.fq
-rw-rw-r--. 1 hrasrb powerplant    52807511 Sep 28 16:16 unclassified_M7xNui_hifi.fastq.gz
-rw-rw-r--. 1 hrasrb powerplant    71112586 Feb 28  2023 unclassified_progeny1.fq
-rw-rw-r--. 1 hrasrb powerplant   105189601 Feb 28  2023 unclassified_progeny2.fq


In [3]:
rm classified_M7_hifi.fastq.gz

In [1]:
BASE=/workspace/hrasrb/Blueberry_trio

LOG=${BASE}/log
cd $LOG

ls

cat *2064450*

blasthits	    hrasrb_2064386.err	hrasrb_2064417.out
hrasrb_2063983.err  hrasrb_2064386.out	hrasrb_2064450.err
hrasrb_2063983.out  hrasrb_2064417.err	hrasrb_2064450.out
slurmstepd: error: *** JOB 2064450 ON wkoppb40 CANCELLED AT 2023-09-28T16:06:02 DUE TO TIME LIMIT ***
compress reads


## Map to M7 haps

In [2]:
WKDIR=/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination
LOG=${WKDIR}/log

M7_hifi=/workspace/hrasrb/Blueberry_trio/trio-binning/02.classifyHiFi/classified_M7_hifi.fastq.gz

# ASM=${WKDIR}/progeny_M7_hap1.fa
ASM=${WKDIR}/progeny_M7_hap2.fa

tag=$(basename "${ASM}" .fa)

cd "${WKDIR}"

# mkdir bamfiles

module load minimap2/2.22
module load samtools/1.16

# Create coverage files.
sbatch << EOF
#!/bin/bash
#SBATCH -J "${tag}_minimap_samtools"
#SBATCH -o ${LOG}/hrasrb_%J.out
#SBATCH -e ${LOG}/hrasrb_%J.err
#SBATCH --cpus-per-task=8
#SBATCH --mem=24G
#SBATCH --time=03:00:00

echo "$tag"
echo
echo "Map"

minimap2 -t 8 -ax map-hifi "${ASM}" "${M7_hifi}" > "${WKDIR}/bamfiles/${tag}_to_binned_hifi.sam"

echo "Done"
echo
echo "Sam -> bam"

samtools view --threads 8 -b "${WKDIR}/bamfiles/${tag}_to_binned_hifi.sam" > "${WKDIR}/bamfiles/${tag}_to_binned_hifi.bam"

echo "Done"
echo
echo "Sort bam"

samtools sort --threads 8 "${WKDIR}/bamfiles/${tag}_to_binned_hifi.bam" > "${WKDIR}/bamfiles/${tag}_to_binned_hifi.sorted.bam"

echo "Done"
echo
echo "index bam"

samtools index "${WKDIR}/bamfiles/${tag}_to_binned_hifi.sorted.bam"

echo "Done"
echo

EOF

module unload samtools/1.16
module unload minimap2/2.22


Submitted batch job 2069543


In [None]:
2064956 M7 hap1
2069543 M7 hap2

In [14]:
seff 2069543

Job ID: 2069543
Cluster: powerplant
User/Group: hrasrb/hrasrb
State: COMPLETED (exit code 0)
Nodes: 1
Cores per node: 8
CPU Utilized: 12:45:53
CPU Efficiency: 89.00% of 14:20:32 core-walltime
Job Wall-clock time: 01:47:34
Memory Utilized: 11.09 GB
Memory Efficiency: 46.21% of 24.00 GB


## Map to Nui haps

In [4]:
WKDIR=/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination
LOG=${WKDIR}/log

Nui_hifi=/workspace/hrasrb/Blueberry_trio/trio-binning/02.classifyHiFi/classified_Nui_hifi.fastq.gz

# ASM=${WKDIR}/progeny_Nui_hap1.fa
ASM=${WKDIR}/progeny_Nui_hap2.fa

tag=$(basename "${ASM}" .fa)

cd "${WKDIR}"

# mkdir bamfiles

module load minimap2/2.22
module load samtools/1.16

# Create coverage files.
sbatch << EOF
#!/bin/bash
#SBATCH -J "${tag}_minimap_samtools"
#SBATCH -o ${LOG}/hrasrb_%J.out
#SBATCH -e ${LOG}/hrasrb_%J.err
#SBATCH --cpus-per-task=8
#SBATCH --mem=24G
#SBATCH --time=03:00:00

echo "$tag"
echo
echo "Map"

minimap2 -t 8 -ax map-hifi "${ASM}" "${M7_hifi}" > "${WKDIR}/bamfiles/${tag}_to_binned_hifi.sam"

echo "Done"
echo
echo "Sam -> bam"

samtools view --threads 8 -b "${WKDIR}/bamfiles/${tag}_to_binned_hifi.sam" > "${WKDIR}/bamfiles/${tag}_to_binned_hifi.bam"

echo "Done"
echo
echo "Sort bam"

samtools sort --threads 8 "${WKDIR}/bamfiles/${tag}_to_binned_hifi.bam" > "${WKDIR}/bamfiles/${tag}_to_binned_hifi.sorted.bam"

echo "Done"
echo
echo "index bam"

samtools index "${WKDIR}/bamfiles/${tag}_to_binned_hifi.sorted.bam"

echo "Done"
echo

EOF

module unload samtools/1.16
module unload minimap2/2.22


Submitted batch job 2069545


In [None]:
2069544 Nui Hap1
2069545 Nui Hap2

## generate blast result

In [4]:
module load ncbi-blast
module list

Currently Loaded Modulefiles:
 1) [46mpowerPlant/core[0m    4) git/2.21.0      7) asub/2.2           
 2) texlive/20151117   5) perlbrew/0.76   8) ncbi-blast/2.11.0  
 3) pandoc/1.19.2      6) perl/5.28.0    

Key:
[46msticky[0m  


In [6]:
base=/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination

cd "${base}"

file="Blueberry_haplotypes.fofn"
if [ -f "$file" ] ; then
    rm "$file"
fi

cat <<EOF > Blueberry_haplotypes.fofn
/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination/progeny_Nui_hap2.fa
/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination/progeny_Nui_hap1.fa
/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination/progeny_M7_hap2.fa
/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination/progeny_M7_hap1.fa
EOF

In [16]:
module load ncbi-blast

base=/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination

cd "${base}"

mkdir blasthits

file=./Blueberry_haplotypes.fofn

cat "${file}" | while read line
do
    tag=$(basename "${line}" .fa)
    echo "blastn \
    -task megablast \
    -query "${line}" \
    -db /workspace/ComparativeDataSources/NCBI/nt/nt \
    -outfmt '6 qseqid staxids bitscore std' \
    -max_target_seqs 1 \
    -max_hsps 1 \
    -evalue 1e-10 \
    -num_threads 12 \
    -out "${base}/blasthits/${tag}.vs.nt.mts1.hsp1.1e10.megablast.out""
done | abatch -j MegaBlast --time 24:00:00 --mem 1G --cpus-per-task=12 --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL | sbatch

module unload ncbi-blast

SBATCH_ARGS: --time 24:00:00 --mem 1G --cpus-per-task=12 --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL
JOB_ARRAY_NAME: MegaBlast
GROUP_SIZE: 1
NUM_COMMANDS: 5
Submitted batch job 2064410


## Sorting by query, then by bitscore, then by evalue

In [6]:
module load ncbi-blast

base=/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination

cd "${base}"

mkdir blasthits

file=./Blueberry_haplotypes.fofn

# blastn -query your.fa -db your_db.fa -outfmt 6 | sort -k1,1 -k12,12nr -k11,11n | sort -u -k1,1 --merge > best_single_hits.blastn

# -k3,3nr
# -k14,14n

# c("qseqid", "staxids", "bitscore", "std", "sseqid", "pident", "length", "mismatch", "gapopen", "qstart", "qend", "sstart", "send", "evalue")
cat "${file}" | while read line
do
    tag=$(basename "${line}" .fa)
    echo "blastn \
    -task megablast \
    -query "${line}" \
    -db /workspace/ComparativeDataSources/NCBI/nt/nt \
    -outfmt '6 qseqid staxids bitscore std' \
    -evalue 1e-10 \
    -num_threads 12 | sort -k1,1 -k3,3nr -k14,14n | sort -u -k1,1 --merge > "${base}/blasthits/${tag}.vs.nt.best_single.1e10.megablast.out""
done | abatch -j MegaBlast_best_single --time 24:00:00 --mem 1G --cpus-per-task=12 --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL | sbatch

module unload ncbi-blast

mkdir: cannot create directory ‘blasthits’: File exists
SBATCH_ARGS: --time 24:00:00 --mem 1G --cpus-per-task=12 --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL
JOB_ARRAY_NAME: MegaBlast_best_single
GROUP_SIZE: 1
NUM_COMMANDS: 4
Submitted batch job 2072110


In [5]:
seff 2064410

Job ID: 2064410
Array Job ID: 2064410_5
Cluster: powerplant
User/Group: hrasrb/hrasrb
State: COMPLETED (exit code 0)
Nodes: 1
Cores per node: 12
CPU Utilized: 2-23:27:41
CPU Efficiency: 96.08% of 3-02:22:24 core-walltime
Job Wall-clock time: 06:11:52
Memory Utilized: 669.66 MB
Memory Efficiency: 65.40% of 1.00 GB


In [15]:
cat ${base}/MegaBlast/MegaBlast.err/*err

Command line argument error: Argument "out". File is not accessible:  `/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination/blasthits/progeny_M7_hap1.vs.nt.mts1.hsp1.1e10.megablast.out'
Command line argument error: Argument "out". File is not accessible:  `/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination/blasthits/progeny_Nui_hap2.vs.nt.mts1.hsp1.1e10.megablast.out'
Command line argument error: Argument "out". File is not accessible:  `/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination/blasthits/progeny_Nui_hap1.vs.nt.mts1.hsp1.1e10.megablast.out'
Command line argument error: Argument "out". File is not accessible:  `/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination/blasthits/progeny_M7_hap2.vs.nt.mts1.hsp1.1e10.megablast.out'
Command line argument error: Argument "out". File is not accessible:  `/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination/blasthits/M7xNu

## generate blobplot

In [7]:
module list

Loading [1mblobtools/1.0[22m
  [94mLoading requirement[0m: pfr-python2/2.7.13
Currently Loaded Modulefiles:
 1) [46mpowerPlant/core[0m    4) git/2.21.0      7) asub/2.2            
 2) texlive/20151117   5) perlbrew/0.76   8) [100mpfr-python2/2.7.13[0m  
 3) pandoc/1.19.2      6) perl/5.28.0     9) blobtools/1.0       

Key:
[100mauto-loaded[0m  [46msticky[0m  


In [6]:
module load conda
conda activate hraijc_blobtools
/workspace/hraijc/git_clones/blobtools/blobtools --version

(/workspace/appscratch/miniconda/hraijc_blobtools) 1.1.1
(/workspace/appscratch/miniconda/hraijc_blobtools) 

: 1

In [9]:
conda deactivate
module unload conda

In [5]:
WKDIR=/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination

module load blobtools

cd "${WKDIR}"

mkdir -p BlobPlot_out

file=./Blueberry_haplotypes.fofn

cat "${file}" | while read line
do
    tag=$(basename "${line}" .fa)
    echo "blobtools create \
         -i "${line}" \
         -b "${WKDIR}/bamfiles/${tag}_to_binned_hifi.sorted.bam" \
         -t "${WKDIR}/blasthits/${tag}.vs.nt.best_single.1e10.megablast.out" \
         -o "${WKDIR}/BlobPlot_out/${tag}.best_single.blobplot""
    echo "blobtools plot \
         -i "${WKDIR}/BlobPlot_out/${tag}.best_single.blobplot.blobDB.json" \
         -o "${WKDIR}/BlobPlot_out/${tag}.best_single.blobplot""
    echo "blobtools view \
        -i "${WKDIR}/BlobPlot_out/${tag}.best_single.blobplot.blobDB.json" \
        -o "${WKDIR}/BlobPlot_out/${tag}.best_single.blobplot""
    echo "blobtools view \
         -i "${WKDIR}/BlobPlot_out/${tag}.best_single.blobplot.blobDB.json" \
         -o "${WKDIR}/BlobPlot_out/${tag}.best_single.rank_species.blobplot" \
         -r 'species'"
    echo "blobtools view \
         -i "${WKDIR}/BlobPlot_out/${tag}.best_single.blobplot.blobDB.json" \
         -o "${WKDIR}/BlobPlot_out/${tag}.best_single.rank_order.blobplot" \
         -r 'order'"
done | abatch -j BlobPlot -g 5 --time 00:30:00 --mem 1G --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL | sbatch

module unload blobtools

SBATCH_ARGS: --time 00:30:00 --mem 1G --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL
JOB_ARRAY_NAME: BlobPlot
GROUP_SIZE: 5
NUM_COMMANDS: 10
ARRAY_SIZE: 2
Submitted batch job 2075571


In [1]:
seff 2070357

Job ID: 2070357
Array Job ID: 2070357_4
Cluster: powerplant
User/Group: hrasrb/hrasrb
State: COMPLETED (exit code 0)
Nodes: 1
Cores per node: 2
CPU Utilized: 00:15:18
CPU Efficiency: 56.88% of 00:26:54 core-walltime
Job Wall-clock time: 00:13:27
Memory Utilized: 983.29 MB
Memory Efficiency: 96.02% of 1.00 GB


### Refine the blobplot

```
blobtools plot -i <.blobDB.json> -r 'species' -o <out_file_name> # with species
blobtools plot -i <.blobDB.json> -r 'order' -o <out_file_name> # with order
```


In [7]:
WKDIR=/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination

module load blobtools

cd "${WKDIR}"

file=./Blueberry_haplotypes.fofn

cat "${file}" | while read line
do
    tag=$(basename "${line}" .fa)
    echo "blobtools plot \
         -i "${WKDIR}/BlobPlot_out/best_single_hit/${tag}.best_single.blobplot.blobDB.json" \
         -o "${WKDIR}/BlobPlot_out/best_single_hit/${tag}.best_single.rank_species.blobplot" \
         -r 'species'"
    echo "blobtools plot \
         -i "${WKDIR}/BlobPlot_out/best_single_hit/${tag}.best_single.blobplot.blobDB.json" \
         -o "${WKDIR}/BlobPlot_out/best_single_hit/${tag}.best_single.rank_order.blobplot" \
         -r 'order'"
done | abatch -j BlobPlot -g 2 --time 00:10:00 --mem 1G --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL | sbatch
module unload blobtools

SBATCH_ARGS: --time 00:10:00 --mem 1G --mail-user=Sarah.Bailey@plantandfood.co.nz --mail-type=ALL
JOB_ARRAY_NAME: BlobPlot
GROUP_SIZE: 2
NUM_COMMANDS: 8
ARRAY_SIZE: 4
Submitted batch job 2075792


In [28]:
scancel 2070631

In [5]:
WKDIR=/powerplant/workspace/hrasrb/Blueberry_trio/2023-09-01_remove_contamination

module load blobtools

ASSEMBLY=${WKDIR}/M7xNui_min2k_Flye.fa
PREFIX=$(basename "${ASSEMBLY}" .fa)

cd "${WKDIR}"

sbatch << EOF
#!/bin/bash
#SBATCH -J blobtools
#SBATCH -o ${WKDIR}/log/hrasrb_%J.out
#SBATCH -e ${WKDIR}/log/hrasrb_%J.err
#SBATCH --cpus-per-task=2
#SBATCH --mem=1G
#SBATCH --time=2:00:00

blobtools create \
 -i ${ASSEMBLY} \
 -b ${WKDIR}/bamfiles/${PREFIX}_to_hifi.sorted.bam \
 -t ${WKDIR}/blasthits/${PREFIX}.vs.nt.mts1.hsp1.1e10.megablast.out \
 -o ${WKDIR}/BlobPlot_out/${PREFIX}.blobplot

blobtools plot \
 -i ${WKDIR}/BlobPlot_out/${PREFIX}.blobplot.blobDB.json \
 -o ${WKDIR}/BlobPlot_out/${PREFIX}.rank_clade.blobplot
 
blobtools view \
 -i ${WKDIR}/BlobPlot_out/${PREFIX}.blobplot.blobDB.json \
 -o ${WKDIR}/BlobPlot_out/${PREFIX}.blobplot

blobtools plot \
 -i ${WKDIR}/BlobPlot_out/${tag}.blobplot.blobDB.json \
 -o ${WKDIR}/BlobPlot_out/${tag}.rank_species.blobplot \
 -r 'species'
 
blobtools view \
 -i ${WKDIR}/BlobPlot_out/${tag}.blobplot.blobDB.json \
 -o ${WKDIR}/BlobPlot_out/${tag}.rank_species.blobplot \
 -r 'species'
 
blobtools plot \
 -i ${WKDIR}/BlobPlot_out/${tag}.blobplot.blobDB.json \
 -o ${WKDIR}/BlobPlot_out/${tag}.rank_order.blobplot \
 -r 'order'
 
blobtools view \
 -i ${WKDIR}/BlobPlot_out/${tag}.blobplot.blobDB.json \
 -o ${WKDIR}/BlobPlot_out/${tag}.rank_order.blobplot \
 -r 'order'

EOF

Submitted batch job 2071228


In [4]:
seff 2071225

Job ID: 2071225
Cluster: powerplant
User/Group: hrasrb/hrasrb
State: FAILED (exit code 1)
Nodes: 1
Cores per node: 2
CPU Utilized: 00:00:23
CPU Efficiency: 18.85% of 00:02:02 core-walltime
Job Wall-clock time: 00:01:01
Memory Utilized: 13.51 MB
Memory Efficiency: 1.32% of 1.00 GB


# Use Rstudio to explore blob toolkit results

In [1]:
cp /workspace/hrasrb/kiwifruit_pangenome/Phasing/assemblies/ncbi_fcs_vs_bloptools.html /workspace/hrasrb/Repo/High-quality-genomes/Blueberry/Assembly_Contamination_Check