# Nui ONT + HiC Assembly V1.1
*Vaccinium corymbosum nui*

## Data sources

- HiC Data for Nui is here:
    - /input/genomic/plant/Vaccinium/corymbosum/AGRF_CAGRF21434_HJWHFDRXX


- 10X data for Nui and M7 here:
    - /input/genomic/plant/Vaccinium/corymbosum/AGRF_CAGRF18813_H7JY3DRXX


- ONT PromethION Nui (BB2020 and BB2020-2 are the same sample) here:
    - /input/genomic/plant/Vaccinium/corymbosum/Blueberry_PromethION_Apr2020


- ONT MinION Nui (BB2020) here:
    - /input/genomic/plant/Vaccinium/corymbosum/CAGRF21436/20200224_MinION/AGRF_CAGRFF21436_FAL87845_BB2020/


- 10X Supernova Assembly for 10X data here:
    - /output/genomic/plant/Vaccinium/corymbosum/2021_GenomeAssembly/Nui/01_Supernova

## Basecalling using Guppy V6.3.4

In [67]:
WKDIR=/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1
TEMP=${WKDIR}/temp
LOGDIR=${WKDIR}/log
ONTINDIR2=/input/genomic/plant/Vaccinium/corymbosum/Blueberry_PromethION_Apr2020/AGRF_CAGRF21436-2_PAE71986_BB2020-2/fast5_pass
ONTINDIR1=/input/genomic/plant/Vaccinium/corymbosum/Blueberry_PromethION_Apr2020/AGRF_CAGRF21436-2_PAE71986_BB2020/fast5_pass
FAST5DIR=${WKDIR}/fast5
FASTQDIR=${WKDIR}/fastq
FAST5DIR_UNZIP=${WKDIR}/fast5_unzip

In [39]:
# Set up workspace
mkdir -p $WKDIR
mkdir -p $LOGDIR
mkdir -p $TEMP
mkdir -p $FAST5DIR
mkdir -p $FAST5DIR_UNZIP
mkdir -p $FASTQDIR

In [34]:
#Make symlinks of inputfiles into one directory
cp -s $ONTINDIR1/PAE* $FAST5DIR/
cp -s $ONTINDIR2/PAE* $FAST5DIR/
#ls -lrS $FAST5DIR | head

In [45]:
#Unzip all of the Fast5 files
cd $FAST5DIR
for file in *.gz
do 
    echo gunzip -c $FAST5DIR/$file '>' $FAST5DIR_UNZIP/${file%.gz}
done | abatch -j unzip_fast5 -g 20 --mem=1G --time=00:30:00 > unzip_sbatch_script.sh

SBATCH_ARGS: --mem=1G --time=00:30:00
JOB_ARRAY_NAME: unzip_fast5
GROUP_SIZE: 20
NUM_COMMANDS: 674
ARRAY_SIZE: 34


In [46]:
cd $FAST5DIR
sbatch unzip_sbatch_script.sh
cd $WKDIR

Submitted batch job 885843


In [68]:
du -sh ${WKDIR}/*

19M	/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/fast5
537G	/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/fast5_unzip
80G	/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/fastq
752K	/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/log
46M	/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/ONT_QC
32K	/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/temp


In [53]:
#Basecalling with defaults.
sbatch << EOF
#!/bin/bash -e
#SBATCH -J basecalling
#SBATCH --nodes=1
#SBATCH --ntasks=10
#SBATCH --cpus-per-task=1
#SBATCH --mem=20G
#SBATCH --output=${LOGDIR}/%J.out
#SBATCH --error=${LOGDIR}/%J.err
#SBATCH --time=30:00:00
#SBATCH --gres=gpu:1

module load guppy/6.3.4

guppy_basecaller --input_path ${FAST5DIR_UNZIP} -s ${FASTQDIR} --cpu_threads_per_caller 1 --num_callers 10 -c dna_r9.4.1_450bps_hac_prom.cfg --device 'cuda:0'

EOF
     

sbatch: slurm_job_submit: Job assigned to GPU nodes: gpu
Submitted batch job 885877


In [36]:
#Print Workflow Options
sbatch << EOF
#!/bin/bash -e
#SBATCH -J basecalling
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --output=${LOGDIR}/%J.out
#SBATCH --error=${LOGDIR}/%J.err
#SBATCH --mem=1G
#SBATCH --time=00:01:00
#SBATCH --gres=gpu:1

module load guppy/6.3.4

guppy_basecaller --print_workflows --device 'cuda:0'

EOF

sbatch: slurm_job_submit: Job assigned to GPU nodes: gpu
Submitted batch job 885833


In [56]:
mkdir -p ${WKDIR}/ONT_QC

In [70]:
#QC ONT Basecalling

sbatch << EOF
#!/bin/bash
#SBATCH -J Nanoplot
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=1G
#SBATCH --time=00:30:00

module load conda
conda activate hraijc_NanoPlot

cd ${WKDIR}/ONT_QC
#NanoPlot --summary ${FASTQDIR}/sequencing_summary.txt --loglength --minqual 9 -o BB_Nui_Guppy6.3
#NanoPlot --summary /workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_BB2020_ONT/BB2020_PromethION_Fastq/sequencing_summary.txt --minqual 9 --loglength -o BB_Nui_Guppy5  

#NanoComp --summary ${FASTQDIR}/sequencing_summary.txt /workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_BB2020_ONT/BB2020_PromethION_Fastq/sequencing_summary.txt -p Nui_Guppy5vGuppy6
#NanoComp --summary Guppy5.txt Guppy6.txt -p test -n Guppy5 Guppy6 MinION 
NanoComp --summary ${FASTQDIR}/sequencing_summary.txt /workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_BB2020_ONT/BB2020_PromethION_Fastq/sequencing_summary.txt /workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_BB2020_ONT/BB2020_MinION_Fastq/sequencing_summary.txt -p Nui_Guppy5vGuppy6vMinION -n Guppy5 Guppy6 MinION


conda deactivate
module unload conda

EOF

Submitted batch job 886478


In [4]:
#Combine fastq.
#for file in /workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/fastq/pass/*.fastq 
#do 
#cat $file
#done >> /workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/fastq/BB_Nui_ONT_Guppyv6_all.fastq


#cat /workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_BB2020_ONT/BB2020_MinION_1kb/all.fastq >> /workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/fastq/BB_Nui_ONT_Guppyv6_all.fastq

In [15]:
du -sh /workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/fastq/pass/
du -sh /workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/fastq/BB_Nui_ONT_Guppyv6_all.fastq
du -sh /workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_BB2020_ONT/BB2020_MinION_1kb/all.fastq

68G	/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/fastq/pass/
86G	/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/fastq/BB_Nui_ONT_Guppyv6_all.fastq
19G	/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_BB2020_ONT/BB2020_MinION_1kb/all.fastq


## Flye assembly

In [18]:
# Trim Reads by length/quality using NanoFilt
# FLYE --nano-hq expects qscore of 14 so almost all of our reads would be thrown out.
# Using --nano-raw with expects qscore of 10. Using all reads >5kb.
cd /workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/fastq/
mkdir -p log


sbatch << EOF
#!/bin/bash
#SBATCH -J seqkit
#SBATCH -o log/%J.out
#SBATCH -e log/%J.err
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=1G
#SBATCH --time=01:30:00

module load seqkit

seqkit seq -m 5000 BB_Nui_ONT_Guppyv6_all.fastq > BB_Nui_ONT_Guppyv6_5kb.fastq
seqkit stats BB_Nui_ONT_Guppyv6_all.fastq BB_Nui_ONT_Guppyv6_5kb.fastq


EOF

Submitted batch job 886648


In [None]:
#file                          format  type   num_seqs         sum_len  min_len   avg_len  max_len
#BB_Nui_ONT_Guppyv6_all.fastq  FASTQ   DNA   3,212,086  39,760,967,397       29  12,378.6  199,090
#BB_Nui_ONT_Guppyv6_5kb.fastq  FASTQ   DNA   1,911,492  36,524,002,011    5,000  19,107.6  199,090

In [63]:
#Using flye 2.9.1 instead of PFR instalation. 
module avail flye
module load pfr-python3
python /workspace/hraijc/git_clones/Flye/bin/flye -v

------------------- [1;94m/software/powerPlant/modulefiles/custom[0m --------------------
[1mflye[22m/2.4.2  [1mflye[22m/2.5  [1mflye[22m/2.7.1  [1mflye[22m/2.8.3  

Key:
[1;94mmodulepath[0m  
Conflict with existing Conda environment. Please unload/deactivate base

Loading [1mpfr-python3/3.9.13[22m
  [91mERROR[0m: can't read "errorCode": no such variable
2.9.1-b1780


In [1]:
WKDIR=/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1
INFASTQ=/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/fastq/BB_Nui_ONT_Guppyv6_5kb.fastq

In [22]:
#Running Flye and then Busco. 
sbatch << EOF
#!/bin/bash
#SBATCH -J Flye
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=36
#SBATCH --mem=200G
#SBATCH --time=100:30:00

module load pfr-python3
module load BUSCO/v5.2.2

python /workspace/hraijc/git_clones/Flye/bin/flye --nano-raw $INFASTQ --out-dir ${WKDIR}/FLYE --thread 36 --iteration 1
busco -i ${WKDIR}/FLYE/assembly.fasta -l eudicots -o Nui_V1_1_Flye_busco -m geno -c 36 --datasets_version odb10

EOF

Submitted batch job 886659


In [2]:
cp ${WKDIR}/FLYE/assembly.fasta ${WKDIR}/V1.1_Flye.fasta

## purge haplotigs and overlaps in an assembly based on read depth
Flye 4 version of the assembly was better so going to use that version, not the guppy_v6 version

In [1]:
#Define Variables
WKDIR=/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1
OUTDIR=${WKDIR}
OUTPREFIX=Nui_Flye04_dedup2

ASSEMBLY=/workspace/hraijc/Blueberry/BB_Nui_Assembly/ONT_Assemly/FLYE04/Flye04_assembly.fasta
ALLREADS=/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/fastq/BB_Nui_ONT_Guppyv6_5kb.fastq
PURGEDUPS_BIN_DIR=/workspace/hraijc/git_clones/purge_dups/bin
OUTPUT=${WKDIR}/purge_dups

In [30]:
mkdir -p ${WKDIR}/log/
mkdir -p ${WKDIR}/purge_dups
cd ${OUTDIR}
du -sh $ASSEMBLY

2.0G	/workspace/hraijc/Blueberry/BB_Nui_Assembly/ONT_Assemly/FLYE04/Flye04_assembly.fasta


In [6]:
conda deactivate
module load pfr-python3

python /workspace/hraijc/git_clones/purge_dups/scripts/pd_config.py --help

usage: pd_config.py [-h] [-s SRF] [-l LOCD] [-B] [-n FN] [--version]
                    ref pbfofn

generate a configuration file in json format

positional arguments:
  ref                   reference file
  pbfofn                list of pacbio file (one absolute file path per line)

optional arguments:
  -h, --help            show this help message and exit
  -s SRF, --srfofn SRF  list of short reads files (one record per line, the
                        record is a tab splitted line of abosulte file path
                        plus trimmed bases, refer to
                        https://github.com/dfguan/KMC) [NONE]
  -l LOCD, --localdir LOCD
                        local directory to keep the reference and lists of the
                        pacbio, short reads files [.]
  -B, --skipB           skip running busco [False]
  -n FN, --name FN      output config file name [config.json]
  --version             show program's version number and exit


In [31]:
# make pbofn file
printf ${ALLREADS} > ${OUTDIR}/ONT_files.txt

In [32]:
module load pfr-python3
python /workspace/hraijc/git_clones/purge_dups/scripts/pd_config.py -n config.${OUTPREFIX}.json ${ASSEMBLY} ${OUTDIR}/ONT_files.txt

In [34]:

sbatch << EOF
#!/bin/bash
#SBATCH -J PurgeDups
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=1
#SBATCH --mem=8G
#SBATCH --time=6:00:00

conda deactivate
module load pfr-python3
module load zlib
module load minimap2
module load BUSCO/v5.2.2


python /workspace/hraijc/git_clones/purge_dups/scripts/run_purge_dups.py config.${OUTPREFIX}.json ${PURGEDUPS_BIN_DIR} ${OUTPREFIX} -p bash

EOF

Submitted batch job 903171


In [2]:
cp ${WKDIR}/Flye04_assembly/seqs/Flye04_assembly.purged.fa ${WKDIR}/Flye04_assembly_purged.fa

In [3]:
#Busco. 
sbatch << EOF
#!/bin/bash
#SBATCH -J BUSCO
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=24
#SBATCH --mem=6G
#SBATCH --time=06:30:00

module load BUSCO/v5.2.2
module load assemblathon_stats

assemblathon_stats.pl ${WKDIR}/Flye04_assembly_purged2.fa
busco -i ${WKDIR}/Flye04_assembly_purged.fa -l eudicots -o Nui_V1_1_Flye_busco -m geno -c 24 --datasets_version odb10

EOF

Submitted batch job 903533


## HiC mapping and scaffolding.


HiC Reads here:
/input/genomic/plant/Vaccinium/corymbosum/AGRF_CAGRF21434_HJWHFDRXX


### Clean HiC Reads


In [1]:
#Define Variables
WKDIR=/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1
CONTIGS=${WKDIR}/Flye04_assembly_purged.fa
OUT_PREFIX_CONTIGS=Nui_Flye04_purged

RAW_READ1=/input/genomic/plant/Vaccinium/corymbosum/AGRF_CAGRF21434_HJWHFDRXX/BlueberryNui_HiC_HJWHFDRXX_GTACGA_L002_R1.fastq.gz
RAW_READ2=/input/genomic/plant/Vaccinium/corymbosum/AGRF_CAGRF21434_HJWHFDRXX/BlueberryNui_HiC_HJWHFDRXX_GTACGA_L002_R2.fastq.gz

HiC_CLEAN=${WKDIR}/HiC_Clean
HiC_CLEAN_R1=${HiC_CLEAN}/BlueberryNui_HiC_HJWHFDRXX_GTACGA_L002_R1_CLEAN.fastq.gz
HiC_CLEAN_R2=${HiC_CLEAN}/BlueberryNui_HiC_HJWHFDRXX_GTACGA_L002_R2_CLEAN.fastq.gz

In [18]:
mkdir -p $HiC_CLEAN
mkdir -p ${WKDIR}/log

In [19]:
# Filter HiC Reads.

sbatch << EOF
#!/bin/bash
#SBATCH -J fastp
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=6
#SBATCH --mem=6G
#SBATCH --time=6:00:00

module load fastp/0.23.2

fastp -i $RAW_READ1 -o $HiC_CLEAN_R1 -I $RAW_READ2 -O $HiC_CLEAN_R2 --trim_front1 15 --trim_front2 15 --qualified_quality_phred 25 --length_required 75 --thread 6

EOF

Submitted batch job 903625


In [2]:
du -sh ${HiC_CLEAN}/*.fastq.gz
du -sh $CONTIGS
du -sh $RAW_READ1
du -sh $RAW_READ2

6.1G	/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/HiC_Clean/BlueberryNui_HiC_HJWHFDRXX_GTACGA_L002_R1_CLEAN.fastq.gz
7.2G	/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/HiC_Clean/BlueberryNui_HiC_HJWHFDRXX_GTACGA_L002_R2_CLEAN.fastq.gz
869M	/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1/Flye04_assembly_purged.fa
8.1G	/input/genomic/plant/Vaccinium/corymbosum/AGRF_CAGRF21434_HJWHFDRXX/BlueberryNui_HiC_HJWHFDRXX_GTACGA_L002_R1.fastq.gz
8.7G	/input/genomic/plant/Vaccinium/corymbosum/AGRF_CAGRF21434_HJWHFDRXX/BlueberryNui_HiC_HJWHFDRXX_GTACGA_L002_R2.fastq.gz


### Map HiC Reads



In [4]:
#Create HiC BAM FILE of 1kb contigs

sbatch << EOF
#!/bin/bash
#SBATCH -J HiC_Pipe_BB
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=18
#SBATCH --mem=10G
#SBATCH --time=30:00:00

module load bwa/0.7.17
module load samtools/0.1.19

cd ${WKDIR}

bwa index $CONTIGS
samtools faidx $CONTIGS

#Align reads with BWA. Use -5SP for Hi-C reads.
bwa mem -5SP -t18 ${CONTIGS} $HiC_CLEAN_R1 $HiC_CLEAN_R2 -o ${OUT_PREFIX_CONTIGS}.sam

#Flag PCR Duplicates with SAMBLASTER
/workspace/hraijc/git_clones/samblaster/samblaster -i ${OUT_PREFIX_CONTIGS}.sam -o ${OUT_PREFIX_CONTIGS}_marked_byread.sam

#Remove PCR Duplicates
samtools view -S -b -h -@ 18 -F 2316 ${OUT_PREFIX_CONTIGS}_marked_byread.sam > ${OUT_PREFIX_CONTIGS}_dedup.bam

# Run YAHS
/workspace/hraijc/git_clones/yahs/yahs $CONTIGS ${OUT_PREFIX_CONTIGS}_dedup.bam -o ${OUT_PREFIX_CONTIGS}_yahs1

#YAHS without error correction.
/workspace/hraijc/git_clones/yahs/yahs --no-contig-ec --no-scaffold-ec $CONTIGS ${OUT_PREFIX_CONTIGS}_dedup.bam -o ${OUT_PREFIX_CONTIGS}_yahs_noEC


EOF

Submitted batch job 903761


In [5]:
cd $WKDIR
du -sh *.sam
du -sh *.bam
mv *.sam temp


90G	Nui_Flye04_purged_marked_byread.sam
90G	Nui_Flye04_purged.sam
22G	Nui_Flye04_purged_dedup.bam


In [6]:
#Stat Assemblies 
sbatch << EOF
#!/bin/bash
#SBATCH -J Assemblathon
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=1
#SBATCH --mem=1G
#SBATCH --time=00:30:00

module load assemblathon_stats

assemblathon_stats.pl ${WKDIR}/Nui_Flye04_purged_yahs_noEC_scaffolds_final.fa
assemblathon_stats.pl ${WKDIR}/Nui_Flye04_purged_yahs1_scaffolds_final.fa

EOF

Submitted batch job 904093


In [7]:
#Index Contigs 
sbatch << EOF
#!/bin/bash
#SBATCH -J Samtools
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=1
#SBATCH --mem=1G
#SBATCH --time=00:30:00

module load samtools

samtools faidx ${CONTIGS}

EOF

Submitted batch job 904094


In [9]:
#Variables are not read within slurm scripts so need to set them before.
# Create heatmap using Yahs script.


out=${WKDIR}/Nui_Flye04_purged_yahs1
contigs=${CONTIGS} # need to be indexed, i.e., test.fa.gz.fai is available

sbatch << EOF
#!/bin/bash
#SBATCH -J mapyahs5
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=8
#SBATCH --mem=32G
#SBATCH --time=3:10:00

module load conda
conda activate hraijc_yahs


############# code modified from YAHS script #####################################
#### this is to generate input file for juicer_tools - non-assembly mode or for PretextMap
(/workspace/hraijc/git_clones/yahs/juicer pre ${out}.bin ${out}_scaffolds_final.agp ${contigs}.fai 2>${out}_tmp_juicer_pre.log | LC_ALL=C sort -k2,2d -k6,6d --parallel=8 -S32G | awk 'NF' > ${out}_alignments_sorted.txt.part) && (mv ${out}_alignments_sorted.txt.part ${out}_alignments_sorted.txt)
## prepare chromosome size file from samtools index file
cat ${out}_tmp_juicer_pre.log | grep "PRE_C_SIZE" | cut -d' ' -f2- >${out}_scaffolds_final.chrom.sizes
## do juicer hic map
#(java -Xmx32G -jar /workspace/hraijc/git_clones/juicer_tools.1.9.9_jcuda.0.8.jar pre ${out}_alignments_sorted.txt ${out}.hic.part ${out}_scaffolds_final.chrom.sizes) && (mv ${out}.hic.part ${out}.hic)
#### generate input file for juicer_tools - assembly (JBAT) mode (-a)
/workspace/hraijc/git_clones/yahs/juicer pre -a -o ${out}_JBAT ${out}.bin ${out}_scaffolds_final.agp ${contigs}.fai 2>${out}_tmp_juicer_pre_JBAT.log
cat ${out}_tmp_juicer_pre_JBAT.log | grep "PRE_C_SIZE" | cut -d' ' -f2- >${out}_JBAT.chrom.sizes
(java -Xmx32G -jar /workspace/hraijc/git_clones/juicer_tools.1.9.9_jcuda.0.8.jar pre ${out}_JBAT.txt ${out}_JBAT.hic.part ${out}_JBAT.chrom.sizes) && (mv ${out}_JBAT.hic.part ${out}_JBAT.hic)
#################################################################################


EOF

#unsetting varialbes to avoid confusion.
unset out
unset contigs

Submitted batch job 904235


In [14]:
WKDIR=/workspace/hraijc/Blueberry/BB_Nui_Assembly/Nui_V1.1
PREFIX=Nui_V1.1
INFILE=Nui_Flye04_purged_dedup.bam
APREFIX=Nui_Flye04_purged


In [11]:
# Viz HiC Coverage

cd $WKDIR

sbatch << EOF
#!/bin/bash
#SBATCH -J hic
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=10
#SBATCH --mem=10G
#SBATCH --time=6:00:00

module load conda
conda deactivate
module load pfr-python3
module load samtools

samtools sort -@ 10 $INFILE > ${INFILE%.bam}_sorted.bam
samtools index ${INFILE%.bam}_sorted.bam
/home/hraijc/.local/bin/plotCoverage -b ${INFILE%.bam}_sorted.bam -n 10000 --plotFile ${PREFIX}_HiC_Coverage -o ${PREFIX}_HiC_Coverage_quick.png --labels ${PREFIX} --plotTitle "HiC_Coverage" 
/home/hraijc/.local/bin/plotCoverage -b ${INFILE%.bam}_sorted.bam -n 100000 --plotFile ${PREFIX}_HiC_Coverage -o ${PREFIX}_HiC_Coverage.png --labels ${PREFIX} --plotTitle "HiC_Coverage" 


EOF

Submitted batch job 904241


In [15]:
### HiC QC from Phase genomics ###
sbatch << EOF
#!/bin/bash
#SBATCH -J HiCQC
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=1
#SBATCH --mem=1G
#SBATCH --time=00:30:00

module load conda

conda activate hraijc_hic_qc2
python /workspace/hraijc/git_clones/hic_qc/hic_qc.py -n 3000000 -b ${APREFIX}_dedup.bam --outfile_prefix ${APREFIX}
conda deactivate

EOF

Submitted batch job 904772
