In [1]:
#Set Up
WKDIR=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/Crodgersii_V4
READ1=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/Urchin/Crodergsii_V3/reads/Urchin_HiC_U_R1.fq.gz
READ2=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/Urchin/Crodergsii_V3/reads/Urchin_HiC_U_R2.fq.gz
ASSEMBLY_DIR=/workspace/hraijc/HiC_trials/HiC23/Assemblies
TEMP=/workspace/hraijc/TEMP
ASSEMBLY=${ASSEMBLY_DIR}/urchin_ONT_purged_prim_55.fa
APREFIX=urchin_ONT_purged_prim_55_hic4

mkdir -p $WKDIR/log
cd $WKDIR

In [2]:

sbatch << EOF
#!/bin/bash
#SBATCH -J hic
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=12
#SBATCH --mem=20G
#SBATCH --time=88:00:00

module load bwa/0.7.17
module load samtools/1.16


cd ${WKDIR}

### Align reads with BWA. Use -5SP for Hi-C reads.#############################
samtools faidx ${ASSEMBLY}
bwa index ${ASSEMBLY}
bwa mem -5SP -t12 ${ASSEMBLY} ${READ1} ${READ2} -o ${TEMP}/${APREFIX}.sam


### Flag PCR Duplicates with SAMBLASTER #######################################
/workspace/hraijc/git_clones/samblaster/samblaster -i ${TEMP}/${APREFIX}.sam -o ${TEMP}/${APREFIX}_marked_byread.sam


### Remove unmmaped and non-primary aligned reads. Sort and Index bam files.###
samtools view -S -b -h -@ 12 -F 2316 ${TEMP}/${APREFIX}_marked_byread.sam > ${TEMP}/${APREFIX}_presort_marked.bam
samtools sort -@ 12 ${TEMP}/${APREFIX}_presort_marked.bam -o ${APREFIX}.bam

### Run YAHS with contig and scaffolding error correction. ####################
/workspace/hraijc/git_clones/yahs/yahs --no-mem-check ${ASSEMBLY} ${APREFIX}.bam -o ${APREFIX}_yahs4
/workspace/hraijc/git_clones/yahs/yahs --no-contig-ec --no-mem-check ${ASSEMBLY} ${APREFIX}.bam -o ${APREFIX}_yahs4_noEC

EOF

unset ASSEMBLY
unset APREFIX

Submitted batch job 2702028


In [5]:
### Make heatmap of YAHS output ###############################################
cd ${WKDIR}

out=${APREFIX}_yahs4 # prefix of outfiles produced by YAHS.  
contigs=${ASSEMBLY} # need to be indexed, i.e., test.fa.gz.fai in same directory
wkdir=${WKDIR}


sbatch --job-name=hic_mapyahs \
    -o ${WKDIR}/log/%J.out \
    -e ${WKDIR}/log/%J.err \
    --cpus-per-task=8 \
    --mem=24G \
    --time=03:10:00 \
    --export=out=$out,contigs=$contigs,wkdir=$wkdir \
    /workspace/hraijc/Gitrepos/High-quality-genomes/Methods/DNase_HiC/notebooks/yahs_contactmapgen2.sh

Submitted batch job 2706109


In [3]:
### Make heatmap of YAHS output ###############################################
cd ${WKDIR}

out=${APREFIX}_yahs4_noEC # prefix of outfiles produced by YAHS.  
contigs=${ASSEMBLY} # need to be indexed, i.e., test.fa.gz.fai in same directory
wkdir=${WKDIR}


sbatch --job-name=hic_mapyahs \
    -o ${WKDIR}/log/%J.out \
    -e ${WKDIR}/log/%J.err \
    --cpus-per-task=8 \
    --mem=24G \
    --time=03:10:00 \
    --export=out=$out,contigs=$contigs,wkdir=$wkdir \
    /workspace/hraijc/Gitrepos/High-quality-genomes/Methods/DNase_HiC/notebooks/yahs_contactmapgen2.sh

Submitted batch job 2887758


In [3]:
cd $WKDIR
module load BBMap/38.33
statswrapper.sh in=$ASSEMBLY,urchin_ONT_purged_prim_55_hic4_yahs4_noEC_scaffolds_final.fa,urchin_ONT_purged_prim_55_hic4_yahs4_scaffolds_final.fa format=3
module unload BBMap/38.33

java -ea -Xmx200m -cp /software/bioinformatics/BBMap-38.33/current/ jgi.AssemblyStatsWrapper format=3 in=/workspace/hraijc/HiC_trials/HiC23/Assemblies/urchin_ONT_purged_prim_55.fa,urchin_ONT_purged_prim_55_hic4_yahs4_noEC_scaffolds_final.fa,urchin_ONT_purged_prim_55_hic4_yahs4_scaffolds_final.fa format=3
n_scaffolds	n_contigs	scaf_bp	contig_bp	gap_pct	scaf_N50	scaf_L50	ctg_N50	ctg_L50	scaf_N90	scaf_L90	ctg_N90	ctg_L90	scaf_max	ctg_max	scaf_n_gt50K	scaf_pct_gt50K	gc_avg	gc_std	filename
351	351	932665333	932665333	0.000	67	4628585	67	4628585	198	1555391	198	1555391	18081068	18081068	334	99.944	0.39366	0.00451	/powerplant/workspace/hraijc/HiC_trials/HiC23/Assemblies/urchin_ONT_purged_prim_55.fa
43	351	932726933	932665333	0.007	6	49448748	67	4628585	15	34445106	198	1555391	116933604	18081068	32	99.967	0.39366	0.00631	/powerplant/workspace/hraijc/HiC_trials/HiC23/NovaSeq/Crodgersii_V4/urchin_ONT_purged_prim_55_hic4_yahs4_noEC_scaffolds_final.fa
69	382	932727933	932665333	0.007	7	43199803	68

| n_scaffolds 	| n_contigs 	| scaf_bp 	| contig_bp 	| gap_pct 	| scaf_N50 	| scaf_L50 	| ctg_N50 	| ctg_L50 	| scaf_N90 	| scaf_L90 	| ctg_N90 	| ctg_L90 	| scaf_max 	| ctg_max 	| scaf_n_gt50K 	| scaf_pct_gt50K 	| gc_avg 	| gc_std 	| filename 	|
|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|
| 351 	| 351 	| 932665333 	| 932665333 	| 0.000 	| 67 	| 4628585 	| 67 	| 4628585 	| 198 	| 1555391 	| 198 	| 1555391 	| 18081068 	| 18081068 	| 334 	| 99.944 	| 0.39366 	| 0.00451 	| /powerplant/workspace/hraijc/HiC_trials/HiC23/Assemblies/urchin_ONT_purged_prim_55.fa 	|
| 43 	| 351 	| 932726933 	| 932665333 	| 0.007 	| 6 	| 49448748 	| 67 	| 4628585 	| 15 	| 34445106 	| 198 	| 1555391 	| 116933604 	| 18081068 	| 32 	| 99.967 	| 0.39366 	| 0.00631 	| /powerplant/workspace/hraijc/HiC_trials/HiC23/NovaSeq/Crodgersii_V4/urchin_ONT_purged_prim_55_hic4_yahs4_noEC_scaffolds_final.fa 	|
| 69 	| 382 	| 932727933 	| 932665333 	| 0.007 	| 7 	| 43199803 	| 68 	| 4619023 	| 16 	| 34445106 	| 201 	| 1506292 	| 156661703 	| 18081068 	| 44 	| 99.938 	| 0.39366 	| 0.01054 	| /powerplant/workspace/hraijc/HiC_trials/HiC23/NovaSeq/Crodgersii_V4/urchin_ONT_purged_prim_55_hic4_yahs4_scaffolds_final.fa 	|
