In [9]:
#Set Up
WKDIR=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/SB1031/Phased_HiC
READ1=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/SB1031/Reads/SB1031_hic1_umap_R1.fastq.gz
READ2=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/SB1031/Reads/SB1031_hic1_umap_R2.fastq.gz
ASSEMBLY_DIR=/workspace/hraijc/HiC_trials/HiC23/Assemblies/SB1031
TEMP=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/SB1031/TEMP

mkdir -p $WKDIR/log
cd $WKDIR

## Primary Assembly

In [22]:
ASSEMBLY=${ASSEMBLY_DIR}/SB1031_q15_duplex_q2050kb_simplex.primary.purged.010923.fa
APREFIX=SB1031_Primary_uHiC_q15_duplex_q2050kb_simplex

sbatch << EOF
#!/bin/bash
#SBATCH -J hic
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=12
#SBATCH --mem=20G
#SBATCH --time=23:00:00

module load bwa/0.7.17
module load samtools/1.16


cd ${WKDIR}

### Align reads with BWA. Use -5SP for Hi-C reads.#############################
#samtools faidx ${ASSEMBLY}
#bwa index ${ASSEMBLY}
bwa mem -5SP -t12 ${ASSEMBLY} ${READ1} ${READ2} -o ${TEMP}/${APREFIX}.sam


### Flag PCR Duplicates with SAMBLASTER #######################################
/workspace/hraijc/git_clones/samblaster/samblaster -i ${TEMP}/${APREFIX}.sam -o ${TEMP}/${APREFIX}_marked_byread.sam


### Remove unmmaped and non-primary aligned reads. Sort and Index bam files.###
samtools view -S -b -h -@ 12 -F 2316 ${TEMP}/${APREFIX}_marked_byread.sam > ${TEMP}/${APREFIX}_presort_marked.bam
samtools sort -@ 12 ${TEMP}/${APREFIX}_presort_marked.bam -o ${APREFIX}.bam

### Run YAHS with contig and scaffolding error correction. ####################
/workspace/hraijc/git_clones/yahs/yahs --no-mem-check ${ASSEMBLY} ${APREFIX}.bam -o ${APREFIX}_yahs3

EOF

unset ASSEMBLY
unset APREFIX

Submitted batch job 1985617


In [10]:
### Make heatmap of YAHS output ###############################################
ASSEMBLY=${ASSEMBLY_DIR}/SB1031_q15_duplex_q2050kb_simplex.primary.purged.010923.fa
APREFIX=SB1031_Primary_uHiC_q15_duplex_q2050kb_simplex
cd ${WKDIR}

out=${APREFIX}_yahs3 # prefix of outfiles produced by YAHS.  
contigs=${ASSEMBLY} # need to be indexed, i.e., test.fa.gz.fai in same directory
wkdir=${WKDIR}


sbatch --job-name=hic_mapyahs \
    -o ${WKDIR}/log/%J.out \
    -e ${WKDIR}/log/%J.err \
    --cpus-per-task=8 \
    --mem=24G \
    --time=03:10:00 \
    --export=out=$out,contigs=$contigs,wkdir=$wkdir \
    /workspace/hraijc/Gitrepos/High-quality-genomes/Methods/DNase_HiC/notebooks/yahs_contactmapgen2.sh

Submitted batch job 2002385


## Hap1 Assembly

In [23]:
ASSEMBLY=${ASSEMBLY_DIR}/SB1031_q15_duplex_q2050kb_simplex.hap1.purged.240823.fa
APREFIX=SB1031_hap1_uHiC_q15_duplex_q2050kb_simplex

sbatch << EOF
#!/bin/bash
#SBATCH -J hic
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=12
#SBATCH --mem=20G
#SBATCH --time=22:00:00

module load bwa/0.7.17
module load samtools/1.16


cd ${WKDIR}

### Align reads with BWA. Use -5SP for Hi-C reads.#############################
#samtools faidx ${ASSEMBLY}
#bwa index ${ASSEMBLY}
bwa mem -5SP -t12 ${ASSEMBLY} ${READ1} ${READ2} -o ${TEMP}/${APREFIX}.sam


### Flag PCR Duplicates with SAMBLASTER #######################################
/workspace/hraijc/git_clones/samblaster/samblaster -i ${TEMP}/${APREFIX}.sam -o ${TEMP}/${APREFIX}_marked_byread.sam


### Remove unmmaped and non-primary aligned reads. Sort and Index bam files.###
samtools view -S -b -h -@ 12 -F 2316 ${TEMP}/${APREFIX}_marked_byread.sam > ${TEMP}/${APREFIX}_presort_marked.bam
samtools sort -@ 12 ${TEMP}/${APREFIX}_presort_marked.bam -o ${APREFIX}.bam

### Run YAHS with contig and scaffolding error correction. ####################
/workspace/hraijc/git_clones/yahs/yahs --no-mem-check ${ASSEMBLY} ${APREFIX}.bam -o ${APREFIX}_yahs3

EOF

unset ASSEMBLY
unset APREFIX

Submitted batch job 1985618


In [11]:
### Make heatmap of YAHS output ###############################################
ASSEMBLY=${ASSEMBLY_DIR}/SB1031_q15_duplex_q2050kb_simplex.hap1.purged.240823.fa
APREFIX=SB1031_hap1_uHiC_q15_duplex_q2050kb_simplex

cd ${WKDIR}

out=${APREFIX}_yahs3 # prefix of outfiles produced by YAHS.  
contigs=${ASSEMBLY} # need to be indexed, i.e., test.fa.gz.fai in same directory
wkdir=${WKDIR}


sbatch --job-name=hic_mapyahs \
    -o ${WKDIR}/log/%J.out \
    -e ${WKDIR}/log/%J.err \
    --cpus-per-task=8 \
    --mem=24G \
    --time=03:10:00 \
    --export=out=$out,contigs=$contigs,wkdir=$wkdir \
    /workspace/hraijc/Gitrepos/High-quality-genomes/Methods/DNase_HiC/notebooks/yahs_contactmapgen2.sh

Submitted batch job 2002386


## Hap2 Assembly

In [24]:
ASSEMBLY=${ASSEMBLY_DIR}/SB1031_q15_duplex_q2050kb_simplex.hap2.purged.240823.fa
APREFIX=SB1031_hap2_uHiC_q15_duplex_q2050kb_simplex

sbatch << EOF
#!/bin/bash
#SBATCH -J hic
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=12
#SBATCH --mem=20G
#SBATCH --time=23:00:00

module load bwa/0.7.17
module load samtools/1.16


cd ${WKDIR}

### Align reads with BWA. Use -5SP for Hi-C reads.#############################
#samtools faidx ${ASSEMBLY}
#bwa index ${ASSEMBLY}
bwa mem -5SP -t12 ${ASSEMBLY} ${READ1} ${READ2} -o ${TEMP}/${APREFIX}.sam


### Flag PCR Duplicates with SAMBLASTER #######################################
/workspace/hraijc/git_clones/samblaster/samblaster -i ${TEMP}/${APREFIX}.sam -o ${TEMP}/${APREFIX}_marked_byread.sam


### Remove unmmaped and non-primary aligned reads. Sort and Index bam files.###
samtools view -S -b -h -@ 12 -F 2316 ${TEMP}/${APREFIX}_marked_byread.sam > ${TEMP}/${APREFIX}_presort_marked.bam
samtools sort -@ 12 ${TEMP}/${APREFIX}_presort_marked.bam -o ${APREFIX}.bam

### Run YAHS with contig and scaffolding error correction. ####################
/workspace/hraijc/git_clones/yahs/yahs --no-mem-check ${ASSEMBLY} ${APREFIX}.bam -o ${APREFIX}_yahs3

EOF

unset ASSEMBLY
unset APREFIX

Submitted batch job 1985619


In [12]:
### Make heatmap of YAHS output ###############################################
ASSEMBLY=${ASSEMBLY_DIR}/SB1031_q15_duplex_q2050kb_simplex.hap2.purged.240823.fa
APREFIX=SB1031_hap2_uHiC_q15_duplex_q2050kb_simplex
cd ${WKDIR}

out=${APREFIX}_yahs3 # prefix of outfiles produced by YAHS.  
contigs=${ASSEMBLY} # need to be indexed, i.e., test.fa.gz.fai in same directory
wkdir=${WKDIR}


sbatch --job-name=hic_mapyahs \
    -o ${WKDIR}/log/%J.out \
    -e ${WKDIR}/log/%J.err \
    --cpus-per-task=8 \
    --mem=24G \
    --time=03:10:00 \
    --export=out=$out,contigs=$contigs,wkdir=$wkdir \
    /workspace/hraijc/Gitrepos/High-quality-genomes/Methods/DNase_HiC/notebooks/yahs_contactmapgen2.sh

Submitted batch job 2002387


In [8]:
seff 1985619
seff 2002387

Job ID: 1985619
Cluster: powerplant
User/Group: hraijc/hraijc
State: COMPLETED (exit code 0)
Nodes: 1
Cores per node: 12
CPU Utilized: 3-19:06:34
CPU Efficiency: 84.91% of 4-11:18:24 core-walltime
Job Wall-clock time: 08:56:32
Memory Utilized: 9.57 GB
Memory Efficiency: 47.83% of 20.00 GB
Job ID: 2002376
Cluster: powerplant
User/Group: hraijc/hraijc
State: COMPLETED (exit code 0)
Nodes: 1
Cores per node: 8
CPU Utilized: 01:04:15
CPU Efficiency: 23.45% of 04:34:00 core-walltime
Job Wall-clock time: 00:34:15
Memory Utilized: 24.15 GB
Memory Efficiency: 201.28% of 12.00 GB


In [2]:
cd ${WKDIR}
module load BBMap/38.33


statswrapper.sh in=${ASSEMBLY_DIR}/SB1031_q15_duplex_q2050kb_simplex.primary.purged.010923.fa,SB1031_Primary_uHiC_q15_duplex_q2050kb_simplex_yahs3_scaffolds_final.fa,${ASSEMBLY_DIR}/SB1031_q15_duplex_q2050kb_simplex.hap1.purged.240823.fa,SB1031_hap1_uHiC_q15_duplex_q2050kb_simplex_yahs3_scaffolds_final.fa,${ASSEMBLY_DIR}/SB1031_q15_duplex_q2050kb_simplex.hap2.purged.240823.fa,SB1031_hap2_uHiC_q15_duplex_q2050kb_simplex_yahs3_scaffolds_final.fa format=3 
module unload BBMap/38.33


java -ea -Xmx200m -cp /software/bioinformatics/BBMap-38.33/current/ jgi.AssemblyStatsWrapper format=3 in=/workspace/hraijc/HiC_trials/HiC23/Assemblies/SB1031/SB1031_q15_duplex_q2050kb_simplex.primary.purged.010923.fa,SB1031_Primary_uHiC_q15_duplex_q2050kb_simplex_yahs3_scaffolds_final.fa,/workspace/hraijc/HiC_trials/HiC23/Assemblies/SB1031/SB1031_q15_duplex_q2050kb_simplex.hap1.purged.240823.fa,SB1031_hap1_uHiC_q15_duplex_q2050kb_simplex_yahs3_scaffolds_final.fa,/workspace/hraijc/HiC_trials/HiC23/Assemblies/SB1031/SB1031_q15_duplex_q2050kb_simplex.hap2.purged.240823.fa,SB1031_hap2_uHiC_q15_duplex_q2050kb_simplex_yahs3_scaffolds_final.fa format=3
n_scaffolds	n_contigs	scaf_bp	contig_bp	gap_pct	scaf_N50	scaf_L50	ctg_N50	ctg_L50	scaf_N90	scaf_L90	ctg_N90	ctg_L90	scaf_max	ctg_max	scaf_n_gt50K	scaf_pct_gt50K	gc_avg	gc_std	filename
22	22	494320633	494320633	0.000	9	25244161	9	25244161	17	21457242	17	21457242	35581360	35581360	22	100.000	0.35047	0.01982	/powerplant/workspace/hraijc/HiC_trials

| n_scaffolds 	| n_contigs 	| scaf_bp 	| contig_bp 	| gap_pct 	| scaf_N50 	| scaf_L50 	| ctg_N50 	| ctg_L50 	| scaf_N90 	| scaf_L90 	| ctg_N90 	| ctg_L90 	| scaf_max 	| ctg_max 	| scaf_n_gt50K 	| scaf_pct_gt50K 	| gc_avg 	| gc_std 	| filename 	|
|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|---	|
| 22 	| 22 	| 494320633 	| 494320633 	| 0.000 	| 9 	| 25244161 	| 9 	| 25244161 	| 17 	| 21457242 	| 17 	| 21457242 	| 35581360 	| 35581360 	| 22 	| 100.000 	| 0.35047 	| 0.01982 	| SB1031_q15_duplex_q2050kb_simplex.primary.purged.010923.fa 	|
| 23 	| 23 	| 494320633 	| 494320633 	| 0.000 	| 9 	| 25244161 	| 9 	| 25244161 	| 17 	| 21457242 	| 17 	| 21457242 	| 35581360 	| 35581360 	| 22 	| 99.993 	| 0.35047 	| 0.02469 	| SB1031_Primary_uHiC_q15_duplex_q2050kb_simplex_yahs3_scaffolds_final.fa 	|
| 35 	| 35 	| 503922201 	| 503922201 	| 0.000 	| 9 	| 25019217 	| 9 	| 25019217 	| 19 	| 13226561 	| 19 	| 13226561 	| 37690093 	| 37690093 	| 35 	| 100.000 	| 0.35140 	| 0.04945 	| SB1031_q15_duplex_q2050kb_simplex.hap1.purged.240823.fa 	|
| 31 	| 37 	| 503923401 	| 503922201 	| 0.000 	| 8 	| 27008740 	| 9 	| 24881217 	| 16 	| 21390040 	| 19 	| 13226561 	| 50925296 	| 37690093 	| 30 	| 99.993 	| 0.35140 	| 0.03631 	| SB1031_hap1_uHiC_q15_duplex_q2050kb_simplex_yahs3_scaffolds_final.fa 	|
| 41 	| 41 	| 494209437 	| 494209437 	| 0.000 	| 11 	| 21765494 	| 11 	| 21765494 	| 24 	| 8564956 	| 24 	| 8564956 	| 28597816 	| 28597816 	| 40 	| 99.996 	| 0.34960 	| 0.04444 	| SB1031_q15_duplex_q2050kb_simplex.hap2.purged.240823.fa 	|
| 29 	| 45 	| 494212637 	| 494209437 	| 0.001 	| 8 	| 26017946 	| 11 	| 21765494 	| 16 	| 22589806 	| 24 	| 8564956 	| 44542958 	| 28597816 	| 26 	| 99.984 	| 0.34960 	| 0.05377 	| SB1031_hap2_uHiC_q15_duplex_q2050kb_simplex_yahs3_scaffolds_final.fa 	|