# Hi-C Scaffolding Urchin
### New assembly needs new scaffolding

In [None]:
#Set Up
WKDIR=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/Urchin/Crodergsii_V3
TEMP=${WKDIR}/TEMP
ASSEMBLY=/workspace/hraijc/HiC_trials/HiC23/Assemblies/urchin_230923_primary_purged.fa
#ASSEMBLY_POSTSCAFF=/workspace/hraijc/HiC_trials/HiC23/Results/Crodergsii_HiC092623_v3.fa
HiC_R1=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/Urchin/Crodergsii_V3/reads/Urchin_HiC_combined_filtered_R1.fq.gz
HiC_R2=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/Urchin/Crodergsii_V3/reads/Urchin_HiC_combined_filtered_R2.fq.gz
OUT_PREFIX=Crodergsii_HiC092623
#TELOMERE=TTAGGG #http://telomerase.asu.edu/sequences_telomere.html
#GENOME_SIZE=880000000
#NF_CONFIG=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/Urchin/hic-scaffolding-pipeline/nextflow.config

mkdir -p $WKDIR/log
mkdir -p $TEMP
cd $WKDIR

In [None]:
#cp /input/globus-test/transfer/public/hraijc/urchin_092023/*.fa /workspace/hraijc/HiC_trials/HiC23/Assemblies/

In [None]:
# Using Annabels filtered reads

In [None]:
#Prep Reference

sbatch << EOF
#!/bin/bash
#SBATCH -J bwaindex
#SBATCH --cpus-per-task=1
#SBATCH --mem=1G
#SBATCH --time=1:00:00
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err

module load bwa/0.7.17
module load samtools/1.16


samtools faidx ${ASSEMBLY}
bwa index ${ASSEMBLY}


EOF

In [None]:
#Map HiC reads

sbatch << EOF
#!/bin/bash
#SBATCH -J hic
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=12
#SBATCH --mem=20G
#SBATCH --time=73:00:00

module load bwa/0.7.17
module load samtools/1.16


cd ${WKDIR}

### Align reads with BWA. Use -5SP for Hi-C reads.#############################

bwa mem -5SP -t12 ${ASSEMBLY} ${HiC_R1} ${HiC_R2} -o ${TEMP}/${OUT_PREFIX}.sam


### Flag PCR Duplicates with SAMBLASTER #######################################
/workspace/hraijc/git_clones/samblaster/samblaster -i ${TEMP}/${OUT_PREFIX}.sam -o ${TEMP}/${OUT_PREFIX}_marked_byread.sam


### Remove unmmaped and non-primary aligned reads. Sort and Index bam files.###
samtools view -S -b -h -@ 12 -F 2316 ${TEMP}/${OUT_PREFIX}_marked_byread.sam > ${TEMP}/${OUT_PREFIX}_presort_marked.bam
samtools sort -@ 12 ${TEMP}/${OUT_PREFIX}_presort_marked.bam -o ${OUT_PREFIX}_marked.bam
samtools view -S -b -h -@ 12 -F 3340 ${TEMP}/${OUT_PREFIX}_marked_byread.sam > ${TEMP}/${OUT_PREFIX}_presort_dedup.bam
samtools sort -@ 12 ${TEMP}/${OUT_PREFIX}_presort_dedup.bam -o ${OUT_PREFIX}_dedup.bam


### Run YAHS with contig and scaffolding error correction. ####################
/workspace/hraijc/git_clones/yahs/yahs --no-mem-check ${ASSEMBLY} ${OUT_PREFIX}_dedup.bam -o ${OUT_PREFIX}_dedup_yahs3

EOF


In [None]:
out=${OUT_PREFIX}_dedup_yahs3 # prefix of outfiles produced by YAHS.  
contigs=${ASSEMBLY} # need to be indexed, i.e., test.fa.gz.fai in same directory
wkdir=${WKDIR}


sbatch --job-name=hic_mapyahs \
    -o ${WKDIR}/log/%J.out \
    -e ${WKDIR}/log/%J.err \
    --cpus-per-task=8 \
    --mem=24G \
    --time=03:10:00 \
    --export=out=$out,contigs=$contigs,wkdir=$wkdir \
    /workspace/hraijc/Gitrepos/High-quality-genomes/Methods/DNase_HiC/notebooks/yahs_contactmapgen2.sh

In [None]:
seff 2069748


In [None]:
#Re-Run YAHS at higher resolutions to fix breaks in smaller contigs. 

sbatch << EOF
#!/bin/bash
#SBATCH -J hic
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=1
#SBATCH --mem=20G
#SBATCH --time=6:00:00



cd ${WKDIR}

### Run YAHS with contig and scaffolding error correction. ####################
#/workspace/hraijc/git_clones/yahs/yahs --no-mem-check ${ASSEMBLY} ${OUT_PREFIX}_dedup.bam -o ${OUT_PREFIX}_dedup_yahs3
/workspace/hraijc/git_clones/yahs/yahs --no-mem-check ${ASSEMBLY} ${OUT_PREFIX}_dedup.bam -r 5000,10000,20000,50000,100000,200000,500000,1000000,2000000,5000000,10000000,20000000,50000000,100000000,200000000,500000000 -o ${OUT_PREFIX}_dedup_yahs3_5k
#/workspace/hraijc/git_clones/yahs/yahs --no-mem-check ${ASSEMBLY} ${OUT_PREFIX}_dedup.bam -r 1000,5000,10000,20000,50000,100000,200000,500000,1000000,2000000,5000000,10000000,20000000,50000000,100000000,200000000,500000000 -o ${OUT_PREFIX}_dedup_yahs3_1k


EOF

In [None]:
out=${OUT_PREFIX}_dedup_yahs3_5k # prefix of outfiles produced by YAHS.  
contigs=${ASSEMBLY} # need to be indexed, i.e., test.fa.gz.fai in same directory
wkdir=${WKDIR}


sbatch --job-name=hic_mapyahs \
    -o ${WKDIR}/log/%J.out \
    -e ${WKDIR}/log/%J.err \
    --cpus-per-task=8 \
    --mem=24G \
    --time=03:10:00 \
    --export=out=$out,contigs=$contigs,wkdir=$wkdir \
    /workspace/hraijc/Gitrepos/High-quality-genomes/Methods/DNase_HiC/notebooks/yahs_contactmapgen2.sh
    


In [None]:
##############
#Do manual scaffolding cleanup in Juicer then run

##############


sbatch << EOF
#!/bin/bash
#SBATCH -J YAHS_liftover
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=1
#SBATCH --mem=4G
#SBATCH --time=2:00:00



cd ${WKDIR}



/workspace/hraijc/git_clones/yahs/juicer post -o Crodergsii_HiC092623_dedup_yahs3_5k_man1 Crodergsii_HiC092623_dedup_yahs3_5k_JBAT.review.assembly Crodergsii_HiC092623_dedup_yahs3_5k_JBAT.liftover.agp ${ASSEMBLY}



EOF


In [None]:
# SET INFO FOR SUBSETTING UNIQUE MAPPED HIC READS
WKDIR=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/Urchin/Crodergsii_V3
READ1=${WKDIR}/reads/Urchin_HiC_U_R1.fq.gz
READ2=${WKDIR}/reads/Urchin_HiC_U_R2.fq.gz
DEDUP_BAM=${WKDIR}/Crodergsii_HiC092623_dedup.bam


In [None]:

#Subset HiC reads

sbatch << EOF
#!/bin/bash
#SBATCH -J hic_subset
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=12
#SBATCH --mem=12G
#SBATCH --time=2:00:00


module load samtools/1.16


cd ${WKDIR}
samtools sort -@ 12 -n ${DEDUP_BAM} -o ${DEDUP_BAM%.bam}_namesort.bam

samtools bam2fq -@ 12 -1 ${READ1} -2 ${READ2} ${DEDUP_BAM%.bam}_namesort.bam

EOF

In [None]:

sbatch << EOF
#!/bin/bash
#SBATCH -J minimap
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=4
#SBATCH --mem=2G
#SBATCH --time=01:10:00

cd /workspace/hraijc/HiC_trials/HiC23/NovaSeq/Urchin/Crodergsii_V3/Synteny

module load minimap2

minimap2 -t 4 -cx asm10 Crodergsii_HiC092623_dedup_yahs3_5k_scaffolds_final.fa Crodergsii_HiC092623_dedup_yahs3_5k_man1.fa > UrchinYahs5_to_man1.paf 
EOF

In [None]:
#Run YAHS without scaffolding just to generate agp assembly and Hic files. 

sbatch << EOF
#!/bin/bash
#SBATCH -J hic
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=1
#SBATCH --mem=20G
#SBATCH --time=6:00:00



cd ${WKDIR}

### Run YAHS with contig and scaffolding error correction. ####################
#/workspace/hraijc/git_clones/yahs/yahs --no-mem-check ${ASSEMBLY} ${OUT_PREFIX}_dedup.bam -o ${OUT_PREFIX}_dedup_yahs3
/workspace/hraijc/git_clones/yahs/yahs --no-mem-check ${ASSEMBLY} ${OUT_PREFIX}_dedup.bam -r 5000,10000,20000,50000,100000,200000,500000,1000000,2000000,5000000,10000000,20000000,50000000,100000000,200000000,500000000 -o ${OUT_PREFIX}_dedup_yahs3_5k
/workspace/hraijc/git_clones/yahs/yahs --no-mem-check ${ASSEMBLY} ${OUT_PREFIX}_dedup.bam -r 1000,5000,10000,20000,50000,100000,200000,500000,1000000,2000000,5000000,10000000,20000000,50000000,100000000,200000000,500000000 -o ${OUT_PREFIX}_dedup_yahs3_1k


EOF