In [11]:
#Set Up
WKDIR=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/SB1031

mkdir -p $WKDIR/log
cd $WKDIR

In [None]:
/workspace/hraijc/HiC_trials/HiC23/Assemblies/SB1031

In [4]:
#IN INTERACTIVE SESSION:
#Use Srun not Salloc
"""
srun --time 01:00:00 --cpus-per-task 1 --mem-per-cpu 4G --pty bash
cd $WKDIR
git clone git@github.com:ignacio3437/hic-scaffolding-pipeline.git
cd ${WKDIR}/hic-scaffolding-pipeline/
git checkout dev
cd ${WKDIR}/hic-scaffolding-pipeline/containers
sudo -E singularity build pipeline_container.sif pipeline_container.def

"""

In [1]:
# RAW Hi-C Data
ls /input/genomic/plant/Actinidia/chinensis/ExperimentRequestor11022/AGRF_CAGRF230414403_HFFHYDSX7/Grape_1031_HFFHYDSX7*

/input/genomic/plant/Actinidia/chinensis/ExperimentRequestor11022/AGRF_CAGRF230414403_HFFHYDSX7/Grape_1031_HFFHYDSX7_CACGAT_L002_R1.fastq.gz
/input/genomic/plant/Actinidia/chinensis/ExperimentRequestor11022/AGRF_CAGRF230414403_HFFHYDSX7/Grape_1031_HFFHYDSX7_CACGAT_L002_R2.fastq.gz
/input/genomic/plant/Actinidia/chinensis/ExperimentRequestor11022/AGRF_CAGRF230414403_HFFHYDSX7/Grape_1031_HFFHYDSX7_CACGAT_L003_R1.fastq.gz
/input/genomic/plant/Actinidia/chinensis/ExperimentRequestor11022/AGRF_CAGRF230414403_HFFHYDSX7/Grape_1031_HFFHYDSX7_CACGAT_L003_R2.fastq.gz


In [3]:
#Combine Reads from flowcells

sbatch << EOF
#!/bin/bash
#SBATCH -J cat
#SBATCH --cpus-per-task=1
#SBATCH --mem=1G
#SBATCH --time=1:00:00
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err



cd /input/genomic/plant/Actinidia/chinensis/ExperimentRequestor11022/AGRF_CAGRF230414403_HFFHYDSX7
date
cat Grape_1031_HFFHYDSX7_CACGAT_L002_R1.fastq.gz Grape_1031_HFFHYDSX7_CACGAT_L003_R1.fastq.gz > /workspace/hraijc/TEMP/SB1031_HiC23_R1.fastq.gz
cat Grape_1031_HFFHYDSX7_CACGAT_L002_R2.fastq.gz Grape_1031_HFFHYDSX7_CACGAT_L003_R2.fastq.gz > /workspace/hraijc/TEMP/SB1031_HiC23_R2.fastq.gz
date




EOF


Submitted batch job 1948299


In [12]:
# Modify Config file

cat ${WKDIR}/hic-scaffolding-pipeline/nextflow.config

includeConfig './conf/base.config'

params {

    // R1, R2 dataset
    reads_R1                = "/workspace/hraijc/HiC_trials/HiC23/NovaSeq/SB1031/TEMP/SB1031_hic1_umap_R1.fastq.gz"
    reads_R2                = "/workspace/hraijc/HiC_trials/HiC23/NovaSeq/SB1031/TEMP/SB1031_hic1_umap_R2.fastq.gz"

    // Assembly file
    assembly_fasta          = "/workspace/hraijc/HiC_trials/HiC23/Assemblies/SB1031/SB1031_duplex_Q20_50kb_plushic.hic.purged.fa" // SALSA requires fasta to not contain ":" character

    // Prefix for output files
    out_prefix              = "SB1031_hic2"

    //STAG 1 
    stop_at_hicqc           = false

    //STAG 2: Include one specific scaffolder, report included
    only_run_yahs           = false
    only_run_salsa2         = false
    only_run_allhic         = false
    only_run_3D_DNA         = false
    

    /* https://github.com/c-zhou/yahs
    yahs_params{
        scaffolding_with_agp_file       = "" // for exmple: -a
        resolution                  

In [10]:
# Run

sbatch << EOF
#!/bin/bash -e

#SBATCH -J hic_pipeline_${USER}
#SBATCH --time=167:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --mem=4G

ml nextflow/22.10.4
ml apptainer/1.1

cd ${WKDIR}/hic-scaffolding-pipeline

srun nextflow main.nf -resume
EOF





Submitted batch job 1950039


## Subset mapped unique reads from HiC scaffolding pipeline above to speed things up in the future.

In [None]:
#ln -s /workspace/hraijc/HiC_trials/HiC23/NovaSeq/SB1031/hic-scaffolding-pipeline/work/a2/1f3e4a5d328922b544d4616e2f8ef5/SB1031_hic1_dedup.bam /workspace/hraijc/HiC_trials/HiC23/NovaSeq/SB1031/TEMP/

In [5]:
BAMFILE=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/SB1031/TEMP/SB1031_hic1_dedup.bam
WKDIR=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/SB1031/
OUTPREFIX=SB1031_hic1_umap

In [6]:
##############
# Subset HiC reads to keep unique mapped reads only. speed up analysis for future iterations.

##############

cd ${WKDIR}/TEMP

sbatch << EOF
#!/bin/bash
#SBATCH -J samtoolsfastq
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=8
#SBATCH --mem=12G
#SBATCH --time=12:00:00

cd ${WKDIR}

module load samtools 

samtools view -h -b -@ 8 -F 3340 ${BAMFILE} | samtools sort -@ 8 -n | samtools fastq -@ 8 -1 ${OUTPREFIX}_R1.fastq.gz -2 ${OUTPREFIX}_R2.fastq.gz -0 /dev/null -s /dev/null -n



EOF

Submitted batch job 1950277


In [1]:
seff 1950277

Job ID: 1950277
Cluster: powerplant
User/Group: hraijc/hraijc
State: COMPLETED (exit code 0)
Nodes: 1
Cores per node: 8
CPU Utilized: 09:52:29
CPU Efficiency: 93.22% of 10:35:36 core-walltime
Job Wall-clock time: 01:19:27
Memory Utilized: 6.72 GB
Memory Efficiency: 55.99% of 12.00 GB


## Hi-C Scaffolding 2. Running full pipeline on subset of Hi-C reads that mapped and are unique.

In [1]:
#Set Up
WKDIR=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/SB1031

mkdir -p $WKDIR/log
cd $WKDIR

In [2]:
# Run

sbatch << EOF
#!/bin/bash -e

#SBATCH -J hic_pipeline_${USER}
#SBATCH --time=167:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --mem=4G

ml nextflow/22.10.4
ml apptainer/1.1

cd ${WKDIR}/hic-scaffolding-pipeline

srun nextflow main.nf -resume
EOF




Submitted batch job 1953196


In [8]:
WKDIR=/workspace/hraijc/HiC_trials/HiC23/NovaSeq/SB1031/hic-scaffolding-pipeline/work/50/466373aa171544a40475edde4b1d93
ASSEMBLY=SB1031_duplex_Q20_50kb_plushic.hic.purged.fa
OUT=SB1031_hic2_yahs
mkdir -p ${WKDIR}/log

In [10]:
# Make heatmap of YAHS output
cd ${WKDIR}
out=${OUT} # prefix of outfiles produced by YAHS.  
contigs=${WKDIR}/${ASSEMBLY} # need to be indexed, i.e., test.fa.gz.fai in same directory
wkdir=${WKDIR}


sbatch --job-name=hic_mapyahs \
    -o ${WKDIR}/log/%J.out \
    -e ${WKDIR}/log/%J.err \
    --cpus-per-task=8 \
    --mem=12G \
    --time=03:10:00 \
    --export=out=$out,contigs=$contigs,wkdir=$wkdir \
    /workspace/hraijc/Gitrepos/High-quality-genomes/Methods/DNase_HiC/notebooks/yahs_contactmapgen2.sh

Submitted batch job 1960327
