## Putting final touches on Crimson Glory genome assembly.

#### Need to make sure the scaffolds are named after the chromosomes of *Eucalyptus grandis* and that the scaffolds are all in the correct order/orientation.

#### Using most recent genome assembly by Australian National University found here: 
https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_016545825.1/



In [8]:
## Set up

WKDIR=/workspace/hraijc/Manuka/CrismsonGlory_V2/Genome_Assembly
EG_ASSEMBLY=/workspace/hraijc/Manuka/CrismsonGlory_V2/Genome_Assembly/assembly_qc/GCF_016545825.1_ASM1654582v1_genomic.fna
EG_ASSEMBLY2=/workspace/ComparativeDataSources/Myrtaceae/Eucalyptus/grandis/Genome/v2.0/assembly/Egrandis_297_v2.0.fa

V2C_ORIGINAL=/workspace/hrtjbs/Manuka_annotation/genome/manuka_CG_Hybrid_HiC_v2c.fa
V2C=${WKDIR}/manuka_CG_Hybrid_HiC_v2c.fa
V2D=${WKDIR}/manuka_CG_Hybrid_v2d_100kb.fasta
V2E=${WKDIR}/manuka_CG_Hybrid_v2e.fasta
#mkdir -p ${WKDIR}/log
cd $WKDIR

In [10]:
cp $V2C_ORIGINAL .

In [14]:
# V2C to V2D.

sbatch << EOF
#!/bin/bash
#SBATCH -J minimap
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=4
#SBATCH --mem=2G
#SBATCH --time=01:10:00

module load minimap2

minimap2 -t 4 -cx asm10 ${V2C} ${V2D} > ManukaV2c_ManukaV2d100kb_asm10.paf 
EOF

Submitted batch job 2061293


In [24]:
# V2E to E. grandis

sbatch << EOF
#!/bin/bash
#SBATCH -J minimap
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=4
#SBATCH --mem=2G
#SBATCH --time=01:10:00

module load minimap2

minimap2 -t 4 -cx asm10 ${V2E} ${EG_ASSEMBLY} > ManukaV2e_EGash_asm10.paf 
EOF

Submitted batch job 2061408


In [11]:
# V2C to V2E

sbatch << EOF
#!/bin/bash
#SBATCH -J minimap
#SBATCH -o ${WKDIR}/log/%J.out
#SBATCH -e ${WKDIR}/log/%J.err
#SBATCH --cpus-per-task=4
#SBATCH --mem=2G
#SBATCH --time=01:10:00

module load minimap2

minimap2 -t 4 -cx asm5 ${V2C_ORIGINAL} ${V2E} > ManukaV2c_ManukaV2e_asm5.paf 
EOF

Submitted batch job 2061504


### AssemblyQC

In [2]:
cd ${WKDIR}/assembly_qc/

In [3]:
grep -v "//" nextflow.config

includeConfig './conf/base.config'

params {
    
    target_assemblies       = [
        ["manuka_CG_Hybrid_v2e", "/workspace/hraijc/Manuka/CrismsonGlory_V2/Genome_Assembly/manuka_CG_Hybrid_v2e.fasta"]
    ]

    assembly_gff3           = []
    
    assemblathon_stats {
        n_limit             = 100
    }

    ncbi_fcs_adaptor {


    }

    ncbi_fcs_gx {
        skip                = 1

        tax_id              = "78410"

        db_path             = "/workspace/ComparativeDataSources/NCBI/FCS/GX/r2023-01-24"

    }
    
    busco {
        skip                = 0

        mode                = "geno"
        
        lineage_datasets    = ["embryophyta_odb10"]

        download_path       = "/workspace/ComparativeDataSources/BUSCO/assembly_qc"
    }

    tidk {
        skip                = 0
        
        repeat_seq          = "TTTAGGG"

        filter_by_size      = 0

        filter_size_bp      = 1000000
    }

    lai {
        skip                = 1

        mode 

In [4]:
sbatch ./assembly_qc_pfr.sh

Submitted batch job 2061432
