## Bin Reads into their corresponding haplotypes using genetic linkage map for the parents and progeny. HiFiasm has partially phased the assembly into two haplotypes but the haplotypes are not fully phased. We can now fully phase because we have the linkage map. 
### Assembly has been purged and had "contaminant" contigs removed. 
#### Merge and map the contigs to the same reference as the genetic map.

+ Genetic Map here: /workspace/hrpsym/Blueberry/NuixM7/map_cleaned_phased
+ Cleaned Assemblies here: /workspace/hraijc/Blueberry/Blueberry_trio/Assembly/postcontamremoval/merged
+ Reference for linkage map is /output/genomic/fairGenomes/Plant/Vaccinium/corymbosum/var_na/sex_na/2x/assembly_w85_20/v1.3/VcaeV1.3.p0.fa


In [1]:
WKDIR=/workspace/hraijc/Blueberry/Blueberry_trio/contigbin
VCAE=/output/genomic/fairGenomes/Plant/Vaccinium/corymbosum/var_na/sex_na/2x/assembly_w85_20/v1.3/VcaeV1.3.p0.fa
MERGEDIR=/workspace/hraijc/Blueberry/Blueberry_trio/Assembly/postcontamremoval/merged
LOG=${WKDIR}/log

mkdir -p $LOG
cd $WKDIR

In [17]:
#cd $WORK/Blueberry/Blueberry_trio/Assembly/
#mkdir merged
#cat classified_M7_plus_unclassified_hap* > M7h1h2_cleanMerge.fa
#cat classified_Nui_plus_unclassified_hap* > Nuih1h2_cleanMerge.fa


In [9]:
# Map each assembly to VcaeV1.3 using minimap2

module load minimap2
cd ${WKDIR}

for contigfile in ${MERGEDIR}/*.fa
do
    contigfilename=$(basename "$contigfile")
    echo "minimap2 -t 8 -ax asm10 ${VCAE} ${contigfile} > ${contigfilename%.fa}_to_Vcae13_asm10.sam"
#    echo ${contigfilename%.fa}
#    echo $contigfile
done | abatch -j ${LOG} --time 2:30:00 --cpus-per-task=8 --mem 4G | sbatch

module unload minimap2

SBATCH_ARGS: --time 2:30:00 --cpus-per-task=8 --mem 4G
JOB_ARRAY_NAME: /workspace/hraijc/Blueberry/Blueberry_trio/contigbin/log
GROUP_SIZE: 1
NUM_COMMANDS: 2
Submitted batch job 2283487


In [10]:
### Create coordinated sorted bamfiles and index. 
cd $WKDIR

sbatch << EOF
#!/bin/bash
#SBATCH -J samtools
#SBATCH -o ${LOG}/%J.out
#SBATCH -e ${LOG}/%J.err
#SBATCH --cpus-per-task=8
#SBATCH --mem=4G
#SBATCH --time=1:30:00

ml samtools

cd ${WKDIR}

samtools view -@ 8 -bS M7h1h2_cleanMerge_to_Vcae13_asm10.sam > M7h1h2_cleanMerge_to_Vcae13_asm10.bam
samtools view -@ 8 -bS Nuih1h2_cleanMerge_to_Vcae13_asm10.sam > Nuih1h2_cleanMerge_to_Vcae13_asm10.bam
samtools sort -@ 8 M7h1h2_cleanMerge_to_Vcae13_asm10.bam -o M7h1h2_cleanMerge_to_Vcae13_psorted.bam
samtools sort -@ 8 Nuih1h2_cleanMerge_to_Vcae13_asm10.bam -o Nuih1h2_cleanMerge_to_Vcae13_psorted.bam
samtools index -@ 8 M7h1h2_cleanMerge_to_Vcae13_psorted.bam
samtools index -@ 8 Nuih1h2_cleanMerge_to_Vcae13_psorted.bam


EOF

Submitted batch job 2283793


In [2]:
### Remove secondary alignments and low quality mappings. 
cd $WKDIR

sbatch << EOF
#!/bin/bash
#SBATCH -J samtools
#SBATCH -o ${LOG}/%J.out
#SBATCH -e ${LOG}/%J.err
#SBATCH --cpus-per-task=8
#SBATCH --mem=4G
#SBATCH --time=1:30:00

ml samtools

cd ${WKDIR}

samtools view -@8 -F 256 -q 30 -o M7h1h2_cleanMerge_to_Vcae13_filtered.bam M7h1h2_cleanMerge_to_Vcae13_psorted.bam
samtools view -@8 -F 256 -q 30 -o Nuih1h2_cleanMerge_to_Vcae13_filtered.bam Nuih1h2_cleanMerge_to_Vcae13_psorted.bam
samtools index -@ 8 Nuih1h2_cleanMerge_to_Vcae13_filtered.bam
samtools index -@ 8 M7h1h2_cleanMerge_to_Vcae13_filtered.bam


EOF

Submitted batch job 2565369


In [1]:
# Add copy of LinkageMap xlsx to powerplant.  
ls /workspace/hraijc/Blueberry/Blueberry_trio/contigbin/LinkageMap

ls: cannot access /workspace/hraijc/Blueberry/Blueberry_trio/contigbin/LinkageMap: No such file or directory


: 2