In [None]:
#######################################
Input:
1. [11TBS|12TBS|13TBS|7TBS|1TBS|8TBS]R1.trimmed (read 1 female trimmed reads)
2. [11TBS|12TBS|13TBS|7TBS|1TBS|8TBS]R2.trimmed (read 2 female trimmed reads)

Function:
Take all illumina trimmed female reads and merge them. This is done as a prerequisite for pilon polishing, 
which uses illumina data to polish the PacBio assembly.

Output:
1. r1fetrev_trimmed.fastq.gz (FASTQ.GZ file for read 1 trimmed female reads)
2. r2fetrev_trimmed.fastq.gz (FASTQ.GZ file for read 2 trimmed female reads)
#######################################

In [6]:
#get all illumina female reads for fw and rv and merge 
fqdir=/powerplant/workspace/hramzr/DNAseq_mapping/reads_alignment/
logdir=/powerplant/workspace/hramzr/github/Trevally/PacBio/log/

ls  $fqdir | egrep "11TBS|12TBS|13TBS|7TBS|1TBS|8TBS" | egrep "R1.trimmed">r1fe
ls  $fqdir | egrep "11TBS|12TBS|13TBS|7TBS|1TBS|8TBS" | egrep "R2.trimmed">r2fe


cat r1fe | awk -v "d=$fqdir" '{print d$1}' | tr "\n" " ">r1fecat

cat r2fe | awk -v "d=$fqdir" '{print d$1}' | tr "\n" " ">r2fecat

r1cat="$(cat r1fecat)"
r2cat="$(cat r2fecat)"

bsub -o ${logdir}fqmerger1.out -e ${logdir}fqmerger1.err \
"cat $r1cat >r1fetrev_trimmed.fastq.gz" 

bsub -o ${logdir}fqmerger2.out -e ${logdir}fqmerger2.err \
"cat $r2cat >r2fetrev_trimmed.fastq.gz"

mv r1fetrev_trimmed.fastq.gz /workspace/hramzr/github/Trevally/PacBio/fastq/
mv r2fetrev_trimmed.fastq.gz /workspace/hramzr/github/Trevally/PacBio/fastq/

Job <334505> is submitted to default queue <normal>.
Job <334506> is submitted to default queue <normal>.


In [None]:
#######################################
Input:
1. fetreval.fasta.contigs.fasta (FASTA reference file)

Function:
Index the reference sequence as a prerequisite for polishing with illumina reads in Pilon.

Output:
1. fetreval.fasta.contigs.fasta.fai (Indexed FASTA reference file)
#######################################

In [1]:
#pre-polish process align against ref CANU DEFAULT
ref=/workspace/hramzr/github/Trevally/PacBio/polishing/arrow_out/consensus_canu_filtered_7.fasta
r1=/workspace/hramzr/github/Trevally/PacBio/masurca/r1fetrev.fastq.gz
r2=/workspace/hramzr/github/Trevally/PacBio/masurca/r2fetrev.fastq.gz
pdir=/workspace/hramzr/github/Trevally/PacBio/polishing/
module load bwa/0.7.17
bsub -o log/bwaind.out -e log/bwaind.err \
"bwa index $ref"

Job <705668> is submitted to default queue <lowpriority>.


In [None]:
#######################################
Input:
1. fetreval.fasta.contigs.fasta (FASTA reference file)
2. r1fetrev_trimmed.fastq.gz (FASTQ.GZ file for read 1 trimmed female reads)
3. r2fetrev_trimmed.fastq.gz (FASTQ.GZ file for read 2 trimmed female reads)

Function:
Map the read 1 and read 2 mapped reads against the reference sequence to create a mapped bam file.

Output:
1. mapped_canu_default.bam (bam file with mapped reads)
#######################################

In [2]:
#mappin it
module load samtools/1.9
module load bwa/0.7.15

ref=/workspace/hramzr/github/Trevally/PacBio/polishing/arrow_out/consensus_canu_filtered_7.fasta
r1=/workspace/hramzr/github/Trevally/PacBio/fastq/r1fetrev_trimmed.fastq.gz
r2=/workspace/hramzr/github/Trevally/PacBio/fastq/r2fetrev_trimmed.fastq.gz
pdir=/workspace/hramzr/github/Trevally/PacBio/polishing/bwa_mem_out/
bsub -o log/bwamap.out -e log/bwamap.err -J "bwamem" -n 8 \
"bwa mem -t 8 \
$ref $r1 $r2 \
| samtools view -Su - | samtools sort - -o ${pdir}mapped_arrow_filtered7.bam"

bwa/0.7.15(22):ERROR:150: Module 'bwa/0.7.15' conflicts with the currently loaded module(s) 'bwa/0.7.17'
bwa/0.7.15(22):ERROR:102: Tcl command execution failed: conflict ${appname}

Job <705749> is submitted to default queue <lowpriority>.


In [None]:
#######################################
Input:
1. mapped_canu_default.bam (bam file with mapped reads)

Function:
Index the mapped bam file, which is a prerequisite to running Pilon.

Output:
1. mapped_canu_default.bam.bai (Indexed bam file with mapped reads)
#######################################

In [1]:
# bsub -o log/bamind.out -e log/bamind.err \
#index samtool files
pdir=/workspace/hramzr/github/Trevally/PacBio/polishing/bwa_mem_out/
module load samtools/1.9
bsub -o log/bamind.out -e log/bamind.err \
"samtools index ${pdir}mapped_arrow_filtered7.bam"

Job <706219> is submitted to default queue <lowpriority>.


In [None]:
#######################################
Input:
1. mapped_canu_default.bam (bam file with mapped reads)

Function:
Polish the assembly reference file with the mapped read file, which contains female illumina reads.

Output:

1. pilon.fasta (Consensus file after polishing)
#######################################

In [1]:
module purge
module load pilon/1.20
pdir=/workspace/hramzr/github/Trevally/PacBio/polishing/bwa_mem_out/mapped_arrow_filtered7.bam
ref=/workspace/hramzr/github/Trevally/PacBio/polishing/arrow_out/consensus_canu_filtered_7.fasta
pil=/software/bioinformatics/pilon-1.20/pilon-1.20.jar
outdir=/workspace/hramzr/github/Trevally/PacBio/polishing/pilon_out/filtered_7_AP_filtered/
logdir=/workspace/hramzr/github/Trevally/PacBio/log/
bsub -o ${logdir}pilon.out -e ${logdir}pilon.err -J "Pilon" \
"java -jar -Xmx96G $pil --genome $ref --frags $pdir"

Job <717312> is submitted to default queue <lowpriority>.


In [10]:
pwd

/powerplant/workspace/hramzr/github/Trevally/PacBio/polishing
