#                                          Training module
    ###################################################################################
    # Genomic molecular characterization for viral strains using informatics tools    #
    # CGS, USAMRIID                                                                   #
    # Authors: Raina Kumar (code and training module pipeline),                       #
    #          Joushua Richardson (documentation and presentations)                   #
    # Contact: raina.kumar.ctr@mail.mil                                               #
    ###################################################################################
 
 ## Objective
 
    The training module will provide the complete bioinformatics workflow for analyzing genomics data using open source tools. The training module uses sequence reads generated using genomics tools such as genomic DNA or RNA sequencing using next generation sequencing technology with objective of characterization of viral strains in outbreak setting.
 

In [None]:
## Next Generation sequencing Introduction to genome assembly 

from IPython.display import IFrame
IFrame('../docs/final_pdfs/1_training_mod_013120_intro.pdf', width=900, height=300)



In [None]:
## Introduction to genomics assembly workflow 
from IPython.display import IFrame
IFrame('../docs/final_pdfs/2_training_mod_013120_AssemblyPipe.pdf', width=900, height=300)

In [None]:
# Step 1 
# Define paths for input base directory, work directory and result directory in config.yaml for any new datasets
# 

base_dir ="../../data/example_data/"
work_dir  = "../../data/example_data/"
result_dir = "../../data/example_data/results/"
srefindex="/../../data/example_data/seqindex/"
sreference="../../data/example_data/references/GCF_000848505.1_ViralProj14703_genomic.fna"
pri_adaptors="../../data/example_data/references/pri_adaptors.fa"



In [None]:
## Step 2

##Run following:
##
## shell command
## For paired end data
##   test fastqc read.R1_001.fastq.gz read.R2_001.fastq.gz -f fastq -o results/fastqc > log.txt

from IPython.display import IFrame
IFrame('../docs/final_pdfs/3_training_mod_013120__Fastqc.pdf', width=900, height=300)

In [None]:
## Run Step 2

!snakemake --cores all -s "../snakemake/popgen_fastqc.smk"

In [None]:
# Step 2 Fastqc results
from IPython.display import FileLink, FileLinks
FileLinks('../../data/example_data/results/fastqc/.')


In [None]:
# Step 3
## Trimming the bait illumina adaptors and primers from Illumina sequencing protocol using tool trimmomatic  

## 
## shell command
## For Paired end data
# "time java -jar trimmomatic-0.33.jar PE -threads 3 -trimlog logprefix input.read.R1_001.fastq.gz input.read.R2_001.fastq.gz out.read.paired.R1.fastq out.read.unpaired.R1.fastq out.fastq.paired.R2.fastq out.fastq.unpaired.R2.fastq ILLUMINACLIP:input.primer.adaptor.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:30"
##


from IPython.display import IFrame
IFrame('../docs/final_pdfs/4_training_mod_013120__Trimv2.pdf', width=900, height=300)


In [None]:
# Step 3 Run Trimmomatic on sequence reads using snakemake rule trimmomatics

!snakemake --cores all -s "../snakemake/popgen_trimmomatics.smk"


In [None]:
# Sequence read summary after trimming adaptors and primers

# Reports

from IPython.display import FileLink, FileLinks
FileLinks('../../data/example_data/results/primer_adapt_removed/.')


In [None]:
# Step 4 

## Reference mapping for Read correction
## Align reads to makona viral genome assembly fasta file

## Shell command
## time bwa mem -t 30 makona/references/GCF_000848505.1_ViralProj14703_genomic.fna input.read.1.fastq input.read.2.fastq > sample1.assembly_align_mem_ref.sam



from IPython.display import IFrame
IFrame('../docs/final_pdfs/5_training_mod_013120__Alignment.pdf', width=900, height=300)


In [None]:
# Run step 4 for reference mapping for read correction using snakemake rule refmapsam

!snakemake --cores all -s "../snakemake/popgen_refmapsam.smk"

In [None]:
# Output from reference mapping
from IPython.display import FileLink, FileLinks
FileLinks('../../data/example_data/results/ref_aligned/')


In [None]:
## Step 5

## Sort sam file and convert to bam format file using samtools software
## Shell command:
## "time samtools sort -O BAM makona.aligned.mem.sam > sample1.assembly_align_mem_ref_sorted.bam"


In [None]:
!snakemake --cores all -s "../snakemake/popgen_samsort2bam.smk"

In [None]:
# Output from reference mapping


from IPython.display import FileLink, FileLinks
FileLinks('../../data/example_data/results/ref_aligned/.')



In [None]:
# Step 6

## Reference Guided Assembly graph using velvet assembler

## Shell Command:
## "time velveth out.assembly.dir input.kmernumber -bam -longPaired {output.assembly.dir"


from IPython.display import IFrame
IFrame('../docs/final_pdfs/5_training_mod_013120__Alignmentv2.pdf', width=900, height=300)
    

In [None]:
!snakemake --cores all -s "../snakemake/popgen_assembly.smk"

In [None]:
# Output from reference mapping


from IPython.display import FileLink, FileLinks
FileLinks('makona/results/velvet_assembly/')


In [None]:
# Step 7

## Reference Guided Assembly map using velvet assembler
## Shell Command:

## "time velvetg input.out.assembly.dir -amos_file yes > output.logfile"


from IPython.display import IFrame
IFrame('../docs/final_pdfs/5_training_mod_013120__Alignmentv2.pdf', width=900, height=300)


In [None]:

!snakemake --cores all -s "../snakemake/popgen_assembly_sgraph.smk" 


In [None]:
## Step 7 output
## # Output from velvet assembly

from IPython.display import FileLink, FileLinks
FileLinks('makona/results/velvet_assembly/.')

In [None]:
# Step 8

## Assembly quality assesment stastics and gene prediction 
## Shell Command:

## "time quast.py step7.input.contig.fa -R chk.genome.fa -G chk.genome.gff -o out.assembly.stat.reports --glimmer > output.logfile"


from IPython.display import IFrame
IFrame('../docs/final_pdfs/6_training_mod_013120__DraftQC.pdf', width=900, height=300)



In [None]:

!snakemake --cores all -s "../snakemake/popgen_assembly_predictgene.smk" -n

In [None]:
## Step 8 Assembly reports

from IPython.display import HTML
HTML(filename="../../data/example_data/results/assembl_stats/R4714bT_S2_L001_reference_stats/report.html")



In [None]:
# Step 9
## Create index of contigs and map reads back to contig
## Shell command: 

## "time bwa index -a bwtsw step7.input.contig.fa > output.logfile"

from IPython.display import IFrame
IFrame('../docs/final_pdfs/7_training_mod_013120__Polishv2.pdf', width=900, height=300)



In [None]:

!snakemake --cores all -s "../snakemake/popgen_bwaindex_contig.smk" -n

In [None]:
# Step 10
## "time bwa mem -t 30 step8.input.contig.fa {input.read1p} {input.read2p} > {output.contigalign}"

In [None]:


!snakemake --cores all -s "../snakemake/popgen_alignreads2contig.smk" -n

In [None]:
# Step 11
## Coordinate sort sam files and convert to bam file using samtools


In [None]:

!snakemake --cores all -s "../snakemake/popgen_sortSAM.smk" -n

In [None]:
# Step 12

## "time samtools faidx configs.fa > output.logfile"
!snakemake --cores all -s "../snakemake/popgen_reindexContig.smk" -n


In [None]:
# Step 13

## Variant Calling using samtools mpileup

## Shell Command:

## "time samtools mpileup -u -g -f step8.input.contig.fa step11.contig.read.sorted.aligned.bam | bcftools call -v -m -O z -o output.mpileup.vcf.gz > output.logfile"


from IPython.display import IFrame
IFrame('../docs/command_pdfs/training_mod_Draft_Sl39.pdf', width=900, height=300)

In [None]:
!snakemake --cores all -s "../snakemake/popgen_variantsCall.smk" -n

In [None]:
## Reports

from IPython.display import FileLink, FileLinks
FileLinks('../../data/example_data/results/variants_calling/.')


In [None]:
!snakemake --cores all -s "../snakemake/popgen_vcfindex.smk" -n

In [None]:
# Step 15

## Build sequences consensus

## Shell Command:

## "time cat step8.input.contig.fa | bcftools consensus output.mpileup.vcf.gz > output.consensus.fa



In [None]:

!snakemake --cores all -s "../snakemake/popgen_buildConsensus.smk" -n



In [None]:
## Reports

from IPython.display import FileLink, FileLinks
FileLinks('../../results/consensus_seq/.')

In [None]:
# Step 16

## Consensus muliple alignment 

## Shell Command:

## cat final_assembly.fasta | mafft ebola_ref.fasta > Final_alignment.out


from IPython.display import IFrame
IFrame('../docs/final_pdfs/8_training_mod_013120__GenAlignv3.pdf', width=900, height=300)



In [None]:
!snakemake --cores all -s "../snakemake/popgen_maff_alignment_view.smk"




In [None]:
## View MSA alignment

library(shiny)
runApp()


In [None]:
## Reports

from IPython.display import FileLink, FileLinks
FileLinks('../../data/example_data/results/maff_haplo/.')

In [None]:

## Shell Command:
# "time bcftools stats -F step8.input.contig.fa -s step11.output.mpileup.vcf.gz > output.variants.stat"

!snakemake --cores all -s "../snakemake/popgen_variants_stat.smk" -n

In [None]:
## Reports

from IPython.display import FileLink, FileLinks
FileLinks('../../data/example_data/results/variants_stats/.')



In [None]:
!head -100 makona/results/variants_stats/Brett424_1_S4_L001_vcf.stats

In [None]:
## Haplotype network and SNP analysis

## Shell

!snakemake --cores all -s "../snakemake/popgen_haplonetwork.smk"


In [None]:

from IPython.display import IFrame
IFrame('../docs/final_pdfs/9_training_mod_013120__HapNetv2.pdf', width=900, height=300)

In [None]:
## Reports

from IPython.display import FileLink, FileLinks
FileLinks('../../data/example_data/results/haplotype_network/')



In [None]:
from IPython.display import HTML
HTML(filename="../rscripts/rscript_haplo.nb.html")



In [None]:
## References

## Shell

from IPython.display import IFrame
IFrame('../docs/final_pdfs/10_training_mod_013120__CommandLine.pdf', width=900, height=300)