In [3]:
import sys
import os
from os import listdir,path
import subprocess

This pipeline relies heavily in `snakemake`.
<font color="red">NOTE:</font> before running the pipeline, make sure everything is correct by running from inside the REAP directory:

```bash
. ~/anaconda3/etc/profile.d/conda.sh
conda activate REAP

#DEBUG
snakemake --dryrun --debug --debug-dag --printshellcmds -s crop2 --configfile test_data/config.json --cores 4

#RUN
snakemake --debug --debug-dag --printshellcmds -s crop2 --configfile test_data/config.json

#DAG
snakemake --dag -s crop2 --configfile test_data/config.json | dot -Tpng > dag.png

#RESTART
rm -r .snakemake
rm -r test_project
```

---
## Setup 
Create new environment. This is recommended since `snakemake` works on Python 3.6
```bash
~/anaconda3/bin/conda config --add channels defaults
~/anaconda3/bin/conda config --add channels bioconda
~/anaconda3/bin/conda config --add channels conda-forge

~/anaconda3/bin/conda create -n REAP --yes python snakemake

#DEPENDENCIES
~/anaconda3/bin/conda install -n REAP --yes  -c bioconda r=3.5.0
~/anaconda3/bin/conda install -n REAP --yes  -c bioconda fastqc
~/anaconda3/bin/conda install -n REAP --yes  -c bioconda star
~/anaconda3/bin/conda install -n REAP --yes  -c bioconda bedtools
~/anaconda3/bin/conda install -n REAP --yes  -c bioconda samtools
~/anaconda3/bin/conda install -n REAP --yes  -c bioconda deeptools
~/anaconda3/bin/conda install -n REAP --yes  -c bioconda stringtie
~/anaconda3/bin/conda install -n REAP --yes  -c bioconda scallop
~/anaconda3/bin/conda install -n REAP --yes  -c bioconda kallisto
~/anaconda3/bin/conda install -n REAP --yes  -c bioconda qualimap

#TACO http://tacorna.github.io/
#cd <TACO_PARENT_PATH>
wget https://github.com/tacorna/taco/releases/download/v0.7.3/taco-v0.7.3.Linux_x86_64.tar.gz
tar -xzf taco-v0.7.3.Linux_x86_64.tar.gz
cd taco-v0.7.3.Linux_x86_64
sudo cp taco_r* /usr/bin/

#RSEQC
sudo apt install python-pip
pip install cython
pip install RSEQC
#python -c 'from qcmodule import SAM' #Test instalation
#~/anaconda3/bin/conda install -n REAP --yes  -c bioconda rseqc #A LOT OF CONFLICTS
```


### Activate conda environment
```bash
. ~/anaconda3/etc/profile.d/conda.sh
conda activate REAP
```

---
# Configuration file

```json
{
	"project_name" : <your_project_name>, 
	"group" : <your_group_name>, 
	"email" : <your_email>,

	"fastqFolder" : <path_to_input_fastqFolder>,
	"extension" : <extension_for_files>["fastq.gz"|"fa"|...],
	"delim" : <in_fastq_files,_delimiter_of_name_and_pair_id_(for_paired_reads)>["_"|" "|...],
	"mates" : { 
		"mate1" : "R1", 
		"mate2" : "R2" 
	}, 
	"gtf" : <path_to_annotation_gtf>,
	"reference_fasta" : <path_to_reference_fasta>,
	"star_reference" : <path_to_STAR_reference_if_already_exists_or_"">,
	"star_threads" : "8",
	"star_version" : "2.6.0c",
	"star_RAM": "40000000000",
	"star_sort_RAM": "40000000000",
	"sort_mem" : "16G",
	"samtools_sortByName_threads" : "4",
	"samtools_version" : "1.3",
	"deeptools_version" : "2.5.4",
	"stringtie_version" : "1.3.3b",
	"scallop_version" : "1.2.3",
	"taco_version" : "1.2.3",
	"kallisto_version" : "0.44.0",
	"fragment_length" : "200",
	"standard_deviation" : "30",
	"library_preparation" : "reverse",
	"bedtools_version" : "2.26.0",
	"experimental_design" : <path_to_experimental_design>,	
	"R_path" : <path_to_R>,
    "R_version" : "3.5.1",
	"novel_assembly" : "1",
	"qualimap" : <path_to_qualimap>,
	"rseqc_version" : "2.6.1", 	
	"biomart_db" : <name_of_biomart_db>, 
	"strand" : "2",
	"feature_type" : ["exon"|"transcript"|"gene"], 
	"meta_feature" : ["gene_id"|"transcript_id"], 
	"min_mapQ" : "10"
}
```