config_main.yaml

# Please check the parameters, and adjust them according to your circumstance

# Project name
PROJECT: test

# ================== Control of the workflow ==================

## Do you need to do quality control?
QC: no  # "yes" or "no"

## Do you need to do trimming?
TRIMMED: yes  # "yes" or "no"?

## Which mapping reference do you want to use? Genome or transcriptome?
REFERENCE: genome  # "genome" or "transcriptome"

## Do you want to do Differential Expression Analysis (DEA)?
DEA: yes  # "yes" or "no"

## Do you want to visualize the results of DEA?
VISUALIZE: yes  # "yes" or "no"

# ================== Shared parameters for some or all of the sub-workflows ==================

## key file if the data is stored remotely, otherwise leave it tempty
KEY: 

## the path to fastq files
READSPATH: /tmp

## the meta file describing the experiment settings
METAFILE: /tmp/rasflow_metadata.tsv

## is the sequencing paired-end or single-end?
END: single  # "pair" or "single"

## number of cores you want to allocate to this workflow
NCORE: 3  # Use command "getconf _NPROCESSORS_ONLN" to check the number of cores/CPU on your machine

## paths for intermediate outputs and final outputs
OUTPUTPATH: /tmp/data/output # intermediate output. do not upload to github
FINALOUTPUT: /tmp/output

# ================== Configuration for Quality Control ==================

## All required params have already been defined in the public params

# ================== Configuration for trimming ==================

## All required params have already been defined in the public params

# ================== Configuration for quantification using transcriptome ==================

## transcriptome file
TRANS: /tmp/data/example/ref/transcriptome/Homo_sapiens.GRCh38.cdna.all.1.1.10M.fa.gz

# ================== Configuration for alignment to genome and feature count ==================

## genome and annotation files. for example from: http://ftp.ensembl.org/pub/release-103/fasta/mus_musculus/dna/ ; http://ftp.ensembl.org/pub/release-103/gtf/mus_musculus/
GENOME: /tmp/ref/Mus_musculus.GRCm39.dna.primary_assembly.fa
ANNOTATION: /tmp/ref/Mus_musculus.GRCm39.103.gtf
ATTRIBUTE: gene_id  # the attribute used in annotation file. It's usually "gene_id", but double check that since it may also be "gene", "ID"...

## aligner
ALIGNER: hisat2

## tool for feature count
COUNTER: featureCounts  # default is "featureCounts", or you may want to use "htseq-count"

## alignment quality control
alignmentQC: yes  # "yes" or "no" to specify whether you want to do alignment QC

# ================== Configuration for DEA ==================

## Do you want to use edgeR or DESeq2 to do DEA?
DEATOOL: edgeR  # "edgeR" or "DESeq2"? DESeq2 is recommended for transcriptome-based and DEA

## Is your experiment designed in a pair-wise way?
PAIR: FALSE  # Is this a pair test or not? ("TRUE" or "FALSE")

## the comparison(s) you want to do. If multiple comparisons, specify each pair (CONTROL & TREAT) in order respectively
CONTROL: ["q111sst"]
TREAT: ["nortriptyline"]
## length of 'CONTROL' should agree with that of 'TREAT'
## what you fill in there should agree with the "group" column in metadata.tsv

FILTER:
  yesOrNo: FALSE  # Filter out low expressed transcripts/genes or not? (TRUE or FALSE) It's better to be set to TRUE. FALSE is set as default only for testing fake toy data

## If transcriptome was used as mapping reference in the previous quantification step, the following params need to be specifed

## Do you need to do gene-level differential expression analysis?
GENE_LEVEL: TRUE  # TRUE or FALSE. If TRUE, ignore the following 3 parameters.
## If TRUE, specify the corresponding dataset in ENSEMBL for your interested organism or do you provide your own tx2gene
ENSEMBL: TRUE  # TRUE or FALSE. Specify whether you're using transcriptome from Ensembl or you provide your homemade one: then you need to specify the corresponding Ensembl dataset or your tx2gene file (two columns, 1st col: transcript ID; 2nd col: gene ID)
EnsemblDataSet: mmusculus_gene_ensembl  # only if ENSEMBL was set to TRUE. Search for your dataset in the file EnsemblDataSet_look_up_table.csv 
TX2GENE: /export/jonassenfs/xiaokangz/dcod/data/genome/tx2gene_gadMor3.tsv  # only if ENSEMBL was set to FALSE

# ================== Configuration for visualization ==================

## All required params have already been defined in the public params