-
Notifications
You must be signed in to change notification settings - Fork 0
/
config_main.yaml
101 lines (67 loc) · 4.32 KB
/
config_main.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Please check the parameters, and adjust them according to your circumstance
# Project name
PROJECT: test
# ================== Control of the workflow ==================
## Do you need to do quality control?
QC: no # "yes" or "no"
## Do you need to do trimming?
TRIMMED: yes # "yes" or "no"?
## Which mapping reference do you want to use? Genome or transcriptome?
REFERENCE: genome # "genome" or "transcriptome"
## Do you want to do Differential Expression Analysis (DEA)?
DEA: yes # "yes" or "no"
## Do you want to visualize the results of DEA?
VISUALIZE: yes # "yes" or "no"
# ================== Shared parameters for some or all of the sub-workflows ==================
## key file if the data is stored remotely, otherwise leave it tempty
KEY:
## the path to fastq files
READSPATH: /tmp
## the meta file describing the experiment settings
METAFILE: /tmp/rasflow_metadata.tsv
## is the sequencing paired-end or single-end?
END: single # "pair" or "single"
## number of cores you want to allocate to this workflow
NCORE: 3 # Use command "getconf _NPROCESSORS_ONLN" to check the number of cores/CPU on your machine
## paths for intermediate outputs and final outputs
OUTPUTPATH: /tmp/data/output # intermediate output. do not upload to github
FINALOUTPUT: /tmp/output
# ================== Configuration for Quality Control ==================
## All required params have already been defined in the public params
# ================== Configuration for trimming ==================
## All required params have already been defined in the public params
# ================== Configuration for quantification using transcriptome ==================
## transcriptome file
TRANS: /tmp/data/example/ref/transcriptome/Homo_sapiens.GRCh38.cdna.all.1.1.10M.fa.gz
# ================== Configuration for alignment to genome and feature count ==================
## genome and annotation files. for example from: http://ftp.ensembl.org/pub/release-103/fasta/mus_musculus/dna/ ; http://ftp.ensembl.org/pub/release-103/gtf/mus_musculus/
GENOME: /tmp/ref/Mus_musculus.GRCm39.dna.primary_assembly.fa
ANNOTATION: /tmp/ref/Mus_musculus.GRCm39.103.gtf
ATTRIBUTE: gene_id # the attribute used in annotation file. It's usually "gene_id", but double check that since it may also be "gene", "ID"...
## aligner
ALIGNER: hisat2
## tool for feature count
COUNTER: featureCounts # default is "featureCounts", or you may want to use "htseq-count"
## alignment quality control
alignmentQC: yes # "yes" or "no" to specify whether you want to do alignment QC
# ================== Configuration for DEA ==================
## Do you want to use edgeR or DESeq2 to do DEA?
DEATOOL: edgeR # "edgeR" or "DESeq2"? DESeq2 is recommended for transcriptome-based and DEA
## Is your experiment designed in a pair-wise way?
PAIR: FALSE # Is this a pair test or not? ("TRUE" or "FALSE")
## the comparison(s) you want to do. If multiple comparisons, specify each pair (CONTROL & TREAT) in order respectively
CONTROL: ["q111sst"]
TREAT: ["nortriptyline"]
## length of 'CONTROL' should agree with that of 'TREAT'
## what you fill in there should agree with the "group" column in metadata.tsv
FILTER:
yesOrNo: FALSE # Filter out low expressed transcripts/genes or not? (TRUE or FALSE) It's better to be set to TRUE. FALSE is set as default only for testing fake toy data
## If transcriptome was used as mapping reference in the previous quantification step, the following params need to be specifed
## Do you need to do gene-level differential expression analysis?
GENE_LEVEL: TRUE # TRUE or FALSE. If TRUE, ignore the following 3 parameters.
## If TRUE, specify the corresponding dataset in ENSEMBL for your interested organism or do you provide your own tx2gene
ENSEMBL: TRUE # TRUE or FALSE. Specify whether you're using transcriptome from Ensembl or you provide your homemade one: then you need to specify the corresponding Ensembl dataset or your tx2gene file (two columns, 1st col: transcript ID; 2nd col: gene ID)
EnsemblDataSet: mmusculus_gene_ensembl # only if ENSEMBL was set to TRUE. Search for your dataset in the file EnsemblDataSet_look_up_table.csv
TX2GENE: /export/jonassenfs/xiaokangz/dcod/data/genome/tx2gene_gadMor3.tsv # only if ENSEMBL was set to FALSE
# ================== Configuration for visualization ==================
## All required params have already been defined in the public params