Add header to NF files (#152)

* add file header for nf
LabShengLi · Jun 20, 2022 · ef3f3c4 · ef3f3c4
1 parent a46f0b8
commit ef3f3c4
Show file tree

Hide file tree

Showing 16 changed files with 294 additions and 91 deletions.
diff --git a/main.nf b/main.nf
@@ -12,87 +12,16 @@
  @Organization : JAX Li Lab
 ----------------------------------------------------------------------------------------
 */
-// We now support both latest and lower versions, due to Lifebit CloudOS is only support 21.04
+// We now support both latest and lower versions, due to Lifebit CloudOS is only support 20.04
 // Note: NXF_VER=20.04.1 nextflow run main.nf -profile test,singularity
 if( nextflow.version.matches(">= 20.07.1") ){
-	nextflow.enable.dsl=2
+	nextflow.enable.dsl = 2
 } else {
 	// Support lower version of nextflow
-	nextflow.preview.dsl=2
+	nextflow.preview.dsl = 2
 }
 
-def helpMessage() {
-	log.info"""
-	NANOME - Nextflow PIPELINE (v$workflow.manifest.version)
-	by Sheng Li Lab at The Jackson Laboratory
-	https://github.com/LabShengLi/nanome
-	=================================
-	Usage:
-	The typical command is as follows:
-
-	nextflow run LabShengLi/nanome -profile test,docker
-	nextflow run LabShengLi/nanome -profile test,singularity
-	nextflow run LabShengLi/nanome -profile [docker/singularity] \\
-		--dsname DSNAME --input INPUT --genome GENOME
-
-	Mandatory arguments:
-	  --dsname		Dataset/analysis name
-	  --input		Input path for raw fast5 files (folders, tar/tar.gz files)
-	  --genome		Genome reference name ('hg38', 'ecoli', or 'hg38_chr22') or a directory, the directory must contain only one .fasta file with .fasta.fai index file. Default is hg38
-
-	General options:
-	  --processors		Processors used for each task
-	  --outdir		Output dir, default is 'results'
-	  --chrSet		Chromosomes used in analysis, default is chr1-22, X and Y, for human. For E. coli data, it is default as 'NC_000913.3'. For other reference genome, please specify each chromosome with space seperated.
-	  --cleanAnalyses	If clean old basecalling info in fast5 files
-	  --skipBasecall	Skip redo basecalling if users provide basecalled inputs
-
-	  --cleanup		If clean work dir after complete, default is false
-
-	Tools specific options:
-	  --run[Tool-name]	By default, we run top four performers in nanome paper, specify '--run[Tool-name]' can include other tool, supported tools: NANOME, Megalodon, Nanopolish, DeepSignal, Guppy, Tombo, METEORE, and DeepMod
-	  --rerioDir		Rerio dir for Megalodon model, default will get online
-	  --MEGALODON_MODEL	Megalodon model name, default is 'res_dna_r941_min_modbases_5mC_v001.cfg'
-	  --guppyDir		Guppy installation local directory, used only for conda environment
-	  --GUPPY_BASECALL_MODEL	Guppy basecalling model, default is 'dna_r9.4.1_450bps_hac.cfg'
-	  --GUPPY_METHCALL_MODEL	Guppy methylation calling model, default is 'dna_r9.4.1_450bps_modbases_5mc_hac.cfg'
-	  --deepsignalDir	DeepSignal model dir, default will get online
-	  --tomboResquiggleOptions	Tombo resquiggle options for super long/damaged sequencing, set to '--signal-length-range 0 500000  --sequence-length-range 0 50000'
-	  --moveOption	If using move table for DeepMod, default is true
-	  --useDeepModCluster	If using DeepMod cluster model for human, default is false
-	  --METEOREDir	METEORE model dir, default will get online
-
-	Running environment options:
-	  --docker_name		Docker name used for pipeline, default is 'liuyangzzu/nanome:latest'
-	  --singularity_name	Singularity name used for pipeline, default is 'docker://liuyangzzu/nanome:latest'
-	  --singularity_cache	Singularity cache dir, default is 'local_singularity_cache'
-	  --conda_name		Conda name used for pipeline, default is 'nanome'
-	  --conda_base_dir	Conda base directory, default is '/opt/conda'
-
-	Platform specific options:
-	  --queue		SLURM job submission queue name, e.g., 'gpu'
-	  --qos			SLURM job submission QOS name, e.g., 'inference'
-	  --gresOptions		SLURM job submission GPU allocation option, e.g., 'gpu:v100:1'
-	  --time		SLURM job submission running time, e.g., '2h', '1d'
-	  --memory		SLURM job submission memory, e.g., '32GB'
-
-	  --projectCloud	Google Cloud Platform (GCP) project name for google-lifesciences
-	  --config		Lifebit CloudOS config file, e.g., 'conf/executors/lifebit.config'
-
-	-profile options:
-	  Use this parameter to choose a predefined configuration profile. Profiles can give configuration presets for different compute environments.
-
-	  test		A bundle of input params for ecoli test
-	  test_human	A bundle of input params for human test
-	  docker 	A generic configuration profile to be used with Docker, pulls software from Docker Hub: liuyangzzu/nanome:latest
-	  singularity	A generic configuration profile to be used with Singularity, pulls software from: docker://liuyangzzu/nanome:latest
-	  conda		Please only use conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity. Check our GitHub for how to install local conda environment
-	  hpc		A generic configuration profile to be used on HPC cluster with SLURM
-	  google	A generic configuration profile to be used on Google Cloud platform with 'google-lifesciences'
-
-	Contact to https://github.com/LabShengLi/nanome/issues for bug report.
-	""".stripIndent()
-}
+include {helpMessage} from './modules/HELP'
 
 // Show help message
 if (params.help){
@@ -156,17 +85,12 @@ projectDir = workflow.projectDir
 ch_utils = Channel.fromPath("${projectDir}/utils",  type: 'dir', followLinks: false)
 ch_src   = Channel.fromPath("${projectDir}/src",  type: 'dir', followLinks: false)
 
-// Reference genome, deepmod cluster settings
-def referenceGenome = "reference_genome/${params.GENOME_FN}"
-def chromSizesFile = "reference_genome/${params.CHROM_SIZE_FN}"
-
+// Reference genome, chom size file
 params.referenceGenome = "${params.GENOME_DIR}/${params.GENOME_FN}"
 params.chromSizesFile = "${params.GENOME_DIR}/${params.CHROM_SIZE_FN}"
 
-
 if (dataType == 'human') { isDeepModCluster = params.useDeepModCluster }
 else { isDeepModCluster = false }
-
 params.isDeepModCluster = isDeepModCluster
 
 
@@ -184,7 +108,7 @@ if (params.input.endsWith(".filelist.txt")) {
 		}
 		.set{ inputCh }
 } else if (params.input.contains('*') || params.input.contains('?')) {
-	// match all files in the folder, note: input must use '', prevent expand in advance
+	// match all files in the folder, note: input must use quote string '', prevent expand in advance
 	// such as --input '/fastscratch/liuya/nanome/NA12878/NA12878_CHR22/input_chr22/*'
 	Channel.fromPath(params.input, type: 'any', checkIfExists: true)
 		.set{ inputCh }
@@ -256,7 +180,6 @@ if (params.hmc) { summary['hmc'] 	= params.hmc }
 if (params.ctg_name) { summary['ctg_name'] 	= params.ctg_name }
 
 
-
 summary['\nModel summary']         = "--------"
 if (params.runBasecall && !params.skipBasecall) summary['GUPPY_BASECALL_MODEL'] 	= params.GUPPY_BASECALL_MODEL
 if (params.runMethcall && params.runMegalodon)
@@ -287,8 +210,8 @@ if (workflow.revision) summary['Pipeline Release'] = workflow.revision
 if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container"
 summary['errorStrategy']    = params.errorStrategy
 summary['maxRetries']       = params.maxRetries
-if (params.echo)  summary['echo'] = params.echo
-if (params.cleanup)   summary['cleanup'] = params.cleanup
+if (params.echo)  		summary['echo'] = params.echo
+if (params.cleanup)   	summary['cleanup'] = params.cleanup
 
 if (workflow.profile.contains('hpc') || workflow.profile.contains('winter') ||\
  	workflow.profile.contains('sumner') ) {
@@ -508,7 +431,7 @@ workflow {
 	if (params.runNewTool && params.newModuleConfigs) {
 		newModuleCh = Channel.of( params.newModuleConfigs ).flatten()
 		// ref: https://www.nextflow.io/docs/latest/operator.html#combine
-		NewTool(newModuleCh.combine(BASECALL.out.basecall), ENVCHECK.out.reference_genome, referenceGenome)
+		NewTool(newModuleCh.combine(BASECALL.out.basecall), ENVCHECK.out.reference_genome, params.referenceGenome)
 		NewToolComb(NewTool.out.batch_out.collect(), newModuleCh, ch_src)
 
 		s_new = NewToolComb.out.site_unify

diff --git a/modules/ALIGNMENT.nf b/modules/ALIGNMENT.nf
@@ -1,3 +1,16 @@
+/*
+=========================================================================================
+  		NANOME(Nanopore methylation) pipeline for Oxford Nanopore sequencing
+=========================================================================================
+ NANOME Analysis Pipeline.
+ #### Homepage / Documentation
+ https://github.com/LabShengLi/nanome
+ @Author   : Yang Liu
+ @FileName : ALIGNMENT.nf
+ @Software : NANOME project
+ @Organization : JAX Sheng Li Lab
+----------------------------------------------------------------------------------------
+*/
 // Align each basecalled outputs
 process ALIGNMENT {
 	tag "${basecallDir.baseName}"

diff --git a/modules/BASECALL.nf b/modules/BASECALL.nf
@@ -1,3 +1,16 @@
+/*
+=========================================================================================
+  		NANOME(Nanopore methylation) pipeline for Oxford Nanopore sequencing
+=========================================================================================
+ NANOME Analysis Pipeline.
+ #### Homepage / Documentation
+ https://github.com/LabShengLi/nanome
+ @Author   : Yang Liu
+ @FileName : BASECALL.nf
+ @Software : NANOME project
+ @Organization : JAX Sheng Li Lab
+----------------------------------------------------------------------------------------
+*/
 // basecall of subfolders named 'M1', ..., 'M10', etc.
 process BASECALL {
 	tag "${fast5Untar.baseName}"

diff --git a/modules/COMMONS.nf b/modules/COMMONS.nf
@@ -0,0 +1,24 @@
+/*
+=========================================================================================
+  		NANOME(Nanopore methylation) pipeline for Oxford Nanopore sequencing
+=========================================================================================
+ NANOME Analysis Pipeline.
+ #### Homepage / Documentation
+ https://github.com/LabShengLi/nanome
+ @Author   : Yang Liu
+ @FileName : COMMONS.nf
+ @Software : NANOME project
+ @Organization : JAX Sheng Li Lab
+----------------------------------------------------------------------------------------
+*/
+// check nextflow version, then declare DSL2 in two ways
+def nextflowVersionCheck() {
+	// We now support both latest and lower versions, due to Lifebit CloudOS is only support 20.04
+	// Note: NXF_VER=20.04.1 nextflow run main.nf -profile test,singularity
+	if( nextflow.version.matches(">= 20.07.1") ){
+		nextflow.enable.dsl = 2
+	} else {
+		// Support lower version of nextflow
+		nextflow.preview.dsl = 2
+	}
+}
diff --git a/modules/DEEPSIGNAL.nf b/modules/DEEPSIGNAL.nf
@@ -1,3 +1,16 @@
+/*
+=========================================================================================
+  		NANOME(Nanopore methylation) pipeline for Oxford Nanopore sequencing
+=========================================================================================
+ NANOME Analysis Pipeline.
+ #### Homepage / Documentation
+ https://github.com/LabShengLi/nanome
+ @Author   : Yang Liu
+ @FileName : DEEPSIGNAL.nf
+ @Software : NANOME project
+ @Organization : JAX Sheng Li Lab
+----------------------------------------------------------------------------------------
+*/
 // DeepSignal runs on resquiggled subfolders named 'M1', ..., 'M10', etc.
 process DEEPSIGNAL {
 	tag "${indir.baseName}"
@@ -106,4 +119,4 @@ process DPSIGCOMB {
 		.  $task.cpus  12 ${params.sort  ? true : false}  "${params.chrSet1.replaceAll(',', ' ')}"
 	echo "### DeepSignal combine DONE"
 	"""
-}
+}
diff --git a/modules/ENVCHECK.nf b/modules/ENVCHECK.nf
@@ -1,3 +1,16 @@
+/*
+=========================================================================================
+  		NANOME(Nanopore methylation) pipeline for Oxford Nanopore sequencing
+=========================================================================================
+ NANOME Analysis Pipeline.
+ #### Homepage / Documentation
+ https://github.com/LabShengLi/nanome
+ @Author   : Yang Liu
+ @FileName : ENVCHECK.nf
+ @Software : NANOME project
+ @Organization : JAX Sheng Li Lab
+----------------------------------------------------------------------------------------
+*/
 // Check all tools work well
 process ENVCHECK {
 	tag "${params.dsname}"

diff --git a/modules/HELP.nf b/modules/HELP.nf
@@ -0,0 +1,86 @@
+/*
+=========================================================================================
+  		NANOME(Nanopore methylation) pipeline for Oxford Nanopore sequencing
+=========================================================================================
+ NANOME Analysis Pipeline.
+ #### Homepage / Documentation
+ https://github.com/LabShengLi/nanome
+ @Author   : Yang Liu
+ @FileName : HELP.nf
+ @Software : NANOME project
+ @Organization : JAX Sheng Li Lab
+----------------------------------------------------------------------------------------
+*/
+def helpMessage() {
+	log.info"""
+	NANOME - Nextflow PIPELINE (v$workflow.manifest.version)
+	by Sheng Li Lab at The Jackson Laboratory
+	https://github.com/LabShengLi/nanome
+	=================================
+	Usage:
+	The typical command is as follows:
+
+	nextflow run LabShengLi/nanome -profile test,docker
+	nextflow run LabShengLi/nanome -profile test,singularity
+	nextflow run LabShengLi/nanome -profile [docker/singularity] \\
+		--dsname DSNAME --input INPUT --genome GENOME
+
+	Mandatory arguments:
+	  --dsname		Dataset/analysis name
+	  --input		Input path for raw fast5 files (folders, tar/tar.gz files)
+	  --genome		Genome reference name ('hg38', 'ecoli', or 'hg38_chr22') or a directory, the directory must contain only one .fasta file with .fasta.fai index file. Default is hg38
+
+	General options:
+	  --processors		Processors used for each task
+	  --outdir		Output dir, default is 'results'
+	  --chrSet		Chromosomes used in analysis, default is chr1-22, X and Y, for human. For E. coli data, it is default as 'NC_000913.3'. For other reference genome, please specify each chromosome with space seperated.
+	  --cleanAnalyses	If clean old basecalling info in fast5 files
+	  --skipBasecall	Skip redo basecalling if users provide basecalled inputs
+
+	  --cleanup		If clean work dir after complete, default is false
+
+	Tools specific options:
+	  --run[Tool-name]	By default, we run top four performers in nanome paper, specify '--run[Tool-name]' can include other tool, supported tools: NANOME, Megalodon, Nanopolish, DeepSignal, Guppy, Tombo, METEORE, and DeepMod
+	  --rerioDir		Rerio dir for Megalodon model, default will get online
+	  --MEGALODON_MODEL	Megalodon model name, default is 'res_dna_r941_min_modbases_5mC_v001.cfg'
+	  --guppyDir		Guppy installation local directory, used only for conda environment
+	  --GUPPY_BASECALL_MODEL	Guppy basecalling model, default is 'dna_r9.4.1_450bps_hac.cfg'
+	  --GUPPY_METHCALL_MODEL	Guppy methylation calling model, default is 'dna_r9.4.1_450bps_modbases_5mc_hac.cfg'
+	  --deepsignalDir	DeepSignal model dir, default will get online
+	  --tomboResquiggleOptions	Tombo resquiggle options for super long/damaged sequencing, set to '--signal-length-range 0 500000  --sequence-length-range 0 50000'
+	  --moveOption	If using move table for DeepMod, default is true
+	  --useDeepModCluster	If using DeepMod cluster model for human, default is false
+	  --METEOREDir	METEORE model dir, default will get online
+
+	Running environment options:
+	  --docker_name		Docker name used for pipeline, default is 'liuyangzzu/nanome:latest'
+	  --singularity_name	Singularity name used for pipeline, default is 'docker://liuyangzzu/nanome:latest'
+	  --singularity_cache	Singularity cache dir, default is 'local_singularity_cache'
+	  --conda_name		Conda name used for pipeline, default is 'nanome'
+	  --conda_base_dir	Conda base directory, default is '/opt/conda'
+
+	Platform specific options:
+	  --queue		SLURM job submission queue name, e.g., 'gpu'
+	  --qos			SLURM job submission QOS name, e.g., 'inference'
+	  --gresOptions		SLURM job submission GPU allocation option, e.g., 'gpu:v100:1'
+	  --time		SLURM job submission running time, e.g., '2h', '1d'
+	  --memory		SLURM job submission memory, e.g., '32GB'
+
+	  --projectCloud	Google Cloud Platform (GCP) project name for google-lifesciences
+	  --config		Lifebit CloudOS config file, e.g., 'conf/executors/lifebit.config'
+
+	-profile options:
+	  Use this parameter to choose a predefined configuration profile. Profiles can give configuration presets for different compute environments.
+
+	  test		A bundle of input params for ecoli test
+	  test_human	A bundle of input params for human test
+	  docker 	A generic configuration profile to be used with Docker, pulls software from Docker Hub: liuyangzzu/nanome:latest
+	  singularity	A generic configuration profile to be used with Singularity, pulls software from: docker://liuyangzzu/nanome:latest
+	  conda		Please only use conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity. Check our GitHub for how to install local conda environment
+	  hpc		A generic configuration profile to be used on HPC cluster with SLURM
+	  google	A generic configuration profile to be used on Google Cloud platform with 'google-lifesciences'
+
+	Contact to https://github.com/LabShengLi/nanome/issues for bug report.
+	""".stripIndent()
+}
+
diff --git a/modules/MEGALODON.nf b/modules/MEGALODON.nf
@@ -1,3 +1,16 @@
+/*
+=========================================================================================
+  		NANOME(Nanopore methylation) pipeline for Oxford Nanopore sequencing
+=========================================================================================
+ NANOME Analysis Pipeline.
+ #### Homepage / Documentation
+ https://github.com/LabShengLi/nanome
+ @Author   : Yang Liu
+ @FileName : MEGALODON.nf
+ @Software : NANOME project
+ @Organization : JAX Sheng Li Lab
+----------------------------------------------------------------------------------------
+*/
 // Megalodon runs on resquiggled subfolders named 'M1', ..., 'M10', etc.
 process MEGALODON {
 	tag "${fast5Untar.baseName}"
@@ -151,3 +164,4 @@ process MGLDNCOMB {
 	echo "### Megalodon combine DONE"
 	"""
 }
+
diff --git a/modules/NANOPOLISH.nf b/modules/NANOPOLISH.nf
@@ -1,3 +1,16 @@
+/*
+=========================================================================================
+  		NANOME(Nanopore methylation) pipeline for Oxford Nanopore sequencing
+=========================================================================================
+ NANOME Analysis Pipeline.
+ #### Homepage / Documentation
+ https://github.com/LabShengLi/nanome
+ @Author   : Yang Liu
+ @FileName : NANOPOLISH.nf
+ @Software : NANOME project
+ @Organization : JAX Sheng Li Lab
+----------------------------------------------------------------------------------------
+*/
 // Nanopolish runs on resquiggled subfolders named 'M1', ..., 'M10', etc.
 process NANOPOLISH {
 	tag "${basecallDir.baseName}"
@@ -108,4 +121,4 @@ process NPLSHCOMB {
 
 	echo "### Nanopolish combine DONE"
 	"""
-}
+}