Merge pull request #9 from Ferlab-Ste-Justine/NewNf-CoreSchema

Parameter validation with nf schema
Ferlab-Ste-Justine · Jun 12, 2024 · b65fec4 · b65fec4
2 parents dee61dd + add1e5b
commit b65fec4
Show file tree

Hide file tree

Showing 4 changed files with 112 additions and 3 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 .nextflow*
 work
 test.nf
-nextflow-test.config
+nextflow-test.config
+venv
diff --git a/main.nf b/main.nf
@@ -5,6 +5,8 @@ include { hardFiltering } from './modules/hardFilter'
 include { splitMultiAllelics        } from './modules/vep'
 include { vep                       } from './modules/vep'
 include { tabix                     } from './modules/vep'
+include { validateParameters; paramsHelp; paramsSummaryLog} from 'plugin/nf-schema'
+
 
 enum SequencingType {
     WGS,
@@ -15,6 +17,13 @@ enum SampleFileFormat {
     V1,
     V2
 }
+//Nf-core schema functions
+if (params.help) {
+    log.info paramsHelp("nextflow run Ferlab-Ste-Justine/cqdg-denovo-pipeline -r v1.0.2 -params-file params.json")
+    exit 0
+}
+validateParameters()
+log.info paramsSummaryLog(workflow)
 
 /**
 Parse the sample file (tsv) and returns the 2 following channels as a map:

diff --git a/nextflow.config b/nextflow.config
@@ -1,11 +1,18 @@
+plugins {
+  id 'nf-schema@2.0.0'
+}
+//For more information about the parameters, run the pipeline with  --help:
+  //nextflow run cqdg-denovo-pipeline/main.nf --help
 params {
+    help = null
     sampleFile = null
     outputDir = null
     referenceGenome = null
     broad = null
     vepCache = null
     intervalsFile = null
-    sampleFileFormat = null
+    sampleFileFormat = "V1"
+    sequencingType = "WGS"
     vepCpu = 4
     TSfilterSNP = '99.0'
     TSfilterINDEL = '99.0'
@@ -143,7 +150,6 @@ def check_max(obj, type) {
         }
     }
 }
-}
 executor {
   queueSize = 80
   pollInterval = '60 sec'

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -0,0 +1,93 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://raw.githubusercontent.com//master/nextflow_schema.json",
+  "title": " pipeline parameters",
+  "description": "",
+  "type": "object",
+  "properties": {
+    "sampleFile": {
+      "type": "string",
+      "description": "Path to the .tsv sample file, of format V1 or V2, containing the Family ID and path to GVCF files",
+      "pattern": "^\\S+\\.tsv$",
+      "format": "file-path",
+      "help_text": "Supply the path and filename of the sample file. The file must be tab delimited, including the family ID in the first column, then the path to the .gvcf.gz files of each family member in each following column. \nIf the sampleFileFormat is \"V2\", the format is similar but the second column must be the sequencing type \"WES\" or \"WGS\""
+    },
+    "outputDir": {
+      "type": "string",
+      "description": "Directory for the outputs files, including the reports",
+      "format": "directory-path"
+    },
+    "referenceGenome": {
+      "type": "string",
+      "description": "Directory containing the referenceGenomeFasta",
+      "format": "directory-path",
+      "help_text": "Contains the path to the directory that contains the reference fasta genome. "
+    },
+    "broad": {
+      "type": "string",
+      "description": "Directory containing the references for vqsr and the intervalsFile",
+      "help_text": "Path to the directory containing 5 important files: \n1. The intervalsFile whose name is defined in the intervalsFile parameter\n2. The Hapmap file for vqsr training\n3. The omni2.5 file for vqsr training\n4. The 1000G SNP reference file for vqsr training\n5. The dbsnp database for vqsr training",
+      "format": "directory-path"
+    },
+    "vepCache": {
+      "type": "string",
+      "description": "Directory of the vepCache",
+      "help_text": "Path to the vepCache directory, which is usually installed by vep by default. It should contain at least the homo_sapien/111_GRCh38/ directory. ",
+      "format": "directory-path"
+    },
+    "intervalsFile": {
+      "type": "string",
+      "description": "Namefile of the genome interval we want to use",
+      "help_text": "Used during the CombineGVCFs step to indicate the regions of interest"
+    },
+    "sampleFileFormat": {
+      "type": "string",
+      "description": "The format of the sample file, either \"V1\" or \"V2\"",
+      "enum": ["V1", "V2"],
+      "help_text": "See our README for more information about the two file format: https://github.com/Ferlab-Ste-Justine/cqdg-denovo-pipeline",
+      "default": "V1"
+    },
+    "vepCpu": {
+      "type": "integer",
+      "default": 4,
+      "description": "Determines the -fork option that will be used for rep",
+      "help_text": "Allows forking during the vet step, to make it faster"
+    },
+    "TSfilterSNP": {
+      "type": "string",
+      "default": "99",
+      "description": "Truth-sensitivity filter level for SNPs",
+      "help_text": "This value is required by the VQSR step"
+    },
+    "TSfilterINDEL": {
+      "type": "string",
+      "default": "99",
+      "help_text": "This value is required by the VQSR step",
+      "description": "Truth-sensitivity filter level for Indels"
+    },
+    "hardFilters": {
+      "type": "array",
+      "description": "Parameters for Hard-Filtering",
+      "help_text": "Must be an array containing each desired filter. Each filter must be formatted with the desired name and expression, for example\n[[name: 'QD1', expression: 'QD < 1.0'],[name: 'QD2', expression: 'QD < 2.0]]"
+    },
+    "sequencingType": {
+      "type": "string",
+      "description": "Either \"WGS\" for Whole Genome Sequencing or \"WES\" for Whole Exome Sequencing",
+      "default": "WGS",
+      "enum": ["WGS", "WES"]
+    },
+    "help": {
+      "type": "string",
+      "description": "Select option --help when running to get help for each parameter"
+    }
+  },
+  "required": [
+    "sampleFile",
+    "outputDir",
+    "referenceGenome",
+    "broad",
+    "vepCache",
+    "intervalsFile",
+    "hardFilters"
+  ]
+}