Molmed · GitEdvard · Feb 4, 2022 · Nov 9, 2021 · Nov 12, 2021 · Nov 25, 2021
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -0,0 +1,48 @@
+name: Run tests
+on: [push]
+jobs:
+  run-tests:
+    runs-on: ubuntu-20.04
+    env:
+      NXF_VER: 21.04.1
+      NXF_ANSI_LOG: false
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v2
+
+      - name: Cache singularity images
+        uses: actions/cache@v2
+        with:
+          path: work/singularity
+          key: singularity-${{ hashFiles('config/nextflow_config/singularity.config') }}
+          restore-keys: singularity-
+
+      - name: Install Singularity
+        uses: eWaterCycle/setup-singularity@v7
+        with:
+          singularity-version: 3.8.3
+
+      - name: Install Nextflow
+        env:
+          CAPSULE_LOG: none
+        run: |
+          curl -s https://get.nextflow.io | bash
+          sudo mv nextflow /usr/local/bin/
+
+      - name: Make Nextflow binary executable
+        run: chmod +x /usr/local/bin/nextflow
+
+      - name: Set up python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+          architecture: x64
+
+      - name: Install test requirements
+        run: pip install -r requirements-dev.txt
+
+      - name: Run tests
+        run: pytest tests
+
+      - name: Run Black code formatting check
+        run: black --check .
diff --git a/.gitignore b/.gitignore
@@ -9,3 +9,5 @@ resources
 *.simg
 *.img
 FastQ_Screen_Genomes
+venv
+__pycache__
diff --git a/README.md b/README.md
@@ -29,6 +29,7 @@ These are the primary config profiles:
 - `irma`:         Uppmax slurm profile for use on the cluster `irma` (note: The parameter `params.project` must be supplied).
 - `snpseq`:       Run locally with greater memory available than `dev`.
 - `singularity`:  Enables singularity and provides container URLs.
+- `test`:         Run the pipeline using test data
 
 Additional profiles:
 - `debug`: prints out the `env` properties before executing processes.
@@ -52,6 +53,35 @@ There are two primary branches of this project:
 - `master`: The stable release branch
 - `dev`: The development and test branch, to which pull requests should be made.
 
-### Known issues:
+Tests are run through GitHub Actions when pushing code to the repo. See instructions below on how to reproduce it locally.
+
+To keep the python parts of the project nice and tidy, we enforce that code should be formatted according to [black](https://github.com/psf/black).
+To re-format your code with black, simply run:
+```
+black .
+```
+
+### Running tests locally
+
+Assuming you have installed all pre-requisites (except the fastq screen database: test data comes with a minimal version of it), you can run tests locally by following these steps:
+
+```
+# create virtual environment 
+virtualenv -p python3.9 venv/   
+
+# activate venv
+source venv/bin/activate
+
+# install dependencies
+pip install -r requirements-dev.txt
+
+# run tests
+pytest tests/
+
+# perform black formatter check
+black --check .
+```
+
+## Known issues:
 
 - Unable to download genome indicies using `fastq_screen --get_genomes` as wget within the container does not resolve the address correctly. Fastq Screen must be installed separately (e.g. with conda) and the genomes downloaded prior to running the workflow. The path to the databases must then be given using the `params.fastqscreen_databases` parameter.
diff --git a/bin/get_metadata.py b/bin/get_metadata.py
@@ -8,21 +8,24 @@
 import json
 
 
-class RunfolderInfo():
-
+class RunfolderInfo:
     def __init__(self, runfolder, bcl2fastq_outdir):
         self.runfolder = runfolder
         self.run_parameters = self.read_run_parameters()
         self.stats_json = self.read_stats_json(bcl2fastq_outdir)
         self.description_and_identifier = OrderedDict()
-        self.run_parameters_tags = \
-            {'RunId': 'Run ID', 'RunID': 'Run ID',
-             'ApplicationName': 'Control software', 'Application': 'Control software',
-             'ApplicationVersion': 'Control software version',
-             'Flowcell': 'Flowcell type', 'FlowCellMode': 'Flowcell type',
-             'ReagentKitVersion': 'Reagent kit version',
-             'RTAVersion': 'RTA Version', 'RtaVersion': 'RTA Version',
-             }
+        self.run_parameters_tags = {
+            "RunId": "Run ID",
+            "RunID": "Run ID",
+            "ApplicationName": "Control software",
+            "Application": "Control software",
+            "ApplicationVersion": "Control software version",
+            "Flowcell": "Flowcell type",
+            "FlowCellMode": "Flowcell type",
+            "ReagentKitVersion": "Reagent kit version",
+            "RTAVersion": "RTA Version",
+            "RtaVersion": "RTA Version",
+        }
 
     def find(self, d, tag):
         if tag in d:
@@ -45,7 +48,8 @@ def read_run_parameters(self):
 
     def read_stats_json(self, bcl2fastq_outdir):
         stats_json_path = os.path.join(
-            self.runfolder, bcl2fastq_outdir, "Stats/Stats.json")
+            self.runfolder, bcl2fastq_outdir, "Stats/Stats.json"
+        )
         if os.path.exists(stats_json_path):
             with open(stats_json_path) as f:
                 return json.load(f)
@@ -72,10 +76,14 @@ def get_read_cycles(self):
         try:
             for read_info in self.stats_json["ReadInfosForLanes"][0]["ReadInfos"]:
                 if read_info["IsIndexedRead"]:
-                    read_and_cycles[f"Index {index_counter} (bp)"] = read_info["NumCycles"]
+                    read_and_cycles[f"Index {index_counter} (bp)"] = read_info[
+                        "NumCycles"
+                    ]
                     index_counter += 1
                 else:
-                    read_and_cycles[f"Read {read_counter} (bp)"] = read_info["NumCycles"]
+                    read_and_cycles[f"Read {read_counter} (bp)"] = read_info[
+                        "NumCycles"
+                    ]
                     read_counter += 1
             return read_and_cycles
         except TypeError:
@@ -85,19 +93,21 @@ def get_info(self):
         results = self.get_read_cycles()
         results.update(self.get_run_parameters())
         if os.path.exists(os.path.join(self.runfolder, "bcl2fastq_version")):
-            results['bcl2fastq version'] = self.get_bcl2fastq_version(
-                self.runfolder)
+            results["bcl2fastq version"] = self.get_bcl2fastq_version(self.runfolder)
         return results
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description='Dumps a metadata yaml for MultiQC')
-    parser.add_argument('--runfolder', type=str,
-                        required=True, help='Path to runfolder')
-    parser.add_argument('--bcl2fastq-outdir', type=str,
-                        default='Data/Intensities/BaseCalls',
-                        help='Path to bcl2fastq output folder relative to the runfolder')
+    parser = argparse.ArgumentParser(description="Dumps a metadata yaml for MultiQC")
+    parser.add_argument(
+        "--runfolder", type=str, required=True, help="Path to runfolder"
+    )
+    parser.add_argument(
+        "--bcl2fastq-outdir",
+        type=str,
+        default="Data/Intensities/BaseCalls",
+        help="Path to bcl2fastq output folder relative to the runfolder",
+    )
 
     args = parser.parse_args()
     runfolder = args.runfolder
@@ -106,14 +116,16 @@ def get_info(self):
     runfolder_info = RunfolderInfo(runfolder, bcl2fastq_outdir)
     results = runfolder_info.get_info()
 
-    print ('''
+    print(
+        """
 id: 'sequencing_metadata'
 section_name: 'Sequencing Metadata'
 plot_type: 'html'
 description: 'regarding the sequencing run'
 data: |
     <dl class="dl-horizontal">
-''')
+"""
+    )
     for k, v in results.items():
         print("        <dt>{}</dt><dd><samp>{}</samp></dd>".format(k, v))
-    print ("    </dl>")
+    print("    </dl>")
diff --git a/bin/get_qc_config.py b/bin/get_qc_config.py
@@ -13,20 +13,30 @@ def __init__(self, handler_name, multiqc_mapping, compare_direction):
         self.compare_direction = compare_direction
 
 
-class HandlerMapper():
+class HandlerMapper:
     def __init__(self):
-        self._mapper_list = [ValueHandlerMapper(handler_name = 'ClusterPFHandler',
-                                                multiqc_mapping = 'total',
-                                                compare_direction = 'lt'),
-                             ValueHandlerMapper(handler_name = 'ErrorRateHandler',
-                                                multiqc_mapping = 'Error',
-                                                compare_direction = 'gt'),
-                             ValueHandlerMapper(handler_name = 'Q30Handler',
-                                                multiqc_mapping = 'percent_Q30',
-                                                compare_direction = 'lt'),
-                             ValueHandlerMapper(handler_name = 'ReadsPerSampleHandler',
-                                                multiqc_mapping = 'mqc-generalstats-bcl2fastq-total',
-                                                compare_direction = 'lt')]
+        self._mapper_list = [
+            ValueHandlerMapper(
+                handler_name="ClusterPFHandler",
+                multiqc_mapping="total",
+                compare_direction="lt",
+            ),
+            ValueHandlerMapper(
+                handler_name="ErrorRateHandler",
+                multiqc_mapping="Error",
+                compare_direction="gt",
+            ),
+            ValueHandlerMapper(
+                handler_name="Q30Handler",
+                multiqc_mapping="percent_Q30",
+                compare_direction="lt",
+            ),
+            ValueHandlerMapper(
+                handler_name="ReadsPerSampleHandler",
+                multiqc_mapping="mqc-generalstats-bcl2fastq-total",
+                compare_direction="lt",
+            ),
+        ]
 
         self.mapping = self._convert_to_mappings(self._mapper_list)
 
@@ -36,33 +46,45 @@ def _convert_to_mappings(self, mapper_list):
             mapper_dict[mapper.handler_name] = mapper
         return mapper_dict
 
+
 def convert_to_multiqc_config(checkqc_config_dict):
     multiqc_config_format = {}
     handler_mapper = HandlerMapper()
     for mapper_name, mapper in handler_mapper.mapping.items():
         qc_criteria = checkqc_config_dict.get(mapper.handler_name)
         multiqc_config_value = {mapper.multiqc_mapping: {}}
-        if not qc_criteria['warning'] == 'unknown':
-            multiqc_config_value[mapper.multiqc_mapping]['warn'] = [{mapper.compare_direction: qc_criteria['warning']}]
-        if not qc_criteria['error'] == 'unknown':
-            multiqc_config_value[mapper.multiqc_mapping]['fail'] = [{mapper.compare_direction: qc_criteria['error']}]
+        if not qc_criteria["warning"] == "unknown":
+            multiqc_config_value[mapper.multiqc_mapping]["warn"] = [
+                {mapper.compare_direction: qc_criteria["warning"]}
+            ]
+        if not qc_criteria["error"] == "unknown":
+            multiqc_config_value[mapper.multiqc_mapping]["fail"] = [
+                {mapper.compare_direction: qc_criteria["error"]}
+            ]
+
+        multiqc_config_format[mapper.multiqc_mapping] = multiqc_config_value[
+            mapper.multiqc_mapping
+        ]
 
-        multiqc_config_format[mapper.multiqc_mapping] = multiqc_config_value[mapper.multiqc_mapping]
+    return {"table_cond_formatting_rules": multiqc_config_format}
 
-    return {'table_cond_formatting_rules': multiqc_config_format}
 
 def convert_to_dict(checkqc_config):
     checkqc_config_dict = {}
     for qc_handler in checkqc_config:
-        checkqc_config_dict[qc_handler['name']] = qc_handler
+        checkqc_config_dict[qc_handler["name"]] = qc_handler
 
     return checkqc_config_dict
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Converts CheckQC tresholds to MultiQC conditional format')
-    parser.add_argument('--runfolder', type=str, required=True, help='Path to runfolder')
-    parser.add_argument('--config', type=str, help='Path to checkQC config')
+    parser = argparse.ArgumentParser(
+        description="Converts CheckQC tresholds to MultiQC conditional format"
+    )
+    parser.add_argument(
+        "--runfolder", type=str, required=True, help="Path to runfolder"
+    )
+    parser.add_argument("--config", type=str, help="Path to checkQC config")
 
     args = parser.parse_args()
     runfolder = args.runfolder
@@ -71,12 +93,16 @@ def convert_to_dict(checkqc_config):
     run_type_recognizer = RunTypeRecognizer(runfolder)
     config = ConfigFactory.from_config_path(config)
 
-    instrument_and_reagent_version = run_type_recognizer.instrument_and_reagent_version()
+    instrument_and_reagent_version = (
+        run_type_recognizer.instrument_and_reagent_version()
+    )
     both_read_lengths = run_type_recognizer.read_length()
     read_length = int(both_read_lengths.split("-")[0])
-    checkqc_config = config.get_handler_configs(instrument_and_reagent_version, read_length)
+    checkqc_config = config.get_handler_configs(
+        instrument_and_reagent_version, read_length
+    )
     checkqc_config_dict = convert_to_dict(checkqc_config)
     multiqc_config = convert_to_multiqc_config(checkqc_config_dict)
 
-    with open('qc_thresholds.yaml', 'w') as outfile:
+    with open("qc_thresholds.yaml", "w") as outfile:
         yaml.dump(multiqc_config, outfile)
diff --git a/config/compute_resources.config → .../nextflow_config/compute_resources.config b/config/compute_resources.config → .../nextflow_config/compute_resources.config
diff --git a/config/nextflow_config/singularity.config b/config/nextflow_config/singularity.config
@@ -0,0 +1,29 @@
+singularity {
+    enabled = true
+    autoMounts = true
+}
+
+process {
+    withName: 'FASTQC' {
+        container = 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--hdfd78af_1'
+    }
+    withName: 'FASTQ_SCREEN' {
+        container = 'https://depot.galaxyproject.org/singularity/fastq-screen:0.14.0--pl5262hdfd78af_1'
+    }
+    withName: 'GET_QC_THRESHOLDS' {
+        container = 'https://depot.galaxyproject.org/singularity/checkqc:3.6.6--pyhdfd78af_0'
+    }
+    withName: 'GET_METADATA' {
+        container = 'https://depot.galaxyproject.org/singularity/checkqc:3.6.6--pyhdfd78af_0'
+    }
+    withName: 'INTEROP_SUMMARY' {
+        container = 'https://depot.galaxyproject.org/singularity/illumina-interop:1.1.23--h1b792b2_0'
+    }
+    withName: 'MULTIQC_PER_FLOWCELL' {
+        container = 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0'
+    }
+    withName: 'MULTIQC_PER_PROJECT' {
+        container = 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0'
+    }
+}
+
diff --git a/config/nextflow_config/test.config b/config/nextflow_config/test.config
@@ -0,0 +1,19 @@
+/*
+========================================================================================
+    Nextflow config file for running minimal tests
+========================================================================================
+    Defines input files and everything required to run a fast and simple pipeline test.
+    Use as follows:
+        nextflow run main.nf -profile dev,test,singularity
+
+
+    This config takes inspiration from https://github.com/nf-core/rnaseq
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    run_folder = "$baseDir/test_data/210510_M03910_0104_000000000-JHGJL"
+    fastqscreen_databases = "$baseDir/test_data/Test_FastQ_Screen_Genomes"
+    checkqc_config = "$baseDir/test_data/checkqc_config.yaml"
+    config_dir = "$baseDir/test_data/test_config"
+}    
diff --git a/config/fastq_screen.conf → config/tool_config/fastq_screen.conf b/config/fastq_screen.conf → config/tool_config/fastq_screen.conf
diff --git a/config/multiqc_flowcell_config.yaml → .../tool_config/multiqc_flowcell_config.yaml b/config/multiqc_flowcell_config.yaml → .../tool_config/multiqc_flowcell_config.yaml
diff --git a/config/multiqc_main_config.yaml → config/tool_config/multiqc_main_config.yaml b/config/multiqc_main_config.yaml → config/tool_config/multiqc_main_config.yaml
diff --git a/config/multiqc_project_config.yaml → ...g/tool_config/multiqc_project_config.yaml b/config/multiqc_project_config.yaml → ...g/tool_config/multiqc_project_config.yaml