upgrade guppy to 6.0.1 (#144)

* upgrade guppy to 6.0.1 * use Remora model as default for megalodon * add GPU doc
LabShengLi · Jan 30, 2022 · 13a7a6d · 13a7a6d
1 parent a1849cb
commit 13a7a6d
Show file tree

Hide file tree

Showing 8 changed files with 135 additions and 152 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -23,7 +23,8 @@ jobs:
       - name: Basic NANOME workflow test for ${{ matrix.ci_test_name }}
         run: |
           nextflow -v &&\
-            nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.ci_test_name }},docker  ${{ matrix.using_tools_option }}
+            nextflow run ${GITHUB_WORKSPACE} \
+                -profile ${{ matrix.ci_test_name }},docker ${{ matrix.using_tools_option }}
       - name: File tree for ${{ matrix.ci_test_name }}
         run: |
           echo "Outputs tree view:" ; tree -a results; ls -lh -R results

diff --git a/Dockerfile b/Dockerfile
@@ -6,25 +6,23 @@
 
 # Set the base image to Ubuntu 18.04 and NVIDIA GPU from https://hub.docker.com/r/nvidia/cuda
 # or from https://ngc.nvidia.com/catalog/containers/nvidia:cuda/tags
-FROM nvidia/cuda:11.4.1-base-ubuntu18.04
+FROM nvidia/cuda:11.6.0-base-ubuntu18.04
 
 # Author and maintainer
 MAINTAINER Yang Liu <yang.liu@jax.org>
 LABEL description="Nanome project in Li Lab at The Jackson Laboratory" \
       author="yang.liu@jax.org"
 
 # Guppy version
-ARG GUPPY_VERSION=5.0.16
-ARG MEGALODON_VERSION=2.4.1
-ARG BUILD_PACKAGES="wget apt-transport-https procps git curl"
+ARG GUPPY_VERSION=6.0.1
+ARG REMORA_VERSION=0.1.2
+ARG MEGALODON_VERSION=2.4.2
+ARG BUILD_PACKAGES="wget apt-transport-https procps git curl libnvidia-compute-460-server"
 ARG DEBIAN_FRONTEND="noninteractive"
-ARG METEORE_GITHUB="https://github.com/comprna/METEORE/archive/refs/tags/v1.0.0.tar.gz"
-ARG DEEPSIGNAL_MODEL="https://storage.googleapis.com/jax-nanopore-01-project-data/nanome-input/model.CpG.R9.4_1D.human_hx1.bn17.sn360.v0.1.7+.tar.gz"
-ARG MEGALODON_MODEL="https://storage.googleapis.com/jax-nanopore-01-project-data/nanome-input/megalodon_model.tar.gz"
 
 # Install guppy-gpu version, ref: https://github.com/GenomicParisCentre/dockerfiles
 RUN apt-get -q update && \
-    DEBIAN_FRONTEND="noninteractive" apt-get -q install --yes ${BUILD_PACKAGES} libnvidia-compute-460-server && \
+    DEBIAN_FRONTEND="noninteractive" apt-get -q install --yes ${BUILD_PACKAGES} && \
     cd /tmp && \
     wget -q https://mirror.oxfordnanoportal.com/software/analysis/ont_guppy_${GUPPY_VERSION}-1~bionic_amd64.deb && \
     DEBIAN_FRONTEND="noninteractive" apt-get -q install --yes /tmp/ont_guppy_${GUPPY_VERSION}-1~bionic_amd64.deb && \
@@ -49,7 +47,9 @@ RUN conda env create --name nanome --file=environment.yml && conda clean -a
 SHELL ["conda", "run", "-n", "nanome", "/bin/bash", "-c"]
 
 # Install latest version for megalodon, even conflicts with fast5mod, they can work
-RUN pip install megalodon==${MEGALODON_VERSION} && pip cache purge &&\
+RUN pip install megalodon==${MEGALODON_VERSION} &&\
+	pip install ont-remora==${REMORA_VERSION} &&\
+    pip cache purge &&\
     npm install -g inliner && npm cache clean --force
 
 # Set nanome env path into PATH

diff --git a/conf/modules/newmodules.config b/conf/modules/newmodules.config
@@ -14,61 +14,27 @@ params{
 	newModuleConfigs = [
 	// New tool 1:
 	  [
-		name      : 'MegalodonNew1',
-		container_docker : 'liuyangzzu/nanome:v1.3',
-		container_singularity : 'docker://liuyangzzu/nanome:v1.3',
-		version   : '5.0',
+		name      : 'MegalodonRemora',
+		container_docker : 'liuyangzzu/nanome:v1.4',
+		container_singularity : 'docker://liuyangzzu/nanome:v1.4',
+		version   : '0.1.2',
 		cmd       : '''
-			## Download Rerio model
-			git clone https://github.com/nanoporetech/rerio
-			rerio/download_model.py rerio/basecall_models/res_dna_r941_min_modbases_5mC_v001
-
-			## Megalodon calling
-			megalodon     ${input}   --overwrite  \
-				--mod-motif m CG 0   --outputs per_read_mods mods per_read_refs\
-				--mod-output-formats bedmethyl wiggle \
-				--write-mods-text --write-mod-log-probs\
+			## Run Remora model
+			megalodon ${input} --overwrite\
+				--guppy-config dna_r9.4.1_450bps_fast.cfg\
+				--remora-modified-bases dna_r9.4.1_e8 fast 0.0.0 5mc CG 0\
+				--outputs mod_mappings mods per_read_mods\
 				--guppy-server-path $(which guppy_basecall_server) \
-				--guppy-config res_dna_r941_min_modbases_5mC_v001.cfg  \
-				--guppy-params "-d ./rerio/basecall_models/" \
-				--guppy-timeout 300 \
-				--reference ${genome} \
-				--processes 2
-		''',
-		output	 : 'megalodon_results/per_read_modified_base_calls.txt',
-		outputHeader 			: true,
-		outputOrder 			: [0,1,3,2],
-		outputScoreCols			: [4,5],
-		logScore				: true,
-	  ],
-	// New tool 2:
-	  [
-		name      : 'MegalodonNew2',
-		container_docker : 'liuyangzzu/nanome:v1.3',
-		container_singularity : 'docker://liuyangzzu/nanome:v1.3',
-		version   : '5.1',
-		cmd       : '''
-			## Download Rerio model
-			git clone https://github.com/nanoporetech/rerio
-			rerio/download_model.py rerio/basecall_models/res_dna_r941_min_modbases_5mC_v001
-
-			## Megalodon calling
-			megalodon     ${input}   --overwrite  \
-				--mod-motif m CG 0   --outputs per_read_mods mods per_read_refs\
+				--reference ${genome}\
+				--processes 2 \
 				--mod-output-formats bedmethyl wiggle \
-				--write-mods-text --write-mod-log-probs\
-				--guppy-server-path $(which guppy_basecall_server) \
-				--guppy-config res_dna_r941_min_modbases_5mC_v001.cfg  \
-				--guppy-params "-d ./rerio/basecall_models/" \
-				--guppy-timeout 300 \
-				--reference ${genome} \
-				--processes 2
+				--write-mods-text --write-mod-log-probs
 		''',
-		output	 				: 'megalodon_results/per_read_modified_base_calls.txt',
-		outputHeader 			: true,
-		outputOrder 			: [0,1,3,2],
-		outputScoreCols			: [4,5],
-		logScore				: true,
+		output	 : 'megalodon_results/per_read_modified_base_calls.txt',  //output file name
+		outputHeader 			: true, // if output contain header
+		outputOrder 			: [0,1,3,2], // output columns index for: READID, CHR, POS, STRAND
+		outputScoreCols			: [4,5], // output of scores, meth_score and unmeth_score
+		logScore				: true,  // if output score is log-transformed values
 	  ],
 	]
-}
+}
diff --git a/docs/Usage.md b/docs/Usage.md
@@ -244,18 +244,23 @@ For more detail of using cloud computing, please check [Cloud computing usage](h
 
 # 6. Conda environment for local running
 
-NANOME support local running without Docker or Singularity support. Below is conda environment installation steps, users need to install Guppy software by themselves in this case:
+NANOME support local running without Docker or Singularity support. Below is conda environment installation steps, users need to install Guppy software by themselves in this case.
+
+* Create conda environment name:
 ```angular2html
 # Create conda environment for local running NANOME
 git clone https://github.com/TheJacksonLaboratory/nanome.git
 cd nanome
 conda env create --name nanome --file=environment.yml
 conda activate nanome
 
-pip install megalodon==2.4.1
+pip install megalodon==2.4.2
 npm install -g inliner
 conda install -c conda-forge -c bioconda nextflow
+```
 
+* Run NANOME in local conda way:
+```
 # Run NANOME pipeline using local execution
 conda activate nanome
 nextflow run TheJacksonLaboratory/nanome\
@@ -277,7 +282,31 @@ nextflow run TheJacksonLaboratory/nanome\
 ```
 Param`--guppyDir=[guppy-installation-directory]` is the Guppy software installation base directory, `--conda_base_dir [conda-dir]` is conda software base directory, `--conda_name [conda-env-dir]` is conda environment base directory.
 
-# 7. Add a new module/tool
+# 7. Using GPU
+NANOME support running on GPU mode automatically, based on setting of environment varable `CUDA_VISIBLE_DEVICES`. In general, this variable will be set by system administrator, so there is no command line difference for GPU and CPU.
+
+Check your system support NVIDIA and CUDA driver:
+```angular2html
+nvidia-smi
+nvcc -V
+echo $CUDA_VISIBLE_DEVICES
+```
+
+Singularity can directly pass CUDA devices by default, however, docker need to provide `--gpus all` docker options to pass CUDA devices. Below is the docker command to use GPU resources:
+```angular2html
+nextflow run TheJacksonLaboratory/nanome\
+    -profile test,docker\
+    --containerOptions '--gpus all'
+```
+
+Please ensure the system installed GPU supported options for docker, check [install document](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). Below is a test by running a base CUDA container:
+```angular2html
+sudo docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi
+```
+
+Check links for how to  install [Nvidia](https://www.nvidia.com/Download/index.aspx?lang=cn) and [CUDA](https://developer.nvidia.com/cuda-toolkit-archive) drivers.
+
+# 8. Add a new module/tool
 
 NANOME support adding any new methylation-calling module in a rapid way, without touching the main pipeline codes. Users only need to specify the container (or local running way) and methylation calling command line interface for each new tool in a configuration file.
 

diff --git a/environment.yml b/environment.yml
@@ -39,8 +39,8 @@ dependencies:
   - nanopolish=0.13.2
   - pip:
     - xgboost
-    - ont-pyguppy-client-lib==5.0.16
-    - deepsignal==0.1.9
+    - ont-pyguppy-client-lib==6.0.1
+    - deepsignal==0.1.10
     - fast5mod==1.0.5
     - nanome-jax==1.3.25
     - megalodon