Skip to content

Commit

Permalink
Support R10 (#157)
Browse files Browse the repository at this point in the history
* support t2t chm13
* fix link
* add guppy6
* support r10.4.1
  • Loading branch information
liuyangzzu committed Aug 12, 2023
1 parent 1cdc472 commit 7ecee06
Show file tree
Hide file tree
Showing 43 changed files with 912 additions and 1,341 deletions.
11 changes: 5 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ NANOME pipeline support running with various ways in different platforms:


## Simple usage
Please refer to [Usage](https://github.com/LabShengLi/nanome/blob/master/docs/Usage.md) and [Specific Usage](https://github.com/LabShengLi/nanome/blob/master/docs/SpecificUsage.md) and [NANOME options](https://github.com/LabShengLi/nanome/blob/tutorial1/docs/nanome_params.md) for how to use NANOME pipeline. For running on CloudOS platform (e.g., google cloud), please check [Usage on CloudOS](https://github.com/LabShengLi/nanome/blob/master/docs/Usage.md#5-running-pipeline-on-cloud-computing-platform). We provide a **tutorial video** for running NANOME pipeline:
Please refer to [Usage](https://github.com/LabShengLi/nanome/blob/master/docs/Usage.md) and [Specific Usage](https://github.com/LabShengLi/nanome/blob/master/docs/SpecificUsage.md) and [NANOME options](https://github.com/LabShengLi/nanome/blob/master/docs/nanome_params.md) for how to use NANOME pipeline. For running on CloudOS platform (e.g., google cloud), please check [Usage on CloudOS](https://github.com/LabShengLi/nanome/blob/master/docs/Usage.md#5-running-pipeline-on-cloud-computing-platform). We provide a **tutorial video** for running NANOME pipeline:

[![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/TfotM55KTVE/0.jpg)](https://www.youtube.com/watch?v=TfotM55KTVE)

Expand Down Expand Up @@ -142,15 +142,14 @@ Please check [NANOME report](https://github.com/LabShengLi/nanome/blob/master/do


### Haplotype-aware consensus methylations
Please check [phasing usage](https://github.com/LabShengLi/nanome/blob/tutorial1/docs/Phasing.md).
Please check [phasing usage](https://github.com/LabShengLi/nanome/blob/master/docs/Phasing.md).
![PhasingDemo](https://github.com/LabShengLi/nanome/blob/master/docs/resources/nanome3t_5mc_phasing2.png)

### Lifebit CloudOS report
We now support running NANOME on cloud computing platform. [Lifebit](https://lifebit.ai/lifebit-cloudos/) is a web-based cloud computing platform, and below is the running reports:
* Ecoli test report: https://cloudos.lifebit.ai/public/jobs/61c9fd328c574a01e8d31d2e
* Human test report: https://cloudos.lifebit.ai/public/jobs/61c9fe618c574a01e8d31e99
* NA12878 chr22 report: https://cloudos.lifebit.ai/public/jobs/61c4f2ad8c574a01e8d0eee3
* NA12878 chr20 part5 report: https://cloudos.lifebit.ai/public/jobs/61c770748c574a01e8d2062b
* Ecoli test report: https://cloudos.lifebit.ai/public/jobs/6430509445941801546e5f8f
* Human test report: https://cloudos.lifebit.ai/public/jobs/6430639045941801546e627f
* NA12878 chr22 report: https://cloudos.lifebit.ai/public/jobs/6430b64645941801546e7400


## Revision History
Expand Down
4 changes: 3 additions & 1 deletion conf/executors/gcp_input.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,7 @@ params{
genome_map = [ 'hg38': "${GCP_INPUT}/hg38.tar.gz",
'hg38_chr22': "${GCP_INPUT}/hg38_chr22.tar.gz",
'mm10': "${GCP_INPUT}/mm10.tar.gz",
'ecoli': "${GCP_INPUT}/ecoli.tar.gz" ]
'ecoli': "${GCP_INPUT}/ecoli.tar.gz",
'chm13': "${GCP_INPUT}/chm13.tar.gz"
]
}
4 changes: 3 additions & 1 deletion conf/executors/jaxhpc_input.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,7 @@ params {
genome_map = [ 'hg38': "${HPC_INPUT}/reference_genome/hg38",
'hg38_chr22': "${HPC_INPUT}/hg38_chr22.tar.gz",
'mm10': "${HPC_INPUT}/mm10.tar.gz",
'ecoli': "${HPC_INPUT}/ecoli.tar.gz"]
'ecoli': "${HPC_INPUT}/ecoli.tar.gz",
'chm13': "${HPC_INPUT}/chm13.tar.gz"
]
}
16 changes: 12 additions & 4 deletions conf/executors/lifebit.config
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,18 @@ process {
params.errorStrategy : task.exitStatus in [1, 2, 10, 14] ? 'retry' : params.errorStrategy }
}

withName: 'ENVCHECK|BASECALL|Guppy|MEGALODON|DEEPSIGNAL2' { // allocate gpu
// allocate gpu
withName: 'ENVCHECK|BASECALL|Guppy|Guppy6|MEGALODON|DEEPSIGNAL2' {
accelerator = [request: params.gpuNumber, type: params.gpuType]
beforeScript = "export CUDA_VISIBLE_DEVICES=0" // pass CUDA var to process, since GCP do not export it
// pass CUDA var to process, since GCP do not export it
beforeScript = "export CUDA_VISIBLE_DEVICES=0"
// gpu options for container
containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
( workflow.containerEngine == "docker" ? '--gpus all': null ) }
( workflow.containerEngine == "docker" ? '--gpus all': null ) }
}

withName: 'UNTAR|BASECALL|Guppy|RESQUIGGLE' { // allocate high disk size
// allocate high disk size
withName: 'UNTAR|BASECALL|Guppy|Guppy6|RESQUIGGLE' {
disk = params.highDiskSize
}

Expand All @@ -109,6 +113,10 @@ process {
withName: 'DEEPSIGNAL2' {
container = params.deepsignal2_docker_name
}

withName: 'Guppy6' {
container = params.guppy_stable_name
}
}

env {
Expand Down
4 changes: 2 additions & 2 deletions docs/CloudComputing.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ nextflow run LabShengLi/nanome\
-profile test,docker,google\
-w [Google-storage-bucket]/nanome-work-test\
--outdir [Google-storage-bucket]/nanome-outputs\
--googleProjectName [PROJECT_ID]
--projectCloud [PROJECT_ID]
```

## Build and submit to container registry of google cloud computing
Expand All @@ -65,7 +65,7 @@ nextflow run LabShengLi/nanome\
-profile test,docker,google\
-w gs://jax-nanopore-01-project-data/nanome-work\
--outdir gs://jax-nanopore-01-project-data/nanome-outputs\
--googleProjectName jax-nanopore-01
--projectCloud jax-nanopore-01
```


Expand Down
22 changes: 22 additions & 0 deletions docs/SpecificUsage.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,26 @@ nextflow run LabShengLi/nanome\
--input '/fastscratch/liuya/nanome/APL_ont_out/APL_sept/sept_dir/*'\
--genome hg38 \
--runMethcall false
```

## 5. Support T2T-CHM13 genome

Example as below:
```angular2html
nextflow run LabShengLi/nanome \
-profile test_human,singularity \
--genome chm13
```

## 6. Support R10.4.1 flow cells

Example as below:
```angular2html
nextflow run LabShengLi/nanome \
-profile test_human,singularity \
--input https://storage.googleapis.com/jax-nanopore-01-project-data/nanome-input/testdata_r10_4_1.tar.gz \
--runGuppy \
--GUPPY_BASECALL_MODEL dna_r10.4.1_e8.2_400bps_hac.cfg \
--GUPPY_METHCALL_MODEL dna_r10.4.1_e8.2_400bps_modbases_5mc_cg_hac.cfg \
--runNanopolish false --runDeepSignal false --runMegalodon false
```
6 changes: 3 additions & 3 deletions docs/Usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ You can also run NANOME pipeline on cloud computing platform ([google cloud plat
nextflow run LabShengLi/nanome\
-profile test,docker,google \
-w [Google-storage-bucket]/TestData-work \
--outputDir [Google-storage-bucket]/TestData-ouputs\
--googleProjectName [Google-project-name]
--outdir [Google-storage-bucket]/TestData-ouputs\
--projectCloud [Google-project-name]
```

## Running results
Expand Down Expand Up @@ -235,7 +235,7 @@ nextflow run LabShengLi/nanome\
-profile test,docker,google \
-w [Google-storage-bucket]/nanome-work-ci \
--outdir [Google-storage-bucket]/nanome-outputs-ci\
--googleProjectName [Google-project-name]
--projectCloud [Google-project-name]
```

The `[Google-project-name]` is the Google project name, and `[Google-storage-bucket]` is the **Data Bucket** name that user can access on Google Cloud. `-w` is pipeline output working directory, `--outdir` is the directory for methylation-calling results.
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ channels:
- bioconda
- conda-forge
dependencies:
- python=3.6 # need 3.6 for some software
- python=3.6 # need 3.6 for some software, ont-guppy-client-lib need >=3.6 <=3.8
- pip
- nodejs
- scipy
Expand Down
95 changes: 60 additions & 35 deletions main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env nextflow
/*
/**
=========================================================================================
NANOME(Nanopore methylation) pipeline for Oxford Nanopore sequencing
=========================================================================================
Expand All @@ -11,7 +11,7 @@
@Software : NANOME project
@Organization : JAX Li Lab
----------------------------------------------------------------------------------------
*/
**/
// We now support both latest and lower versions, due to Lifebit CloudOS is only support 20.04
// Note: NXF_VER=20.04.1 nextflow run main.nf -profile test,singularity
if( nextflow.version.matches(">= 20.07.1") ){
Expand Down Expand Up @@ -43,7 +43,8 @@ gbl_genome_path = gbl_genome_map[params.genome] ? gbl_genome_map[params.genome]
humanChrSet = 'chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY'

genome_basefn = (new File(params.genome)).name
if (genome_basefn.startsWith('hg') || (params.dataType && params.dataType == 'human')) {
if (genome_basefn.startsWith('hg') || genome_basefn.startsWith('chm13') ||
(params.dataType && params.dataType == 'human')) {
dataType = params.dataType ? params.dataType : "human"
// default for human chr
chrSet = params.chrSet ? params.chrSet : humanChrSet
Expand Down Expand Up @@ -123,8 +124,8 @@ if (params.runResquiggle) summary['runResquiggle'] = 'Yes'
if (params.runMethcall) {
if (params.runNanopolish) summary['runNanopolish'] = 'Yes'
if (params.runMegalodon) summary['runMegalodon'] = 'Yes'
if (params.runDeepSignal2) summary['runDeepSignal2'] = 'Yes'
if (params.runDeepSignal) summary['runDeepSignal'] = 'Yes'
if (params.runDeepSignal1) summary['runDeepSignal1'] = 'Yes'
if (params.runGuppy) summary['runGuppy'] = 'Yes'
if (params.runTombo) summary['runTombo'] = 'Yes'
if (params.runMETEORE) summary['runMETEORE'] = 'Yes'
Expand Down Expand Up @@ -169,11 +170,23 @@ if (params.ctg_name) { summary['ctg_name'] = params.ctg_name }

summary['\nModel summary'] = "--------"
if (params.runBasecall && !params.skipBasecall) summary['GUPPY_BASECALL_MODEL'] = params.GUPPY_BASECALL_MODEL

if (params.runNANOME) {
summary['NANOME_MODEL/CS_MODEL_FILE'] = "${params.NANOME_MODEL}/${params.CS_MODEL_FILE}"
// summary['CS_MODEL_SPEC'] = "${params.CS_MODEL_SPEC}"
}

if (params.runMethcall && params.runMegalodon)
summary['MEGALODON_MODEL'] = params.rerio? 'Rerio:' + params.MEGALODON_MODEL : 'Remora:' + params.remoraModel
if (params.runMethcall && params.runDeepSignal) summary['DEEPSIGNAL_MODEL_DIR/DEEPSIGNAL_MODEL'] =\

if (params.runMethcall && params.runDeepSignal) summary['DEEPSIGNAL2_MODEL_FILE/DEEPSIGNAL2_MODEL_NAME'] =\
params.DEEPSIGNAL2_MODEL_FILE + "/" + params.DEEPSIGNAL2_MODEL_NAME

if (params.runMethcall && params.runDeepSignal1) summary['DEEPSIGNAL_MODEL_DIR/DEEPSIGNAL_MODEL'] =\
params.DEEPSIGNAL_MODEL_DIR + "/" + params.DEEPSIGNAL_MODEL

if (params.runMethcall && params.runGuppy) summary['GUPPY_METHCALL_MODEL'] = params.GUPPY_METHCALL_MODEL

if (params.runMethcall && params.runDeepMod) {
if (isDeepModCluster) {
summary['DEEPMOD_RNN_MODEL;DEEPMOD_CLUSTER_MODEL'] = \
Expand All @@ -183,11 +196,6 @@ if (params.runMethcall && params.runDeepMod) {
summary['DEEPMOD_RNN_MODEL'] = "${params.DEEPMOD_RNN_MODEL}"
}
}
if (params.runNANOME) {
summary['NANOME_MODEL'] = "${params.NANOME_MODEL}"
summary['CS_MODEL_FILE'] = "${params.CS_MODEL_FILE}"
summary['CS_MODEL_SPEC'] = "${params.CS_MODEL_SPEC}"
}

summary['\nPipeline settings'] = "--------"
summary['Working dir'] = workflow.workDir
Expand Down Expand Up @@ -278,6 +286,8 @@ include { DEEPSIGNAL2; DEEPSIGNAL2COMB } from './modules/DEEPSIGNAL2'

include { Guppy; GuppyComb; Tombo; TomboComb; DeepMod; DpmodComb; METEORE } from './modules/OLDTOOLS'

include { Guppy6; Guppy6Comb } from './modules/GUPPY6'

include { NewTool; NewToolComb } from './modules/NEWTOOLS'

include { CLAIR3; PHASING } from './modules/PHASING'
Expand All @@ -304,7 +314,7 @@ workflow {
null1

// deepsignal model dir will be downloaded in ENVCHECK if needed
if (params.runDeepSignal) {
if (params.runDeepSignal1) {
ch_deepsignal_dir = params.deepsignalDir ?
Channel.fromPath(params.deepsignalDir, type: 'any', checkIfExists: true) :
Channel.fromPath(params.DEEPSIGNAL_MODEL_ONLINE, type: 'any', checkIfExists: true)
Expand All @@ -325,7 +335,7 @@ workflow {
}

// Resquiggle running if use Tombo or DeepSignal
if (((params.runDeepSignal || params.runTombo || params.runDeepSignal2) && params.runMethcall)
if (((params.runDeepSignal1 || params.runTombo || params.runDeepSignal) && params.runMethcall)
|| params.runResquiggle) {
resquiggle = RESQUIGGLE(UNTAR.out.untar_tuple.join(BASECALL.out.basecall_tuple), ENVCHECK.out.reference_genome)
f1 = params.feature_extract ? resquiggle.feature_extract : Channel.empty()
Expand All @@ -339,9 +349,11 @@ workflow {
comb_nanopolish = NPLSHCOMB(NANOPOLISH.out.nanopolish_tsv.collect(), ch_src, ch_utils)
s1 = comb_nanopolish.site_unify
r1 = comb_nanopolish.read_unify
co1 = comb_nanopolish.nanopolish_combine
} else {
s1 = Channel.empty()
r1 = Channel.empty()
co1 = Channel.empty()
}

if (params.runMegalodon && params.runMethcall) {
Expand All @@ -351,12 +363,14 @@ workflow {
ch_src, ch_utils)
s2 = comb_megalodon.site_unify
r2 = comb_megalodon.read_unify
co2 = comb_megalodon.megalodon_combine
} else {
s2 = Channel.empty()
r2 = Channel.empty()
co2 = Channel.empty()
}

if (params.runDeepSignal && params.runMethcall) {
if (params.runDeepSignal1 && params.runMethcall) {
DEEPSIGNAL(RESQUIGGLE.out.resquiggle, ENVCHECK.out.reference_genome,
ENVCHECK.out.deepsignal_model)
comb_deepsignal = DPSIGCOMB(DEEPSIGNAL.out.deepsignal_tsv.collect(), ch_src, ch_utils)
Expand All @@ -367,36 +381,41 @@ workflow {
r3 = Channel.empty()
}

if (params.runDeepSignal2 && params.runMethcall) {
deepsignal2 = DEEPSIGNAL2(RESQUIGGLE.out.resquiggle.collect(),
if (params.runDeepSignal && params.runMethcall) {
deepsignal2_model_file = Channel.fromPath(params.DEEPSIGNAL2_MODEL_FILE, type: 'any', checkIfExists: true)
deepsignal2 = DEEPSIGNAL2(RESQUIGGLE.out.resquiggle,
ENVCHECK.out.reference_genome,
ch_src, ch_utils)
comb_deepsignal2 = DEEPSIGNAL2COMB(DEEPSIGNAL2.out.deepsignal2_combine_out,
ch_src, ch_utils, deepsignal2_model_file)
comb_deepsignal2 = DEEPSIGNAL2COMB(DEEPSIGNAL2.out.deepsignal2_batch_per_read.collect(),
DEEPSIGNAL2.out.deepsignal2_batch_feature.collect(),
ch_src, ch_utils
)
f2 = deepsignal2.deepsignal2_feature_out
f2 = comb_deepsignal2.deepsignal2_feature_combine
s3_1 = comb_deepsignal2.site_unify
r3_1 = comb_deepsignal2.read_unify
co3_1 = comb_deepsignal2.deepsignal2_per_read_combine
} else {
f2 = Channel.empty()
s3_1 = Channel.empty()
r3_1 = Channel.empty()
co3_1 = Channel.empty()
}

if (params.runGuppy && params.runMethcall) {
Guppy(UNTAR.out.untar, ENVCHECK.out.reference_genome, ch_utils)
Guppy6(UNTAR.out.untar, ENVCHECK.out.reference_genome, ch_utils)

gcf52ref_ch = Channel.fromPath("${projectDir}/utils/null1").concat(Guppy.out.guppy_gcf52ref_tsv.collect())

comb_guppy = GuppyComb(Guppy.out.guppy_fast5mod_bam.collect(),
gcf52ref_ch,
comb_guppy6 = Guppy6Comb(Guppy6.out.guppy_batch_bam_out.collect(),
Guppy6.out.guppy_batch_per_read.collect(),
ENVCHECK.out.reference_genome,
ch_src, ch_utils)
s4 = comb_guppy.site_unify
r4 = comb_guppy.read_unify

s4 = comb_guppy6.site_unify
r4 = comb_guppy6.read_unify
co4 = comb_guppy6.guppy6_combine_tsv
} else {
s4 = Channel.empty()
r4 = Channel.empty()
co4 = Channel.empty()
}

if (params.runTombo && params.runMethcall) {
Expand Down Expand Up @@ -463,9 +482,11 @@ workflow {
consensus = CONSENSUS(top3_tools_read_unify, ch_src, ch_utils)
s8 = consensus.site_unify
r8 = consensus.read_unify
co8 = consensus.nanome_combine_out
} else {
s8 = Channel.empty()
r8 = Channel.empty()
co8 = Channel.empty()
}

null2.concat(
Expand Down Expand Up @@ -495,20 +516,24 @@ workflow {
s1, s2, s3, s3_1, s4, s5, s6, s7, s_new, s8
).toList().set { tools_site_unify }

REPORT(tools_site_unify, top3_tools_read_unify,
ENVCHECK.out.tools_version_tsv, QCEXPORT.out.qc_report,
ENVCHECK.out.reference_genome, ch_src, ch_utils)
if (params.runBasecall) {
REPORT(tools_site_unify, top3_tools_read_unify,
ENVCHECK.out.tools_version_tsv, ENVCHECK.out.basecall_version_txt,
QCEXPORT.out.qc_report,
ENVCHECK.out.reference_genome, ch_src, ch_utils
)
}

if (params.phasing) {
CLAIR3(QCEXPORT.out.bam_data, ENVCHECK.out.reference_genome)
null1.concat(
MGLDNCOMB.out.megalodon_combine,
MGLDNCOMB.out.read_unify,
CONSENSUS.out.nanome_combine_out,
CONSENSUS.out.read_unify,
NPLSHCOMB.out.nanopolish_combine_out_ch
).toList().set { mega_and_nanome_ch }
PHASING(mega_and_nanome_ch, CLAIR3.out.clair3_out_ch,
co1,
co2, r2,
co3_1, r3_1,
co4, r4,
co8, r8
).toList().set { meth_for_phasing_input_ch }
PHASING(meth_for_phasing_input_ch, CLAIR3.out.clair3_out_ch,
ch_src, QCEXPORT.out.bam_data, ENVCHECK.out.reference_genome)
}
}
Loading

0 comments on commit 7ecee06

Please sign in to comment.