Skip to content
This repository has been archived by the owner on Mar 16, 2022. It is now read-only.

Setup: Running

Christopher Dunn edited this page Apr 2, 2018 · 3 revisions

Running fc_run.py

After installation, it is easy to start the assembly process like this,

    fc_run.py fc_run.cfg

Here is an example of fc_run.cfg for a small E. coli assembly:

    [General]
    # list of files of the initial subread fasta files
    input_fofn = input.fofn

    input_type = raw
    #input_type = preads

    # The length cutoff used for seed reads used for initial mapping
    length_cutoff = 12000

    # The length cutoff used for seed reads usef for pre-assembly
    length_cutoff_pr = 12000

    # overlapping options for Daligner
    pa_HPCdaligner_option =  -v -dal4 -M32 -e.70 -l1000 -s1000
    ovlp_HPCdaligner_option = -v -dal4 -M32 -h60 -e.96 -l500 -s1000

    pa_DBsplit_option = -x500 -s50
    ovlp_DBsplit_option = -x500 -s50

    # error correction consensus options
    falcon_sense_option = --output-multi --min-idt 0.70 --min-cov 4 --max-n-read 200 --n-core 6

    # overlap filtering options
    overlap_filtering_setting = --max-diff 100 --max-cov 100 --min-cov 20 --bestn 10

    # For job-submission options, see https://github.com/PacificBiosciences/FALCON/wiki/Configuration
    # These are old-style, but should still work, for now.

    # Cluster queue setting
    #sge_option_da = -pe smp 8 -q jobqueue
    #sge_option_la = -pe smp 2 -q jobqueue
    #sge_option_pda = -pe smp 8 -q jobqueue
    #sge_option_pla = -pe smp 2 -q jobqueue
    #sge_option_fc = -pe smp 24 -q jobqueue
    #sge_option_cns = -pe smp 8 -q jobqueue

    # concurrency setting
    #pa_concurrent_jobs = 32
    #cns_concurrent_jobs = 32
    #ovlp_concurrent_jobs = 32

    #pwatcher_type = fs_based # the default
    #job_type = SGE # the default

    [job.defaults]
    JOB_OPTS = -pe smp ${NPROC} -q jobqueue
    njobs = 32

    [job.step.da]
    NPROC = 8
    # Daligner needs only 4 procs per job, but since we set `-M32`, we need 32GB per job. If
    # your Grid has roughly 4GB per processor, then we want to reserve 8 processors, in order to
    # reserve 8GB*4==32GB of RAM per job.

    [job.step.la]
    NPROC = 2

    [job.step.pda]
    NPROC = 8

    [job.step.pla]
    NPROC = 2

    [job.step.cns]
    NPROC = 6 # also to pass --n-core=6 to falcon_sense

    [job.step.asm]
    NPROC = 24 # also to pass --n-core=24 to ovlp_filter

In that case input.fofn contains a list of fasta files for E.coli:

ecoli.1.subreads.fasta
ecoli.2.subreads.fasta
ecoli.3.subreads.fasta

Those files can be obtained like this:

wget https://www.dropbox.com/s/tb78i5i3nrvm6rg/m140913_050931_42139_c100713652400000001823152404301535_s1_p0.1.subreads.fasta
wget https://www.dropbox.com/s/v6wwpn40gedj470/m140913_050931_42139_c100713652400000001823152404301535_s1_p0.2.subreads.fasta
wget https://www.dropbox.com/s/j61j2cvdxn4dx4g/m140913_050931_42139_c100713652400000001823152404301535_s1_p0.3.subreads.fasta