# Generate job submission script for various computing environments

This workflow generates job scripts for cluster job submissions.

In [23]:
[global]
# Job script file to be written to
parameter: to_script = path
# The workflow file to execute
parameter: workflow_file = path
# Path to job template
parameter: template_file = path
# Command arguments
parameter: args = str

args = '\\\n'.join(args.strip().split('\n'))

In [1]:
# Configuration for Yale `farnam` cluster
[farnam]
output: to_script
report: output = to_script, expand = True
    #!/bin/bash
    #SBATCH --partition general
    #SBATCH --nodes 1
    #SBATCH --ntasks-per-node 1
    #SBATCH --cpus-per-task 4
    #SBATCH --mem 16G
    #SBATCH --time 5-0:00:00
    #SBATCH --job-name {to_script:n}
    #SBATCH --output {to_script:n}-%J.out
    #SBATCH --error {to_script:n}-%J.log

    sos run {workflow_file:a} \
        {args} \
        -c {template_file:a} -q farnam -J 40 \
        &> {to_script:n}.log

In [None]:
# Configuration for Yale `pi_dewan` partition cluster
[dewan]
output: to_script
report: output = to_script, expand = True
    #!/bin/bash
    #SBATCH --partition pi_dewan
    #SBATCH --nodes 1
    #SBATCH --ntasks-per-node 1
    #SBATCH --cpus-per-task 4
    #SBATCH --mem 16G
    #SBATCH --time 5-0:00:00
    #SBATCH --job-name {to_script:n}
    #SBATCH --output {to_script:n}-%J.out
    #SBATCH --error {to_script:n}-%J.log

    sos run {workflow_file:a} \
        {args} \
        -c {template_file:a} -q pi_dewan -J 40 \
        &> {to_script:n}.log

In [None]:
# Configuration for Columbia csg partition cluster
[csg]
output: to_script
report: output = to_script, expand = True
    #!/bin/sh
    #$ -l h_rt=36:00:00
    #$ -l h_vmem=16G
    #$ -N {to_script:abn}
    #$ -o {to_script:an}-$JOB_ID.out
    #$ -e {to_script:an}-$JOB_ID.err  
    #$ -j y
    #$ -q csg.q
    #$ -S /bin/bash
    export PATH=$HOME/miniconda3/bin:$PATH
    module load Singularity/3.11.4
    sos run {workflow_file:a} \
        {args} \
        -c {template_file:a} \
        -q csg -s force \
        &> {to_script:an}.log
        

In [None]:
# Configuration for Columbia csg partition cluster
[csg_mamba]
output: to_script
report: output = to_script, expand = True
    #!/bin/sh
    #$ -l h_rt=36:00:00
    #$ -l h_vmem=16G
    #$ -N {to_script:abn}
    #$ -o {to_script:an}-$JOB_ID.out
    #$ -e {to_script:an}-$JOB_ID.err  
    #$ -j y
    #$ -q csg.q
    #$ -S /bin/bash
    source ~/mamba_activate.sh
    module load Singularity/3.11.4
    sos run {workflow_file:a} \
        {args} \
        -c {template_file:a} \
        -q csg -s force \
        &> {to_script:an}.log

## Example

For example, to generate script for `fastGWA` analysis, first we set some bash variables,

In [None]:
workflow_file=LMM.ipynb
workflow=fastGWA
to_script=20200701_fastGWA.sbatch
cwd=output
tpl_file=farnam.yml
bfile=data/genotypes.bed
sampleFile=data/imputed_genotypes.sample
bgenFile=data/imputed_genotypes_chr{1..22}.bgen
phenoFile=data/phenotypes.txt
formatFile=data/fastGWA_template.yml
pheno=BMI
covar=SEX
qCovar=AGE

Then using bash variables above, construct the command arguments for the pipeline,

In [2]:
cmd_args="""$workflow
    --cwd $cwd
    --bfile $bfile
    --phenoFile $phenoFile
    --formatFile $formatFile
    --phenoCol $pheno
    --covarCol $covar
    --qCovarCol $qCovar
    --numThreads 1
    --bgenMinMAF 0.001
    --bgenMinINFO 0.1
    --parts 2
    --p-filter 1
    --job-size 1
"""

Finally, generate the pipeline submission script,

In [3]:
sos run Get_Job_Script.ipynb farnam \
    --workflow-file $workflow_file \
    --to-script $to_script \
    --template-file $tpl_file \
    --args "$cmd_args"

INFO: Running [32mfarnam[0m: Configuration for Yale `farnam` cluster
INFO: [32mfarnam[0m is [32mcompleted[0m.
INFO: [32mfarnam[0m output:   [32m20200701_fastGWA.sbatch[0m
INFO: Workflow farnam (ID=33191d20faadedb6) is executed successfully with 1 completed step.


## Result

In [4]:
%preview 20200701_fastGWA.sbatch -l -1

#!/bin/bash
#SBATCH --partition general
#SBATCH --nodes 1
#SBATCH --ntasks-per-node 1
#SBATCH --cpus-per-task 4
#SBATCH --mem 16G
#SBATCH --time 3-0:00:00
#SBATCH --job-name 20200701_fastGWA
#SBATCH --output 20200701_fastGWA-%J.out
#SBATCH --error 20200701_fastGWA-%J.log

sos run /home/gw/GIT/github/UKBB_GWAS_dev/workflow/LMM.ipynb \
    fastGWA\
    --cwd output\
    --bfile data/genotypes.bed\
    --phenoFile data/phenotypes.txt\
    --formatFile data/fastGWA_template.yml\
    --phenoCol BMI\
    --covarCol SEX\
    --qCovarCol AGE\
    --numThreads 1\
    --bgenMinMAF 0.001\
    --bgenMinINFO 0.1\
    --parts 2\
    --p-filter 1\
    --job-size 1 \
    -c /home/gw/GIT/github/UKBB_GWAS_dev/workflow/farnam.yml -q farnam -J 40 \
    &> 20200701_fastGWA.log
