# 04_06_snakemake.ipynb - Scientific workflows with Snakemake

In [None]:
# conda install -c bioconda -c conda-forge snakemake-minimal

In [1]:
%%file Snakefile

events = ['GW170817-v3', 'GW190521_074359-v1', 'GW190814-v2', 'GW190412-v3',
          'GW190828_063405-v1', 'GW170814-v3', 'GW170608-v3', 'GW190408_181802-v1',
          'GW190521-v3']

rule stack:
    input:
        expand("events/{id}.png", id=events)
    output:
        "events/allevents.png"
    run:
        from PIL import Image
        import numpy as np
        
        # load all images to numpy arrays
        images = [np.array(Image.open(imagefile)) for imagefile in input]
        # stack the arrays vertically
        stacked = np.vstack(images)
        # convert stacked array to PIL image, then save 
        Image.fromarray(stacked).save(output[0])

rule plot:
    input:
        "events/{id}.json"
    output:
        "events/{id}.png"
    shell:
        "python plotsignal.py {wildcards.id} -C events"
        
rule download:
    output:
        "events/{id}.json"
    shell:
        "python hdfdownload.py {wildcards.id} -C events"

Overwriting Snakefile


In [2]:
!snakemake -j 1 events/GW150914-v3.json

[33mBuilding DAG of jobs...[0m
[33mUsing shell: /bin/bash[0m
[33mProvided cores: 1 (use --cores to define parallelism)[0m
[33mRules claiming more threads will be scaled down.[0m
[33mJob counts:
	count	jobs
	1	download
	1[0m
[32m[0m
[32m[Mon Jul 26 16:29:38 2021][0m
[32mrule download:
    output: events/GW150914-v3.json
    jobid: 0
    wildcards: id=GW150914-v3[0m
[32m[0m
Downloading https://www.gw-openscience.org/eventapi/json/allevents to events/catalog.json.
Downloading https://www.gw-openscience.org/eventapi/json/GWTC-1-confident/GW150914/v3 to events/GW150914-v3.json.
Downloading https://www.gw-openscience.org/eventapi/json/GWTC-1-confident/GW150914/v3/H-H1_GWOSC_4KHZ_R1-1126259447-32.hdf5 to events/H1-GW150914-v3.hdf5.
Downloading https://www.gw-openscience.org/eventapi/json/GWTC-1-confident/GW150914/v3/L-L1_GWOSC_4KHZ_R1-1126259447-32.hdf5 to events/L1-GW150914-v3.hdf5.
[32m[Mon Jul 26 16:29:50 2021][0m
[32mFinished job 0.[0m
[32m1 of 1 steps (100%) done[

In [3]:
!snakemake -j 1 events/GW170817-v3.png

[33mBuilding DAG of jobs...[0m
[33mUsing shell: /bin/bash[0m
[33mProvided cores: 1 (use --cores to define parallelism)[0m
[33mRules claiming more threads will be scaled down.[0m
[33mJob counts:
	count	jobs
	1	download
	1	plot
	2[0m
[32m[0m
[32m[Mon Jul 26 16:30:24 2021][0m
[32mrule download:
    output: events/GW170817-v3.json
    jobid: 1
    wildcards: id=GW170817-v3[0m
[32m[0m
Downloading https://www.gw-openscience.org/eventapi/json/GWTC-1-confident/GW170817/v3 to events/GW170817-v3.json.
Downloading https://www.gw-openscience.org/eventapi/json/GWTC-1-confident/GW170817/v3/H-H1_GWOSC_4KHZ_R1-1187008867-32.hdf5 to events/H1-GW170817-v3.hdf5.
Downloading https://www.gw-openscience.org/eventapi/json/GWTC-1-confident/GW170817/v3/L-L1_GWOSC_4KHZ_R1-1187008867-32.hdf5 to events/L1-GW170817-v3.hdf5.
[32m[Mon Jul 26 16:30:36 2021][0m
[32mFinished job 1.[0m
[32m1 of 2 steps (50%) done[0m
[32m[0m
[32m[Mon Jul 26 16:30:36 2021][0m
[32mrule plot:
    input: events/G

In [4]:
!snakemake -n

[33mBuilding DAG of jobs...[0m
[33mJob counts:
	count	jobs
	8	download
	8	plot
	1	stack
	17[0m
[32m[0m
[32m[Mon Jul 26 16:31:58 2021][0m
[32mrule download:
    output: events/GW190408_181802-v1.json
    jobid: 17
    wildcards: id=GW190408_181802-v1[0m
[32m[0m
[32m[0m
[32m[Mon Jul 26 16:31:58 2021][0m
[32mrule download:
    output: events/GW170814-v3.json
    jobid: 15
    wildcards: id=GW170814-v3[0m
[32m[0m
[32m[0m
[32m[Mon Jul 26 16:31:58 2021][0m
[32mrule download:
    output: events/GW190521_074359-v1.json
    jobid: 11
    wildcards: id=GW190521_074359-v1[0m
[32m[0m
[32m[0m
[32m[Mon Jul 26 16:31:58 2021][0m
[32mrule download:
    output: events/GW170608-v3.json
    jobid: 16
    wildcards: id=GW170608-v3[0m
[32m[0m
[32m[0m
[32m[Mon Jul 26 16:31:58 2021][0m
[32mrule download:
    output: events/GW190828_063405-v1.json
    jobid: 14
    wildcards: id=GW190828_063405-v1[0m
[32m[0m
[32m[0m
[32m[Mon Jul 26 16:31:58 2021][0m
[32mrule dow

In [5]:
!snakemake -j 4

[33mBuilding DAG of jobs...[0m
[33mUsing shell: /bin/bash[0m
[33mProvided cores: 4[0m
[33mRules claiming more threads will be scaled down.[0m
[33mJob counts:
	count	jobs
	8	download
	8	plot
	1	stack
	17[0m
[32m[0m
[32m[Mon Jul 26 16:32:21 2021][0m
[32mrule download:
    output: events/GW190521_074359-v1.json
    jobid: 11
    wildcards: id=GW190521_074359-v1[0m
[32m[0m
[32m[0m
[32m[Mon Jul 26 16:32:21 2021][0m
[32mrule download:
    output: events/GW190408_181802-v1.json
    jobid: 17
    wildcards: id=GW190408_181802-v1[0m
[32m[0m
[32m[0m
[32m[Mon Jul 26 16:32:21 2021][0m
[32mrule download:
    output: events/GW170814-v3.json
    jobid: 15
    wildcards: id=GW170814-v3[0m
[32m[0m
[32m[0m
[32m[Mon Jul 26 16:32:21 2021][0m
[32mrule download:
    output: events/GW190412-v3.json
    jobid: 13
    wildcards: id=GW190412-v3[0m
[32m[0m
Downloading https://www.gw-openscience.org/eventapi/json/GWTC-2/GW190521_074359/v1 to events/GW190521_074359-v1.json

[33mJob counts:
	count	jobs
	1	stack
	1[0m
[32m[Mon Jul 26 16:32:36 2021][0m
[32mFinished job 0.[0m
[32m17 of 17 steps (100%) done[0m
[33mComplete log: /Users/mvallisneri/Desktop/Exercise Files/Ch04/.snakemake/log/2021-07-26T163221.348634.snakemake.log[0m


In [6]:
# on Windows !start events/allevents.png
!open events/allevents.png