## Load Libraries

In [9]:
import json
import os
from google.cloud import storage
import cromwell_manager as cwm

with open(os.path.expanduser('~/.ssh/mint_cromwell_config.json')) as f:
    cromwell_server = cwm.Cromwell(**json.load(f))

storage_client = storage.Client(project='broad-dsde-mint-dev')

## Define Testing Inputs

In [10]:
os.environ['wdltool'] = '/Users/carra1/google_drive/software/wdltool-0.14.jar'

In [193]:
inputs_json = {
    "Optimus.fastq_inputs": [
        ["gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz", 
         "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_R2_001.fastq.gz",
         "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_I1_001.fastq.gz"],
        ["gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz", 
         "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_R2_001.fastq.gz",
         "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_I1_001.fastq.gz"],
    ],
    "Optimus.whitelist": "gs://broad-dsde-mint-dev-teststorage/10x/whitelist/737K-august-2016.txt",
    "Optimus.tar_star_reference": "gs://broad-dsde-mint-dev-teststorage/demo/star.tar",
    "Optimus.sample_name": "pbmc8k_test",
    "Optimus.annotations_gtf": "gs://broad-dsde-mint-dev-teststorage/reference/hg19_ds/GSM1629193_hg19_ERCC.gtf.gz",  # "gs://broad-dsde-mint-dev-teststorage/demo/gencodev19_chr21.gtf",
    "Optimus.ref_genome_fasta": "gs://broad-dsde-mint-dev-teststorage/demo/chr21.fa"
}

dependencies_json = {
    "StarAlignBamSingleEnd.wdl": "../pipelines/tasks/StarAlignBamSingleEnd.wdl",
    "FastqToUBam.wdl": "../pipelines/tasks/FastqToUBam.wdl",
    "Attach10xBarcodes.wdl": "../pipelines/tasks/Attach10xBarcodes.wdl",
    "SplitBamByCellBarcode.wdl": "../pipelines/tasks/SplitBamByCellBarcode.wdl",
    "TagGeneExon.wdl": "../pipelines/tasks/TagGeneExon.wdl",
    "CorrectUmiMarkDuplicates.wdl": "../pipelines/tasks/CorrectUmiMarkDuplicates.wdl",
    "CollectMultiplePicardMetrics.wdl": "../pipelines/tasks/CollectMultiplePicardMetrics.wdl",
    "MergeBam.wdl": "../pipelines/tasks/MergeBam.wdl",
    "CreateCountMatrix.wdl": "../pipelines/tasks/CreateCountMatrix.wdl",
    "AlignTagCorrectUmis.wdl": "AlignTagCorrectUmis.wdl"
}

wdl = "Optimus.wdl"

In [209]:
with open('example_test_inputs.json', 'w') as f:
    json.dump(inputs_json, f)

## Validate & Run WDL

In [182]:
cwm.Workflow.validate(
    wdl=wdl,
    inputs_json=inputs_json,
    storage_client=storage_client,
    workflow_dependencies=dependencies_json,
    cromwell_server=cromwell_server
)

CWM:2017-11-25 17:59:00.878349:creating temporary directory
CWM:2017-11-25 17:59:00.878811:writing dependencies
CWM:2017-11-25 17:59:00.893109:writing wdl
CWM:2017-11-25 17:59:00.894301:running wdltool validate
CWM:2017-11-25 17:59:02.271670:validation successful
CWM:2017-11-25 17:59:04.194289:checking docker image humancellatlas/dropseqtools:1.12... OK.
CWM:2017-11-25 17:59:04.770922:checking docker image humancellatlas/samtools:1.3.1... OK.
CWM:2017-11-25 17:59:08.127880:checking docker image humancellatlas/picard:2.10.10... OK.
CWM:2017-11-25 17:59:20.352495:checking docker image humancellatlas/star:2.5.3a-40ead6e... OK.
CWM:2017-11-25 17:59:21.269367:checking docker image humancellatlas/python3-scientific:0.1.3... OK.


In [177]:
wf = cwm.Workflow.from_submission(
    wdl=wdl,
    inputs_json=inputs_json,
    storage_client=storage_client,
    workflow_dependencies=dependencies_json,
    cromwell_server=cromwell_server
)

Display the status and the completed run ID

In [178]:
wf.status

{'id': 'c98322d7-406d-4e3f-96c8-e2bd1429e645', 'status': 'Succeeded'}

Display the outputs of the rMVP

In [180]:
wf.outputs

{'id': 'c98322d7-406d-4e3f-96c8-e2bd1429e645',
 'outputs': {'Optimus.bam': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/Optimus/c98322d7-406d-4e3f-96c8-e2bd1429e645/call-MergeBam/out.bam',
  'Optimus.duplicate_metrics': [['gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/Optimus/c98322d7-406d-4e3f-96c8-e2bd1429e645/call-AlignTagCorrectUmis/shard-0/AlignTagCorrectUmis/d250b98d-a2f0-4b19-be86-e347bd076a5f/call-CorrectUmiMarkDuplicates/shard-0/duplicate_metrics.txt']],
  'Optimus.matrix': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/Optimus/c98322d7-406d-4e3f-96c8-e2bd1429e645/call-DropSeqToolsDigitalExpression/digital_expression.txt.gz',
  'Optimus.matrix_summary': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/Optimus/c98322d7-406d-4e3f-96c8-e2bd1429e645/call-DropSeqToolsDigitalExpression/digital_expression_summary.txt',
  'Optimus.picard_metrics': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/Op

In [210]:
with open('example_test_outputs.json', 'w') as f:
    json.dump(wf.outputs, f)

Open up the completed timing graph and the complete run metadata

In [213]:
wf.timing()
cromwell_server.metadata(wf.id, open_browser=True)

<Response [200]>

## Test at Scale

In [199]:
!gsutil ls gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/

gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/metadata.json
gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/pbmc8k_S1_L007_I1_001.fastq.gz
gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz
gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/pbmc8k_S1_L007_R2_001.fastq.gz
gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/pbmc8k_S1_L008_I1_001.fastq.gz
gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/pbmc8k_S1_L008_R1_001.fastq.gz
gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/pbmc8k_S1_L008_R2_001.fastq.gz


In [200]:
scale_inputs_json = {
    "Optimus.fastq_inputs": [
        ["gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz", 
         "gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/pbmc8k_S1_L007_R2_001.fastq.gz",
         "gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/pbmc8k_S1_L007_I1_001.fastq.gz"],
        ["gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/pbmc8k_S1_L008_R1_001.fastq.gz", 
         "gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/pbmc8k_S1_L008_R2_001.fastq.gz",
         "gs://broad-dsde-mint-dev-teststorage/10x/pbmc8k/fastqs/pbmc8k_S1_L008_I1_001.fastq.gz"],
    ],
    "Optimus.whitelist": "gs://broad-dsde-mint-dev-teststorage/10x/whitelist/737K-august-2016.txt",
    "Optimus.tar_star_reference": "gs://broad-dsde-mint-dev-teststorage/reference/Hg19_kco/star_hg19_gencode_v19.tar",
    "Optimus.sample_name": "pbmc8k_optimus_test",
    "Optimus.annotations_gtf": "gs://broad-dsde-mint-dev-teststorage/reference/hg19_ds/GSM1629193_hg19_ERCC.gtf.gz",  # "gs://broad-dsde-mint-dev-teststorage/demo/gencodev19_chr21.gtf",
    "Optimus.ref_genome_fasta": "gs://broad-dsde-mint-dev-teststorage/reference/Hg19_kco/Hg19.fa",
}

In [207]:
scale_wf = cwm.Workflow.from_submission(
    wdl=wdl,
    inputs_json=scale_inputs_json,
    storage_client=storage_client,
    workflow_dependencies=dependencies_json,
    cromwell_server=cromwell_server
)

In [202]:
scale_wf.status

{'id': '7533b502-34ba-4175-832b-d7031e694248', 'status': 'Running'}

In [214]:
scale_wf.timing()
cromwell_server.metadata(scale_wf.id, open_browser=True)

<Response [200]>