In [1]:
import json
import os
from google.cloud import storage
import cromwell_manager as cwm

with open(os.path.expanduser('~/.ssh/mint_cromwell_config.json')) as f:
    cromwell_server = cwm.Cromwell(**json.load(f))

storage_client = storage.Client(project='broad-dsde-mint-dev')

os.environ['wdltool'] = '/Users/ajc/google_drive/software/wdltool-0.14.jar'

In [2]:
inputs_json = {
    'TestOptimus.expected_bam_hash': '06201722dc6c897261daf9d032377cc8',
    'TestOptimus.expected_matrix_hash': '69f3be6085e0c5f694b3cfa877b0eeaa',
    'TestOptimus.expected_matrix_summary_hash': 'dd513351d4e7688c97f7bf902ba2876f',
    'TestOptimus.expected_picard_metrics_hash': '1c3af42240367ae4dd4cc5f96e70b7ce',
    "TestOptimus.fastq_inputs": [
      [
          "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz",
          "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_R2_001.fastq.gz",
          "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_I1_001.fastq.gz"
      ],
      [
          "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1a_L007_R1_001.fastq.gz",
          "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_R2_001.fastq.gz",
          "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_I1_001.fastq.gz"
      ]
    ],
    "TestOptimus.whitelist": "gs://broad-dsde-mint-dev-teststorage/10x/whitelist/737K-august-2016.txt",
    "TestOptimus.tar_star_reference": "gs://broad-dsde-mint-dev-teststorage/demo/star.tar",
    "TestOptimus.sample_name": "pbmc8k_test",
    "TestOptimus.annotations_gtf": "gs://broad-dsde-mint-dev-teststorage/reference/hg19_ds/GSM1629193_hg19_ERCC.gtf.gz",
    "TestOptimus.ref_genome_fasta": "gs://broad-dsde-mint-dev-teststorage/demo/chr21.fa"
}

wdl = "TestOptimus.wdl"
# options_json = "../adapter_pipelines/Optimus/options.json"  # no caching 
workflow_dependencies = {
    'Optimus.wdl': '../optimus/Optimus.wdl',
    'ValidateOptimus.wdl': 'ValidateOptimus.wdl',
    "StarAlignBamSingleEnd.wdl": "../pipelines/tasks/StarAlignBamSingleEnd.wdl",
    "FastqToUBam.wdl": "../pipelines/tasks/FastqToUBam.wdl",
    "Attach10xBarcodes.wdl": "../pipelines/tasks/Attach10xBarcodes.wdl",
    "SplitBamByCellBarcode.wdl": "../pipelines/tasks/SplitBamByCellBarcode.wdl",
    "TagGeneExon.wdl": "../pipelines/tasks/TagGeneExon.wdl",
    "CorrectUmiMarkDuplicates.wdl": "../pipelines/tasks/CorrectUmiMarkDuplicates.wdl",
    "CollectMultiplePicardMetrics.wdl": "../pipelines/tasks/CollectMultiplePicardMetrics.wdl",
    "MergeBam.wdl": "../pipelines/tasks/MergeBam.wdl",
    "CreateCountMatrix.wdl": "../pipelines/tasks/CreateCountMatrix.wdl"
}

In [16]:
ss2_dependencies = json.load(open('smartseq2_single_sample/pr/dependencies.json'))
for k, v in ss2_dependencies.items():
    ss2_dependencies[k] = v.replace('/working', '..')
options_json = {"read_from_cache": False}
ss2_wdl = 'smartseq2_single_sample/pr/TestSmartSeq2SingleCellPR.wdl'
ss2_inputs = json.load(open('smartseq2_single_sample/pr/test_inputs.json'))

wf = cwm.Workflow.from_submission(
    wdl=ss2_wdl,
    inputs_json=ss2_inputs,
    storage_client=storage_client,
    options_json=options_json,
    workflow_dependencies=ss2_dependencies,
    cromwell_server=cromwell_server)

In [19]:
wf.status

{'id': 'c429116c-7d22-44b6-9734-e2b5b0c027c2', 'status': 'Succeeded'}

In [15]:
wf.logs

{'calls': {'TestSmartSeq2SingleCellPR.checker': [{'attempt': 1,
    'backendLogs': {'log': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestSmartSeq2SingleCellPR/df9529e8-2380-487e-85b0-0634a088ae5b/call-checker/checker.log'},
    'shardIndex': -1,
    'stderr': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestSmartSeq2SingleCellPR/df9529e8-2380-487e-85b0-0634a088ae5b/call-checker/checker-stderr.log',
    'stdout': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestSmartSeq2SingleCellPR/df9529e8-2380-487e-85b0-0634a088ae5b/call-checker/checker-stdout.log'}]},
 'id': 'df9529e8-2380-487e-85b0-0634a088ae5b'}

In [84]:
with open('test_inputs_temp.json', 'w') as f:
    json.dump(inputs_json, f)

In [85]:
%%bash
cat test_inputs_temp.json | jq . > test_inputs.json
rm test_inputs_temp.json

In [90]:
with open('dependencies_temp.json', 'w') as f:
    json.dump(workflow_dependencies, f)

In [91]:
%%bash
cat dependencies_temp.json | jq . > dependencies.json
rm dependencies_temp.json

In [93]:
with open('options_temp.json', 'w') as f:
    json.dump(options_json, f)

In [94]:
%%bash
cat options_temp.json | jq . > options.json
rm options_temp.json

In [64]:

cwm.Workflow.validate(
    wdl=wdl,
    inputs_json=inputs_json,
    storage_client=storage_client,
    workflow_dependencies=workflow_dependencies,
    cromwell_server=cromwell_server)

CWM:2017-12-05 17:37:19.381609:creating temporary directory
CWM:2017-12-05 17:37:19.382959:writing dependencies
CWM:2017-12-05 17:37:19.397641:writing wdl
CWM:2017-12-05 17:37:19.398868:running wdltool validate
CWM:2017-12-05 17:37:20.920296:validation successful
CWM:2017-12-05 17:37:21.232200:checking docker image humancellatlas/dropseqtools:1.12... OK.
CWM:2017-12-05 17:37:21.414104:checking docker image ubuntu:17.04... not found. Is image private?
CWM:2017-12-05 17:37:22.470443:checking docker image humancellatlas/picard:2.10.10... OK.
CWM:2017-12-05 17:37:22.705134:checking docker image humancellatlas/samtools:1.3.1... OK.
CWM:2017-12-05 17:37:22.976520:checking docker image humancellatlas/star:2.5.3a-40ead6e... OK.
CWM:2017-12-05 17:37:23.203105:checking docker image humancellatlas/python3-scientific:0.1.3... OK.


In [65]:
wf = cwm.Workflow.from_submission(
    wdl=wdl,
    inputs_json=inputs_json,
    storage_client=storage_client,
    workflow_dependencies=workflow_dependencies,
    cromwell_server=cromwell_server)

In [70]:
wf.status

{'id': 'f8ba61dc-bf8a-4c1e-8c69-f22608a571d8', 'status': 'Succeeded'}

In [69]:
!gsutil cat gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimus/9c92e1f3-ec44-4e25-8ee8-e2d4d7dd82de/call-checker/checker-stderr.log

bam_hash (06201722dc6c897261daf9d032377cc8  /cromwell_root/broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimus/9c92e1f3-ec44-4e25-8ee8-e2d4d7dd82de/call-target/Optimus/866e61bf-188b-4ec3-a8a4-abd12b83ea09/call-MergeBam/out.bam) did not match expected hash (a85318fbf34a5e3ce13ddfbd73732771)
matrix_hash (69f3be6085e0c5f694b3cfa877b0eeaa  /cromwell_root/broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimus/9c92e1f3-ec44-4e25-8ee8-e2d4d7dd82de/call-target/Optimus/866e61bf-188b-4ec3-a8a4-abd12b83ea09/call-DropSeqToolsDigitalExpression/digital_expression.txt.gz) did not match expected hash (69f3be6085e0c5f694b3cfa877b0eeaa)
matrix_summary_hash (dd513351d4e7688c97f7bf902ba2876f  /cromwell_root/broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimus/9c92e1f3-ec44-4e25-8ee8-e2d4d7dd82de/call-target/Optimus/866e61bf-188b-4ec3-a8a4-abd12b83ea09/call-DropSeqToolsDigitalExpression/digital_expression_summary.txt) did not match expected hash (dd51335

In [49]:
failing_inputs_json = {
    'TestOptimus.expected_bam_hash': 'IShouldDefinitelyNotPassBecauseImTheWrongHash',
    'TestOptimus.expected_matrix_hash': '69f3be6085e0c5f694b3cfa877b0eeaa',
    'TestOptimus.expected_matrix_summary_hash': 'dd513351d4e7688c97f7bf902ba2876f',
    'TestOptimus.expected_picard_metrics_hash': '36101cca60876ab8733729961366322d',
    "TestOptimus.fastq_inputs": [
      [
          "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz",
          "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_R2_001.fastq.gz",
          "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_I1_001.fastq.gz"
      ],
      [
          "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz",
          "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_R2_001.fastq.gz",
          "gs://broad-dsde-mint-dev-teststorage/10x/demo/fastqs/pbmc8k_S1_L007_I1_001.fastq.gz"
      ]
    ],
    "TestOptimus.whitelist": "gs://broad-dsde-mint-dev-teststorage/10x/whitelist/737K-august-2016.txt",
    "TestOptimus.tar_star_reference": "gs://broad-dsde-mint-dev-teststorage/demo/star.tar",
    "TestOptimus.sample_name": "pbmc8k_test",
    "TestOptimus.annotations_gtf": "gs://broad-dsde-mint-dev-teststorage/reference/hg19_ds/GSM1629193_hg19_ERCC.gtf.gz",
    "TestOptimus.ref_genome_fasta": "gs://broad-dsde-mint-dev-teststorage/demo/chr21.fa"
}

wdl = "TestOptimus.wdl"
# options_json = "../adapter_pipelines/Optimus/options.json"  # no caching 
workflow_dependencies = {
    'Optimus.wdl': '../optimus/Optimus.wdl',
    'ValidateOptimus.wdl': 'ValidateOptimus.wdl',
    "StarAlignBamSingleEnd.wdl": "../pipelines/tasks/StarAlignBamSingleEnd.wdl",
    "FastqToUBam.wdl": "../pipelines/tasks/FastqToUBam.wdl",
    "Attach10xBarcodes.wdl": "../pipelines/tasks/Attach10xBarcodes.wdl",
    "SplitBamByCellBarcode.wdl": "../pipelines/tasks/SplitBamByCellBarcode.wdl",
    "TagGeneExon.wdl": "../pipelines/tasks/TagGeneExon.wdl",
    "CorrectUmiMarkDuplicates.wdl": "../pipelines/tasks/CorrectUmiMarkDuplicates.wdl",
    "CollectMultiplePicardMetrics.wdl": "../pipelines/tasks/CollectMultiplePicardMetrics.wdl",
    "MergeBam.wdl": "../pipelines/tasks/MergeBam.wdl",
    "CreateCountMatrix.wdl": "../pipelines/tasks/CreateCountMatrix.wdl"
}

In [50]:
failing_wf = cwm.Workflow.from_submission(
    wdl=wdl,
    inputs_json=failing_inputs_json,
    storage_client=storage_client,
    workflow_dependencies=workflow_dependencies,
    cromwell_server=cromwell_server)

In [51]:
wf.status

{'id': '6bfd3f85-1af5-4201-8808-faab002a9348', 'status': 'Succeeded'}

In [52]:
wf.logs

{'calls': {'TestOptimus.checker': [{'attempt': 1,
    'backendLogs': {'log': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimus/6bfd3f85-1af5-4201-8808-faab002a9348/call-checker/checker.log'},
    'shardIndex': -1,
    'stderr': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimus/6bfd3f85-1af5-4201-8808-faab002a9348/call-checker/checker-stderr.log',
    'stdout': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimus/6bfd3f85-1af5-4201-8808-faab002a9348/call-checker/checker-stdout.log'}]},
 'id': '6bfd3f85-1af5-4201-8808-faab002a9348'}

In [54]:
!gsutil cat gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimus/6bfd3f85-1af5-4201-8808-faab002a9348/call-checker/checker-stderr.log

/cromwell_root/exec.sh: line 16: [: too many arguments
/cromwell_root/exec.sh: line 21: [: too many arguments
/cromwell_root/exec.sh: line 25: [: too many arguments
/cromwell_root/exec.sh: line 29: [: too many arguments
/cromwell_root/exec.sh: line 33: [: too many arguments


## Test PR and scientific tests from config files

In [2]:
ls optimus/pr/

TestOptimusPR.wdl    dependencies.json
ValidateOptimus.wdl  test_inputs.json


### PR tests

In [137]:
wdl='optimus/pr/TestOptimusPR.wdl'
dependencies='optimus/pr/dependencies.json'
with open(dependencies, 'r') as f:
    dependencies = json.load(f)
inputs='optimus/pr/test_inputs.json'
options='options.json'

In [94]:
pr_wf = cwm.Workflow.validate(
    wdl=wdl,
    inputs_json=inputs,
    storage_client=storage_client,
    workflow_dependencies=dependencies,
    cromwell_server=cromwell_server)

CWM:2017-12-12 09:14:10.224205:creating temporary directory
CWM:2017-12-12 09:14:10.225869:writing dependencies
CWM:2017-12-12 09:14:10.236165:writing wdl
CWM:2017-12-12 09:14:10.236991:running wdltool validate
CWM:2017-12-12 09:14:11.614211:validation successful
CWM:2017-12-12 09:14:12.873793:checking docker image humancellatlas/dropseqtools:1.12... OK.
CWM:2017-12-12 09:14:13.144924:checking docker image humancellatlas/python3-scientific:0.1.5... OK.
CWM:2017-12-12 09:14:13.392328:checking docker image humancellatlas/picard:2.10.10... OK.
CWM:2017-12-12 09:14:13.649861:checking docker image humancellatlas/star:2.5.3a-40ead6e... OK.
CWM:2017-12-12 09:14:13.902457:checking docker image humancellatlas/samtools:1.3.1... OK.


In [138]:
pr_wf = cwm.Workflow.from_submission(
    wdl=wdl,
    inputs_json=inputs,
    storage_client=storage_client,
    workflow_dependencies=dependencies,
    cromwell_server=cromwell_server,
    options_json=options)

In [139]:
pr_wf2 = cwm.Workflow.from_submission(
    wdl=wdl,
    inputs_json=inputs,
    storage_client=storage_client,
    workflow_dependencies=dependencies,
    cromwell_server=cromwell_server,
    options_json=options)

In [109]:
cromwell_server.metadata(pr_wf.id, open_browser=True)

<Response [200]>

In [117]:
pr_wf2.logs

{'calls': {'TestOptimusPR.checker': [{'attempt': 1,
    'backendLogs': {'log': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimusPR/80af9921-f9ad-4b45-b9a5-a1361c9f306b/call-checker/checker.log'},
    'shardIndex': -1,
    'stderr': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimusPR/80af9921-f9ad-4b45-b9a5-a1361c9f306b/call-checker/checker-stderr.log',
    'stdout': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimusPR/80af9921-f9ad-4b45-b9a5-a1361c9f306b/call-checker/checker-stdout.log'}]},
 'id': '80af9921-f9ad-4b45-b9a5-a1361c9f306b'}

In [136]:
cromwell_server.outputs(pr_wf.metadata['calls']['TestOptimusPR.target'][0]['subWorkflowId']).json()['outputs']['Optimus.bam']

'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimusPR/d3858e86-9c35-4a36-a110-d02588a99d1e/call-target/Optimus/8232e6a3-6166-4b91-924d-043d84a3e276/call-MergeBam/out_sorted.bam'

In [134]:
cromwell_server.outputs(pr_wf2.metadata['calls']['TestOptimusPR.target'][0]['subWorkflowId']).json()['outputs']['Optimus.picard_metrics']

'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimusPR/80af9921-f9ad-4b45-b9a5-a1361c9f306b/call-target/Optimus/8f360c7a-5724-499a-ac8c-d9b243f26ddc/call-CollectMultipleMetrics/pbmc8k_test.tar.gz'

In [142]:
print(pr_wf.logs)
print(pr_wf2.logs)

{'calls': {'TestOptimusPR.checker': [{'stdout': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimusPR/63b674a5-5deb-4194-b920-c15050eb2b8d/call-checker/checker-stdout.log', 'shardIndex': -1, 'stderr': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimusPR/63b674a5-5deb-4194-b920-c15050eb2b8d/call-checker/checker-stderr.log', 'attempt': 1, 'backendLogs': {'log': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimusPR/63b674a5-5deb-4194-b920-c15050eb2b8d/call-checker/checker.log'}}]}, 'id': '63b674a5-5deb-4194-b920-c15050eb2b8d'}
{'calls': {'TestOptimusPR.checker': [{'stdout': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimusPR/bde37bd9-b81d-4c96-9df8-5b7035c062cc/call-checker/checker-stdout.log', 'shardIndex': -1, 'stderr': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimusPR/bde37bd9-b81d-4c96-9df8-5b7035c062cc/call-checker/checker-stderr.log', 'attempt': 1, 'bac

In [145]:
!gsutil cat gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimusPR/bde37bd9-b81d-4c96-9df8-5b7035c062cc/call-checker/checker-stderr.log

bam_hash (8c03262b5bd383abd237e5c1dab202bd) did not match expected hash (b58ad655d246e0c50a12d492bf4567dd)
picard_metrics_hash (7b7be5c9a2236920ca09f05811dca6d5) did not match expected hash (3a1d63932057b18ae8eb2463b1d5236e)


In [146]:
!gsutil cat gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/TestOptimusPR/63b674a5-5deb-4194-b920-c15050eb2b8d/call-checker/checker-stderr.log

bam_hash (8c03262b5bd383abd237e5c1dab202bd) did not match expected hash (b58ad655d246e0c50a12d492bf4567dd)
picard_metrics_hash (7b7be5c9a2236920ca09f05811dca6d5) did not match expected hash (3a1d63932057b18ae8eb2463b1d5236e)


In [143]:
# when this one fails, update the @RG header
pr_wf2.status

{'id': 'bde37bd9-b81d-4c96-9df8-5b7035c062cc', 'status': 'Failed'}

In [144]:
# when this one fails, update the @RG header
pr_wf.status

{'id': '63b674a5-5deb-4194-b920-c15050eb2b8d', 'status': 'Running'}

### Scientific tests

In [65]:
sci_wdl='optimus/scientific/TestOptimusScientific.wdl'
sci_dependencies='optimus/scientific/dependencies.json'
with open(sci_dependencies, 'r') as f:
    sci_dependencies = json.load(f)
sci_inputs='optimus/scientific/test_inputs.json'

In [46]:
sci_wf = cwm.Workflow.validate(
    wdl=sci_wdl,
    inputs_json=sci_inputs,
    storage_client=storage_client,
    workflow_dependencies=sci_dependencies,
    cromwell_server=cromwell_server)

CWM:2017-12-11 19:17:32.595019:creating temporary directory
CWM:2017-12-11 19:17:32.596616:writing dependencies
CWM:2017-12-11 19:17:32.606036:writing wdl
CWM:2017-12-11 19:17:32.606800:running wdltool validate
CWM:2017-12-11 19:17:33.811435:validation successful
CWM:2017-12-11 19:17:34.562100:checking docker image humancellatlas/dropseqtools:1.12... OK.
CWM:2017-12-11 19:17:35.050001:checking docker image humancellatlas/python3-scientific:0.1.5... OK.
CWM:2017-12-11 19:17:35.588630:checking docker image humancellatlas/picard:2.10.10... OK.
CWM:2017-12-11 19:17:36.142646:checking docker image humancellatlas/star:2.5.3a-40ead6e... OK.
CWM:2017-12-11 19:17:36.779175:checking docker image humancellatlas/samtools:1.3.1... OK.


In [66]:
sci_wf = cwm.Workflow.from_submission(
    wdl=sci_wdl,
    inputs_json=sci_inputs,
    storage_client=storage_client,
    workflow_dependencies=sci_dependencies,
    cromwell_server=cromwell_server)

In [60]:
sci_wf.status

{'id': '91fef86e-7b1b-4260-bc09-cc357a730634', 'status': 'Failed'}

In [69]:
sci_wf.timing()
cromwell_server.metadata(sci_wf.id, open_browser=True)

<Response [200]>