## Load packages and define workflow inputs

In [3]:
import json
import os
from google.cloud import storage
import cromwell_manager as cwm

with open(os.path.expanduser('~/.ssh/mint_cromwell_config.json')) as f:
    cromwell_server = cwm.Cromwell(**json.load(f))

storage_client = storage.Client(project='broad-dsde-mint-dev')

os.environ['wdltool'] = '/Users/ajc/google_drive/software/wdltool-0.14.jar'

In [8]:
# run each genome creation routine
wdl = '../../../skylab/library/accessory_workflows/star_mkref.wdl'

In [73]:
dependencies = {'StarMkref': '../../../skylab/library/tasks/StarMkref.wdl'}

In [69]:
mm38_hg38_inputs_json = {
    'star_mkref.fasta_file': 'gs://hca-dcp-mint-test-data/reference/GRCm38_GRCh38_Gencode/mmhg.fa',
    'star_mkref.annotation_file': 'gs://hca-dcp-mint-test-data/reference/GRCm38_GRCh38_Gencode/mmhg.gtf'
}

In [48]:
flu_hg38_inputs_json = {
    'star_mkref.fasta_file': 'gs://hca-dcp-mint-test-data/reference/Flu_GRCh38_Gencode/hg_flu.fa',
    'star_mkref.annotation_file': 'gs://hca-dcp-mint-test-data/reference/Flu_GRCh38_Gencode/hg_flu.gtf'
}

In [40]:
mm38_inputs_json = {
    'star_mkref.fasta_file': 'gs://hca-dcp-mint-test-data/reference/GRCm38_Gencode/GRCm38.primary_assembly.genome.fa',
    'star_mkref.annotation_file': 'gs://hca-dcp-mint-test-data/reference/GRCm38_Gencode/gencode.vM16.primary_assembly.annotation.gtf'
}

In [19]:
ls ../../../skylab/library/tasks/

Attach10xBarcodes.wdl             StarAlignFastqSingleEnd.wdl
CollectMultiplePicardMetrics.wdl  StarMkref.wdl
CorrectUmiMarkDuplicates.wdl      TagGeneExon.wdl
CreateCountMatrix.wdl             featurecounts.wdl
FastqToUBam.wdl                   hisat2.wdl
Kallisto.wdl                      htseq.wdl
MergeSortBam.wdl                  picard.wdl
SplitBamByCellBarcode.wdl         rsem.wdl
StarAlignBamSingleEnd.wdl         star.wdl


In [74]:
!rm -r dependencies.zip
!rm -r StarMkref.wdl

In [75]:
%%bash
cp ../../../skylab/library/tasks/StarMkref.wdl .
zip -cq dependencies.zip StarMkref.wdl
rm ./StarMkref.wdl

In [41]:
with open('mmhg38_inputs.json', 'w') as f:
    json.dump(mm38_hg38_inputs_json, f)

In [49]:
with open('flu_hg38_inputs.json', 'w') as f:
    json.dump(flu_hg38_inputs_json, f)

In [42]:
with open('mm38_inputs.json', 'w') as f:
    json.dump(mm38_inputs_json, f)

## Run all the workflows

In [22]:
wf = cwm.Workflow.from_submission(
    wdl=wdl,
    inputs_json=mm38_hg38_inputs_json,
    storage_client=storage_client,
    workflow_dependencies=dependencies,
    cromwell_server=cromwell_server)

Successful workflow ids:
```
mm38 = 2d2577e8-d1fb-43a2-b85a-b8e0a7527d05
mm38_hg38 = b4f016c8-9f25-45f8-8c17-77f7e7bf9b53
mm38_hg38 # 2 = 92aea1d0-f47d-4348-8dbd-ce570840fcdc
hg38_flu = e4e6ab25-4c74-4a7b-bc17-798a1f6bbc31
```

## Copy the outputs to the public bucket

In [64]:
cromwell_server.outputs('2d2577e8-d1fb-43a2-b85a-b8e0a7527d05').json()

{'id': '2d2577e8-d1fb-43a2-b85a-b8e0a7527d05',
 'outputs': {'star_mkref.genome': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/star_mkref/2d2577e8-d1fb-43a2-b85a-b8e0a7527d05/call-StarMkref/genome.tar'}}

In [66]:
cromwell_server.outputs('b4f016c8-9f25-45f8-8c17-77f7e7bf9b53').json()

{'id': 'b4f016c8-9f25-45f8-8c17-77f7e7bf9b53',
 'outputs': {'star_mkref.genome': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/star_mkref/b4f016c8-9f25-45f8-8c17-77f7e7bf9b53/call-StarMkref/genome.tar'}}

In [65]:
cromwell_server.outputs('e4e6ab25-4c74-4a7b-bc17-798a1f6bbc31').json()

{'id': 'e4e6ab25-4c74-4a7b-bc17-798a1f6bbc31',
 'outputs': {'star_mkref.genome': 'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/star_mkref/e4e6ab25-4c74-4a7b-bc17-798a1f6bbc31/call-StarMkref/genome.tar'}}

In [None]:
%%bash
gsutil -m cp \
gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/star_mkref/2d2577e8-d1fb-43a2-b85a-b8e0a7527d05/call-StarMkref/genome.tar \
gs://hca-dcp-mint-test-data/reference/GRCm38_Gencode/GRCm38_star_genome.tar

In [None]:
%%bash
gsutil -m cp \
gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/star_mkref/92aea1d0-f47d-4348-8dbd-ce570840fcdc/call-StarMkref/genome.tar \
gs://hca-dcp-mint-test-data/reference/GRCm38_GRCh38_Gencode/GRCm38_GRCh38_star_genome.tar

In [None]:
%%bash
gsutil -m cp \
gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/star_mkref/e4e6ab25-4c74-4a7b-bc17-798a1f6bbc31/call-StarMkref/genome.tar \
gs://hca-dcp-mint-test-data/reference/Flu_GRCh38_Gencode/Flu_GRCh38_star_genome.tar

Check the references made it into the bucket (they did)

In [67]:
!gsutil ls gs://hca-dcp-mint-test-data/reference/

gs://hca-dcp-mint-test-data/reference/
gs://hca-dcp-mint-test-data/reference/GRCh38_gencode.v27.refFlat.txt
gs://hca-dcp-mint-test-data/reference/gencode.v27.rRNA.interval_list
gs://hca-dcp-mint-test-data/reference/Flu_GRCh38_Gencode/
gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/
gs://hca-dcp-mint-test-data/reference/GRCm38_GRCh38_Gencode/
gs://hca-dcp-mint-test-data/reference/GRCm38_Gencode/
gs://hca-dcp-mint-test-data/reference/HISAT2/
gs://hca-dcp-mint-test-data/reference/rsem_ref/
