# Setup Cromwell GVS Stats Input

Starting a job on `cromwell` requires a source wdl and inputs to be configured. This notebook helps configure inputs and submits the job.

In [None]:
import json
import os
from ipywidgets import widgets

## Setup variables

In [None]:
# Set the CALLSET_IDENTIFIER and GVS_BQ_DATASET to match the variables used in the GVS run
CALLSET_IDENTIFIER = 'willyn-300-samples-4'
GVS_BQ_DATASET = 'gvs_testing'

MAIN_WORKFLOW = "GvsCallsetStatistics"
WDL_FILE = f"{MAIN_WORKFLOW}.wdl"

GOOGLE_CLOUD_PROJECT = os.getenv('GOOGLE_CLOUD_PROJECT')


The below cell will create a `~/terra-tutorials/cromwell` directory if it doesn't already exist. This contains files like a cromwell server log that another notebook may have created.

In [None]:
CROMWELL_EXAMPLES_DIR=os.path.expanduser('~/terra-tutorials/cromwell')
CROMWELL_SERVER_LOG=f'{CROMWELL_EXAMPLES_DIR}/cromwell.server.log'

!mkdir -p {CROMWELL_EXAMPLES_DIR}

In [None]:
# We need the "main" wdl
!cp gvs_wdls/GvsCallsetStatistics.wdl .

## Build json input file


In [None]:
input_dict = {
    'GvsCallsetStatistics.project_id': GOOGLE_CLOUD_PROJECT,
    'GvsCallsetStatistics.dataset_name': GVS_BQ_DATASET,
    'GvsCallsetStatistics.filter_set_name': CALLSET_IDENTIFIER,
    'GvsCallsetStatistics.extract_prefix': CALLSET_IDENTIFIER,
    'GvsCallsetStatistics.bq_location': 'us-central1'
}

with open('gvs_stats.inputs', 'w') as outfile:
    json.dump(input_dict, outfile, indent=4)

!head gvs_stats.inputs

## Build Empty options file

In [None]:
with open('gvs_options.json', 'w') as outfile:
    json.dump({}, outfile, indent=4)

## Submit job to server

In [None]:
%%bash

mkdir -p ~/.cromshell

echo 'localhost:8000' > ~/.cromshell/cromwell_server.config

In [None]:
!cromshell submit GvsCallsetStatistics.wdl gvs_stats.inputs gvs_options.json gvs_wdls.zip

### Check status of job

In [None]:
!cromshell status

In [None]:
!tail -n 5 {CROMWELL_SERVER_LOG}