In [1]:
from core.utils import Tibanna
from core import ff_utils

#format for input json in hic-partA
def make_input_file_json(obj_id, arg_name, tibanna):
    '''
    {
      "bucket_name": "%s",
      "object_key": "%s",
      "uuid" : "%s",
      "workflow_argument_name": "%s"
    }
    '''
    ff = ff_utils.fdn_connection(key=tibanna.ff_keys)
    metadata = ff_utils.get_metadata(obj_id, connection=ff)
    data = {}
    
    # just make sure the file is on s3, otherwise bail
    print("looking for upload key %s, on bucket %s" % 
          (metadata['upload_key'],
           tibanna.s3.outfile_bucket))
    if tibanna.s3.does_key_exist(metadata['upload_key']):
        data = {'bucket_name' : tibanna.s3.outfile_bucket,
                'object_key' : metadata['upload_key'].split('/')[1],
                'uuid' : metadata['uuid'],
                'workflow_argument_name': arg_name
                }
    return data
    

def make_hic1_json(input_files, env, output_bucket, accession):
    input_json = {'input_files': input_files,
                  'output_bucket': output_bucket,
                  'workflow_uuid': "3feedadc-50f9-4bb4-919b-09a8b731d0cc",
                  "app_name": "bwa-mem",
                  "parameters": {
                      "nThreads": 16
                      },
                  "config": {
                        "ebs_type": "io1",
                        "json_bucket": "4dn-aws-pipeline-run-json",
                        "ebs_iops": 500,
                        "shutdown_min": 30,
                        "s3_access_arn": "arn:aws:iam::643366669028:instance-profile/S3_access",
                        "ami_id": "ami-cfb14bb5",
                        "copy_to_s3": True,
                        "launch_instance": True,
                        "password": "dragonfly",
                        "log_bucket": "tibanna-output",
                        "script_url": "https://raw.githubusercontent.com/4dn-dcic/tibanna/master/awsf/",
                        "key_name": "4dn-encode"
                    },
                  "_tibanna": {"env": env, "run_type": "bwa-mem",
                               "run_id": accession},
                  "tag": "0.2.5"
                  }
    return input_json
    

In [2]:
from tasks import run_md5
from tasks import run_fastqc
from invoke import run
import time
from datetime import datetime

# for a given experiment set and some parameters like instrument
# print set of files and their partA hic workflow status
# if there are one that are running report the number of running cases
# if there are file pairs that don't have a corresponding part A, report them separately



exclude_miseq = True

wf_md5 = "md5"
wf_fastqc = "fastqc-0-11-4-1"
wf_partA = "bwa-mem 0.2.5"

env = 'fourfront-webprod'
tibanna = Tibanna(env=env)

run_md_qc = raw_input("Do you wanna run md5 and/or fastqc if missing? (md5/qc/all/none)")

# status for completion
# there are two flavors of complete signals, before it was output_file_transfer_finished, not it is complete.
# old completed wf runs have former one.
status_done = ['complete', 'output_file_transfer_finished']
status_good = ['uploaded', 'released', 'released to project']
ff = ff_utils.fdn_connection(key=tibanna.ff_keys)

################
##ADD TO WORKFLOW
# wfr_time = datetime.strptime(wfr_data['date_created'],'%Y-%m-%dT%H:%M:%S.%f+00:00')
# run_hours = int((datetime.now()-wfr_time).total_seconds()/3600)
################

def summarize_file(file_resp):
    qc = False
    file_id = file_resp['accession']
    sequencer = file_resp.get('instrument')
    relations = file_resp.get('related_files')
    status = file_resp.get('status')
    workflows = file_resp.get('workflow_run_inputs')
    first_alias = file_resp.get('aliases',[None])[0]
    pair_no = file_resp.get('paired_end')
    # get related file
    paired_file = ''
    for relation in relations:
        if relation['relationship_type'] == 'paired with':
            paired_file = relation['file']['accession']
    
    # is there a qc?
    if file_resp.get('quality_metric'):
        qc = True
        
    # Check workflows for qc fastqc workflow partA
    last_part_A = ''
    last_part_A_status = 'did_not_run'
    md5_status = 'did_not_run'
    fastqc_status = 'did_not_run'
    # Assumes workflow_runs come in time ordered list, and grabs the last ones for each wf run
    if workflows:
        for a_wfr in workflows:
            wfr_resp = ff_utils.get_metadata(a_wfr['uuid'], connection=ff)  
            wfr_name = wfr_resp['display_title']
            if wfr_name.startswith(wf_md5):
                md5_status = wfr_resp.get('run_status')     
            elif wfr_name.startswith(wf_fastqc):
                fastqc_status = wfr_resp.get('run_status')     
            elif wfr_name.startswith(wf_partA):
                last_part_A=wfr_resp['uuid']
                last_part_A_status = wfr_resp.get('run_status')  
                
    # Check for md5 and fastqc, and if not complete, run or report it. 
    # if exclude miseq is on, do this only if sequencer is not miseq
    if not exclude_miseq or sequencer != "Illumina MiSeq":
        # check if md5 step is completed properly
        
        if status not in status_good or md5_status not in status_done:
            # if not, shall we run it?
            if run_md_qc in ['md5', 'all']:
                print 'md5 running for', file_resp['accession']
                code_md5= "invoke run_md5 " + env + " " + file_resp['accession'] + " " + file_resp['uuid']
                run(code_md5)
                print ''
                time.sleep(10)
            # user does not want it to be run, so just report
            else:
                print 'md5 run missing for', file_resp['accession']
        # check fastqc if md5 is fine
        else:

            if not qc or fastqc_status not in status_done:
                # if not, shall we run it?
                if run_md_qc in ['qc', 'all']:
                    print 'fastqc running for', file_resp['accession']
                    code_qc= "invoke run_fastqc " + env + " " + file_resp['accession'] + " " + file_resp['uuid']
                    run(code_qc)
                    print ''    
                    time.sleep(10)
                # user does not want it to be run, so just report
                else:
                    print 'fastqc run missing for', file_resp['accession'], fastqc_status
                    print 
   
    # return a small report
    return {'file': file_id,
            'alias': first_alias,
            'sequencer': sequencer,
            'pair_no': pair_no,
            'paired_file': paired_file,
            'file_status': status,
            'qc': qc,
            'md5_status': md5_status,
            'fastqc_status': fastqc_status,
            'last_part_A': last_part_A,
            'last_part_A_status': last_part_A_status
           }


def get_file_list(my_rep_set):

    rep_resp = ff_utils.get_metadata(my_rep_set, connection=ff)['experiments_in_set']
    report = []
    enzymes = []
    organisms = []
    for exp in rep_resp:
        exp_resp = ff_utils.get_metadata(exp, connection=ff)

        if not organisms:
            biosample = ff_utils.get_metadata(exp_resp['biosample'], connection=ff, frame='embedded')      
            organisms = list(set([bs['individual']['organism']['display_title'] for bs in  biosample['biosource']]))
            print organisms

        exp_files = exp_resp['files']
        enzyme = exp_resp.get('digestion_enzyme')
        enzymes.append(enzyme)
        for fastq_file in exp_files:
            file_resp = ff_utils.get_metadata(fastq_file, connection=ff, frame='embedded')  
            #Some checks before running
            #check if status is deleted
            if file_resp['status'] == 'deleted':
                continue
            #if no uploaded file in the file item report and skip
            if not file_resp.get('filename'):
                print file_resp['accession'], "does not have a file"
                continue
            # check that file has a pair
            if not file_resp.get('related_files'):
                print file_resp['accession'], 'does not have a pair'
                continue
            # check if file is in s3
            head_info = tibanna.s3.does_key_exist(file_resp['upload_key'], tibanna.s3.raw_file_bucket)
            if not head_info:
                print file_resp['accession'], "does not have a file in S3"
                continue

            # skip pair no 2
            if file_resp.get('paired_end')=='2':
                continue 

            # if the file size is more than 42 GB do not run
            file_size = head_info['ContentLength']
            if file_size > 45097156608:
                print file_resp['aliases'], 'larger than 42GB, skipping, please use another script'
                continue
            filesize = round(head_info['ContentLength']/1073741824.0,1)

            file_info = summarize_file(file_resp)

            # check for miseq
            if exclude_miseq:
                if file_info['sequencer'] == 'Illumina MiSeq':
                    continue
            paired_file = file_info['paired_file']
            pair_file_resp = ff_utils.get_metadata(paired_file, connection=ff, frame='embedded')
            pair_file_info = summarize_file(pair_file_resp)
            print fastq_file,paired_file, filesize

            # check consistency of paired file info
            # status differences gives error but there are multiple statuses that indicate complete
            # TODO fix it
            pairs_inconsistent = ""
            check_items = [ i for i in file_info.keys() if i not in ['file', 'paired_file', 'pair_no', 'alias']]
            for check_item in check_items:
                try:
                    assert file_info[check_item] == pair_file_info[check_item]
                except AssertionError:
                    print check_item, "not the same between pair", fastq_file, 'and', paired_file
                    pairs_inconsistent += check_item + ', '
            wf_check = ''
            # check if md5 and qc are okay
            for info in [file_info, pair_file_info]:
                if (info['md5_status'] in status_done and info['file_status'] in status_good and
                    info['fastqc_status'] in status_done and info['qc'] == True):
                    wf_check += '+'
            rep = {"consistency": pairs_inconsistent,  
                   "file1": file_info['alias'],
                   "file2": pair_file_info['alias'],
                   "const_check": pairs_inconsistent,
                   "wf_check": wf_check, 
                   "partA_wf": file_info['last_part_A'], 
                   "partA_status": file_info['last_part_A_status']
                   }
            # status differences gives error but there are multiple statuses that indicate complete
            # TODO fix it
            report.append(rep)
            if rep.get('const_check'):
                print rep['const_check']
    
    return my_rep_set, organisms, report


Do you wanna run md5 and/or fastqc if missing? (md5/qc/all/none)


In [3]:
all_reports = []
set_url = '/search/?experiments_in_set.biosample.biosource.individual.organism.name=human&experiments_in_set.experiment_type=in%20situ%20Hi-C&experiments_in_set.experiment_type=dilution%20Hi-C&experiments_in_set.experiment_type=DNase%20Hi-C&experiments_in_set.experiment_type=micro-C&experiments_in_set.experiment_type=PLAC-seq&experiments_in_set.experiment_type=capture%20Hi-C&experiments_in_set.experiment_type=CHIA-pet&experiments_in_set.experiment_type=TrAC-loop&type=ExperimentSetReplicate'
run_sets = [i['uuid'] for i in ff_utils.get_metadata(set_url , connection=ff)['@graph']]

print len(run_sets)
c = 0
for a_set in run_sets:
    c += 1
    print c
    print a_set
    rep = get_file_list(a_set)
    if rep:
        all_reports.append(rep)
    print
print 'done'

117
1
f8da32f6-18fe-4f90-82f2-36dca70b409f
[u'human']
/files-fastq/4DNFIFF7K2ZC/ 4DNFIEPLTDNG 1.2
/files-fastq/4DNFIWB5O95D/ 4DNFIJ95LUGK 24.6
/files-fastq/4DNFIOOZOF4F/ 4DNFIJ4JO7H2 0.8
/files-fastq/4DNFIB734M8W/ 4DNFIPG8BLCU 26.0

2
9b1063f5-c891-44a8-b44a-3b4879257366
[u'human']
/files-fastq/4DNFI3EIJZ1S/ 4DNFILPUZLAP 1.5
/files-fastq/4DNFINYCUVKX/ 4DNFIHZQRKFQ 1.6
/files-fastq/4DNFIT2Y15DJ/ 4DNFIR7H77KR 1.2
/files-fastq/4DNFIQLDW5SE/ 4DNFIKVK3ZUG 1.3
/files-fastq/4DNFI6UHAJ72/ 4DNFISYFW1FW 1.3
/files-fastq/4DNFI7ITYF7V/ 4DNFIW1UQ1UH 1.3
/files-fastq/4DNFI42SLX1J/ 4DNFIQAHKX9G 1.6
/files-fastq/4DNFI2LUHMUT/ 4DNFIVEYCSDC 1.7
/files-fastq/4DNFIZMI8RBZ/ 4DNFIZQYOVRZ 1.4
/files-fastq/4DNFIL41JKUA/ 4DNFIZ1VGJXF 1.5
/files-fastq/4DNFIVAE9EIS/ 4DNFIRRWCZN4 1.2
/files-fastq/4DNFIM4VAKNZ/ 4DNFI4ZZ9UXN 1.2
/files-fastq/4DNFI4RJFKSL/ 4DNFIYUZQRIJ 1.2
/files-fastq/4DNFIYM5YJPF/ 4DNFIL5UXD8D 1.2
/files-fastq/4DNFIAD6CA2N/ 4DNFIG78CNWP 1.2
/files-fastq/4DNFIS32DRD4/ 4DNFIM6BAWWO 1.2

3
c5db8085-9

[u'human']
/files-fastq/4DNFIBCRYBPW/ 4DNFI8TIPK75 10.2
/files-fastq/4DNFI1PAJKFC/ 4DNFIUXJJM67 10.0
/files-fastq/4DNFI4F6KXR5/ 4DNFIY55W1HW 9.9
/files-fastq/4DNFIVNDDEDZ/ 4DNFI29W6CX6 10.2
/files-fastq/4DNFI7PO1Y9I/ 4DNFI7FTRUX4 10.1
/files-fastq/4DNFIA7QPBK1/ 4DNFIFSML3KY 10.1
/files-fastq/4DNFIHY3D1G7/ 4DNFI96OZWFF 10.0
/files-fastq/4DNFINLRXHSC/ 4DNFIHNEAEMA 9.5
/files-fastq/4DNFIJI2ZEWN/ 4DNFIMZ4MZCG 10.2
/files-fastq/4DNFIO9OHYUV/ 4DNFIM62WI62 9.2
/files-fastq/4DNFI3TE7QNY/ 4DNFIS28AISX 10.0
/files-fastq/4DNFIN9HJDNO/ 4DNFIBZJ8W8V 10.1
/files-fastq/4DNFI6LI5AMP/ 4DNFIEIHJLLV 10.2
/files-fastq/4DNFIU8LC2OB/ 4DNFI4Z2Y6IR 10.2
/files-fastq/4DNFITO994UM/ 4DNFIROGHFF7 10.2
/files-fastq/4DNFIYC5ZZGF/ 4DNFI6LY81F9 10.2
/files-fastq/4DNFIZWJPRWP/ 4DNFIQQ1T6EJ 10.1
/files-fastq/4DNFIF3HEOMM/ 4DNFICIEIA1P 10.0

27
e58e2141-9253-4c91-85c3-d67ce06db28f
[u'human']
/files-fastq/4DNFIR8PHK1N/ 4DNFIAGRT5V5 1.7
/files-fastq/4DNFI4J23CH2/ 4DNFICAWNEZW 1.7
/files-fastq/4DNFI6PNCV7R/ 4DNFI9NZGW4Y 1.

/files-fastq/4DNFI4794X2M/ 4DNFIOHG1PT1 0.2

56
9b97c001-5a38-48ca-8e20-8a2dfe073b53
[u'human']
/files-fastq/4DNFIUJQD4KY/ 4DNFIEH4T6MU 0.0
/files-fastq/4DNFIG3F4WTK/ 4DNFIA3AKOC5 0.0
/files-fastq/4DNFIT2ADOV9/ 4DNFI4LSMEHL 0.0
/files-fastq/4DNFI7RR5RO2/ 4DNFIDJ4HPUU 0.0

57
a0b4c3a6-fb4c-41ae-8ca2-d035f09e4b46
[u'human']
/files-fastq/4DNFIKQ2WKBZ/ 4DNFIEF63HY4 0.0
/files-fastq/4DNFIZKWF9NW/ 4DNFIK8LPUSR 0.0
/files-fastq/4DNFIFDXP9MM/ 4DNFI6ATF1SC 0.0
/files-fastq/4DNFIQ2F7IPM/ 4DNFIWTTQ34S 0.0

58
1e5f0646-e569-45e0-9560-bd5c39a68859
[u'human']
/files-fastq/4DNFIBVEWU6D/ 4DNFIH6TEYTI 0.0
/files-fastq/4DNFIY9D6LOK/ 4DNFIZCDQ8H9 0.0
/files-fastq/4DNFI3UFYOWR/ 4DNFIJRG7G2X 0.0
/files-fastq/4DNFI7YMVCGF/ 4DNFIPP825HG 0.0

59
d99fcef2-8167-417f-a4e4-a0e757b30942
[u'human']
/files-fastq/4DNFICTA2EY8/ 4DNFI5SZXCB7 0.0
/files-fastq/4DNFINDN9LFE/ 4DNFILFI89TD 0.0
/files-fastq/4DNFIZ5HEG3B/ 4DNFIT1TO2CF 0.0
/files-fastq/4DNFIDGAVP7C/ 4DNFIDME1O8H 0.0
/files-fastq/4DNFIQ91Z4YK/ 4DNFI5R8NHIN 0.0


/files-fastq/4DNFIKNJHM3U/ 4DNFI72H6X6M 0.0
/files-fastq/4DNFI3W15K5N/ 4DNFIVL4EB3K 0.0

76
b5fb91fd-34cc-43d1-9e93-fc317d53c5a1
[u'human']
/files-fastq/4DNFIXWHQFO9/ 4DNFISBLR2R6 0.0
/files-fastq/4DNFICRE72JW/ 4DNFI4LKCDPE 0.0
/files-fastq/4DNFIB36TK7A/ 4DNFI24SHTVF 0.0
/files-fastq/4DNFIXDTM8KT/ 4DNFIZIGUH1X 0.0
/files-fastq/4DNFI6K9U643/ 4DNFIYD3NXOA 0.0
/files-fastq/4DNFIT1XED2D/ 4DNFIPS5DFSZ 0.0
/files-fastq/4DNFIWHUQG6J/ 4DNFIN36BI6I 0.0
/files-fastq/4DNFI2PIUC8H/ 4DNFIGTKGPHX 0.0
/files-fastq/4DNFITKLCQS8/ 4DNFI14URKNA 0.0
/files-fastq/4DNFI6RJTP9D/ 4DNFISS35OSV 0.0
/files-fastq/4DNFIX1J1O73/ 4DNFIQT9TW71 0.0
/files-fastq/4DNFIEXLKK9P/ 4DNFIY8I8OMW 0.0
/files-fastq/4DNFIJHWYQRH/ 4DNFI8EDE4QE 0.0
/files-fastq/4DNFI1L2YYU5/ 4DNFIVQF2X77 0.0
/files-fastq/4DNFIEOIJNP8/ 4DNFILU2W38H 0.0
/files-fastq/4DNFI46QP8AT/ 4DNFIOPO5R29 0.0
/files-fastq/4DNFI5UDXHZX/ 4DNFIDJ4LIEC 0.0
/files-fastq/4DNFISQT7FNY/ 4DNFIR2VWCEO 0.0
/files-fastq/4DNFI3NY3VBR/ 4DNFIW83E59D 0.0
/files-fastq/4DNFI6E8PEF

/files-fastq/4DNFIJ5ET6MX/ 4DNFIS3TQ6DL 0.2
/files-fastq/4DNFI8EZLF1E/ 4DNFIL614MGU 0.2
/files-fastq/4DNFI3RPSS5X/ 4DNFIUAQALP7 0.2

108
b258fb64-3d9c-472d-9fce-60f293be2ac2
[u'human']
/files-fastq/4DNFI6W569F6/ 4DNFI923YLXB 7.7

109
7b3c4c77-221a-4e21-bd95-4345cd220caa
[u'human']
/files-fastq/4DNFIGCX4XEH/ 4DNFIPUEUERV 5.9
/files-fastq/4DNFIYY4W4VY/ 4DNFIR9GJWST 5.9
/files-fastq/4DNFI2ZKZQTZ/ 4DNFIGENEVWB 7.1
/files-fastq/4DNFITKTRBZU/ 4DNFIHTJYH5T 5.9
/files-fastq/4DNFIQVI4XXQ/ 4DNFIMCJXZKH 8.0

110
844ed6ca-1b16-4827-830f-87ab613b0a56
[u'human']
/files-fastq/4DNFIOIRW5GO/ 4DNFIHKUJC39 15.6
/files-fastq/4DNFI4Q46QWG/ 4DNFI2BOWMXW 15.9
/files-fastq/4DNFIKXS6UAI/ 4DNFITDMVPYE 16.3
/files-fastq/4DNFI1IBQLLM/ 4DNFI4TFRODP 16.6

111
e6ce55ac-a39f-4064-a578-5329c5454205
[u'human']
/files-fastq/4DNFI2ODUV3V/ 4DNFIH2FMKX4 14.9
/files-fastq/4DNFIL3UHHMS/ 4DNFI1LHO8TZ 15.0
/files-fastq/4DNFIBH655V6/ 4DNFIHTUHK6B 14.9
/files-fastq/4DNFICYR7A3D/ 4DNFI2FZ7WRH 14.8
/files-fastq/4DNFIPBN8225/ 4DNFI

In [4]:


pairs_completed = []
pairs_running = []
pairs_did_not_run = []
pairs_ready_to_run = []
pairs_qcmd_problem = []

all_num= 0
for name, org, report in all_reports: 
    all_num += len(report)
    
    # 1 completed pairs
    pairs_completed.extend([[name,org,i] for i in report if i['partA_status']=='complete'])
    # 2 running pairs
    pairs_running.extend([[name, org, i] for i in report if i['partA_status'] not in ['complete','did_not_run']])
    # 3 no run pairs
    pairs_did_not_run.extend([[name, org, i] for i in report if i['partA_status']=='did_not_run'])
                             
                            
# 3a no run pairs with fine qc md5
pairs_ready_to_run = [[i[0], i[1], i[2]['file1'], i[2]['file2']] for i in pairs_did_not_run if i[2]['wf_check'] == '++']
# 3b no run pairs with problematic qc md5
pairs_qcmd_problem = [[i[0], i[1], i[2]['file1'], i[2]['file2']] for i in pairs_did_not_run if i[2]['wf_check'] != '++']

# # 2 running pairs to run again
# rerun_running_pairs = [(i['file1'], i['file2']) for i in report if i['partA_status'] not in ['complete','did_not_run']]

# rerun_started_pairs = [(i['file1'], i['file2']) for i in report if i['partA_status']=='started']

print "{}/{} pairs completed partA".format(all_num, len(pairs_completed))
print "{}/{} pairs still running partA".format(all_num, len(pairs_running))

print '1) ready to run (pairs_ready_to_run)'
print len(pairs_ready_to_run)
for n,o,a,b in pairs_ready_to_run:
    print n,o,a,b
print ""

print '2) problematics ones (pairs_qcmd_problem)'
for n,o,a,b in pairs_qcmd_problem:
    print n,o,a,b
print ""


print "DONE"

684/678 pairs completed partA
684/0 pairs still running partA
1) ready to run (pairs_ready_to_run)
6
5db537d2-1cf6-44c2-ae5d-6d724c9f43bc [u'human'] chuck-murry-lab:fetal_heart_dnase_hic_seq_rep1_run1_R1 chuck-murry-lab:fetal_heart_dnase_hic_seq_rep1_run1_R2
5db537d2-1cf6-44c2-ae5d-6d724c9f43bc [u'human'] chuck-murry-lab:fetal_heart_dnase_hic_seq_rep2_run1_R1 chuck-murry-lab:fetal_heart_dnase_hic_seq_rep2_run1_R2
3effa8a5-1156-4f1a-aa8a-e6d511c384c6 [u'human'] chuck-murry-lab:esc_dnase_hic_seq_rep1_run2_R1 chuck-murry-lab:esc_dnase_hic_seq_rep1_run2_R2
46572b13-ac2d-45de-9e11-a50236f975dd [u'human'] dcic:SRR2671373_1 dcic:SRR2671373_2
46572b13-ac2d-45de-9e11-a50236f975dd [u'human'] dcic:SRR2671374_1 dcic:SRR2671374_2
b258fb64-3d9c-472d-9fce-60f293be2ac2 [u'human'] dcic:HIC043_SRR1658649_1 dcic:HIC043_SRR1658649_2

2) problematics ones (pairs_qcmd_problem)

DONE


In [5]:
from core.utils import Tibanna
from core.utils import run_workflow
import time

paired_files = pairs_ready_to_run

env = 'fourfront-webprod'
tibanna = Tibanna(env=env)
outfiles = tibanna.s3.outfile_bucket
tibanna.s3.outfile_bucket = 'elasticbeanstalk-fourfront-webprod-files'
index_h = make_input_file_json('4DNFIZQZ39L9', 'bwa_index', tibanna)
index_m = make_input_file_json('4DNFI823LSI8', 'bwa_index', tibanna)

for set_name, organisms, f1,f2 in paired_files: 

    # find the correct index
    if organisms == ['human']:
        index = index_h
    elif organisms == ['mouse']:
        #index = index_m
        continue
    else:
        continue

    fastq1 = make_input_file_json(f1, 'fastq1', tibanna)
    fastq2 = make_input_file_json(f2, 'fastq2', tibanna)

    input_files = [fastq1, fastq2, index]
    if all(input_files):
        name = fastq1['object_key'].split('.')[0] + "-" + fastq2['object_key'].split('.')[0]
        input_json = make_hic1_json(input_files, env, outfiles, name)
        # print input_json
        res = run_workflow(input_json)
    else:
        print("some files not found on s3.  Investigate this list %s" % input_files)
    time.sleep(2)
    #a = raw_input("Press Enter to continue...")

print('Done')


looking for upload key 1f53df95-4cf3-41cc-971d-81bb16c486dd/4DNFIZQZ39L9.bwaIndex.tgz, on bucket elasticbeanstalk-fourfront-webprod-files
looking for upload key 4a6d10ee-2edb-4402-a98f-0edb1d58f5e1/4DNFI823LSI8.bwaIndex.tgz, on bucket elasticbeanstalk-fourfront-webprod-files
looking for upload key 0cfb16e8-b902-4977-a498-587d36497687/4DNFIG22ZQ7Y.fastq.gz, on bucket elasticbeanstalk-fourfront-webprod-files
looking for upload key 37d96da6-1daf-4bd0-87fa-50ec8df1cf3f/4DNFIMM81AZ3.fastq.gz, on bucket elasticbeanstalk-fourfront-webprod-files
about to start run bwa-mem_4DNFIG22ZQ7Y-4DNFIMM81AZ3
response from aws was: 
 {u'startDate': datetime.datetime(2018, 3, 19, 20, 27, 10, 288000, tzinfo=tzlocal()), 'ResponseMetadata': {'RetryAttempts': 0, 'HTTPStatusCode': 200, 'RequestId': '6da9735e-2bd5-11e8-91f9-51cdf96108a6', 'HTTPHeaders': {'x-amzn-requestid': '6da9735e-2bd5-11e8-91f9-51cdf96108a6', 'content-length': '142', 'content-type': 'application/x-amz-json-1.0'}}, u'executionArn': u'arn:aws: