In [None]:
from core.utils import Tibanna
from core import ff_utils
from core.utils import run_workflow
import time

def make_input_file_json(obj_id, arg_name, tibanna):
    '''
    {
      "bucket_name": "%s",
      "object_key": "%s",
      "uuid" : "%s",
      "workflow_argument_name": "%s"
    }
    '''
    ff = ff_utils.fdn_connection(key=tibanna.ff_keys)
    metadata = ff_utils.get_metadata(obj_id, connection=ff)
    data = {}
    
    # just make sure the file is on s3, otherwise bail
    print("looking for upload key %s, on bucket %s" % 
          (metadata['upload_key'],
           tibanna.s3.outfile_bucket))
    if tibanna.s3.does_key_exist(metadata['upload_key']):
        data = {'bucket_name' : tibanna.s3.outfile_bucket,
                'object_key' : metadata['upload_key'].split('/')[1],
                'uuid' : metadata['uuid'],
                'workflow_argument_name': arg_name
                }
    return data
        
def make_hica_json(input_files, env, output_bucket, accession, ncores):
    input_json = {'input_files': input_files,
                  'output_bucket': output_bucket,
                  'workflow_uuid': "a9caf6f3-49e5-4c33-bfab-9ec90d65111c",
                  "app_name": "hi-c-processing-parta-juicer/16",
                  "parameters": {
                      "nsplit": 100,
                      "ncores" : ncores
                      },
                  "_tibanna": {"env": env, "run_type": "hic-parta",
                               "run_id": accession}
                  }
    return input_json



# Choose the right NZ reference file
# 'HindIII': '4DNFI823L811', 'MboI': '4DNFI823L812'
re_ref_file = '4DNFI823L811'


# hic-partA, paired files
paired_files = [('dcic:selveraj_SRX318776_SRR927086_1', 'dcic:selveraj_SRX318776_SRR927086_2')]
#pairs_qcmd_problem
#pairs_ready_to_run
#rerun_running_pairs


# testportal
env = 'fourfront-webdev'
tibanna = Tibanna(env=env)
outfiles = tibanna.s3.outfile_bucket
tibanna.s3.outfile_bucket = 'elasticbeanstalk-fourfront-webdev-files'

# todo need a function to determin this given fastq1
index = make_input_file_json('4DNFIZQZ39L9', 'bwa_index', tibanna)
chrsizes = make_input_file_json('4DNFI823LSII', 'chrsizes', tibanna)
ref = make_input_file_json('4DNFI823L888', 'reference_fasta', tibanna)
restrict = make_input_file_json(re_ref_file, 'restriction_file', tibanna)
ncores = 36

# ncore options 8  cores up to 20gb per .fastq.gz (1tb)
#               36 cores up to 42gb per .fastq.gz (2tb)
#     Not set            up to 90gb per .fastq.gz (4tb)

for pair in paired_files:
    fastq1 = make_input_file_json(pair[0], 'fastq1', tibanna)
    fastq2 = make_input_file_json(pair[1], 'fastq2', tibanna)
    
     
    input_files = [fastq1, fastq2, index, chrsizes, ref, restrict]
    if all(input_files):
        name = fastq1['object_key'].split('.')[0] + "-" + fastq2['object_key'].split('.')[0]
        input_json = make_hica_json(input_files, env, outfiles, name, ncores)
        res = run_workflow(input_json)
    else:
        print("some files not found on s3.  Investigate this list %s" % input_files)
    break
    time.sleep(30)
    
print 'Done'