In [None]:
from core.utils import Tibanna
from core import sbg_utils

#format for input json in hic-partA
'''
{
  "input_files": [
    {
      "bucket_name": "elasticbeanstalk-fourfront-webprod-files",
      "object_key": "4DNFIFBK258N.fastq.gz",
      "uuid" : "06aa0af1-2ccf-4dfe-aa14-209b1bd2754d",
      "workflow_argument_name": "fastq1"
    },
    {
      "bucket_name": "elasticbeanstalk-fourfront-webprod-files",
      "object_key": "4DNFINZVD2W3.fastq.gz",
      "uuid": "52646398-29d5-4200-b3b5-059ff5c40b82",
      "workflow_argument_name": "fastq2"
    },
    {
      "bucket_name": "elasticbeanstalk-fourfront-webprod-files",
      "object_key": "4DNFIZQZ39L9.bwaIndex.tgz",
      "uuid": "1f53df95-4cf3-41cc-971d-81bb16c486dd",
      "workflow_argument_name": "bwa_index"
    }
  ],
  "workflow_uuid": "02d636b9-d82d-4da9-950c-2ca994a0943e",
  "app_name": "hi-c-processing-parta/9",
  "parameters": {
    "nThreads": 8
  },
  "output_bucket": "elasticbeanstalk-fourfront-webprod-wfoutput-files",
  "_tibanna": {"env": "fourfront-webprod"}
}
'''

def make_input_file_json(obj_id, arg_name, tibanna):
    '''
    {
      "bucket_name": "%s",
      "object_key": "%s",
      "uuid" : "%s",
      "workflow_argument_name": "%s"
    }
    '''
    ff = sbg_utils.fdn_connection(key=tibanna.ff_keys)
    metadata = sbg_utils.get_metadata(obj_id, connection=ff)
    data = {}
    
    # just make sure the file is on s3, otherwise bail
    print("looking for upload key %s, on bucket %s" % 
          (metadata['upload_key'],
           tibanna.s3.outfile_bucket))
    if tibanna.s3.does_key_exist(metadata['upload_key']):
        data = {'bucket_name' : tibanna.s3.outfile_bucket,
                'object_key' : metadata['upload_key'].split('/')[1],
                'uuid' : metadata['uuid'],
                'workflow_argument_name': arg_name
                }
    return data
    
        
def make_hica_json(input_files, env, output_bucket):
    input_json = {'input_files': input_files,
                  'output_bucket': output_bucket,
                  'workflow_uuid': "02d636b9-d82d-4da9-950c-2ca994a0943e",
                  "app_name": "hi-c-processing-parta/9",
                  "parameters": {
                      "nThreads": 8
                      },
                  "_tibanna": {"env": env}
                  }
    return input_json
    
    
    
    


In [None]:
from core.utils import run_workflow


# hic-partA, paired files
paired_files = [('dcic:HIC034_SRR1658608_1', 'dcic:HIC034_SRR1658608_2'),
('dcic:HIC034_SRR1658609_1', 'dcic:HIC034_SRR1658609_2'),
('dcic:HIC034_SRR1658610_1', 'dcic:HIC034_SRR1658610_2'),
('dcic:HIC034_SRR1658611_1', 'dcic:HIC034_SRR1658611_2'),
('dcic:HIC034_SRR1658612_1', 'dcic:HIC034_SRR1658612_2'),
('dcic:HIC034_SRR1658613_1', 'dcic:HIC034_SRR1658613_2'),
('dcic:HIC034_SRR1658614_1', 'dcic:HIC034_SRR1658614_2'),
('dcic:HIC034_SRR1658615_1', 'dcic:HIC034_SRR1658615_2'),
('dcic:HIC034_SRR1658616_1', 'dcic:HIC034_SRR1658616_2'),
('dcic:HIC034_SRR1658617_1', 'dcic:HIC034_SRR1658617_2'),
('dcic:HIC034_SRR1658618_1', 'dcic:HIC034_SRR1658618_2'),
('dcic:HIC034_SRR1658619_1', 'dcic:HIC034_SRR1658619_2'),
('dcic:HIC034_SRR1658620_1', 'dcic:HIC034_SRR1658620_2'),
('dcic:HIC034_SRR1658621_1', 'dcic:HIC034_SRR1658621_2'),
('dcic:HIC034_SRR1658622_1', 'dcic:HIC034_SRR1658622_2'),
('dcic:HIC034_SRR1658623_1', 'dcic:HIC034_SRR1658623_2'),
('dcic:HIC034_SRR1658624_1', 'dcic:HIC034_SRR1658624_2'),
('dcic:HIC035_SRR1658625_1', 'dcic:HIC035_SRR1658625_2'),
('dcic:HIC035_SRR1658626_1', 'dcic:HIC035_SRR1658626_2'),
('dcic:HIC035_SRR1658627_1', 'dcic:HIC035_SRR1658627_2'),
('dcic:HIC035_SRR1658628_1', 'dcic:HIC035_SRR1658628_2'),
('dcic:HIC035_SRR1658629_1', 'dcic:HIC035_SRR1658629_2'),
('dcic:HIC035_SRR1658630_1', 'dcic:HIC035_SRR1658630_2'),
('dcic:HIC035_SRR1658631_1', 'dcic:HIC035_SRR1658631_2'),
('dcic:HIC037_SRR1658636_1', 'dcic:HIC037_SRR1658636_2'),
('dcic:HIC037_SRR1658637_1', 'dcic:HIC037_SRR1658637_2'),
('dcic:HIC037_SRR1658638_1', 'dcic:HIC037_SRR1658638_2'),
('dcic:HIC037_SRR1658639_1', 'dcic:HIC037_SRR1658639_2'),
('dcic:HIC037_SRR1658640_1', 'dcic:HIC037_SRR1658640_2'),
('dcic:HIC037_SRR1658641_1', 'dcic:HIC037_SRR1658641_2'),
('dcic:HIC037_SRR1658642_1', 'dcic:HIC037_SRR1658642_2'),
('dcic:HIC037_SRR1658643_1', 'dcic:HIC037_SRR1658643_2')]

# testportal
env = 'fourfront-webdev'
tibanna = Tibanna(env=env)
outfiles = tibanna.s3.outfile_bucket
tibanna.s3.outfile_bucket = 'elasticbeanstalk-fourfront-webdev-files'

for pair in paired_files:
    fastq1 = make_input_file_json(pair[0], 'fastq1', tibanna)
    fastq2 = make_input_file_json(pair[0], 'fastq2', tibanna)
    # todo need a function to determin this given fastq1
    ref = make_input_file_json('4DNFIZQZ39L9', 'bwa_index', tibanna)
    
    input_files = [fastq1, fastq2, ref]
    if all(input_files):
        input_json = make_hica_json(input_files, env, outfiles)
        print(input_json)
        res = run_workflow(input_json)
        print(res)
    else:
        print("some files not found on s3.  Investigate this list %s" % input_files)
    
    break
    
    
    




