In [1]:
from core.utils import Tibanna
from core import ff_utils

#format for input json in hic-partA
'''
{
  "input_files": [
    {
      "bucket_name": "elasticbeanstalk-fourfront-webprod-files",
      "object_key": "4DNFIFBK258N.fastq.gz",
      "uuid" : "06aa0af1-2ccf-4dfe-aa14-209b1bd2754d",
      "workflow_argument_name": "fastq1"
    },
    {
      "bucket_name": "elasticbeanstalk-fourfront-webprod-files",
      "object_key": "4DNFINZVD2W3.fastq.gz",
      "uuid": "52646398-29d5-4200-b3b5-059ff5c40b82",
      "workflow_argument_name": "fastq2"
    },
    {
      "bucket_name": "elasticbeanstalk-fourfront-webprod-files",
      "object_key": "4DNFIZQZ39L9.bwaIndex.tgz",
      "uuid": "1f53df95-4cf3-41cc-971d-81bb16c486dd",
      "workflow_argument_name": "bwa_index"
    }
        {
      "bucket_name": "elasticbeanstalk-fourfront-webprod-files",
      "object_key": "4DNFI823LSII.chrom.sizes",
      "uuid": "4a6d10ee-2edb-4402-a98f-0edb1d58f5e9",
      "workflow_argument_name": "chrsizes"
    },
    {
      "bucket_name": "elasticbeanstalk-fourfront-webprod-files",
      "object_key": "4DNFI823L888.fasta.gz",
      "uuid": "4a6d10ee-2edb-4402-a98f-0edb1d58ddd2",
      "workflow_argument_name": "reference_fasta"
    },
    {
      "bucket_name": "elasticbeanstalk-fourfront-webprod-files",
      "object_key": "4DNFI823L812.txt",
      "uuid": "4a6d10ee-2edb-4402-a98f-0edb1d582084",
      "workflow_argument_name": "restriction_file"
    }
  ],
  "workflow_uuid": "02d636b9-d82d-4da9-950c-2ca994a0943e",
  "app_name": "hi-c-processing-parta/9",
  "parameters": {
    "nThreads": 8
  },
  "output_bucket": "elasticbeanstalk-fourfront-webprod-wfoutput-files",
  "_tibanna": {"env": "fourfront-webprod"}
}
'''


def make_input_file_json(obj_id, arg_name, tibanna):
    '''
    {
      "bucket_name": "%s",
      "object_key": "%s",
      "uuid" : "%s",
      "workflow_argument_name": "%s"
    }
    '''
    ff = ff_utils.fdn_connection(key=tibanna.ff_keys)
    metadata = ff_utils.get_metadata(obj_id, connection=ff)
    data = {}
    
    # just make sure the file is on s3, otherwise bail
    print("looking for upload key %s, on bucket %s" % 
          (metadata['upload_key'],
           tibanna.s3.outfile_bucket))
    if tibanna.s3.does_key_exist(metadata['upload_key']):
        data = {'bucket_name' : tibanna.s3.outfile_bucket,
                'object_key' : metadata['upload_key'].split('/')[1],
                'uuid' : metadata['uuid'],
                'workflow_argument_name': arg_name
                }
    return data
    
        
def make_hica_json(input_files, env, output_bucket, accession):
    input_json = {'input_files': input_files,
                  'output_bucket': output_bucket,
                  'workflow_uuid': "a9caf6f3-49e5-4c33-afab-9ec90d65faf3",
                  "app_name": "hi-c-processing-parta-juicer/5",
                  "parameters": {
                      "nsplit": 100
                      },
                  "_tibanna": {"env": env, "run_type": "hic-parta",
                               "run_id": accession}
                  }
    return input_json
    
    
    
    


In [3]:
from core.utils import run_workflow
from time import sleep


# hic-partA, paired files
'''
# ('dcic:HIC034_SRR1658608_1', 'dcic:HIC034_SRR1658608_2')
('dcic:HIC034_SRR1658609_1', 'dcic:HIC034_SRR1658609_2'),
('dcic:HIC034_SRR1658610_1', 'dcic:HIC034_SRR1658610_2'),
('dcic:HIC034_SRR1658611_1', 'dcic:HIC034_SRR1658611_2'),
('dcic:HIC034_SRR1658612_1', 'dcic:HIC034_SRR1658612_2'),
('dcic:HIC034_SRR1658613_1', 'dcic:HIC034_SRR1658613_2'),]
"""
paired_files = [

('dcic:HIC034_SRR1658614_1', 'dcic:HIC034_SRR1658614_2'),
('dcic:HIC034_SRR1658615_1', 'dcic:HIC034_SRR1658615_2'),
('dcic:HIC034_SRR1658616_1', 'dcic:HIC034_SRR1658616_2'),
('dcic:HIC034_SRR1658617_1', 'dcic:HIC034_SRR1658617_2'),
('dcic:HIC034_SRR1658618_1', 'dcic:HIC034_SRR1658618_2'),
('dcic:HIC034_SRR1658619_1', 'dcic:HIC034_SRR1658619_2'),
('dcic:HIC034_SRR1658620_1', 'dcic:HIC034_SRR1658620_2'),
('dcic:HIC034_SRR1658621_1', 'dcic:HIC034_SRR1658621_2'),
('dcic:HIC034_SRR1658622_1', 'dcic:HIC034_SRR1658622_2'),
('dcic:HIC034_SRR1658623_1', 'dcic:HIC034_SRR1658623_2'),
('dcic:HIC034_SRR1658624_1', 'dcic:HIC034_SRR1658624_2'),
('dcic:HIC035_SRR1658625_1', 'dcic:HIC035_SRR1658625_2'),
('dcic:HIC035_SRR1658626_1', 'dcic:HIC035_SRR1658626_2'),
('dcic:HIC035_SRR1658627_1', 'dcic:HIC035_SRR1658627_2'),
('dcic:HIC035_SRR1658628_1', 'dcic:HIC035_SRR1658628_2'),
('dcic:HIC035_SRR1658629_1', 'dcic:HIC035_SRR1658629_2'),
('dcic:HIC035_SRR1658630_1', 'dcic:HIC035_SRR1658630_2'),
('dcic:HIC035_SRR1658631_1', 'dcic:HIC035_SRR1658631_2'),
('dcic:HIC037_SRR1658636_1', 'dcic:HIC037_SRR1658636_2'),
('dcic:HIC037_SRR1658637_1', 'dcic:HIC037_SRR1658637_2'),
('dcic:HIC037_SRR1658638_1', 'dcic:HIC037_SRR1658638_2'),
('dcic:HIC037_SRR1658639_1', 'dcic:HIC037_SRR1658639_2'),
('dcic:HIC037_SRR1658640_1', 'dcic:HIC037_SRR1658640_2'),
('dcic:HIC037_SRR1658641_1', 'dcic:HIC037_SRR1658641_2'),
('dcic:HIC037_SRR1658642_1', 'dcic:HIC037_SRR1658642_2'),
('dcic:HIC037_SRR1658643_1', 'dcic:HIC037_SRR1658643_2')]
"""

# testportal
env = 'fourfront-webdev'
tibanna = Tibanna(env=env)
outfiles = tibanna.s3.outfile_bucket
tibanna.s3.outfile_bucket = 'elasticbeanstalk-fourfront-webdev-files'

# todo need a function to determin this given fastq1
index = make_input_file_json('4DNFIZQZ39L9', 'bwa_index', tibanna)
chrsizes = make_input_file_json('4DNFI823LSII', 'chrsizes', tibanna)
ref = make_input_file_json('4DNFI823L888', 'reference_fasta', tibanna)
restrict = make_input_file_json('4DNFI823L811', 'restriction_file', tibanna)

for pair in paired_files:
    fastq1 = make_input_file_json(pair[0], 'fastq1', tibanna)
    fastq2 = make_input_file_json(pair[1], 'fastq2', tibanna)
    
     
    input_files = [fastq1, fastq2, index, chrsizes, ref, restrict]
    if all(input_files):
        name = fastq1['object_key'].split('.')[0] + "-" + fastq2['object_key'].split('.')[0]
        input_json = make_hica_json(input_files, env, outfiles, name)
        print(input_json)
        res = run_workflow(input_json)
        print(res)
        sleep(30)
    else:
        print("some files not found on s3.  Investigate this list %s" % input_files)
    
    
    
    






looking for upload key 1f53df95-4cf3-41cc-971d-81bb16c486dd/4DNFIZQZ39L9.bwaIndex.tgz, on bucket elasticbeanstalk-fourfront-webdev-files
looking for upload key 4a6d10ee-2edb-4402-a98f-0edb1d58f5e9/4DNFI823LSII.chrom.sizes, on bucket elasticbeanstalk-fourfront-webdev-files
looking for upload key 4a6d10ee-2edb-4402-a98f-0edb1d58ddd2/4DNFI823L888.fasta.gz, on bucket elasticbeanstalk-fourfront-webdev-files
looking for upload key 4a6d10ee-2edb-4402-a98f-0edb1d582083/4DNFI823L811.txt, on bucket elasticbeanstalk-fourfront-webdev-files
looking for upload key a2fe83f0-32eb-4d8e-b174-510538e7e901/4DNFITTUYJLR.fastq.gz, on bucket elasticbeanstalk-fourfront-webdev-files
looking for upload key cee4b5e3-7873-40fe-8c2e-435527526a0f/4DNFIEF7WURJ.fastq.gz, on bucket elasticbeanstalk-fourfront-webdev-files
{'parameters': {'nsplit': 100}, '_tibanna': {'run_type': 'hic-parta', 'env': 'fourfront-webdev', 'run_id': u'4DNFITTUYJLR-4DNFIEF7WURJ'}, 'output_bucket': 'elasticbeanstalk-fourfront-webdev-wfoutput',

looking for upload key 28b087e8-4bd7-4b25-b4bf-0a6e901bc945/4DNFIB5VLYR8.fastq.gz, on bucket elasticbeanstalk-fourfront-webdev-files
looking for upload key 0031b628-efea-4d8a-b2ed-1838320e279e/4DNFI214B87P.fastq.gz, on bucket elasticbeanstalk-fourfront-webdev-files
{'parameters': {'nsplit': 100}, '_tibanna': {'run_type': 'hic-parta', 'env': 'fourfront-webdev', 'run_id': u'4DNFIB5VLYR8-4DNFI214B87P'}, 'output_bucket': 'elasticbeanstalk-fourfront-webdev-wfoutput', 'input_files': [{'workflow_argument_name': 'fastq1', 'bucket_name': 'elasticbeanstalk-fourfront-webdev-files', 'uuid': u'28b087e8-4bd7-4b25-b4bf-0a6e901bc945', 'object_key': u'4DNFIB5VLYR8.fastq.gz'}, {'workflow_argument_name': 'fastq2', 'bucket_name': 'elasticbeanstalk-fourfront-webdev-files', 'uuid': u'0031b628-efea-4d8a-b2ed-1838320e279e', 'object_key': u'4DNFI214B87P.fastq.gz'}, {'workflow_argument_name': 'bwa_index', 'bucket_name': 'elasticbeanstalk-fourfront-webdev-files', 'uuid': u'1f53df95-4cf3-41cc-971d-81bb16c486dd', 

looking for upload key ce198a2e-910e-4411-a5af-7d39592b8e73/4DNFIYO1PBVV.fastq.gz, on bucket elasticbeanstalk-fourfront-webdev-files
looking for upload key 0d4cdc70-a235-4c0c-bdc1-f2069f2adf15/4DNFI7HY89B3.fastq.gz, on bucket elasticbeanstalk-fourfront-webdev-files
{'parameters': {'nsplit': 100}, '_tibanna': {'run_type': 'hic-parta', 'env': 'fourfront-webdev', 'run_id': u'4DNFIYO1PBVV-4DNFI7HY89B3'}, 'output_bucket': 'elasticbeanstalk-fourfront-webdev-wfoutput', 'input_files': [{'workflow_argument_name': 'fastq1', 'bucket_name': 'elasticbeanstalk-fourfront-webdev-files', 'uuid': u'ce198a2e-910e-4411-a5af-7d39592b8e73', 'object_key': u'4DNFIYO1PBVV.fastq.gz'}, {'workflow_argument_name': 'fastq2', 'bucket_name': 'elasticbeanstalk-fourfront-webdev-files', 'uuid': u'0d4cdc70-a235-4c0c-bdc1-f2069f2adf15', 'object_key': u'4DNFI7HY89B3.fastq.gz'}, {'workflow_argument_name': 'bwa_index', 'bucket_name': 'elasticbeanstalk-fourfront-webdev-files', 'uuid': u'1f53df95-4cf3-41cc-971d-81bb16c486dd', 