In [11]:
from core.utils import Tibanna
from core import ff_utils
import copy

env = 'fourfront-webprod'
tibanna = Tibanna(env=env)
ff = ff_utils.fdn_connection(key=tibanna.ff_keys)

def sort_p_files(p_file_list, acc):
    archive = []
    new_p_files = copy.deepcopy(p_file_list)
    deleted = []
    
    for p_file in p_file_list:
        p_file_resp = ff_utils.get_metadata(p_file, connection = ff, frame= 'embedded')
        
        # get rid of all deleted files
        if p_file_resp['status'] == 'deleted':
            new_p_files.remove(p_file)
            deleted.append(p_file)
            continue

        p_wfr =  p_file_resp['workflow_run_outputs']
        assert len(p_wfr) == 1
        wfr_name = p_wfr[0]['display_title']
        if '0.2.5' not in wfr_name:
            archive.append(p_file)
            new_p_files.remove(p_file)
    
    rep = 'no change'
    if archive:       
        rep = 'proc_files will be archived'
        if new_p_files:
            rep += ' partially'
        if deleted:
            rep += ', cleaned deleted items'
    elif deleted:
        rep = 'cleaned deleted items'
    
    if rep != 'no change':
        # add archived
        patch_data_1 = {'archived_processed_files': archive}
        try:
            ff_utils.patch_metadata(patch_data_1, obj_id=acc ,connection=ff)
        except Exception as e:
            print e 
        
        # update processed files
        patch_data_2 = {'processed_files': new_p_files}
        try:
            ff_utils.patch_metadata(patch_data_2, obj_id=acc ,connection=ff)
        except Exception as e:
            print e 
    return archive, new_p_files, rep

In [12]:
# work on all experiment sets

url = '/search/?status=released&status=released%20to%20project&status=archived&status=submission%20in%20progress&type=ExperimentSetReplicate'
sets = ff_utils.get_metadata(url, connection=ff)['@graph']
print len(sets), 'experiment sets'
pros_sets = [i for i in sets if i.get('processed_files')]
print len(pros_sets), 'sets with processed files'
stati = [i['status'] for i in sets if i.get('processed_files')]
print 'with statuses', ",".join(list(set(stati))).upper()

264 experiment sets
19 sets with processed files
with statuses RELEASED


In [14]:
# work on experiment set part 2
for a_set in pros_sets:
    exps = a_set['experiments_in_set']
    p_files = a_set['processed_files']
    # take care of sets first
    arch, new_p, rep = sort_p_files(p_files, a_set['accession'])
    print 'set', a_set['accession'], rep
    
    for exp in exps:
        p_files_exp = ff_utils.get_metadata(exp, connection = ff)
        print len(p_files_exp['files'])
        arch_e, new_p_e, rep_e = sort_p_files(p_files_exp['processed_files'], p_files_exp['accession'])
        print 'exp', p_files_exp['accession'], rep_e
    print

set 4DNESB6MNCFE no change
2
exp 4DNEXITSUDKV no change
2
exp 4DNEXQ83CMK6 no change

set 4DNES98CI6GV no change
2
exp 4DNEXVP5U9YT no change
2
exp 4DNEXX3SJOG9 no change

set 4DNES21NPLZU no change
2
exp 4DNEX6DNUA7J no change
2
exp 4DNEX7X3XBWY no change

set 4DNESYTIFTEE no change
2
exp 4DNEX4USO4XW no change
2
exp 4DNEXGBVIBAD no change

set 4DNESIG4ELE4 no change
2
exp 4DNEX4U1R4HK no change
2
exp 4DNEXQ2XU5ZX no change

set 4DNESNHN919R no change
2
exp 4DNEXAQPT9EH no change
2
exp 4DNEXHMUQC5C no change

set 4DNES8ZUV5CQ no change
2
exp 4DNEXIQ7WY4X no change
2
exp 4DNEXFFYE93E no change

set 4DNESCCP4KTY no change
2
exp 4DNEXOJ1T4FS no change
2
exp 4DNEXMRUA2TD no change

set 4DNES7DFQZLI no change
2
exp 4DNEXCYOTRZ8 no change
2
exp 4DNEXECZBL99 no change

set 4DNESE3ICNE1 no change
6
exp 4DNEXYC25MK5 no change
6
exp 4DNEX92ZDG46 no change

set 4DNES4GSP9S4 no change
6
exp 4DNEXQT9BRMS no change
6
exp 4DNEXM7Q2UE8 no change

set 4DNESTAPSPUC no change
6
exp 4DNEX94NDZC8 no chang

KeyboardInterrupt: 

In [10]:
# Work on individual types of experiments

exp_type = 'ExperimentHiC'
url_hic = '/search/?status=released&status=released%20to%20project&status=archived&status=submission%20in%20progress&type='+exp_type
hic_exps = ff_utils.get_metadata(url_hic, connection=ff)['@graph']
print len(hic_exps), 'experiment hic'

pros_hic = [i for i in hic_exps if i.get('processed_files')]
print len(pros_hic), 'hics with processed files'

stati_hic = [i['status'] for i in hic_exps if i.get('processed_files')]
print 'with statuses', ",".join(list(set(stati_hic))).upper()

for a_hic in pros_hic:
    p_files = a_hic['processed_files']
    # take care of sets first
    arch, new_p, rep = sort_p_files(p_files, a_hic['accession'])
    print 'hic', a_hic['accession'], rep

324 experiment hic
56 hics with processed files
with statuses RELEASED
hic 4DNEX1OP4VCU cleaned deleted items
hic 4DNEXQ83CMK6 no change
hic 4DNEXITSUDKV no change
hic 4DNEXQ6NUVKG no change
hic 4DNEXX3SJOG9 no change
hic 4DNEXVP5U9YT no change
hic 4DNEXGBVIBAD no change
hic 4DNEX4USO4XW no change
hic 4DNEX52Q4NSZ no change
hic 4DNEX7X3XBWY no change
hic 4DNEX6DNUA7J no change
hic 4DNEXQ2XU5ZX no change
hic 4DNEX4U1R4HK no change
hic 4DNEXHMUQC5C no change
hic 4DNEXAQPT9EH no change
hic 4DNEXFFYE93E no change
hic 4DNEXIQ7WY4X no change
hic 4DNEXECZBL99 no change
hic 4DNEXCYOTRZ8 no change
hic 4DNEXMRUA2TD no change
hic 4DNEXOJ1T4FS no change
hic 4DNEX92ZDG46 no change
hic 4DNEXM7Q2UE8 no change
hic 4DNEXNT317AJ no change
hic 4DNEX8EKK377 no change
hic 4DNEXIENJVWW no change
hic 4DNEXIFHB9PJ no change
hic 4DNEXPCS5JFS no change
hic 4DNEXYC25MK5 no change
hic 4DNEXQT9BRMS no change
hic 4DNEX94NDZC8 no change
hic 4DNEXWV8J47A no change
hic 4DNEXGU61TBP no change
hic 4DNEXR37V4FC no change