### Change status of a Supplementary File (opf) collections on a list of ExperimentSets or Experiments or both

#### You can specify specific collection titles you want to look for in WANTED list

For an opf collection, this notebook changes status of:
* all Files in the collection
* all input and output WorkflowRuns
* StaticContent for each file
* HiglassViewConfig for the collection
* QualityMetric for each file
* QualityMetricWorkflowrun for Workflows

WARNING: will not change status of QualityMetrics linked to other QualityMetrics via the qc_list property
    
    
### This first cell does set up and is where you specify what items you want to start with and if you have specific collections you want to include

In [None]:
from dcicutils import ff_utils
from functions.notebook_functions import get_key
import time

# fourfront connection
my_auth = get_key('')

# if you only want specific collections list them by title here 
# if left empty all collections of opfs will be affected
WANTED = [] # eg. 'Compartment and insulation calls - Dekker Lab'

# if you only want to change items linked to sets and not the experiments of that set - unlikely but who knows
# change this to True
exclude_experiments = False


# lists of ids for items that have opf collections - ExperimentSets or Experiments - can use alias, accession, uuid ...
# or a search that returns the desired items
id_list = []  # ['4DNESX75DD7R', '23d688f3-11f4-4fbf-842d-07181c97a5f4']

search_url  = '' # '/search/?experiments_in_set.biosample.biosample_category=HFF+%28c6+or+hTERT%29&experiments_in_set.biosample.biosample_category=H1-hESC&experiments_in_set.biosample.biosample_category=WTC-11&experiments_in_set.biosample.biosource_summary=H1-hESC+%28Tier+1%29&experiments_in_set.biosample.biosource_summary=H1-hESC&experiments_in_set.experiment_type.assay_subclass_short=Hi-C&experimentset_type=replicate&status%21=archived&type=ExperimentSetReplicate' # 

if id_list:
    item_uuids = [ff_utils.get_metadata(i, my_auth)['uuid'] for i in id_list]
elif search_url:
    item_uuids = [i['uuid'] for i in ff_utils.search_metadata(search_url, my_auth)]

print("We have {} items to check for opf collections".format(len(item_uuids)))

uuids2upd = []

### Functions

In [None]:
def find_uuids_for_linked_items_in_fields(idlist, fields):
    #srcs = ['embedded.{}'.format(field) for field in fields]
    items = ff_utils.get_es_metadata(idlist, is_generator=True, key=my_auth, sources=['embedded.*'])
    fld2uuids = {}
    for item in items:
        if not item:
            continue
        props = item.get('embedded')
        for field in fields:
            if field in props:
                finfo = props.get(field)
                if isinstance(finfo, dict):
                    finfo = [finfo]
                fld2uuids.setdefault(field, []).extend([f.get('uuid') for f in finfo])
    return fld2uuids


def get_info_from_opfs(opf_colls):
    items2upd = []
    for coll in opf_colls:
        if WANTED and (coll.get('title') not in WANTED):
            continue
        else:
            print("Gettings files, qc_metrics (and viewconfs) in {}".format(coll.get('title')))
        # want to get file and coll higlass_viewconf if present
        files = [f for f in coll.get('files', [])]
        items2upd.extend(files)
        hgv = coll.get('higlass_view_config')
        if hgv:
            items2upd.append(hgv)
            
        # need to check the files for linked items
        linked_fields = ['workflow_run_inputs', 'workflow_run_outputs', 'quality_metric', 'static_content']
        fld2uuids = find_uuids_for_linked_items_in_fields(files, linked_fields)
        wfrs = []
        if fld2uuids:
            for fld, uuids in fld2uuids.items():
                uuids = [u for u in uuids if u]
                if not uuids:
                    continue
                items2upd.extend(uuids)
                if fld.startswith('workflow_run'):
                    # there is a possibility there are additional linked items
                    wfrs.extend(uuids)
        else:
            print("nothing found linked to files")
        
        if wfrs:
            qc_fld = ['quality_metric']
            # add qcmetrics from wfrs if any
            items2upd.extend(find_uuids_for_linked_items_in_fields(wfrs, qc_fld).get('quality_metric', []))
    
    return items2upd

### Getting all the linked items

In [None]:
# get the opf collections and from es_metadata for the items - if these are experiment sets will also get the experiment uuids
item_meta = ff_utils.get_es_metadata(item_uuids, is_generator=True, key=my_auth, sources=['properties.other_processed_files', 'properties.experiments_in_set'])

expt_uuids = []
for item in item_meta:
    opf_colls = item.get('properties').get('other_processed_files', [])
    uuids2upd.extend(get_info_from_opfs(opf_colls))
    
    # check to see if this is a set with linked expts
    expt_uuids.extend(item.get('properties').get('experiments_in_set', []))
print("{} items so far!".format(len(uuids2upd)))

if expt_uuids and not exclude_experiments:
    print('Checking linked experiments')
    # we have some experiments to check
    exp_meta = ff_utils.get_es_metadata(expt_uuids, is_generator=True, key=my_auth, sources=['properties.other_processed_files'])
    for emeta in exp_meta:
        eprops = emeta.get('properties', {})
        eopf_colls = eprops.get('other_processed_files', [])
        uuids2upd.extend(get_info_from_opfs(eopf_colls))

print("now we have {}".format(len(uuids2upd)))
print(uuids2upd)

### And lastly update everything in your list with new status

### action=False is a dry run - change to true to perform the patch

#### NOTE: currently status agnostic in that it will overwrite any existing statuses to the new one

Probably want to update to behave more like the standard release script

In [None]:
status = 'released to project'  # WARNING: this overwrites any other status, including archived, restricted, etc.
action = False  # set to True to patch

problems = []
success = 0
for uid in uuids2upd:
    if action:
        res = ff_utils.patch_metadata({'status': status}, uid, key=my_auth)
        if res['status'] != 'success':
            problems.append(uid)
            print(res)
        else:
            success += 1
            print("success")        
    else:
        print("DRY RUN will patch {} to {}".format(uid, status))
print('{} total items'.format(len(uuids2upd)))
print('patched {} items'.format(success))
print('{} problems'.format(len(problems)))
print(problems)