### Replace uploaded files
Use this notebook when you want to replace a file that had not yet been released. Replacement consists in setting the correct file status, deleting some properties on the File item that refer to the previously uploaded file, and delete wfr items and related quality metrics that the previously uploaded file was input for.

The new file upload needs to be handled separately.

The notebook checks the status of items before patching.

**NOTE 1:** only use this when a file is not yet released.

**NOTE 2:** you need to delete files from S3 if you don't immediately proceed with uploading a new file, since the hourly md5 check reverts the effect of running this notebook.

In [None]:
from dcicutils import ff_utils
from functions.notebook_functions import *

my_auth = get_key('andrea_data')

In [None]:
# indicate files to reset
files = '''
4DNFIXXXXXX1
4DNFIXXXXXX2
'''

files = [f for f in files.split('\n') if f]
print(len(files), 'files')

In [None]:
# Load functions
def find_attached_items(file):
    """Given a File, list @id and status of all wfr inputs and linked quality metrics"""
    def _get_id_status(item):
        return item['@id'], item['status']

    item_ids = []
    wfr_inputs = file.get('workflow_run_inputs', [])
    # exception for File types that do not have workflow_run_inputs
    if file['@type'][0] not in ['FileFastq', 'FileProcessed']:
        wfr_inputs = search_wfrs_with_input_file(file)
    
    for wfr_in in wfr_inputs:
        # append wfr that has the file as input
        item_ids.append(_get_id_status(wfr_in))
        wfr_in_object = ff_utils.get_metadata(wfr_in['@id'], key=my_auth)
        # append any qc of the wfr
        if wfr_in_object.get('quality_metric'):
            item_ids.append(_get_id_status(wfr_in_object['quality_metric']))

    return item_ids


def search_wfrs_with_input_file(file):
    """Returns wfr items that have a given file as input.
    This is helpful when handling FileReference or others that do not have workflow_run_inputs"""
    query = '/search/?type=WorkflowRunAwsem&input_files.value.accession=' + file['accession']
    wfr_items = ff_utils.search_metadata(query, key=my_auth)
    return wfr_items


def delete_item(item_id, status):
    """Delete item if status allows"""
    assert status in ['in review by lab', 'pre-release'], "Item status is incompatible with this change"
    if ACTION:
        res = ff_utils.patch_metadata({"status": "deleted"}, item_id, key=my_auth)
        if res.get('status') == 'success':
            print(f"Deleted {item_id}")
    else:
        print(f"{item_id} will be deleted")
    return


def delete_file_fields(file):
    """Reset file to uploading if status allows"""
    assert file['status'] in ['uploaded', 'pre-release'], "File status is incompatible with this change"
    fields_to_be_removed = ['filename', 'md5sum', 'content_md5sum', 'file_size']
    del_add_on = 'delete_fields=' + ','.join(fields_to_be_removed)
    if ACTION:
        res = ff_utils.patch_metadata({"status": "uploading"}, file['@id'], key=my_auth, add_on=del_add_on)
        if res.get('status') == 'success':
            print(f"Reset {file['@id']} to uploading")
    else:
        print(f"{file['@id']} will be reset to uploading")
    return

In [None]:
# main

# set action to True to patch items, otherwise a dry run will be executed
ACTION = False

for file_id in files:
    f = ff_utils.get_metadata(file_id, key=my_auth)
    additional_items = find_attached_items(f)
    for (item_id, status) in additional_items:
        delete_item(item_id, status)
    delete_file_fields(f)