### PLEASE COPY NOTEBOOK TO YOUR FOLDERS TO PREVENT COMMIT CONFLICTS

#### This notebook can be used to copy metadata from one environment and post or patch items (if they already exist) into a different environment

#### *This notebook uses ES functions so needs to be run in a personal EC2*

This cell sets up the auth for the environment that you are retrieving metadata from and from a list of starting items provided via some ID or returned from a search will retrieve all linked items.

**Note:** some items types can be excluded by modifying the parameters to `expand_es_metadata`

In [None]:
from dcicutils import ff_utils
from functions.notebook_functions import *
import json
import time

# get auth from keypairs.json
my_auth = get_key('andyprod')

# or alternatively from env name
# transfer_from_env = 'fourfront-production-green'
# my_auth = ff_utils.get_authentication_with_server({}, ff_env=transfer_from_env)

schema_name = get_schema_names(my_auth) 
print('WORKING ON', my_auth['server'], '\n')

# use either a list of IDS of starting items or search that retrieves them
items_in_scope = ['4DNESSNWXHXK'] # ['4DNACCCC', '4DNACCCCC']

#search_url  = '/search/?award.project=4DN&experiments_in_set.experiment_type=dilution+Hi-C&experimentset_type=replicate&lab.display_title=Bing+Ren%2C+UCSD&status=pre-release&type=ExperimentSetReplicate'

time1 = time.time()
if items_in_scope:
    starting_item_uuids = [ff_utils.get_metadata(i, my_auth)['uuid'] for i in items_in_scope]
elif search_url:
    starting_item_uuids = [i['uuid'] for i in ff_utils.search_metadata(search_url, my_auth)]

store={}
item_uuids=[]
store, item_uuids = ff_utils.expand_es_metadata(starting_item_uuids, my_auth, store_frame='raw',add_pc_wfr=True, ignore_field = [])

print(len(store['experiment_set_replicate']), 'exp sets for status change')
print(len(item_uuids), 'items collected')
time2 = time.time()
print(round((time2-time1), 1), 'sec for collection')
for itype, found in store.items():
    print("{}\t{}".format(itype, len(found)))
    

#### In this section you specify the environment to which you wish to transfer the metadata

**NOTE:** here is where you should set parameters:

- if you want to actually do the updates in the specified env - `action=True` otherwise dry run
- if you want to overwrite existing items via patching then `overwrite_existing=True` otherwise they won't be touched

In [None]:
transfer_env = 'fourfront-webdev'
transfer_key = ff_utils.get_authentication_with_server({}, ff_env=transfer_env)

# if the item exist in the target, should it overwrite it (will include user/award etc)
overwrite_existing = False
action = False  # set True to post/patch in the indicated env

In [None]:
### The following cells should generally only be run once but adjusting parameters can change the behavior so items are patched if they already exist or not over-written - nonetheless care should be taken

#### **WARNING - running first with `action=False` is recommended but if a conflict is encountered this will not be picked up by the dry run and you may end up with only partially posted items.

#### This does initial posting of items that do not yet exist in the target environment

##### NOTE: only required fields are posted for new items so if you stop here you will end up with partially posted items with only a few properties.  

### Round I - only post the required fields for new items - skip if exists already

In [None]:
rev_schema_name = {}
for key, name in schema_name.items():
    rev_schema_name[name] = schema_name[key]

my_types = [i for i in ORDER if i in store.keys()]

second_round_items = {}

for a_type in my_types:
    print(a_type)
    obj_type = rev_schema_name[a_type]
    # find required field
    schema_info = ff_utils.get_metadata('/profiles/{}.json'.format(a_type), key=transfer_key)
    req_fields = schema_info['required']
    ids = schema_info['identifyingProperties']
    first_fields = list(set(req_fields+ids))
    remove_existing_items = []
    different_uuids = []
    counter=0
    print(len(store[a_type]), 'items exist on source')
    for idx, an_item in enumerate(store[a_type]):
        counter += 1

        # does the item exist
        exists = False
        try:
            existing = ff_utils.get_metadata(an_item['uuid'], key=transfer_key)
            exists = True
        except:
            for id2chk in ids:
                try:
                    existing = ff_utils.get_metadata(id2chk, key=transfer_key)
                    exists = True
                except:
                    continue
                else:
                    if exists:
                        print("WARNING! uuid mismatch: {}\t{}".format(an_item.get('uuid'), existing.get('uuid')))
                        print("Existence on {} based on retrieval with {} ID".format(tranfer_key.get('server'), id2chk))
                        break
                
        # skip the items that exists
        if exists and existing:
            if overwrite_existing:
                if existing.get('uuid') != an_item.get('uuid'):
                    print('WARNING - mismatched uuids: will patch {}!'.format(existing.get('uuid')))
                    # here is where we need to swap info somehow so right thing can be updated
                    # how to transfer the properties or swap uuids?
                    remove_existing_items.append(an_item['uuid'])  # first make sure we don't try to post with the other uuid and get a conflict
                    an_item['uuid'] = existing.get('uuid')
                    print("uuid swapped")
                    print(an_item)
                    store[a_type][idx] = an_item
                                                                              
                # patch in second step will update the item
                print('existing item will be patched in second step')
                continue
            else:
                remove_existing_items.append(an_item['uuid'])
                print("{} {} can not post item".format(obj_type, an_item['uuid']))
                continue
        post_first = {key:value for (key,value) in an_item.items() if key in first_fields}
        print('posting')
        if action:
            ff_utils.post_metadata(post_first, obj_type, key = transfer_key)
   
    second_round_items[a_type] = [i for i in store[a_type] if i['uuid'] not in remove_existing_items]
    print(len(second_round_items[a_type]), 'items transfered to target')
    print()

    

### Round II - patch the rest of the metadata updating pre-existing items if `overwrite_existing=True`

In [None]:
for itype, found in second_round_items.items():
    print("{}\t{}".format(itype, len(found)))
counter = 0
for a_type in my_types:
    obj_type = rev_schema_name[a_type]
    if not second_round_items[a_type]:
        continue 
    for an_item in second_round_items[a_type]:
        counter += 1
        if action:
            ff_utils.patch_metadata(an_item, obj_id = an_item['uuid'], key = transfer_key)
print("{} items patched in second round".format(counter))

## WARNING!

### Nothing below here has been updated or tested so may or may not work!!!

### Round III - move attachments

In [None]:
import boto3
s3 = boto3.resource('s3')

#source_addresses
source_health = ff_utils.get_metadata('/health', key = my_key)
source_raw = source_health['file_upload_bucket'] 
source_pf = source_health['processed_file_bucket'] 
source_att = source_health['blob_bucket']

#target_addresses
target_health = ff_utils.get_metadata('/health', key = transfer_key)
target_raw = target_health['file_upload_bucket'] 
target_pf = target_health['processed_file_bucket'] 
target_att = target_health['blob_bucket'] 

# Round III - move attachments
for a_type in my_types:
    obj_type = rev_schema_name[a_type]
    for an_item in second_round_items[a_type]:
        if 'attachment' in an_item.keys():
            at_key = an_item['attachment']['blob_id']
            copy_source = {'Bucket': source_att, 'Key': at_key}
            try:
                s3.meta.client.copy(copy_source, target_att, at_key)
            except:
                print('Can not find attachment on source', an_item['uuid'])
                continue
            print('attachment copied')

## WARNING - not tested

### Round IV - move files

In [None]:
import boto3
s3 = boto3.resource('s3')
#source_addresses
source_health = ff_utils.get_metadata('/health', key = my_key)
source_raw = source_health['file_upload_bucket'] 
source_pf = source_health['processed_file_bucket'] 
source_att = source_health['blob_bucket']

#target_addresses
target_health = ff_utils.get_metadata('/health', key = transfer_key)
target_raw = target_health['file_upload_bucket'] 
target_pf = target_health['processed_file_bucket'] 
target_att = target_health['blob_bucket'] 


for a_type in my_types:
    if a_type in ['file_processed', 'file_vistrack']:
        source_file_bucket = source_pf
        target_file_bucket = target_pf
    elif a_type in ['file_reference', 'file_fastq', 'file_microscopy', 'file_fasta', 'file_calibration']:
        source_file_bucket = source_raw
        target_file_bucket = target_raw
    else:
        continue
        
    for an_item in second_round_items[a_type]:
        # accumulate all keys from a file object to be uploaded
        files_to_upload = []
        file_resp = ff_utils.get_metadata(an_item['uuid'], key = my_key)
        # add extra file keys
        if file_resp.get('extra_files', []):
            for an_extra_file in file_resp['extra_files']:
                files_to_upload.append(an_extra_file['upload_key'])
        # add main file key
        files_to_upload.append(file_resp['upload_key'])
        
        for file_key in files_to_upload:
            copy_source = {'Bucket': source_file_bucket, 'Key': file_key}
            try:
                s3.meta.client.copy(copy_source, target_file_bucket, file_key)
            except:
                print('Can not find file on source', file_key)
                continue
            print('file copied')
            