### PLEASE COPY NOTEBOOK TO YOUR FOLDERS TO PREVENT COMMIT CONFLICTS

#### This notebook can be used to copy metadata from one environment and post or patch items (if they already exist) into a different environment

#### *This notebook uses ES functions so needs to be run in a personal EC2*

This cell sets up the auth for the environment that you are retrieving metadata from and from a list of starting items provided via some ID or returned from a search will retrieve all linked items.

**Note:** some items types can be excluded by modifying the parameters to `expand_es_metadata`

In [None]:
from dcicutils import ff_utils
from functions.notebook_functions import *
import json
import time

# options you may want to change
overwrite_existing = True # if the item exist in the target, should it overwrite it
action = True  # set True to post/patch in the indicated env

# auth for source
# get auth from keypairs.json
src_auth = get_key('andyprod')

# or alternatively from env name
# source_env = 'fourfront-production-green'
# src_auth = ff_utils.get_authentication_with_server({}, ff_env=source_env)

# auth for target
# get auth from keypairs.json
# target_auth = get_key('andywebdev')

# or alternatively from env name
target_env = 'fourfront-webdev'
target_auth = ff_utils.get_authentication_with_server({}, ff_env=target_env)

print('TRANSFERRING envs:\nFROM: {}\nTO:{}\n'.format(src_auth['server'], target_auth['server']))

schema_name = get_schema_names(src_auth) 

# use either a list of IDS of starting items or search that retrieves them
items_in_scope = ['4DNESSNWXHXK'] # ['4DNACCCC', '4DNACCCCC']

#search_url  = '/search/?award.project=4DN&experiments_in_set.experiment_type=dilution+Hi-C&experimentset_type=replicate&lab.display_title=Bing+Ren%2C+UCSD&status=pre-release&type=ExperimentSetReplicate'

time1 = time.time()
if items_in_scope:
    starting_item_uuids = [ff_utils.get_metadata(i, src_auth)['uuid'] for i in items_in_scope]
elif search_url:
    starting_item_uuids = [i['uuid'] for i in ff_utils.search_metadata(search_url, src_auth)]

store={}
item_uuids=[]
store, item_uuids = ff_utils.expand_es_metadata(starting_item_uuids, src_auth, store_frame='raw',add_pc_wfr=True, ignore_field = [])

print(len(starting_item_uuids), 'parent item(s) to transfer')
print(len(item_uuids), 'items collected')
time2 = time.time()
print(round((time2-time1), 1), 'sec for collection')
# new_store = {}
for itype, found in store.items():
    print("{}\t{}".format(itype, len(found)))

In [None]:
#### Set up some globalish variables
#### And helper functions

In [None]:
# set up some generally global variables
rev_schema_name = {}
for key, name in schema_name.items():
    rev_schema_name[name] = schema_name[key]
missing_types = [i for i in store.keys() if i not in ORDER]
print("MISSING FROM ORDER BUT IN STORE:")
print(missing_types)
my_types = [i for i in ORDER if i in store.keys()]
second_round_items = {}
id_swappers = {}


# helper functions
def camel_case(name):
    return ''.join(x for x in name.title() if not x == '_')

def search_for_existing(id_list, item, itype, transfer_auth):
    ''' if the uuid is not found tries to find the item by other identifying properties
        and if found will return it in raw frame
    '''
    base_query = 'search/?type={}&{}={}&frame=object'
    itype = camel_case(itype)
    for id2chk in id_list:
        if id2chk == 'uuid':
            continue
        val2chk = item.get(id2chk)
        if not val2chk:
            continue
        query = base_query.format(itype, id2chk, val2chk)
        try:
            srch_res = ff_utils.search_metadata(query, transfer_auth)
            # print(srch_res)
            if len(srch_res) == 1:
                return ff_utils.get_metadata(srch_res[0].get('uuid'), transfer_auth, add_on='frame=raw')
        except:
            continue
    return None

def check_for_swaps(props_to_check, swaps):
    ''' replaces uuids that have swapped with the value to use taken from the swaps dict 
    '''
    checked_props = {}
    for prop, val in props_to_check.items():
        if isinstance(val, list):
            ulist = []
            for v in val:
                if isinstance(v, str):
                    if v in swaps:
                        v = swaps.get(v)
                elif isinstance(v, dict):
                    v = check_for_swaps(v, swaps)
                else:
                    print("WARNING - unexpected data structure!")
                ulist.append(v)
            checked_props[prop] = ulist
        elif isinstance(val, dict):
            udict = {}
            for k, v in val.items():
                if v in swaps:
                    v = swaps.get(v)
                udict[k] = v
            checked_props[prop] = udict
        elif val in swaps:
            checked_props[prop] = swaps.get(val)
        else:
            checked_props[prop] = val
    return checked_props


def check_for_swaps(props_to_check, swaps):
    ''' replaces uuids that have swapped with the value to use taken from the swaps dict 
    '''
    checked_props = None
    if isinstance(props_to_check, dict):
        checked_props = {}
        for prop, val in props_to_check.items():
            res = check_for_swaps(val, swaps)
            checked_props[prop] = res
    elif isinstance(props_to_check, list):
        checked_props = []
        for val in props_to_check:
            res = check_for_swaps(val, swaps)
            checked_props.append(res)
    elif isinstance(props_to_check, str):
        checked_props = ''
        if props_to_check in swaps:
            checked_props = swaps.get(props_to_check)
        else:
            checked_props = props_to_check
    
    else:
        checked_props = props_to_check
    return checked_props


### The following cells should generally only be run once but adjusting parameters can change the behavior so items are patched if they already exist or not over-written - nonetheless care should be taken

#### **WARNING - running first with `action=False` is recommended but if a conflict is encountered this will not be picked up by the dry run and you may end up with only partially posted items.

#### This does initial posting of items that do not yet exist in the target environment

##### NOTE: only required fields are posted for new items so if you stop here you will end up with partially posted items with only a few properties.  

### Round I - only post the required fields for new items - skip if exists already

In [None]:
for a_type in my_types:
    print(a_type)
    # if a_type != 'experiment_type':
    #    continue
    obj_type = rev_schema_name[a_type]
    # find required field
    schema_info = ff_utils.get_metadata('/profiles/{}.json'.format(a_type), key=target_auth)
    req_fields = schema_info['required']
    ids = schema_info['identifyingProperties']
    first_fields = list(set(req_fields+ids))
    remove_existing_items = []
    counter=0
    print(len(store[a_type]), 'items exist on source')
    for idx, an_item in enumerate(store[a_type]):
        counter += 1

        # does the item exist
        exists = False
        try:
            existing = ff_utils.get_metadata(an_item['uuid'], key=target_auth, add_on='frame=raw')
            exists = True
        except:
            existing = search_for_existing(ids, an_item, a_type, target_auth)
            if existing:
                exists = True
                
        
        if existing:
            if (existing == an_item):
                print("No updates needed")
                continue
                  
        # skip the items that exists
        if exists and existing:
            if overwrite_existing:
                if existing.get('uuid') != an_item.get('uuid'):
                    print('WARNING - mismatched uuids: will patch {}!'.format(existing.get('uuid')))
                    # here is where we need to swap info somehow so right thing can be updated
                    # how to transfer the properties or swap uuids and scan items for use of these in linkTos?
                    id_swappers[an_item.get('uuid')] = existing.get('uuid')
                    remove_existing_items.append(an_item['uuid'])  # first make sure we don't try to post with the other uuid and get a conflict
                    an_item['uuid'] = existing.get('uuid')
                    print("uuid swapped")
                    # print(an_item)
                    store[a_type][idx] = an_item
                                                                              
                # patch in second step will update the item
                print('existing item will be patched in second step')
                continue
            else:
                remove_existing_items.append(an_item['uuid'])
                print("{} {} can not post item".format(obj_type, an_item['uuid']))
                continue
        post_first = {key:value for (key,value) in an_item.items() if key in first_fields}
        # for posting of required fields we need to assume ordering is respected so can just scan the post_first fields
        # for any swapped uuids
        post_first = check_for_swaps(post_first, id_swappers)
        print('posting')
        # if post_first.get('uuid') == '3ef3bbe0-dcc3-4301-87fb-fd40514866ae':
        #    import pdb; pdb.set_trace()
        print(post_first)
        if action:
            ff_utils.post_metadata(post_first, obj_type, key = target_auth)
   
    second_round_items[a_type] = [i for i in store[a_type] if i['uuid'] not in remove_existing_items]
    print(len(second_round_items[a_type]), 'items transfered to target')
    print()

    

### Round II - patch the rest of the metadata updating pre-existing items if `overwrite_existing=True`

In [None]:
for itype, found in second_round_items.items():
    print("{}\t{}".format(itype, len(found)))
counter = 0
for a_type in my_types:
    obj_type = rev_schema_name[a_type]
    if not second_round_items[a_type]:
        continue 
    for an_item in second_round_items[a_type]:
        counter += 1
        an_item = check_for_swaps(an_item, id_swappers)
        if action:
            # import pdb; pdb.set_trace()
            
            res = ff_utils.patch_metadata(an_item, obj_id = an_item['uuid'], key = target_auth)
            print(res.get('status'))
print("{} items patched in second round".format(counter))

## WARNING!

### Nothing below here has been updated or tested so may or may not work!!!

### Round III - move attachments

In [None]:
import boto3
s3 = boto3.resource('s3')

#source_addresses
source_health = ff_utils.get_metadata('/health', key = my_key)
source_raw = source_health['file_upload_bucket'] 
source_pf = source_health['processed_file_bucket'] 
source_att = source_health['blob_bucket']

#target_addresses
target_health = ff_utils.get_metadata('/health', key = transfer_key)
target_raw = target_health['file_upload_bucket'] 
target_pf = target_health['processed_file_bucket'] 
target_att = target_health['blob_bucket'] 

# Round III - move attachments
for a_type in my_types:
    obj_type = rev_schema_name[a_type]
    for an_item in second_round_items[a_type]:
        if 'attachment' in an_item.keys():
            at_key = an_item['attachment']['blob_id']
            copy_source = {'Bucket': source_att, 'Key': at_key}
            try:
                s3.meta.client.copy(copy_source, target_att, at_key)
            except:
                print('Can not find attachment on source', an_item['uuid'])
                continue
            print('attachment copied')

## WARNING - not tested

### Round IV - move files

In [None]:
import boto3
s3 = boto3.resource('s3')
#source_addresses
source_health = ff_utils.get_metadata('/health', key = my_key)
source_raw = source_health['file_upload_bucket'] 
source_pf = source_health['processed_file_bucket'] 
source_att = source_health['blob_bucket']

#target_addresses
target_health = ff_utils.get_metadata('/health', key = transfer_key)
target_raw = target_health['file_upload_bucket'] 
target_pf = target_health['processed_file_bucket'] 
target_att = target_health['blob_bucket'] 


for a_type in my_types:
    if a_type in ['file_processed', 'file_vistrack']:
        source_file_bucket = source_pf
        target_file_bucket = target_pf
    elif a_type in ['file_reference', 'file_fastq', 'file_microscopy', 'file_fasta', 'file_calibration']:
        source_file_bucket = source_raw
        target_file_bucket = target_raw
    else:
        continue
        
    for an_item in second_round_items[a_type]:
        # accumulate all keys from a file object to be uploaded
        files_to_upload = []
        file_resp = ff_utils.get_metadata(an_item['uuid'], key = my_key)
        # add extra file keys
        if file_resp.get('extra_files', []):
            for an_extra_file in file_resp['extra_files']:
                files_to_upload.append(an_extra_file['upload_key'])
        # add main file key
        files_to_upload.append(file_resp['upload_key'])
        
        for file_key in files_to_upload:
            copy_source = {'Bucket': source_file_bucket, 'Key': file_key}
            try:
                s3.meta.client.copy(copy_source, target_file_bucket, file_key)
            except:
                print('Can not find file on source', file_key)
                continue
            print('file copied')
            