# Cloning via Backup

## Setting Up

In [None]:
from arcgis.gis import GIS, Item
import keyring
import pathlib
import json

In [None]:
source_itemId = '4e373608ba444a639bfaa0c893d3d99d'

#### Make a profile to store credentials

In [None]:
'''
GIS(username='username', password='password',
                     profile='example_profile')
'''

#### Connect to our Organizations

In [None]:
source = GIS(profile="dyaw_geosaurus")
target = GIS(profile="dyaw_Arch")

## Gathering Artifacts

In [None]:
source_item = Item(source, source_itemId)

source_item

### Downloading Item

In [None]:
data = source_item.download(r"C:\temp\clone_via_backup")

### Downloading Thumbnail

In [None]:
thumbnail = source_item.download_thumbnail(r"C:\temp\clone_via_backup")

### Downloading Metadata

In [None]:
metadata = source_item.download_metadata(r"C:\temp\clone_via_backup")

### Gathering + Downloading Item Properties

#### Define the properties we want to keep

In [None]:
VALID_PROPERTIES_PORTAL_ITEM_ADD = [
        "type",
        "dataUrl",
        "filename",
        "typeKeywords",
        "description",
        "title",
        "text",
        "tags",
        "snippet",
        "extent",
        "spatialReference",
        "accessInformation",
        "licenseInfo",
        "culture",
        "commentsEnabled",
        "culture",
        "overwrite",
        "url",
    ]

#### Grabbing the properties we want to keep
Using sets to compare our item's vars to VALID_PROPERTIES_PORTAL_ITEM_ADD

In [None]:
add_properties = {
        k: vars(source_item)[k]
        for k in set(VALID_PROPERTIES_PORTAL_ITEM_ADD) &
        set(vars(source_item).keys())
    }

#### Let's save that to JSON for future use

In [None]:
add_properties_path = pathlib.PurePath(r"C:\temp\clone_via_backup", "add_properties.json")

with open(add_properties_path,"w") as outfile:
    json.dump(add_properties, outfile)

## Adding the Item

### Grabbing the properties and data we saved earlier as JSON

In [None]:
with open(add_properties_path, "r") as json_data:
    add_properties = json.load(json_data)
    
with open(data, "r") as json_data:
    new_data = json.load(json_data)    

### Create the item

In [None]:
result = target.content.add(
    item_properties=add_properties,
    thumbnail = thumbnail,
    metadata = metadata,
)

### Add the data

In [None]:
result.update(
    data = new_data,
)

## Let's throw this into a function
To make it easier for future use

In [None]:
VALID_PROPERTIES_PORTAL_ITEM_ADD = [
        "type",
        "dataUrl",
        "filename",
        "typeKeywords",
        "description",
        "title",
        "text",
        "tags",
        "snippet",
        "extent",
        "spatialReference",
        "accessInformation",
        "licenseInfo",
        "culture",
        "commentsEnabled",
        "culture",
        "overwrite",
        "url",
    ]

def migrate_via_backup(itemId: str, source: GIS, target: GIS):
    
    export_dir = pathlib.PurePath(r"C:\temp\clone_via_backup", itemId)
    
    # Part 1: Download the artifacts from the source
    
    # Grab the item from the source
    source_item = Item(source, itemId)
    
    # Download the item from the source
    data = source_item.download(export_dir)
    
    # Download the thumbnail from the source
    thumbnail = source_item.download_thumbnail(export_dir)
    
    # Download the metadata from the source
    metadata = source_item.download_metadata(export_dir)
    
    # Grab the properties of the item from the source
    add_properties = {
        k: vars(source_item)[k]
        for k in set(VALID_PROPERTIES_PORTAL_ITEM_ADD) &
        set(vars(source_item).keys())
    }
    
    # Save the properties to a file
    add_properties_path = pathlib.PurePath(export_dir, "add_properties.json")

    with open(add_properties_path,"w") as outfile:
        json.dump(add_properties, outfile)
    
    # Part 2: Add the item to the target
        
    # Add the item to the target
    with open(add_properties_path, "r") as json_data:
        add_properties = json.load(json_data)
    
    with open(data, "r") as json_data:
        new_data = json.load(json_data)    
    
    # Create the item on the target
    result = target.content.add(
        item_properties=add_properties,
        thumbnail = thumbnail,
        metadata = metadata,
        #owner = source_item.owner,
        #folder = source_item.ownerFolder
    )
    
    # Add the data to the item on the target
    result.update(
        data = new_data,
    )
    
    return result
    
    

## Take a look the result
### Something is missing here... where's the dependencies? 

In [None]:
result

## Let's try that again
This time, we'll find and take care of the dependencies first.

### Finding the dependencies from the item data

The below function will handle finding strings, using regex, recursively

In [None]:
from collections import OrderedDict
import itertools
import re


def find_regex(i: dict or list or str, regex: str, res=[]):
    """
    Takes a dict with nested lists and dicts,
    and searches all dicts for a key of the field
    provided.
    """
    if isinstance(i, dict):
        for v in i.values():
            find_regex(v, regex, res)
    elif isinstance(i, list):
        for v in i:
            find_regex(v, regex, res)
    elif isinstance(i, str):
        matches = re.findall(regex, i, re.MULTILINE)
        if matches:
            res.append(matches)
    # Flattening list of lists
    results = list(itertools.chain(*res))
    # Removing duplicates
    results = list(OrderedDict.fromkeys(results))
    return results

#### Here's a regex string to find GUIDs and URLs

In [None]:
regex_guid = r"[0-9a-f]{8}[0-9a-f]{4}[1-5][0-9a-f]{3}[89ab][0-9a-f]{3}[0-9a-f]{12}"

regex_url = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'

### Now, we have what we need to find all itemIds in the JSON
Remember, itemIds are GUIDs

In [None]:
itemIds = find_regex(new_data, regex_guid, [])
itemIds

#### but wait... that itemId does not exist in our org!

In [None]:
for itemId in itemIds:
    try: 
        target.content.get(itemId)
        print(f'{itemId} found in target')
    except:
        print(f'{itemId} not found in target')

## And what about the dependency's dependencies? And their dependencies?  

### Let's make a function to find dependencies, **recursively** 

In [None]:
def find_dependencies(itemId: str, source_portal: GIS, dependencies_from_source=[]):
    
    item = source.content.get(itemId)
    item_data = item.get_data()
    
    itemIds = find_regex(item_data, regex_guid, [])
    
    for itemId in itemIds if len(itemIds) > 0 else []:
        try: 
            source_portal.content.get(itemId)
            dependencies_from_source.append(itemId)
            find_dependencies(itemId, source, dependencies_from_source)

        except:
            pass
        
    return dependencies_from_source

### But we don't need to migrate Esri-owned items!
This won't work, and we don't need to do it. For content owned by Esri built-in users, itemIds are the same across organizations.

In [None]:
ESRI_BUILTIN_USERS = [
    "esri",
    "esri_apps",
    "esri_ar",
    "esri_boundaries",
    "esri_bs",
    "esri_ca",
    "esri_cs",
    "esri_da",
    "esri_de",
    "esri_demographics",
    "esri_el",
    "esri_en",
    "esri_es",
    "esri_et",
    "esri_fi",
    "esri_fr",
    "esri_he",
    "esri_hi",
    "esri_hk",
    "esri_hr",
    "esri_hu",
    "esri_id",
    "esri_ind",
    "esri_it",
    "esri_ja",
    "esri_ko",
    "esri_livingatlas",
    "esri_lt",
    "esri_lv",
    "esri_nav",
    "esri_nav",
    "esri_nb",
    "esri_nl",
    "esri_pl",
    "esri_po",
    "esri_pt",
    "esri_ro",
    "esri_ru",
    "esri_sl",
    "esri_sr",
    "esri_sv",
    "esri_th",
    "esri_tr",
    "esri_tw",
    "esri_vi",
    "esri_zh",
]

## Now, we'll pull this all together and try again

### First, we'll find everything we need to clone
These, along with the source item, will make a list of everything we want to migrate

In [None]:
source_item = Item(source, source_itemId)

all_items_to_migrate = find_dependencies(itemId = source_itemId, source_portal = source) + [source_itemId]

### Next, we'll migrate all of this

We'll throw the old and new itemIds into a dictionary we'll use in the next step

#### One thing...

We're going to use content.clone_items to clone the hosted feature services. Backing up these can be done, but is a bit more complicated.

In [None]:
replacement_dict = {}

for itemId in all_items_to_migrate:
    # Grab the item object
    item = source.content.get(itemId)
    
    if item.owner in ESRI_BUILTIN_USERS:
        continue
    
    print(f"Migrating {itemId} {item.type}")
    if item.type != 'Feature Service':
        result = migrate_via_backup(itemId, source, target)
    else:
        result = target.content.clone_items(
            items = [item],
            search_existing_items = False
            )[0]
    
    replacement_dict[itemId] = result.id

## Finally, we need to update JSON in the target
To have correct itemIds for migrated dependencies

### But first, some more helper code
This code will traverse the item's JSON and will use the itemId replacement dictionary to find and replace itemIds. 

In [None]:
def replace_deep(data, a, b):
    """Finds all instances of a string in a nested data structure and replaces them with b
    Args:
        data (dict, list, or string): Object with text to be replaced
        a (str): Text to find
        b (any): Text to replace

    Returns:
        dict, lis, or string: Data with text replaced
    """
    if isinstance(data, str):
        return data.replace(a, b)
    elif isinstance(data, dict):
        return {k: replace_deep(v, a, b) for k, v in data.items()}
    elif isinstance(data, list):
        return [replace_deep(v, a, b) for v in data]
    else:
        return data

### Finally, replace old itemIds with new itemIds

In [None]:
replacement_dict

In [None]:
for k, v in replacement_dict.items():
    target_item = target.content.get(v)
    print(target_item.homepage)
    target_item_data = target_item.get_data()
    for k, v in replacement_dict.items():
        target_item_data = replace_deep(target_item_data, k, v)
    target_item.update(data = target_item_data)
    

### Now, we can admire our work

In [None]:
final_result = replacement_dict[source_itemId]
target.content.get(final_result)