# In the cell below, add the full path to your CSV spreadsheet.

Also, if there are any additional types of preservation URLs, add a unique part of each URL to the list called "preservation URLS_have". The list should be formatted like ['a','b','c']

In [1]:
donesheet = '' #put file path here!
preservation_URLS_have = ['preservation-storage1','gwspec-digcol1']

In [2]:
import re
import pprint
import json
import requests
import csv
import string
import random

# In the cell below, add your username, password, and API host (GW Dev or Prod)

In [None]:
USER = ''
PASS = ''
HOST = ''
def aspace_auth(host, username, password):
    auth = requests.post(HOST + '/users/' + username + '/login',
                        params={'password' : password})
    if auth.status_code == 200:
        token = auth.json()['session']
        headers = {'X-ArchivesSpace-Session': token}
        return(headers)
    else:
        return(False)

headers = aspace_auth(HOST, USER, PASS)
print(headers)

# Don't edit anything below. To update ArchivesSpace, run the below cells.

In [None]:
def id_generator(size=6, chars=string.ascii_uppercase + string.digits): #this function is used for faux codes
    return ''.join(random.choice(chars) for _ in range(size))

def extract_key_value_pairs(csv_file): #this function is used to create key value pairs from csv
    key_value_pairs = []

    with open(csv_file, 'r', encoding='utf-8-sig') as file:
        reader = csv.reader(file)
        headers = next(reader)  # Extract the header row
        
        for row in reader:
            key_value = {header: value for header, value in zip(headers, row)}
            key_value_pairs.append(key_value)
    
    return key_value_pairs

csv_file = donesheet
inputdata = extract_key_value_pairs(csv_file)

for item in inputdata:
    skipitem = False #flag

    #report to operator
    print('Starting: ' + item['file_uri'])
    print('Archival object: ' + item['archival_object'])

    #get the archival object ID
    if re.search('.+archival_object_(\d+)$', item['archival_object']):
        ao_id = re.search('.+archival_object_(\d+)$', item['archival_object']).group(1)
    else:
        raise Exception('It looks like we found an archival object whose link isn\'t well formatted: ' + str(item['archival_object']))

    #create the file_version dictionary to add to digital object
    file_uri = item['file_uri']
    if item['publish_link'] == "TRUE":
        publish = True
    elif item['publish_link'] == "FALSE":
        publish = False
    else:
        print('ERROR: Please go back and check your publish column. Found a value other than True/False.')
        break
    xlink_actuate_attribute = item['xlink_actuate_attribute']
    xlink_show_attribute = item['xlink_show_attribute']

    file_version = {'file_uri':file_uri,'publish':publish,'xlink_actuate_attribute':xlink_actuate_attribute,'xlink_show_attribute':xlink_show_attribute}

    #retrieve the full archival object record via API
    ao_record = requests.get(HOST + '/repositories/2/archival_objects/' + ao_id, headers=headers)
    if ao_record.status_code == 404:
        raise Exception('This archival object couldn\'t be retrieved with the api. Something may be wrong with the URL?: ' + item['archival_object'])
    else:
        ao_record = ao_record.json()
        
    #Logic for picking title from AO or if overriding with CSV title
    if item['new_title'] == "":
        title = ao_record['title']
    else:
        title = item['new_title']


    #clear variables
    existing_do_record = None
    skipitem = False

    if 'instances' in ao_record: #if the archival object already has any instances
        digital_object_IDs = [] #blank list to hold existing digital object IDs
        for instance in ao_record['instances']: #for each instance:
            if instance['instance_type'] != 'digital_object': #skip it if it isn't a digital object
                continue #skip this instance and move onto the next one
            else: #if the instance is a Digital Object:
                do_id = instance['digital_object']['ref'] #get its ID
                #print('Found existing Digital Object record: ' + do_id)
                existing_do_record = requests.get(HOST + do_id, headers=headers) #request the full record from the API
                if existing_do_record.status_code == 200: #if it retrieves the DO record successfully:
                    existing_do_record = existing_do_record.json()
                    #add the digital object to list of ids for this archival object
                    digital_object_IDs.append(existing_do_record['digital_object_id'])
                    #check if link is already in DO
                    for existing_file_version in existing_do_record['file_versions']:
                        if existing_file_version['file_uri'] == file_uri:
                            print('This Digital Object already has your link: '+do_id+' Skipping this row . . . ')
                            skipitem = True
                else: #if it can't retrieve successfully:
                    print('There is a Digital Object linked to this archival object, but could not retrieve it: ' + do_id)
                    print(existing_do_record)
                    raise Exception('Stopping script because there was an error retrieving an existing Digital Object linked to this archival object: ' + do_id)
    if skipitem: #If, in the last loop through instances, the script found your link,
        #then skip this item and don't make any edits
        print('Skipped: your link was already there!\n')
    else:
        #Create a new Digital Object
        #print('Archival object doesn\'t have this link. Creating a new Digital Object and linking it . . .')
        new_digital_object = {}

        new_digital_object['file_versions'] = []
        new_digital_object['file_versions'].append(file_version)
        new_digital_object['jsonmodel_type'] = ['digital_object']

        #generate a digital object ID based on the first file_id
        do_id = re.search('.+/(.+?)(\..{3,4})?\*?$', file_uri).group(1)
        if any(x in file_uri for x in preservation_URLS_have):#if the link is to the preservation server
            new_digital_object['title'] = 'Preservation copy: ' + title
            do_id_preCheck = do_id + '_presCopy_01' #make the digital object ID have _presCopy_01 on the end
            if any(x == do_id_preCheck for x in digital_object_IDs): #If that id is already taken by a DO attached to this item
                do_id = do_id_preCheck[:-2] + id_generator()
                print(' ***** WARNING *****  DIGITAL OBJECT ID taken:' + do_id_preCheck + '\nFaux code was appended:' + do_id)
            new_digital_object['digital_object_id'] = do_id
        else:
            new_digital_object['title'] = 'Online copy: ' + title
            do_id_preCheck = do_id + '_onlineCopy_01' #make the digital object ID have _presCopy_01 on the end
            if any(x == do_id_preCheck for x in digital_object_IDs): #If that id is already taken by a DO attached to this item
                do_id = do_id_preCheck[:-2] + id_generator()
                print('WARNING: DIGITAL OBJECT ID taken:' + do_id_preCheck + '\nFaux code was appended:' + do_id)
            new_digital_object['digital_object_id'] = do_id

        new_do_record_post = json.dumps(new_digital_object)
        new_digital_object_result = requests.post(HOST + '/repositories/2/digital_objects', headers=headers, data=new_do_record_post).json()
        print(new_digital_object_result)
        try:
            if new_digital_object_result['error']['digital_object_id'][0] == 'Must be unique':
                MustbeUnique_ErrorMessage = True
        except:
            MustbeUnique_ErrorMessage = None
        if MustbeUnique_ErrorMessage:
            do_id_try_again = do_id_preCheck[:-2] + id_generator()
            new_digital_object['digital_object_id'] = do_id_try_again
            print('The digital object id was not unique: ' + do_id +' \nTrying again with digital object id: ' + new_digital_object['digital_object_id'])
            new_do_try_again_post = json.dumps(new_digital_object)
            new_digital_object_result = requests.post(HOST + '/repositories/2/digital_objects', headers=headers, data=new_do_try_again_post).json()
            print(new_digital_object_result)
        do_id_uri = str(new_digital_object_result['id'])
        result_status = new_digital_object_result['status']
        print('Result: ' + result_status)
        print('New Digital Object: ' + str(new_digital_object_result['uri']))
        #print('Linking Archival Object to Digital Object . . .')
        #update AO to link to DO
        add_to_ao = {'digital_object':{'ref': '/repositories/2/digital_objects/' + do_id_uri},'instance_type': 'digital_object'}
        ao_record['instances'].append(add_to_ao)
        ao_record_update = json.dumps(ao_record)
        archival_object_update = requests.post(HOST + '/repositories/2/archival_objects/' + ao_id, headers=headers, data=ao_record_update).json()
        print('Linking archival object. Result: ' + str(archival_object_update['status']))

    print('')

print('Done!')
