Merge pull request #11 from mjanowiecki/master

ehanson8 · web-flow · commit 5ebc405168da · 2018-12-06T10:38:28.000-05:00
ASpace &amp; readme updates
diff --git a/README.md b/README.md
@@ -7,6 +7,7 @@ All of these scripts require a secrets.py file in the same directory that must c
 	baseURL='[ArchivesSpace API URL]'
 	user='[user name]'
 	password='[password]'
+	repository='[repository]'
 
 This secrets.py file will be ignored according to the repository's .gitignore file so that ArchivesSpace login details will not be inadvertently exposed through GitHub.
 
@@ -39,7 +40,7 @@ Retrieves a count of archival objects associated with a particular resource. Upo
 Extracts all of the archival objects associated with a particular resource. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
 
 #### [getArchivalObjectRefIdsForResource.py](/getArchivalObjectRefIdsForResource.py)
-Extracts the title, URI, ref_id, and date expression for all archival objects associated with a particular resource. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
+Extracts the title, URI, ref_id, date expression, and level for all archival objects associated with a particular resource. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
 
 #### [getArrayPropertiesFromAgentsPeopleCSV.py](/getArrayPropertiesFromAgentsPeopleCSV.py)
 Retrieves specific properties, including proprerties that have arrays as values, from the JSON of ArchivesSpace agent_people records. In this example, the 'dates_of existence' property contains an array that must be iterated over. This requires a second level of iteration with 'for j in range (...)' on line 20, which is in addition to the iteration function 'for i in range (...)' on line 19, which was also found in the getPropertiesFromAgentsPeopleCSV.py script. As with the previous script, it also writes the properties' values into a CSV file which is specified in variable 'f' on line 17.
@@ -107,6 +108,9 @@ Prints the URIs to a CSV file of all resources in a repository without a bib num
 #### [searchForUnassociatedContainers.py](/searchForUnassociatedContainers.py)
 Prints the URIs to a CSV file of all top containers that are not associated with a resource or archival object.
 
+#### [transferAoDatesToDos.py](/transferAoDatesToDos.py)
+Transfers the date from an archival object to any attached digital objects.
+
 #### [unpublishArchivalObjectsByResource.py](/unpublishArchivalObjectsByResource.py)
 Unpublishes all archival objects associated with the specified resource. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
 
diff --git a/getArchivalObjectRefIdsForResource.py b/getArchivalObjectRefIdsForResource.py
@@ -37,7 +37,7 @@ def findKey(d, key):
 resourceID= raw_input('Enter resource ID: ')
 
 f=csv.writer(open('archivalObjectRefIdForResource.csv', 'wb'))
-f.writerow(['title']+['uri']+['ref_id']+['date'])
+f.writerow(['title']+['uri']+['ref_id']+['dateExpression']+['dataBegin']+['level'])
 
 endpoint = '/repositories/'+repository+'/resources/'+resourceID+'/tree'
 
@@ -51,16 +51,21 @@ def findKey(d, key):
 print 'downloading aos'
 for archivalObject in archivalObjects:
     output = requests.get(baseURL + archivalObject, headers=headers).json()
-    print output
+    print json.dumps(output)
     title = output['title']
     uri = output['uri']
     ref_id = output['ref_id']
+    level = output['level']
     for date in output['dates']:
         try:
-            date = date['expression']
+            dateExpression = date['expression']
         except:
-            date = ''
-    f.writerow([title]+[uri]+[ref_id]+[date])
+            dateExpression = ''
+        try:
+            dateBegin = date['begin']
+        except:
+            dateBegin = ''
+    f.writerow([title]+[uri]+[ref_id]+[dateExpression]+[dateBegin]+[level])
 
 elapsedTime = time.time() - startTime
 m, s = divmod(elapsedTime, 60)
diff --git a/transferAoDatesToDos.py b/transferAoDatesToDos.py
@@ -0,0 +1,166 @@
+import json
+import requests
+import secrets
+import time
+import csv
+
+secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
+if secretsVersion != '':
+    try:
+        secrets = __import__(secretsVersion)
+        print 'Editing Production'
+    except ImportError:
+        print 'Editing Development'
+else:
+    print 'Editing Development'
+
+startTime = time.time()
+
+baseURL = secrets.baseURL
+user = secrets.user
+password = secrets.password
+repository = secrets.repository
+
+auth = requests.post(baseURL + '/users/'+user+'/login?password='+password).json()
+session = auth["session"]
+headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
+print 'authenticated'
+
+endpoint = '/repositories/'+repository+'/archival_objects?all_ids=true'
+
+ids = requests.get(baseURL + endpoint, headers=headers).json()
+ids.reverse()
+print len(ids)
+
+## Generates a text file of AOs with DOs. Takes 2+ hours to generate so this code block is separate so the main portion of the script can be run more quickly.
+
+# f=csv.writer(open('archivalObjectsWithDigitalObjects.csv', 'wb'))
+# f.writerow(['uri'])
+# doAos = []
+#
+# for id in ids:
+#     endpoint = '/repositories/'+repository+'/archival_objects/'+str(id)
+#     output = requests.get(baseURL + endpoint, headers=headers).json()
+#     try:
+#         dates = output['dates']
+#     except:
+#         dates = ''
+#     uri = output['uri']
+#     instances = output['instances']
+#     for instance in instances:
+#         if instance['instance_type'] == 'digital_object':
+#             doUri = instance['digital_object']['ref']
+#             print doUri
+#             f.writerow([uri])
+#             doAos.append(uri)
+#
+# f2=open('archivalObjectsWithDigitalObjectsList.txt', 'wb')
+# f2.write(json.dumps(doAos))
+
+f=csv.writer(open('DigitalObjectsDatesEdited.csv', 'wb'))
+f.writerow(['doUri']+['oldBegin']+['oldEnd']+['oldExpression']+['oldLabel']+['aoUri']+['newBegin']+['newEnd']+['newExpression']+['newLabel']+['post'])
+
+doAos = json.load(open('archivalObjectsWithDigitalObjectsList.txt', 'rb'))
+for doAo in doAos:
+    print doAo
+    aoBegin = ''
+    aoExpression = ''
+    aoLabel = ''
+    aoEnd = ''
+    doBegin = ''
+    doExpression = ''
+    doLabel = ''
+    doEnd = ''
+    aoOutput = requests.get(baseURL + doAo, headers=headers).json()
+    try:
+        aoDates = aoOutput['dates']
+        for aoDate in aoDates:
+            try:
+                aoBegin = aoDate['begin']
+            except:
+                aoBegin = ''
+            try:
+                aoEnd = aoDate['end']
+            except:
+                aoEnd = ''
+            try:
+                aoExpression = aoDate['expression']
+            except:
+                aoExpression = ''
+            try:
+                aoLabel = aoDate['label']
+            except:
+                aoLabel = ''
+    except:
+        aoBegin = ''
+        aoExpression = ''
+        aoLabel = ''
+        aoEnd = ''
+    try:
+        instances = aoOutput['instances']
+    except:
+        continue
+    for instance in instances:
+        if instance['instance_type'] == 'digital_object':
+            if aoBegin+aoExpression+aoLabel != '':
+                doUri = instance['digital_object']['ref']
+                doOutput = requests.get(baseURL + str(doUri), headers=headers).json()
+                print 'moving date from AO to DO'
+                doDates = doOutput['dates']
+                if doDates == []:
+                    print 'no date', doDates
+                    doBegin = ''
+                    doExpression = ''
+                    doLabel = ''
+                    doEnd = ''
+                    doDates = []
+                    doDate = {}
+                    doDate['begin'] = aoBegin
+                    doDate['expression'] = aoExpression
+                    doDate['label'] = aoLabel
+                    doDate['date_type'] = 'single'
+                    if aoEnd != '':
+                        doDate['end'] = aoEnd
+                        doDate['date_type'] = 'range'
+                    doDates.append(doDate)
+                    doOutput['dates'] = doDates
+                    output = json.dumps(doOutput)
+                    doPost = requests.post(baseURL + doUri, headers=headers, data=output).json()
+                    print doPost
+                else:
+                    print 'existing date', doDates
+                    for doDate in doDates:
+                        try:
+                            doBegin = doDate['begin']
+                        except:
+                            doBegin = ''
+                        try:
+                            doEnd = doDate['end']
+                        except:
+                            doEnd = ''
+                        try:
+                            doExpression = doDate['expression']
+                        except:
+                            doExpression = ''
+                        try:
+                            doLabel = doDate['label']
+                        except:
+                            doLabel = ''
+                        if aoBegin != '':
+                            doDate['begin'] = aoBegin
+                        if aoExpression != '':
+                            doDate['expression'] = aoExpression
+                        if aoLabel != '':
+                            doDate['label'] = aoLabel
+                        if aoEnd != '':
+                            doDate['end'] = aoEnd
+                    doOutput['dates'] = doDates
+                    output = json.dumps(doOutput)
+                    doPost = requests.post(baseURL + doUri, headers=headers, data=output).json()
+                    print doPost
+                f.writerow([doUri]+[doBegin]+[doEnd]+[doExpression]+[doLabel]+[doAo]+[aoBegin]+[aoEnd]+[aoExpression]+[aoLabel]+[doPost])
+
+elapsedTime = time.time() - startTime
+m, s = divmod(elapsedTime, 60)
+h, m = divmod(m, 60)
+print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)